Skip to content

Commit

Permalink
fix longest_match for i686 and s390x
Browse files Browse the repository at this point in the history
as noted in https://github.com/zlib-ng/zlib-ng/releases/tag/2.2.3, the `UNALIGNED_OK` checks have been removed. That is OK for us, because we read values as `[u8; N]` and hence alignment is not a problem for those loads
  • Loading branch information
folkertdev committed Jan 9, 2025
1 parent 2fbba06 commit 3ff9e2b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 88 deletions.
40 changes: 2 additions & 38 deletions test-libz-rs-sys/src/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1852,13 +1852,8 @@ mod fuzz_based_tests {
}

#[test]
#[cfg_attr(
target_family = "wasm",
ignore = "zlib-ng compresses differently on wasm"
)]
fn longest_match_difference() {
// the output on aarch64 and x86_64: fully featured modern targets
let output_other = &[
let output = &[
24, 87, 83, 81, 97, 100, 96, 96, 228, 98, 0, 3, 123, 6, 6, 77, 21, 16, 67, 5, 36, 10,
102, 73, 139, 67, 164, 24, 194, 64, 32, 144, 75, 55, 16, 5, 248, 65, 65, 52, 152, 116,
99, 100, 96, 96, 16, 98, 96, 151, 241, 243, 243, 11, 12, 52, 128, 41, 2, 153, 206, 6,
Expand All @@ -1868,31 +1863,6 @@ mod fuzz_based_tests {
82,
];

// longest_match does not perform unaligned 64-bit reads/writes on i686
let output_i686 = &[
24, 87, 83, 81, 97, 100, 96, 96, 228, 98, 0, 3, 123, 6, 6, 77, 21, 16, 67, 5, 36, 10,
102, 73, 139, 67, 164, 24, 194, 64, 32, 144, 75, 55, 16, 5, 248, 65, 65, 52, 152, 116,
99, 100, 96, 96, 16, 98, 96, 151, 241, 243, 243, 11, 12, 52, 128, 41, 2, 153, 206, 6,
50, 21, 106, 20, 20, 56, 66, 40, 5, 48, 169, 98, 13, 166, 4, 24, 98, 25, 20, 192, 138,
173, 37, 24, 184, 32, 64, 65, 26, 68, 50, 112, 128, 57, 26, 32, 83, 224, 134, 73, 162,
154, 8, 7, 14, 40, 60, 78, 20, 30, 8, 48, 97, 136, 48, 48, 128, 125, 128, 36, 0, 0,
125, 74, 22, 82,
];

// longest_match does not perform unaligned 32-bit reads/writes on s390x
let output_s390x = &[
24, 87, 83, 81, 97, 100, 96, 96, 228, 98, 0, 3, 123, 6, 6, 77, 21, 16, 67, 5, 36, 10,
102, 73, 139, 67, 164, 24, 194, 64, 32, 144, 75, 55, 16, 5, 248, 65, 65, 52, 152, 116,
99, 100, 96, 96, 16, 98, 96, 151, 241, 243, 243, 11, 12, 52, 128, 41, 2, 153, 206, 6,
50, 21, 106, 20, 20, 56, 66, 40, 5, 48, 169, 98, 13, 166, 4, 24, 98, 25, 20, 192, 138,
173, 37, 24, 184, 32, 64, 65, 26, 68, 50, 112, 128, 57, 26, 32, 83, 224, 134, 73, 66,
140, 192, 0, 14, 40, 60, 78, 20, 30, 8, 48, 97, 136, 48, 48, 128, 125, 128, 36, 0, 0,
125, 74, 22, 82,
];

assert_ne!(output_i686.as_slice(), output_other.as_slice());
assert_ne!(output_i686.as_slice(), output_s390x.as_slice());

fuzz_based_test(
&[
36, 36, 1, 0, 0, 1, 10, 0, 0, 0, 0, 0, 0, 63, 0, 0, 41, 36, 0, 0, 0, 0, 36, 36, 1,
Expand All @@ -1915,13 +1885,7 @@ mod fuzz_based_tests {
mem_level: 6,
strategy: Strategy::Default,
},
if cfg!(target_arch = "x86") {
output_i686
} else if cfg!(target_arch = "s390x") {
output_s390x
} else {
output_other
},
output,
)
}

Expand Down
69 changes: 19 additions & 50 deletions zlib-rs/src/deflate/longest_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,6 @@ use crate::deflate::{Pos, State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};

const EARLY_EXIT_TRIGGER_LEVEL: i8 = 5;

const UNALIGNED_OK: bool = cfg!(any(
target_arch = "wasm32",
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
target_arch = "powerpc64",
));

const UNALIGNED64_OK: bool = cfg!(any(
target_arch = "wasm32",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64",
));

/// Find the (length, offset) in the window of the longest match for the string
/// at offset cur_match
pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, u16) {
Expand Down Expand Up @@ -75,9 +59,9 @@ fn longest_match_help<const SLOW: bool>(

// Calculate read offset which should only extend an extra byte to find the next best match length.
let mut offset = best_len - 1;
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
if best_len >= core::mem::size_of::<u32>() {
offset -= 2;
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
if best_len >= core::mem::size_of::<u64>() {
offset -= 4;
}
}
Expand Down Expand Up @@ -203,44 +187,29 @@ fn longest_match_help<const SLOW: bool>(
//
// scan_end is a bit trickier: it reads at a bounded offset from scan_start:
//
// - UNALIGNED64_OK: scan_end is bounded by `258 - (4 + 2 + 1)`, so an 8-byte read is in-bounds
// - UNALIGNED_OK: scan_end is bounded by `258 - (2 + 1)`, so a 4-byte read is in-bounds
// - otherwise: scan_end is bounded by `258 - 1`, so a 2-byte read is in-bounds
// - >= 8: scan_end is bounded by `258 - (4 + 2 + 1)`, so an 8-byte read is in-bounds
// - >= 4: scan_end is bounded by `258 - (2 + 1)`, so a 4-byte read is in-bounds
// - >= 2: scan_end is bounded by `258 - 1`, so a 2-byte read is in-bounds
unsafe {
if UNALIGNED_OK {
if best_len < core::mem::size_of::<u32>() {
loop {
if is_match::<2>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}

goto_next_in_chain!();
if best_len < core::mem::size_of::<u32>() {
loop {
if is_match::<2>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}
} else if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
loop {
if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}

goto_next_in_chain!();
goto_next_in_chain!();
}
} else if best_len >= core::mem::size_of::<u64>() {
loop {
if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}
} else {
loop {
if is_match::<4>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}

goto_next_in_chain!();
}
goto_next_in_chain!();
}
} else {
loop {
if memcmp_n_ptr::<2>(mbase_end.wrapping_add(cur_match as usize), scan_end)
&& memcmp_n_ptr::<2>(
mbase_start.wrapping_add(cur_match as usize),
scan.as_ptr(),
)
{
if is_match::<4>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}

Expand Down Expand Up @@ -278,9 +247,9 @@ fn longest_match_help<const SLOW: bool>(
}

offset = best_len - 1;
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
if best_len >= core::mem::size_of::<u32>() {
offset -= 2;
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
if best_len >= core::mem::size_of::<u64>() {
offset -= 4;
}
}
Expand Down

0 comments on commit 3ff9e2b

Please sign in to comment.