Skip to content

Commit

Permalink
Refactoring to allow inlining of quick_insert_string for the common C…
Browse files Browse the repository at this point in the history
…RC32 case

Before-and-after benchmarks from an x86_64 test system:
```
Benchmark 1 (56 runs): ./compress-baseline 1 rs silesia-small.tar
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          89.8ms ± 2.42ms    88.2ms …  106ms          3 ( 5%)        0%
  peak_rss           26.7MB ± 65.7KB    26.5MB … 26.7MB          0 ( 0%)        0%
  cpu_cycles          333M  ±  836K      332M  …  335M           0 ( 0%)        0%
  instructions        747M  ±  254       747M  …  747M           0 ( 0%)        0%
  cache_references    400K  ± 6.41K      396K  …  434K           4 ( 7%)        0%
  cache_misses        299K  ± 4.24K      282K  …  311K           6 (11%)        0%
  branch_misses      3.15M  ± 5.68K     3.14M  … 3.16M           0 ( 0%)        0%
Benchmark 2 (56 runs): ./target/release/examples/compress 1 rs silesia-small.tar
  measurement          mean ± σ            min … max           outliers         delta
  wall_time          89.3ms ±  582us    88.3ms … 90.8ms          2 ( 4%)          -  0.5% ±  0.7%
  peak_rss           26.7MB ± 78.2KB    26.5MB … 26.7MB          0 ( 0%)          -  0.1% ±  0.1%
  cpu_cycles          333M  ± 1.45M      331M  …  341M           1 ( 2%)          -  0.1% ±  0.1%
  instructions        736M  ±  268       736M  …  736M           1 ( 2%)        ⚡-  1.5% ±  0.0%
  cache_references    400K  ± 3.33K      397K  …  411K           3 ( 5%)          +  0.1% ±  0.5%
  cache_misses        296K  ± 6.42K      277K  …  306K           6 (11%)          -  0.9% ±  0.7%
  branch_misses      3.09M  ± 7.74K     3.07M  … 3.11M           2 ( 4%)        ⚡-  1.9% ±  0.1%
```
  • Loading branch information
brianpane committed Dec 12, 2024
1 parent 47afe59 commit 75fd9f0
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
24 changes: 20 additions & 4 deletions zlib-rs/src/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1353,11 +1353,27 @@ impl<'a> State<'a> {
#[inline(always)]
pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string),
// SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
// which avoids choosing Crc32 if the system doesn't have support.
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::quick_insert_string(self, string) },
HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string),
_ => {
// Standard and Crc32 both can process 4 bytes at a time.
let slice = &self.window.filled()[string..];
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());

let hm = match self.hash_calc_variant {
// SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
// which avoids choosing Crc32 if the system doesn't have support.
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::update_hash(0, val) }
_ => StandardHashCalc::update_hash(0, val)
} as usize;

let head = self.head.as_slice()[hm];
if head != string as u16 {
self.prev.as_mut_slice()[string & self.w_mask] = head;
self.head.as_mut_slice()[hm] = string as u16;
}

head
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion zlib-rs/src/deflate/algorithm/quick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt
}

if state.lookahead >= WANT_MIN_MATCH {
let slice = &state.window.filled()[state.strstart..];
let str_prefetch = u32::from_le_bytes(slice[..4].try_into().unwrap());
let hash_head = state.quick_insert_string(state.strstart);
let dist = state.strstart as isize - hash_head as isize;

Expand All @@ -105,7 +107,7 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt
$slice[$offset] as u16 | ($slice[$offset + 1] as u16) << 8
}
}
if first_two_bytes!(str_start, 0) == first_two_bytes!(match_start, 0) {
if str_prefetch as u16 == first_two_bytes!(match_start, 0) {
let mut match_len = crate::deflate::compare256::compare256_slice(
&str_start[2..],
&match_start[2..],
Expand Down

0 comments on commit 75fd9f0

Please sign in to comment.