diff --git a/Cargo.toml b/Cargo.toml index ded053c..c7a3866 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ perf_event = ["perf-event"] default = [] [[bench]] -name = "fibonacci_bench" +name = "bench" harness = false [[bench]] diff --git a/benches/bench.rs b/benches/bench.rs new file mode 100644 index 0000000..c213feb --- /dev/null +++ b/benches/bench.rs @@ -0,0 +1,28 @@ +use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM}; + +#[global_allocator] +pub static GLOBAL: &PeakMemAlloc = &INSTRUMENTED_SYSTEM; + +pub fn factorial(mut n: usize) -> usize { + let mut result = 1usize; + while n > 0 { + result = result.wrapping_mul(black_box(n)); + n -= 1; + } + result +} + +fn bench_factorial() { + let mut runner = BenchRunner::new(); + runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting. + + for val in [100, 400] { + runner.bench_function(format!("factorial {}", val), move |_| { + factorial(black_box(val)); + }); + } +} + +fn main() { + bench_factorial(); +} diff --git a/benches/fibonacci_bench.rs b/benches/fibonacci_bench.rs deleted file mode 100644 index 0ee7790..0000000 --- a/benches/fibonacci_bench.rs +++ /dev/null @@ -1,29 +0,0 @@ -use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM}; - -#[global_allocator] -pub static GLOBAL: &PeakMemAlloc = &INSTRUMENTED_SYSTEM; - -pub fn factorial(mut n: usize) -> usize { - let mut result = 1usize; - while n > 0 { - result = result.wrapping_mul(black_box(n)); - n -= 1; - } - result -} - -fn bench_fibonacci_group(mut runner: InputGroup) { - runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting. - runner.register("factorial", move |_| { - factorial(black_box(400)); - }); - //runner.register("fibonacci_alt", move |_| {}); - runner.run(); -} - -fn main() { - bench_fibonacci_group(InputGroup::new().name("fibonacci_plain")); - bench_fibonacci_group( - InputGroup::new_with_inputs(vec![("10", 10), ("15", 15)]).name("fibonacci_input"), - ); -} diff --git a/src/bench.rs b/src/bench.rs index c5650a3..16ba0da 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -11,13 +11,13 @@ pub trait Bench<'a> { fn get_input_name(&self) -> &str; fn set_num_iter(&mut self, num_iter: usize); /// Sample the number of iterations the benchmark should do - fn sample_num_iter(&self) -> usize; + fn sample_num_iter(&mut self) -> usize; fn exec_bench(&mut self, alloc: &Option); fn get_results(&mut self, group_name: Option<&str>) -> BenchResult; fn clear_results(&mut self); } -type CallBench<'a, I> = Box; +type CallBench<'a, I> = Box; pub(crate) struct NamedBench<'a, I> { pub name: String, @@ -83,7 +83,7 @@ impl<'a, I> Bench<'a> for InputWithBenchmark<'a, I> { &self.input.name } #[inline] - fn sample_num_iter(&self) -> usize { + fn sample_num_iter(&mut self) -> usize { self.bench.sample_and_get_iter(&self.input) } fn set_num_iter(&mut self, num_iter: usize) { @@ -146,7 +146,7 @@ impl RunResult { impl<'a, I> NamedBench<'a, I> { #[inline] /// Each group has its own number of iterations. This is not the final num_iter - pub fn sample_and_get_iter(&self, input: &NamedInput<'a, I>) -> usize { + pub fn sample_and_get_iter(&mut self, input: &NamedInput<'a, I>) -> usize { // We want to run the benchmark for 100ms const TARGET_MS_PER_BENCH: u64 = 100; { @@ -174,7 +174,7 @@ impl<'a, I> NamedBench<'a, I> { } #[inline] pub fn exec_bench( - &self, + &mut self, input: &NamedInput<'a, I>, alloc: &Option, profiler: &mut Option, diff --git a/src/bench_runner.rs b/src/bench_runner.rs index 359646a..ec414f7 100644 --- a/src/bench_runner.rs +++ b/src/bench_runner.rs @@ -124,6 +124,12 @@ impl BenchRunner { /// Manully set the number of iterations each benchmark is called. /// /// This disables the automatic detection of the number of iterations. + /// + /// # Note + /// Use this to get more stable and comparable benchmark results, as the number of + /// iterations has a big impact on measurement and the iteration detection may + /// not always get the same num iterations between runs. There are ways implemented + /// to mitigate that but they are limited. pub fn set_num_iter(&mut self, num_iter: usize) { self.num_iter = Some(num_iter); } @@ -156,11 +162,11 @@ impl BenchRunner { } /// Run a single function - pub fn bench_function(&mut self, name: String, f: F) -> &mut Self + pub fn bench_function>(&mut self, name: S, f: F) -> &mut Self where F: Fn(&()) + 'static, { - let named_bench = NamedBench::new(name, Box::new(f)); + let named_bench = NamedBench::new(name.into(), Box::new(f)); let bundle = InputWithBenchmark::new( EMPTY_INPUT, self.input_size_in_bytes, @@ -213,19 +219,12 @@ impl BenchRunner { // We report at the end, so the alignment is correct (could be calculated up front) report_group(name, group, self.alloc.is_some()); - //self.clear_results(); + // TODO: clearing should be optional, to check the results yourself, e.g. in CI for bench in group { bench.clear_results(); } } - // /// Clear the stored results of the benchmarks. - //pub fn clear_results(&mut self) { - //for bench in &mut self.benches { - //bench.clear_results(); - //} - //} - fn run_sequential<'a>(benches: &mut [Box + 'a>], alloc: &Option) { for bench in benches { for iteration in 0..NUM_RUNS { @@ -301,7 +300,7 @@ impl BenchRunner { // In order to make the benchmarks in a group comparable, it is imperative to call them // the same numer of times let (min_num_iter, max_num_iter) = - minmax(benches.iter().map(|b| b.sample_num_iter())).unwrap(); + minmax(benches.iter_mut().map(|b| b.sample_num_iter())).unwrap(); if verbose { println!(