diff --git a/Cargo.toml b/Cargo.toml index 320779a..ded053c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ name = "fibonacci_bench" harness = false [[bench]] -name = "bench" +name = "bench_group" harness = false [[bench]] diff --git a/benches/bench.rs b/benches/bench_group.rs similarity index 82% rename from benches/bench.rs rename to benches/bench_group.rs index 2b25415..d15248e 100644 --- a/benches/bench.rs +++ b/benches/bench_group.rs @@ -36,16 +36,17 @@ fn run_bench() { runner.enable_perf(); runner.set_cache_trasher(true); + let mut group = runner.new_group(); for (input_name, data) in inputs.iter() { - runner.set_input_size(data.len() * std::mem::size_of::()); - runner.register_with_input("vec", input_name, data, move |data| { + group.set_input_size(data.len() * std::mem::size_of::()); + group.register_with_input("vec", input_name, data, move |data| { black_box(test_vec(data)); }); - runner.register_with_input("hashmap", input_name, data, move |data| { + group.register_with_input("hashmap", input_name, data, move |data| { black_box(test_hashmap(data)); }); } - runner.run(); + group.run(); } fn main() { diff --git a/src/bench.rs b/src/bench.rs index aeee048..c5650a3 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -13,7 +13,7 @@ pub trait Bench<'a> { /// Sample the number of iterations the benchmark should do fn sample_num_iter(&self) -> usize; fn exec_bench(&mut self, alloc: &Option); - fn get_results(&mut self, group_name: &Option) -> BenchResult; + fn get_results(&mut self, group_name: Option<&str>) -> BenchResult; fn clear_results(&mut self); } @@ -99,10 +99,10 @@ impl<'a, I> Bench<'a> for InputWithBenchmark<'a, I> { self.results.push(res); } - fn get_results(&mut self, group_name: &Option) -> BenchResult { + fn get_results(&mut self, group_name: Option<&str>) -> BenchResult { let bench_id = format!( "{}_{}_{}", - group_name.as_ref().unwrap_or(&"".to_string()), + group_name.as_ref().unwrap_or(&""), self.input.name, self.bench.name ) diff --git a/src/bench_group.rs b/src/bench_group.rs new file mode 100644 index 0000000..6641308 --- /dev/null +++ b/src/bench_group.rs @@ -0,0 +1,129 @@ +use std::borrow::Cow; + +use crate::{ + bench::{Bench, InputWithBenchmark, NamedBench}, + bench_runner::{BenchRunner, EMPTY_INPUT}, + NamedInput, +}; + +/// `BenchGroup` is a group of benchmarks run together. +/// +pub struct BenchGroup<'a> { + name: Option, + pub(crate) benches: Vec + 'a>>, + /// The size of the input. + /// Enables throughput reporting. + input_size_in_bytes: Option, + pub(crate) runner: BenchRunner, +} + +impl<'a> BenchGroup<'a> { + /// Create a new BenchGroup with no benchmarks. + pub fn new(runner: BenchRunner) -> Self { + Self { + name: None, + benches: Vec::new(), + input_size_in_bytes: None, + runner, + } + } + + /// Create a new BenchGroup with no benchmarks. + pub fn with_name>(runner: BenchRunner, name: S) -> Self { + Self { + name: Some(name.into()), + benches: Vec::new(), + input_size_in_bytes: None, + runner, + } + } + + /// Sets name of the group and returns the group. + pub fn name>(mut self, name: S) -> Self { + self.name = Some(name.into()); + self + } + + /// Enables throughput reporting. The throughput will be valid for all inputs that are + /// registered afterwards. + pub fn set_input_size(&mut self, input_size: usize) { + self.input_size_in_bytes = Some(input_size); + } + + /// Register a benchmark with the given name and function. + pub fn register_with_input>( + &mut self, + bench_name: S, + input_name: S, + input: &'a I, + fun: F, + ) where + F: Fn(&'a I) + 'static, + { + let name = bench_name.into(); + let input_name = input_name.into(); + + let bench = NamedBench::new(name, Box::new(fun)); + self.register_named_with_input( + bench, + NamedInput { + name: Cow::Owned(input_name), + data: input, + }, + ); + } + + /// Register a benchmark with the given name and function. + pub fn register>(&mut self, name: S, fun: F) + where + F: Fn(&'a ()) + 'static, + { + let name = name.into(); + let bench = NamedBench::new(name, Box::new(fun)); + + self.register_named_with_input(bench, EMPTY_INPUT); + } + + /// Register a benchmark with the given name and function. + pub(crate) fn register_named_with_input( + &mut self, + bench: NamedBench<'a, I>, + input: NamedInput<'a, I>, + ) { + if let Some(filter) = &self.runner.options.filter { + if !bench.name.contains(filter) && !input.name.contains(filter) { + return; + } + } + + let bundle = InputWithBenchmark::new( + input, + self.input_size_in_bytes, + bench, + self.runner.options.enable_perf, + ); + + self.benches.push(Box::new(bundle)); + } + + /// Set the name of the group. + /// The name is printed before the benchmarks are run. + /// It is also used to distinguish when writing the results to disk. + pub fn set_name>(&mut self, name: S) { + self.name = Some(name.into()); + } + + /// Sets the filter, which is used to filter the benchmarks by name. + /// The filter is fetched from the command line arguments. + /// + /// It can also match an input name. + pub fn set_filter(&mut self, filter: Option) { + self.runner.set_filter(filter); + } + + /// Run the benchmarks and report the results. + pub fn run(&mut self) { + self.runner + .run_group(self.name.as_deref(), &mut self.benches); + } +} diff --git a/src/bench_input_group.rs b/src/bench_input_group.rs index 7039947..c188dd7 100644 --- a/src/bench_input_group.rs +++ b/src/bench_input_group.rs @@ -1,7 +1,12 @@ use std::{alloc::GlobalAlloc, borrow::Cow, collections::HashMap}; -use crate::{bench::NamedBench, bench_runner::BenchRunner, parse_args, NamedInput, Options}; +use crate::{ + bench::NamedBench, + bench_runner::{group_by_mut, BenchRunner}, + parse_args, BenchGroup, NamedInput, Options, +}; use peakmem_alloc::*; +use yansi::Paint; pub(crate) type Alloc = &'static dyn PeakMemAllocTrait; @@ -13,7 +18,8 @@ pub(crate) type Alloc = &'static dyn PeakMemAllocTrait; /// to the `InputGroup`. If this is not possible, use [BenchRunner](crate::BenchRunner) instead. pub struct InputGroup { inputs: Vec>, - runner: BenchRunner<'static>, + bench_group: BenchGroup<'static>, + pub(crate) name: Option, } impl Default for InputGroup<()> { @@ -39,7 +45,7 @@ pub struct OwnedNamedInput { impl InputGroup { /// Sets name of the group and returns the group. pub fn name>(mut self, name: S) -> Self { - self.runner.set_name(name.into()); + self.name = Some(name.into()); self } /// The inputs are a vector of tuples, where the first element is the name of the input and the @@ -67,12 +73,16 @@ impl InputGroup { let mut runner = BenchRunner::new(); runner.set_options(options); - InputGroup { inputs, runner } + InputGroup { + inputs, + name: None, + bench_group: BenchGroup::new(runner), + } } /// Set the peak mem allocator to be used for the benchmarks. /// This will report the peak memory consumption of the benchmarks. pub fn set_alloc(&mut self, alloc: &'static PeakMemAlloc) { - self.runner.set_alloc(alloc); + self.bench_group.runner.set_alloc(alloc); } /// Enable perf profiling + report /// @@ -91,7 +101,7 @@ impl InputGroup { /// L1dA: 2.001 L1dM: 0.000 Br: 6.001 BrM: 0.000 /// ``` pub fn enable_perf(&mut self) { - self.runner.options.enable_perf = true; + self.bench_group.runner.options.enable_perf = true; } /// Enables throughput reporting. @@ -108,8 +118,8 @@ impl InputGroup { /// Set the name of the group. /// The name is printed before the benchmarks are run. /// It is also used to distinguish when writing the results to disk. - pub fn set_name(&mut self, name: String) { - self.runner.set_name(name); + pub fn set_name>(&mut self, name: S) { + self.name = Some(name.into()); } /// Set the options to the given value. @@ -117,24 +127,24 @@ impl InputGroup { /// /// See the Options struct for more information. pub fn set_options(&mut self, options: Options) { - self.runner.set_options(options); + self.bench_group.runner.set_options(options); } /// Manully set the number of iterations each benchmark is called. /// /// This disables the automatic detection of the number of iterations. pub fn set_num_iter(&mut self, num_iter: usize) { - self.runner.set_num_iter(num_iter); + self.bench_group.runner.set_num_iter(num_iter); } /// Trash CPU cache between bench runs. Defaults to false. pub fn set_cache_trasher(&mut self, enable: bool) { - self.runner.set_cache_trasher(enable); + self.bench_group.runner.set_cache_trasher(enable); } /// Sets the interleave option to the given value. pub fn set_interleave(&mut self, interleave: bool) { - self.runner.set_interleave(interleave); + self.bench_group.runner.set_interleave(interleave); } /// Sets the filter, which is used to filter the benchmarks by name. @@ -142,7 +152,7 @@ impl InputGroup { /// /// It can also match an input name. pub fn set_filter(&mut self, filter: Option) { - self.runner.set_filter(filter); + self.bench_group.runner.set_filter(filter); } /// Register a benchmark with the given name and function. @@ -163,25 +173,38 @@ impl InputGroup { // (probably). let named_input: NamedInput<'static, I> = unsafe { transmute_lifetime(named_input) }; if let Some(input_size) = input.input_size_in_bytes { - self.runner.set_input_size(input_size); + self.bench_group.runner.set_input_size(input_size); } - self.runner + self.bench_group .register_named_with_input(named_bench, named_input); } } /// Run the benchmarks and report the results. pub fn run(&mut self) { + if let Some(name) = &self.name { + println!("{}", name.black().on_red().invert().bold()); + } let input_name_to_ordinal: HashMap = self .inputs .iter() .enumerate() .map(|(i, input)| (input.name.clone(), i)) .collect(); - self.runner + self.bench_group .benches .sort_by_key(|bench| std::cmp::Reverse(input_name_to_ordinal[bench.get_input_name()])); - self.runner.run(); + group_by_mut( + self.bench_group.benches.as_mut_slice(), + |b| b.get_input_name(), + |group| { + let input_name = group[0].get_input_name().to_owned(); + //if !input_name.is_empty() { + //println!("{}", input_name.black().on_yellow().invert().italic()); + //} + self.bench_group.runner.run_group(Some(&input_name), group); + }, + ); } } diff --git a/src/bench_runner.rs b/src/bench_runner.rs index 8c520d8..359646a 100644 --- a/src/bench_runner.rs +++ b/src/bench_runner.rs @@ -4,7 +4,7 @@ use crate::{ bench::{Bench, InputWithBenchmark, NamedBench}, black_box, parse_args, report::report_group, - Options, + BenchGroup, Options, }; use peakmem_alloc::*; use yansi::Paint; @@ -16,10 +16,9 @@ pub(crate) const NUM_RUNS: usize = 32; /// /// BenchRunner is a collection of benchmarks. /// It is self-contained and can be run independently. -pub struct BenchRunner<'a> { - /// Name of the benchmark group. - name: Option, - pub(crate) benches: Vec + 'a>>, +#[derive(Clone)] +pub struct BenchRunner { + //pub(crate) benches: Vec + 'a>>, alloc: Option, cache_trasher: CacheTrasher, pub(crate) options: Options, @@ -39,23 +38,32 @@ pub struct NamedInput<'a, I> { pub(crate) data: &'a I, } -const EMPTY_INPUT: NamedInput<()> = NamedInput { +pub const EMPTY_INPUT: NamedInput<()> = NamedInput { name: Cow::Borrowed(""), data: &(), }; -impl<'a> Default for BenchRunner<'a> { +impl Default for BenchRunner { fn default() -> Self { Self::new() } } -impl<'a> BenchRunner<'a> { +impl BenchRunner { /// The inputs are a vector of tuples, where the first element is the name of the input and the /// second element is the input itself. pub fn new() -> Self { Self::new_with_options(parse_args()) } + + /// The inputs are a vector of tuples, where the first element is the name of the input and the + /// second element is the input itself. + pub fn with_name>(name: S) -> Self { + println!("{}", name.as_ref().black().on_red().invert().bold()); + + Self::new_with_options(parse_args()) + } + /// The inputs are a vector of tuples, where the first element is the name of the input and the /// second element is the input itself. pub(crate) fn new_with_options(options: Options) -> Self { @@ -63,15 +71,25 @@ impl<'a> BenchRunner<'a> { yansi::whenever(Condition::TTY_AND_COLOR); BenchRunner { - benches: Vec::new(), cache_trasher: CacheTrasher::new(1024 * 1024 * 16), options, alloc: None, - name: None, input_size_in_bytes: None, num_iter: None, } } + + /// Creates a new group + /// The group is a collection of benchmarks that are run together. + pub fn new_group<'a>(&self) -> BenchGroup<'a> { + BenchGroup::new(self.clone()) + } + /// Creates a new group + /// The group is a collection of benchmarks that are run together. + pub fn new_group_with_name<'a, S: Into>(&self, name: S) -> BenchGroup<'a> { + BenchGroup::with_name(self.clone(), name) + } + /// Set the peak mem allocator to be used for the benchmarks. /// This will report the peak memory consumption of the benchmarks. pub fn set_alloc(&mut self, alloc: &'static PeakMemAlloc) { @@ -110,13 +128,6 @@ impl<'a> BenchRunner<'a> { self.num_iter = Some(num_iter); } - /// Set the name of the group. - /// The name is printed before the benchmarks are run. - /// It is also used to distinguish when writing the results to disk. - pub fn set_name>(&mut self, name: S) { - self.name = Some(name.into()); - } - /// Set the options to the given value. /// This will overwrite all current options. /// @@ -144,57 +155,21 @@ impl<'a> BenchRunner<'a> { self.options.filter = filter; } - /// Register a benchmark with the given name and function. - pub fn register_with_input>( - &mut self, - bench_name: S, - input_name: S, - input: &'a I, - fun: F, - ) where - F: Fn(&'a I) + 'static, + /// Run a single function + pub fn bench_function(&mut self, name: String, f: F) -> &mut Self + where + F: Fn(&()) + 'static, { - let name = bench_name.into(); - let input_name = input_name.into(); - - let bench = NamedBench::new(name, Box::new(fun)); - self.register_named_with_input( - bench, - NamedInput { - name: Cow::Owned(input_name), - data: input, - }, - ); - } - /// Register a benchmark with the given name and function. - pub(crate) fn register_named_with_input( - &mut self, - bench: NamedBench<'a, I>, - input: NamedInput<'a, I>, - ) { - if let Some(filter) = &self.options.filter { - if !bench.name.contains(filter) && !input.name.contains(filter) { - return; - } - } - + let named_bench = NamedBench::new(name, Box::new(f)); let bundle = InputWithBenchmark::new( - input, + EMPTY_INPUT, self.input_size_in_bytes, - bench, + named_bench, self.options.enable_perf, ); - self.benches.push(Box::new(bundle)); - } - /// Register a benchmark with the given name and function. - pub fn register>(&mut self, name: S, fun: F) - where - F: Fn(&'a ()) + 'static, - { - let name = name.into(); - let bench = NamedBench::new(name, Box::new(fun)); - self.register_named_with_input(bench, EMPTY_INPUT); + self.run_group(None, &mut [Box::new(bundle)]); + self } /// Trash CPU cache between bench runs. Defaults to false. @@ -203,64 +178,55 @@ impl<'a> BenchRunner<'a> { } /// Run the benchmarks and report the results. - pub fn run(&mut self) { - if self.benches.is_empty() { + pub fn run_group<'a>(&self, name: Option<&str>, group: &mut [Box + 'a>]) { + if group.is_empty() { return; } - if let Some(name) = &self.name { - println!("{}", name.black().on_red().invert().bold()); + if let Some(name) = &name { + println!("{}", name.black().on_yellow().invert().bold()); } - // TODO: group by should be configurable - group_by_mut( - &mut self.benches, - |b| b.get_input_name(), - |group| { - let input_name = group[0].get_input_name(); - if !input_name.is_empty() { - println!("{}", input_name.black().on_yellow().invert().italic()); - } - - const MAX_GROUP_SIZE: usize = 5; - if self.options.verbose && group.len() > MAX_GROUP_SIZE { - println!( - "Group is quite big, splitting into chunks of {} elements", - MAX_GROUP_SIZE - ); - } + const MAX_GROUP_SIZE: usize = 5; + if self.options.verbose && group.len() > MAX_GROUP_SIZE { + println!( + "Group is quite big, splitting into chunks of {} elements", + MAX_GROUP_SIZE + ); + } - // If the group is quite big, we don't want to create too big chunks, which causes - // slow tests, therefore a chunk is at most 5 elements large. - for group in group.chunks_mut(MAX_GROUP_SIZE) { - Self::warm_up_group_and_set_iter(group, self.num_iter, self.options.verbose); - - if self.options.interleave { - Self::run_interleaved( - group, - &self.alloc, - self.options.cache_trasher.then_some(&self.cache_trasher), - ); - } else { - Self::run_sequential(group, &self.alloc); - } - } - // We report at the end, so the alignment is correct (could be calculated up front) - report_group(&self.name, group, self.alloc.is_some()); - }, - ); + // If the group is quite big, we don't want to create too big chunks, which causes + // slow tests, therefore a chunk is at most 5 elements large. + for group in group.chunks_mut(MAX_GROUP_SIZE) { + Self::warm_up_group_and_set_iter(group, self.num_iter, self.options.verbose); - self.clear_results(); - } + if self.options.interleave { + Self::run_interleaved( + group, + &self.alloc, + self.options.cache_trasher.then_some(&self.cache_trasher), + ); + } else { + Self::run_sequential(group, &self.alloc); + } + } + // We report at the end, so the alignment is correct (could be calculated up front) + report_group(name, group, self.alloc.is_some()); - /// Clear the stored results of the benchmarks. - pub fn clear_results(&mut self) { - for bench in &mut self.benches { + //self.clear_results(); + for bench in group { bench.clear_results(); } } - fn run_sequential(benches: &mut [Box + 'a>], alloc: &Option) { + // /// Clear the stored results of the benchmarks. + //pub fn clear_results(&mut self) { + //for bench in &mut self.benches { + //bench.clear_results(); + //} + //} + + fn run_sequential<'a>(benches: &mut [Box + 'a>], alloc: &Option) { for bench in benches { for iteration in 0..NUM_RUNS { alloca::with_alloca( @@ -274,7 +240,7 @@ impl<'a> BenchRunner<'a> { } } - fn run_interleaved( + fn run_interleaved<'a>( benches: &mut [Box + 'a>], alloc: &Option, cache_trasher: Option<&CacheTrasher>, @@ -317,8 +283,8 @@ impl<'a> BenchRunner<'a> { } } - fn warm_up_group_and_set_iter( - benches: &mut [Box + 'a>], + fn warm_up_group_and_set_iter<'b>( + benches: &mut [Box + 'b>], num_iter: Option, verbose: bool, ) { @@ -428,6 +394,7 @@ pub fn group_by_mut( /// Performs a dummy reads from memory to spoil given amount of CPU cache /// /// Uses cache aligned data arrays to perform minimum amount of reads possible to spoil the cache +#[derive(Clone)] struct CacheTrasher { cache_lines: Vec, } diff --git a/src/lib.rs b/src/lib.rs index 048aa55..8af4a7c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,6 +75,7 @@ extern crate test; pub use peakmem_alloc::*; pub(crate) mod bench; +mod bench_group; mod bench_input_group; pub(crate) mod bench_runner; pub(crate) mod format; @@ -82,6 +83,7 @@ pub(crate) mod profiler; pub(crate) mod report; pub(crate) mod stats; +pub use bench_group::BenchGroup; pub use bench_input_group::InputGroup; pub use bench_runner::BenchRunner; pub use bench_runner::NamedInput; @@ -93,7 +95,7 @@ pub use std::hint::black_box; /// The options to configure the benchmarking. /// The can be set on `InputGroup`. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct Options { /// Interleave benchmarks pub interleave: bool, diff --git a/src/profiler/mod.rs b/src/profiler/mod.rs index 06709c1..fd3f943 100644 --- a/src/profiler/mod.rs +++ b/src/profiler/mod.rs @@ -53,7 +53,22 @@ fn print_counter_value f64>( }) .unwrap_or_default(); - format!("{}: {:.3} {}", name, f(stats), diff_str,) + format!("{}: {:.3} {}", name, format_number(f(stats)), diff_str,) +} + +fn format_number(n: f64) -> String { + let max_digits = 5; + let integer_part = n.trunc() as i64; + let integer_length = if integer_part != 0 { + integer_part.abs().to_string().len() as i32 + } else if n == 0.0 { + 1 // Special handling for 0 to consider the digit before the decimal point + } else { + 0 // For numbers less than 1 but not zero + }; + + let precision = (max_digits - integer_length).max(0) as usize; + format!("{:.*}", precision, n) } impl CounterValues { diff --git a/src/report.rs b/src/report.rs index 1d1d55d..5a9669a 100644 --- a/src/report.rs +++ b/src/report.rs @@ -31,7 +31,7 @@ pub fn get_output_directory() -> PathBuf { } pub(crate) fn report_group<'a>( - bench_group_name: &Option, + bench_group_name: Option<&str>, benches: &mut [Box + 'a>], report_memory: bool, ) {