add event system and move perf counters to it

PSeitz · Sep 29, 2024 · d8f12bb · d8f12bb
1 parent 1930d83
commit d8f12bb
Show file tree

Hide file tree

Showing 10 changed files with 216 additions and 54 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -28,6 +28,7 @@ prettytable-rs = "0.10.0"
 unicode-width = "0.1.11"
 yansi = { version = "1.0.1", features = ["detect-env", "detect-tty"] }
 rustop = "=1.1.4"
+rustc-hash = "2.0.0"
 
 [target.'cfg(target_os = "linux")'.dependencies]
 perf-event = { version = "0.4.8" }

diff --git a/src/bench.rs b/src/bench.rs
@@ -1,5 +1,10 @@
 use crate::{
-    bench_id::BenchId, bench_input_group::*, black_box, output_value::OutputValue, profiler::*,
+    bench_id::BenchId,
+    bench_input_group::*,
+    black_box,
+    events::{BingganEvents, EventManager},
+    output_value::OutputValue,
+    profiler::*,
     stats::*,
 };
 
@@ -10,8 +15,8 @@ pub trait Bench<'a> {
     fn set_num_iter(&mut self, num_iter: usize);
     /// Sample the number of iterations the benchmark should do
     fn sample_num_iter(&mut self) -> usize;
-    fn exec_bench(&mut self, alloc: &Option<Alloc>);
-    fn get_results(&mut self, report_memory: bool) -> BenchResult;
+    fn exec_bench(&mut self, alloc: &Option<Alloc>, events: &mut EventManager);
+    fn get_results(&mut self, report_memory: bool, events: &mut EventManager) -> BenchResult;
     fn clear_results(&mut self);
 }
 
@@ -33,6 +38,7 @@ impl<'a, I, O> NamedBench<'a, I, O> {
 }
 
 /// The result of a benchmark run.
+#[derive(Debug, Clone)]
 pub struct BenchResult {
     /// The bench id uniquely identifies the benchmark.
     /// It is a combination of the group name, input name and benchmark name.
@@ -59,7 +65,6 @@ pub(crate) struct InputWithBenchmark<'a, I, O> {
     pub(crate) input_size_in_bytes: Option<usize>,
     pub(crate) bench: NamedBench<'a, I, O>,
     pub(crate) results: Vec<RunResult<O>>,
-    pub profiler: Option<PerfProfiler>,
     pub num_iter: Option<usize>,
 }
 
@@ -68,7 +73,6 @@ impl<'a, I, O> InputWithBenchmark<'a, I, O> {
         input: &'a I,
         input_size_in_bytes: Option<usize>,
         bench: NamedBench<'a, I, O>,
-        enable_perf: bool,
         num_iter: Option<usize>,
     ) -> Self {
         InputWithBenchmark {
@@ -77,11 +81,6 @@ impl<'a, I, O> InputWithBenchmark<'a, I, O> {
             results: Vec::with_capacity(bench.num_group_iter),
             bench,
             num_iter,
-            profiler: if enable_perf {
-                PerfProfiler::new().ok()
-            } else {
-                None
-            },
         }
     }
 }
@@ -105,22 +104,28 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
     }
 
     #[inline]
-    fn exec_bench(&mut self, alloc: &Option<Alloc>) {
+    fn exec_bench(&mut self, alloc: &Option<Alloc>, events: &mut EventManager) {
         let num_iter = self.get_num_iter_or_fail();
-        let res = self
-            .bench
-            .exec_bench(self.input, alloc, &mut self.profiler, num_iter);
+        let res = self.bench.exec_bench(self.input, alloc, num_iter, events);
         self.results.push(res);
     }
 
-    fn get_results(&mut self, report_memory: bool) -> BenchResult {
+    fn get_results(&mut self, report_memory: bool, events: &mut EventManager) -> BenchResult {
         let num_iter = self.get_num_iter_or_fail();
+        let total_num_iter = self.bench.num_group_iter as u64 * num_iter as u64;
         let stats = compute_stats(&self.results, num_iter);
-        let perf_counter: Option<CounterValues> = self.profiler.as_mut().and_then(|profiler| {
-            profiler
-                .finish(self.bench.num_group_iter as u64 * num_iter as u64)
-                .ok()
-        });
+        let perf_counter: Option<CounterValues> = events
+            .get_listener(PERF_CNT_EVENT_LISTENER_NAME)
+            .and_then(|listener| {
+                let counters = listener
+                    .as_any()
+                    .downcast_mut::<PerfCounterPerBench>()
+                    .expect("Expected PerfCounterPerBench");
+                counters
+                    .get_by_bench_id_mut(&self.bench.bench_id)
+                    .and_then(|perf_cnt| perf_cnt.finish(total_num_iter).ok())
+            });
+
         let output_value = (self.bench.fun)(self.input);
         BenchResult {
             bench_id: self.bench.bench_id.clone(),
@@ -140,8 +145,9 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-/// The result of a single benchmark run.
-/// There are multiple runs for each benchmark which will be collected to a vector
+/// The result of a single benchmark run. This is already aggregated since a single bench may be
+/// run multiple times to improve the accuracy.
+/// There are multiple runs in a group for each benchmark which will be collected to a vector
 pub struct RunResult<O> {
     pub duration_ns: u64,
     pub memory_consumption: usize,
@@ -194,30 +200,30 @@ impl<'a, I, O> NamedBench<'a, I, O> {
         &mut self,
         input: &'a I,
         alloc: &Option<Alloc>,
-        profiler: &mut Option<PerfProfiler>,
         num_iter: usize,
+        events: &mut EventManager,
     ) -> RunResult<O> {
         if let Some(alloc) = alloc {
             alloc.reset_peak_memory();
         }
-        if let Some(profiler) = profiler {
-            profiler.enable();
-        }
+        events.emit(BingganEvents::BenchStart(&self.bench_id));
         let start = std::time::Instant::now();
         let mut res = None;
         for _ in 0..num_iter {
             res = black_box((self.fun)(input));
         }
         let elapsed = start.elapsed();
-        if let Some(profiler) = profiler {
-            profiler.disable();
-        }
         let mem = if let Some(alloc) = alloc {
             alloc.get_peak_memory()
         } else {
             0
         };
 
-        RunResult::new(elapsed.as_nanos() as u64 / num_iter as u64, mem, res)
+        let run_result = RunResult::new(elapsed.as_nanos() as u64 / num_iter as u64, mem, res);
+        events.emit(BingganEvents::BenchStop(
+            &self.bench_id,
+            run_result.duration_ns,
+        ));
+        run_result
     }
 }
diff --git a/src/bench_group.rs b/src/bench_group.rs
@@ -1,5 +1,5 @@
 use crate::{
-    bench::{Bench, BenchResult, InputWithBenchmark, NamedBench},
+    bench::{Bench, InputWithBenchmark, NamedBench},
     bench_id::BenchId,
     bench_runner::BenchRunner,
     output_value::OutputValue,
@@ -105,7 +105,6 @@ impl<'a, 'runner> BenchGroup<'a, 'runner> {
             input,
             self.input_size_in_bytes,
             bench,
-            self.runner.config.enable_perf,
             self.runner.config.num_iter_bench,
         );
 
@@ -120,7 +119,7 @@ impl<'a, 'runner> BenchGroup<'a, 'runner> {
     }
 
     /// Run the benchmarks and report the results.
-    pub fn run(&mut self) -> Vec<BenchResult> {
+    pub fn run(&mut self) {
         self.runner.run_group(
             self.group_name.as_deref(),
             &mut self.benches,

diff --git a/src/bench_id.rs b/src/bench_id.rs
@@ -49,7 +49,7 @@ impl PrintOnce {
 /// - runner_name: The name of the runner that executed the benchmark.
 /// - group_name: The name of the group that the benchmark belongs to. This is typically the input name.
 /// - bench_name: The name of the benchmark.
-#[derive(Clone)]
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct BenchId {
     runner_name: Option<String>,
     /// This is typically the input name

diff --git a/src/bench_runner.rs b/src/bench_runner.rs
@@ -1,8 +1,11 @@
+use std::env;
 use std::{alloc::GlobalAlloc, cmp::Ordering};
 
+use crate::events::{BingganEvents, EventManager};
 use crate::output_value::OutputValue;
+use crate::profiler::PerfCounterPerBench;
 use crate::{
-    bench::{Bench, BenchResult, InputWithBenchmark, NamedBench},
+    bench::{Bench, InputWithBenchmark, NamedBench},
     bench_id::{BenchId, PrintOnce},
     black_box, parse_args,
     report::{report_group, Reporter},
@@ -28,6 +31,7 @@ pub struct BenchRunner {
     pub(crate) name: Option<PrintOnce>,
 
     reporter: Box<dyn Reporter>,
+    listeners: EventManager,
 }
 
 pub const EMPTY_INPUT: &() = &();
@@ -51,6 +55,12 @@ impl BenchRunner {
         new
     }
 
+    /// Returns the event manager, which can be used to add listeners to the benchmarks.
+    /// See [EventManager] for more information.
+    pub fn get_event_manager(&mut self) -> &mut EventManager {
+        &mut self.listeners
+    }
+
     /// Creates a new `BenchRunner` with custom options set.
     pub(crate) fn new_with_options(options: Config) -> Self {
         use yansi::Condition;
@@ -64,6 +74,7 @@ impl BenchRunner {
             name: None,
             //reporter: Box::new(crate::report::TableReporter {}),
             reporter: Box::new(crate::report::PlainReporter::new()),
+            listeners: EventManager::new(),
         }
     }
 
@@ -115,7 +126,6 @@ impl BenchRunner {
             EMPTY_INPUT,
             self.input_size_in_bytes,
             named_bench,
-            self.config.enable_perf,
             self.config().num_iter_bench,
         );
 
@@ -132,19 +142,24 @@ impl BenchRunner {
 
     /// Run the benchmarks and report the results.
     pub fn run_group<'a>(
-        &self,
+        &mut self,
         group_name: Option<&str>,
         group: &mut [Box<dyn Bench<'a> + 'a>],
         output_value_column_title: &'static str,
-    ) -> Vec<BenchResult> {
+    ) {
         if group.is_empty() {
-            return Vec::new();
+            return;
         }
         if let Some(runner_name) = &self.name {
             runner_name.print_name();
         }
+        if self.config().enable_perf {
+            self.listeners
+                .add_listener_if_absent(PerfCounterPerBench::default());
+        }
 
         if let Some(name) = &group_name {
+            self.listeners.emit(BingganEvents::GroupStart(name));
             println!("{}", name.black().on_yellow().invert().bold());
         }
 
@@ -168,42 +183,42 @@ impl BenchRunner {
                     &self.alloc,
                     self.config.cache_trasher.then_some(&self.cache_trasher),
                     num_group_iter,
+                    &mut self.listeners,
                 );
             } else {
-                Self::run_sequential(group, &self.alloc, num_group_iter);
+                Self::run_sequential(group, &self.alloc, num_group_iter, &mut self.listeners);
             }
         }
 
         let report_memory = self.alloc.is_some();
 
         report_group(
+            group_name,
             group,
             &*self.reporter,
             report_memory,
             output_value_column_title,
+            &mut self.listeners,
         );
 
         // TODO: clearing should be optional, to check the results yourself, e.g. in CI
         //for bench in group {
         //bench.clear_results();
         //}
-        group
-            .iter_mut()
-            .map(|b| b.get_results(report_memory))
-            .collect()
     }
 
     fn run_sequential<'a>(
         benches: &mut [Box<dyn Bench<'a> + 'a>],
         alloc: &Option<Alloc>,
         num_group_iter: usize,
+        events: &mut EventManager,
     ) {
         for bench in benches {
             for iteration in 0..num_group_iter {
                 alloca::with_alloca(
                     iteration, // we increase the byte offset by 1 for each iteration
                     |_memory: &mut [core::mem::MaybeUninit<u8>]| {
-                        bench.exec_bench(alloc);
+                        bench.exec_bench(alloc, events);
                         black_box(());
                     },
                 );
@@ -216,6 +231,7 @@ impl BenchRunner {
         alloc: &Option<Alloc>,
         cache_trasher: Option<&CacheTrasher>,
         num_group_iter: usize,
+        events: &mut EventManager,
     ) {
         let mut bench_indices: Vec<usize> = (0..benches.len()).collect();
         for iteration in 0..num_group_iter {
@@ -241,7 +257,7 @@ impl BenchRunner {
                     alloca::with_alloca(
                         iteration, // we increase the byte offset by 1 for each iteration
                         |_memory: &mut [core::mem::MaybeUninit<u8>]| {
-                            bench.exec_bench(alloc);
+                            bench.exec_bench(alloc, events);
                             black_box(());
                         },
                     );
@@ -256,6 +272,15 @@ impl BenchRunner {
 
     /// Detect how often each bench should be run if it is not set manually.
     fn detect_and_set_num_iter<'b>(benches: &mut [Box<dyn Bench<'b> + 'b>], verbose: bool) {
+        if let Some(num_iter) = env::var("NUM_ITER_BENCH")
+            .ok()
+            .and_then(|val| val.parse::<usize>().ok())
+        {
+            for input_and_bench in benches {
+                input_and_bench.set_num_iter(num_iter);
+            }
+            return;
+        }
         // Filter benches that already have num_iter set
         let mut benches: Vec<_> = benches
             .iter_mut()