Skip to content

Commit

Permalink
add event system and move perf counters to it
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Sep 29, 2024
1 parent 1930d83 commit d8f12bb
Show file tree
Hide file tree
Showing 10 changed files with 216 additions and 54 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ prettytable-rs = "0.10.0"
unicode-width = "0.1.11"
yansi = { version = "1.0.1", features = ["detect-env", "detect-tty"] }
rustop = "=1.1.4"
rustc-hash = "2.0.0"

[target.'cfg(target_os = "linux")'.dependencies]
perf-event = { version = "0.4.8" }
Expand Down
66 changes: 36 additions & 30 deletions src/bench.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
use crate::{
bench_id::BenchId, bench_input_group::*, black_box, output_value::OutputValue, profiler::*,
bench_id::BenchId,
bench_input_group::*,
black_box,
events::{BingganEvents, EventManager},
output_value::OutputValue,
profiler::*,
stats::*,
};

Expand All @@ -10,8 +15,8 @@ pub trait Bench<'a> {
fn set_num_iter(&mut self, num_iter: usize);
/// Sample the number of iterations the benchmark should do
fn sample_num_iter(&mut self) -> usize;
fn exec_bench(&mut self, alloc: &Option<Alloc>);
fn get_results(&mut self, report_memory: bool) -> BenchResult;
fn exec_bench(&mut self, alloc: &Option<Alloc>, events: &mut EventManager);
fn get_results(&mut self, report_memory: bool, events: &mut EventManager) -> BenchResult;
fn clear_results(&mut self);
}

Expand All @@ -33,6 +38,7 @@ impl<'a, I, O> NamedBench<'a, I, O> {
}

/// The result of a benchmark run.
#[derive(Debug, Clone)]
pub struct BenchResult {
/// The bench id uniquely identifies the benchmark.
/// It is a combination of the group name, input name and benchmark name.
Expand All @@ -59,7 +65,6 @@ pub(crate) struct InputWithBenchmark<'a, I, O> {
pub(crate) input_size_in_bytes: Option<usize>,
pub(crate) bench: NamedBench<'a, I, O>,
pub(crate) results: Vec<RunResult<O>>,
pub profiler: Option<PerfProfiler>,
pub num_iter: Option<usize>,
}

Expand All @@ -68,7 +73,6 @@ impl<'a, I, O> InputWithBenchmark<'a, I, O> {
input: &'a I,
input_size_in_bytes: Option<usize>,
bench: NamedBench<'a, I, O>,
enable_perf: bool,
num_iter: Option<usize>,
) -> Self {
InputWithBenchmark {
Expand All @@ -77,11 +81,6 @@ impl<'a, I, O> InputWithBenchmark<'a, I, O> {
results: Vec::with_capacity(bench.num_group_iter),
bench,
num_iter,
profiler: if enable_perf {
PerfProfiler::new().ok()
} else {
None
},
}
}
}
Expand All @@ -105,22 +104,28 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
}

#[inline]
fn exec_bench(&mut self, alloc: &Option<Alloc>) {
fn exec_bench(&mut self, alloc: &Option<Alloc>, events: &mut EventManager) {
let num_iter = self.get_num_iter_or_fail();
let res = self
.bench
.exec_bench(self.input, alloc, &mut self.profiler, num_iter);
let res = self.bench.exec_bench(self.input, alloc, num_iter, events);
self.results.push(res);
}

fn get_results(&mut self, report_memory: bool) -> BenchResult {
fn get_results(&mut self, report_memory: bool, events: &mut EventManager) -> BenchResult {
let num_iter = self.get_num_iter_or_fail();
let total_num_iter = self.bench.num_group_iter as u64 * num_iter as u64;
let stats = compute_stats(&self.results, num_iter);
let perf_counter: Option<CounterValues> = self.profiler.as_mut().and_then(|profiler| {
profiler
.finish(self.bench.num_group_iter as u64 * num_iter as u64)
.ok()
});
let perf_counter: Option<CounterValues> = events
.get_listener(PERF_CNT_EVENT_LISTENER_NAME)
.and_then(|listener| {
let counters = listener
.as_any()
.downcast_mut::<PerfCounterPerBench>()
.expect("Expected PerfCounterPerBench");
counters
.get_by_bench_id_mut(&self.bench.bench_id)
.and_then(|perf_cnt| perf_cnt.finish(total_num_iter).ok())
});

let output_value = (self.bench.fun)(self.input);
BenchResult {
bench_id: self.bench.bench_id.clone(),
Expand All @@ -140,8 +145,9 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
/// The result of a single benchmark run.
/// There are multiple runs for each benchmark which will be collected to a vector
/// The result of a single benchmark run. This is already aggregated since a single bench may be
/// run multiple times to improve the accuracy.
/// There are multiple runs in a group for each benchmark which will be collected to a vector
pub struct RunResult<O> {
pub duration_ns: u64,
pub memory_consumption: usize,
Expand Down Expand Up @@ -194,30 +200,30 @@ impl<'a, I, O> NamedBench<'a, I, O> {
&mut self,
input: &'a I,
alloc: &Option<Alloc>,
profiler: &mut Option<PerfProfiler>,
num_iter: usize,
events: &mut EventManager,
) -> RunResult<O> {
if let Some(alloc) = alloc {
alloc.reset_peak_memory();
}
if let Some(profiler) = profiler {
profiler.enable();
}
events.emit(BingganEvents::BenchStart(&self.bench_id));
let start = std::time::Instant::now();
let mut res = None;
for _ in 0..num_iter {
res = black_box((self.fun)(input));
}
let elapsed = start.elapsed();
if let Some(profiler) = profiler {
profiler.disable();
}
let mem = if let Some(alloc) = alloc {
alloc.get_peak_memory()
} else {
0
};

RunResult::new(elapsed.as_nanos() as u64 / num_iter as u64, mem, res)
let run_result = RunResult::new(elapsed.as_nanos() as u64 / num_iter as u64, mem, res);
events.emit(BingganEvents::BenchStop(
&self.bench_id,
run_result.duration_ns,
));
run_result
}
}
5 changes: 2 additions & 3 deletions src/bench_group.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
bench::{Bench, BenchResult, InputWithBenchmark, NamedBench},
bench::{Bench, InputWithBenchmark, NamedBench},
bench_id::BenchId,
bench_runner::BenchRunner,
output_value::OutputValue,
Expand Down Expand Up @@ -105,7 +105,6 @@ impl<'a, 'runner> BenchGroup<'a, 'runner> {
input,
self.input_size_in_bytes,
bench,
self.runner.config.enable_perf,
self.runner.config.num_iter_bench,
);

Expand All @@ -120,7 +119,7 @@ impl<'a, 'runner> BenchGroup<'a, 'runner> {
}

/// Run the benchmarks and report the results.
pub fn run(&mut self) -> Vec<BenchResult> {
pub fn run(&mut self) {
self.runner.run_group(
self.group_name.as_deref(),
&mut self.benches,
Expand Down
2 changes: 1 addition & 1 deletion src/bench_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl PrintOnce {
/// - runner_name: The name of the runner that executed the benchmark.
/// - group_name: The name of the group that the benchmark belongs to. This is typically the input name.
/// - bench_name: The name of the benchmark.
#[derive(Clone)]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct BenchId {
runner_name: Option<String>,
/// This is typically the input name
Expand Down
49 changes: 37 additions & 12 deletions src/bench_runner.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::env;
use std::{alloc::GlobalAlloc, cmp::Ordering};

use crate::events::{BingganEvents, EventManager};
use crate::output_value::OutputValue;
use crate::profiler::PerfCounterPerBench;
use crate::{
bench::{Bench, BenchResult, InputWithBenchmark, NamedBench},
bench::{Bench, InputWithBenchmark, NamedBench},
bench_id::{BenchId, PrintOnce},
black_box, parse_args,
report::{report_group, Reporter},
Expand All @@ -28,6 +31,7 @@ pub struct BenchRunner {
pub(crate) name: Option<PrintOnce>,

reporter: Box<dyn Reporter>,
listeners: EventManager,
}

pub const EMPTY_INPUT: &() = &();
Expand All @@ -51,6 +55,12 @@ impl BenchRunner {
new
}

/// Returns the event manager, which can be used to add listeners to the benchmarks.
/// See [EventManager] for more information.
pub fn get_event_manager(&mut self) -> &mut EventManager {
&mut self.listeners
}

/// Creates a new `BenchRunner` with custom options set.
pub(crate) fn new_with_options(options: Config) -> Self {
use yansi::Condition;
Expand All @@ -64,6 +74,7 @@ impl BenchRunner {
name: None,
//reporter: Box::new(crate::report::TableReporter {}),
reporter: Box::new(crate::report::PlainReporter::new()),
listeners: EventManager::new(),
}
}

Expand Down Expand Up @@ -115,7 +126,6 @@ impl BenchRunner {
EMPTY_INPUT,
self.input_size_in_bytes,
named_bench,
self.config.enable_perf,
self.config().num_iter_bench,
);

Expand All @@ -132,19 +142,24 @@ impl BenchRunner {

/// Run the benchmarks and report the results.
pub fn run_group<'a>(
&self,
&mut self,
group_name: Option<&str>,
group: &mut [Box<dyn Bench<'a> + 'a>],
output_value_column_title: &'static str,
) -> Vec<BenchResult> {
) {
if group.is_empty() {
return Vec::new();
return;
}
if let Some(runner_name) = &self.name {
runner_name.print_name();
}
if self.config().enable_perf {
self.listeners
.add_listener_if_absent(PerfCounterPerBench::default());
}

if let Some(name) = &group_name {
self.listeners.emit(BingganEvents::GroupStart(name));
println!("{}", name.black().on_yellow().invert().bold());
}

Expand All @@ -168,42 +183,42 @@ impl BenchRunner {
&self.alloc,
self.config.cache_trasher.then_some(&self.cache_trasher),
num_group_iter,
&mut self.listeners,
);
} else {
Self::run_sequential(group, &self.alloc, num_group_iter);
Self::run_sequential(group, &self.alloc, num_group_iter, &mut self.listeners);
}
}

let report_memory = self.alloc.is_some();

report_group(
group_name,
group,
&*self.reporter,
report_memory,
output_value_column_title,
&mut self.listeners,
);

// TODO: clearing should be optional, to check the results yourself, e.g. in CI
//for bench in group {
//bench.clear_results();
//}
group
.iter_mut()
.map(|b| b.get_results(report_memory))
.collect()
}

fn run_sequential<'a>(
benches: &mut [Box<dyn Bench<'a> + 'a>],
alloc: &Option<Alloc>,
num_group_iter: usize,
events: &mut EventManager,
) {
for bench in benches {
for iteration in 0..num_group_iter {
alloca::with_alloca(
iteration, // we increase the byte offset by 1 for each iteration
|_memory: &mut [core::mem::MaybeUninit<u8>]| {
bench.exec_bench(alloc);
bench.exec_bench(alloc, events);
black_box(());
},
);
Expand All @@ -216,6 +231,7 @@ impl BenchRunner {
alloc: &Option<Alloc>,
cache_trasher: Option<&CacheTrasher>,
num_group_iter: usize,
events: &mut EventManager,
) {
let mut bench_indices: Vec<usize> = (0..benches.len()).collect();
for iteration in 0..num_group_iter {
Expand All @@ -241,7 +257,7 @@ impl BenchRunner {
alloca::with_alloca(
iteration, // we increase the byte offset by 1 for each iteration
|_memory: &mut [core::mem::MaybeUninit<u8>]| {
bench.exec_bench(alloc);
bench.exec_bench(alloc, events);
black_box(());
},
);
Expand All @@ -256,6 +272,15 @@ impl BenchRunner {

/// Detect how often each bench should be run if it is not set manually.
fn detect_and_set_num_iter<'b>(benches: &mut [Box<dyn Bench<'b> + 'b>], verbose: bool) {
if let Some(num_iter) = env::var("NUM_ITER_BENCH")
.ok()
.and_then(|val| val.parse::<usize>().ok())
{
for input_and_bench in benches {
input_and_bench.set_num_iter(num_iter);
}
return;
}
// Filter benches that already have num_iter set
let mut benches: Vec<_> = benches
.iter_mut()
Expand Down
Loading

0 comments on commit d8f12bb

Please sign in to comment.