Skip to content

Commit

Permalink
move cache trasher to plugins, refactor plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Oct 14, 2024
1 parent dc5d88e commit 836ccd4
Show file tree
Hide file tree
Showing 17 changed files with 187 additions and 161 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ It is designed to be simple to use and to provide a good overview of the perform
### Example

```rust
use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{black_box, plugins::CacheTrasher, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -46,7 +46,9 @@ fn bench_group(mut runner: InputGroup<Vec<usize>>) {
// Enables the perf integration. Only on Linux, noop on other OS.
runner.config().enable_perf();
// Trashes the CPU cache between runs
runner.config().set_cache_trasher(true);
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
// Enables throughput reporting
runner.throughput(|input| input.len() * std::mem::size_of::<usize>());
runner.register("vec", |data| {
Expand Down
6 changes: 4 additions & 2 deletions benches/bench_group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::HashMap;

use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{black_box, plugins::CacheTrasher, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand Down Expand Up @@ -35,7 +35,9 @@ fn run_bench() {
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

runner.config().enable_perf();
runner.config().set_cache_trasher(true);
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());

for (input_name, data) in inputs.iter() {
let mut group = runner.new_group();
Expand Down
6 changes: 4 additions & 2 deletions benches/bench_input.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::HashMap;

use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{black_box, plugins::CacheTrasher, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand Down Expand Up @@ -29,7 +29,9 @@ fn bench_group(mut runner: InputGroup<Vec<usize>, u64>) {
// Enables the perf integration. Only on Linux, noop on other OS.
runner.config().enable_perf();
// Trashes the CPU cache between runs
runner.config().set_cache_trasher(true);
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
// Enables throughput reporting
runner.throughput(|input| input.len() * std::mem::size_of::<usize>());
runner.register("vec", |data| {
Expand Down
6 changes: 4 additions & 2 deletions benches/test_throughput.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::time::{Duration, Instant};

use binggan::{BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{plugins::CacheTrasher, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -10,7 +10,9 @@ fn run_bench() {
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

runner.config().enable_perf();
runner.config().set_cache_trasher(true);
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
runner.config().set_num_iter_for_group(128);

let mut group = runner.new_group();
Expand Down
20 changes: 10 additions & 10 deletions src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ pub trait Bench<'a> {
fn set_num_iter(&mut self, num_iter: usize);
/// Sample the number of iterations the benchmark should do
fn sample_num_iter(&mut self) -> usize;
fn exec_bench(&mut self, events: &mut EventManager);
fn get_results(&mut self, events: &mut EventManager) -> BenchResult;
fn exec_bench(&mut self, events: &mut PluginManager);
fn get_results(&mut self, events: &mut PluginManager) -> BenchResult;
fn clear_results(&mut self);
}

Expand Down Expand Up @@ -102,17 +102,17 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
}

#[inline]
fn exec_bench(&mut self, events: &mut EventManager) {
fn exec_bench(&mut self, events: &mut PluginManager) {
let num_iter = self.get_num_iter_or_fail();
let res = self.bench.exec_bench(self.input, num_iter, events);
self.results.push(res);
}

fn get_results(&mut self, events: &mut EventManager) -> BenchResult {
fn get_results(&mut self, events: &mut PluginManager) -> BenchResult {
let num_iter = self.get_num_iter_or_fail();
let total_num_iter = self.bench.num_group_iter as u64 * num_iter as u64;
let memory_consumption: Option<&Vec<usize>> = events
.downcast_listener::<AllocPerBench>(ALLOC_EVENT_LISTENER_NAME)
.downcast_plugin::<AllocPerBench>(ALLOC_EVENT_LISTENER_NAME)
.and_then(|counters| counters.get_by_bench_id(&self.bench.bench_id));
let stats = compute_stats(&self.results, memory_consumption);
let tracked_memory = memory_consumption.is_some();
Expand All @@ -137,14 +137,14 @@ impl<'a, I, O: OutputValue> Bench<'a> for InputWithBenchmark<'a, I, O> {
}

fn get_perf_counter(
_events: &mut EventManager,
_events: &mut PluginManager,
_bench_id: &BenchId,
_total_num_iter: u64,
) -> Option<CounterValues> {
#[cfg(target_os = "linux")]
{
_events
.downcast_listener::<PerfCounterPerBench>(PERF_CNT_EVENT_LISTENER_NAME)
.downcast_plugin::<PerfCounterPerBench>(PERF_CNT_EVENT_LISTENER_NAME)
.and_then(|counters| {
counters
.get_by_bench_id_mut(_bench_id)
Expand Down Expand Up @@ -211,9 +211,9 @@ impl<'a, I, O> NamedBench<'a, I, O> {
&mut self,
input: &'a I,
num_iter: usize,
events: &mut EventManager,
events: &mut PluginManager,
) -> RunResult<O> {
events.emit(BingganEvents::BenchStart {
events.emit(PluginEvents::BenchStart {
bench_id: &self.bench_id,
});
let start = std::time::Instant::now();
Expand All @@ -224,7 +224,7 @@ impl<'a, I, O> NamedBench<'a, I, O> {
let elapsed = start.elapsed();

let run_result = RunResult::new(elapsed.as_nanos() as u64 / num_iter as u64, res);
events.emit(BingganEvents::BenchStop {
events.emit(PluginEvents::BenchStop {
bench_id: &self.bench_id,
duration: run_result.duration_ns,
});
Expand Down
10 changes: 5 additions & 5 deletions src/bench_input_group.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{alloc::GlobalAlloc, mem};

use crate::output_value::OutputValue;
use crate::plugins::EventManager;
use crate::plugins::PluginManager;
use crate::{
bench::NamedBench, bench_id::BenchId, bench_runner::BenchRunner, parse_args, BenchGroup, Config,
};
Expand Down Expand Up @@ -152,10 +152,10 @@ impl<I: 'static, O: OutputValue + 'static> InputGroup<I, O> {
&mut self.runner.config
}

/// Returns the event manager, which can be used to add listeners to the benchmarks.
/// See [crate::plugins::EventManager] for more information.
pub fn get_event_manager(&mut self) -> &mut EventManager {
self.runner.get_event_manager()
/// Returns the plugin manager, which can be used to add plugins.
/// See [crate::plugins::PluginManager] for more information.
pub fn get_plugin_manager(&mut self) -> &mut PluginManager {
self.runner.get_plugin_manager()
}
}

Expand Down
76 changes: 15 additions & 61 deletions src/bench_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{alloc::GlobalAlloc, cmp::Ordering};

use crate::output_value::OutputValue;
use crate::plugins::alloc::AllocPerBench;
use crate::plugins::{BingganEvents, EventManager};
use crate::plugins::{PluginEvents, PluginManager};
use crate::report::PlainReporter;
use crate::{
bench::{Bench, InputWithBenchmark, NamedBench},
Expand All @@ -12,13 +12,11 @@ use crate::{
report::report_group,
BenchGroup, Config,
};
use core::mem::size_of;
use peakmem_alloc::*;

/// The main struct to run benchmarks.
///
pub struct BenchRunner {
cache_trasher: CacheTrasher,
pub(crate) config: Config,
/// The size of the input.
/// Enables throughput reporting.
Expand All @@ -27,7 +25,7 @@ pub struct BenchRunner {
/// Name of the test
pub(crate) name: Option<String>,

listeners: EventManager,
listeners: PluginManager,
}

pub const EMPTY_INPUT: &() = &();
Expand All @@ -51,9 +49,9 @@ impl BenchRunner {
new
}

/// Returns the event manager, which can be used to add listeners to the benchmarks.
/// See [crate::plugins::EventManager] for more information.
pub fn get_event_manager(&mut self) -> &mut EventManager {
/// Returns the plugin manager, which can be used to add plugins.
/// See [crate::plugins::PluginManager] for more information.
pub fn get_plugin_manager(&mut self) -> &mut PluginManager {
&mut self.listeners
}

Expand All @@ -62,11 +60,10 @@ impl BenchRunner {
use yansi::Condition;
yansi::whenever(Condition::TTY_AND_COLOR);

let mut event_manager = EventManager::new();
event_manager.add_listener_if_absent(PlainReporter::new());
let mut event_manager = PluginManager::new();
event_manager.add_plugin_if_absent(PlainReporter::new());

BenchRunner {
cache_trasher: CacheTrasher::new(1024 * 1024 * 16),
config: options,
input_size_in_bytes: None,
name: None,
Expand All @@ -91,7 +88,7 @@ impl BenchRunner {
/// This will report the peak memory consumption of the benchmarks.
pub fn set_alloc<A: GlobalAlloc + 'static>(&mut self, alloc: &'static PeakMemAlloc<A>) {
let alloc = AllocPerBench::new(alloc);
self.listeners.add_listener_if_absent(alloc);
self.listeners.add_plugin_if_absent(alloc);
}

/// Enables throughput reporting. The throughput will be valid for all inputs that are
Expand Down Expand Up @@ -147,11 +144,11 @@ impl BenchRunner {
use crate::plugins::perf_counter::PerfCounterPerBench;
if self.config().enable_perf {
self.listeners
.add_listener_if_absent(PerfCounterPerBench::default());
.add_plugin_if_absent(PerfCounterPerBench::default());
}
}

self.listeners.emit(BingganEvents::GroupStart {
self.listeners.emit(PluginEvents::GroupStart {
runner_name: self.name.as_deref(),
group_name,
output_value_column_title,
Expand All @@ -172,12 +169,7 @@ impl BenchRunner {
Self::detect_and_set_num_iter(group, self.config.verbose, &mut self.listeners);

if self.config.interleave {
Self::run_interleaved(
group,
self.config.cache_trasher.then_some(&self.cache_trasher),
num_group_iter,
&mut self.listeners,
);
Self::run_interleaved(group, num_group_iter, &mut self.listeners);
} else {
Self::run_sequential(group, num_group_iter, &mut self.listeners);
}
Expand All @@ -200,7 +192,7 @@ impl BenchRunner {
fn run_sequential<'a>(
benches: &mut [Box<dyn Bench<'a> + 'a>],
num_group_iter: usize,
events: &mut EventManager,
events: &mut PluginManager,
) {
for bench in benches {
for iteration in 0..num_group_iter {
Expand All @@ -217,9 +209,8 @@ impl BenchRunner {

fn run_interleaved<'a>(
benches: &mut [Box<dyn Bench<'a> + 'a>],
cache_trasher: Option<&CacheTrasher>,
num_group_iter: usize,
events: &mut EventManager,
events: &mut PluginManager,
) {
let mut bench_indices: Vec<usize> = (0..benches.len()).collect();
for iteration in 0..num_group_iter {
Expand All @@ -229,9 +220,6 @@ impl BenchRunner {
shuffle(&mut bench_indices, iteration as u64);

for bench_idx in bench_indices.iter() {
if let Some(cache_trasher) = cache_trasher {
cache_trasher.issue_read();
}
let bench = &mut benches[*bench_idx];
// We use alloca to address memory layout randomness issues
// So the whole stack moves down by 1 byte for each iteration
Expand Down Expand Up @@ -262,7 +250,7 @@ impl BenchRunner {
fn detect_and_set_num_iter<'b>(
benches: &mut [Box<dyn Bench<'b> + 'b>],
verbose: bool,
events: &mut EventManager,
events: &mut PluginManager,
) {
if let Some(num_iter) = env::var("NUM_ITER_BENCH")
.ok()
Expand Down Expand Up @@ -297,7 +285,7 @@ impl BenchRunner {
let max_num_iter = max_num_iter.min(min_num_iter * 10);
// We round up, so that we may get the same number of iterations between runs
let max_num_iter = round_up(max_num_iter as u64) as usize;
events.emit(BingganEvents::GroupNumIters {
events.emit(PluginEvents::GroupNumIters {
num_iter: max_num_iter,
});
if verbose {
Expand Down Expand Up @@ -353,40 +341,6 @@ where
Some((min_so_far, max_so_far))
}

/// Performs a dummy reads from memory to spoil given amount of CPU cache
///
/// Uses cache aligned data arrays to perform minimum amount of reads possible to spoil the cache
#[derive(Clone)]
struct CacheTrasher {
cache_lines: Vec<CacheLine>,
}
impl Default for CacheTrasher {
fn default() -> Self {
Self::new(1024 * 1024 * 16) // 16MB
}
}

impl CacheTrasher {
fn new(bytes: usize) -> Self {
let n = bytes / size_of::<CacheLine>();
let cache_lines = vec![CacheLine::default(); n];
Self { cache_lines }
}

fn issue_read(&self) {
for line in &self.cache_lines {
// Because CacheLine is aligned on 64 bytes it is enough to read single element from the array
// to spoil the whole cache line
unsafe { std::ptr::read_volatile(&line.0[0]) };
}
}
}

#[repr(C)]
#[repr(align(64))]
#[derive(Default, Clone, Copy)]
struct CacheLine([u16; 32]);

fn shuffle(indices: &mut [usize], seed: u64) {
let mut rng = SimpleRng::new(seed);

Expand Down
9 changes: 0 additions & 9 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ pub struct Config {
pub filter: Option<String>,
/// Enable/disable perf integration
pub enable_perf: bool,
/// Trash CPU cache between bench runs.
pub cache_trasher: bool,
/// Verbose output of binggan. Prints the number of iterations.
pub verbose: bool,
/// Manually set the number of iterations the benchmarks registered afterwards are called.
Expand All @@ -29,7 +27,6 @@ impl Default for Config {
interleave: true,
filter: None,
enable_perf: false,
cache_trasher: false,
verbose: false,
num_iter_bench: None,
num_iter_group: None,
Expand Down Expand Up @@ -117,12 +114,6 @@ impl Config {
self.enable_perf = true;
self
}

/// Trash CPU cache between bench runs. Defaults to false.
pub fn set_cache_trasher(&mut self, enable: bool) -> &mut Self {
self.cache_trasher = enable;
self
}
}

pub(crate) fn parse_args() -> Config {
Expand Down
Loading

0 comments on commit 836ccd4

Please sign in to comment.