Skip to content

Commit

Permalink
move perf counter to plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Oct 14, 2024
1 parent 06a7b4f commit 1132596
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 62 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ It is designed to be simple to use and to provide a good overview of the perform
### Example

```rust
use binggan::{black_box, plugins::CacheTrasher, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{black_box, plugins::*, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -43,12 +43,12 @@ fn test_hashmap(data: &Vec<usize>) {
fn bench_group(mut runner: InputGroup<Vec<usize>>) {
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

// Enables the perf integration. Only on Linux, noop on other OS.
runner.config().enable_perf();
// Trashes the CPU cache between runs
runner
.get_plugin_manager()
// Trashes the CPU cache between runs
.add_plugin(CacheTrasher::default());
// Enables the perf integration. Only on Linux, noop on other OS.
.add_plugin(PerfCounterPlugin::default());
// Enables throughput reporting
runner.throughput(|input| input.len() * std::mem::size_of::<usize>());
runner.register("vec", |data| {
Expand Down
10 changes: 7 additions & 3 deletions benches/bench_group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use binggan::{black_box, plugins::CacheTrasher, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{
black_box,
plugins::{CacheTrasher, PerfCounterPlugin},
BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM,
};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand Down Expand Up @@ -34,10 +38,10 @@ fn run_bench() {
let mut runner: BenchRunner = BenchRunner::new();
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

runner.config().enable_perf();
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
.add_plugin(CacheTrasher::default())
.add_plugin(PerfCounterPlugin::default());

for (input_name, data) in inputs.iter() {
let mut group = runner.new_group();
Expand Down
14 changes: 9 additions & 5 deletions benches/bench_input.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use binggan::{black_box, plugins::CacheTrasher, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{
black_box,
plugins::{CacheTrasher, PerfCounterPlugin},
InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM,
};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -26,12 +30,12 @@ fn test_hashmap(data: &Vec<usize>) -> HashMap<usize, i32> {
fn bench_group(mut runner: InputGroup<Vec<usize>, u64>) {
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

// Enables the perf integration. Only on Linux, noop on other OS.
runner.config().enable_perf();
// Trashes the CPU cache between runs
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
// Trashes the CPU cache between runs
.add_plugin(CacheTrasher::default())
// Enables the perf integration. Only on Linux, noop on other OS.
.add_plugin(PerfCounterPlugin::default());
// Enables throughput reporting
runner.throughput(|input| input.len() * std::mem::size_of::<usize>());
runner.register("vec", |data| {
Expand Down
9 changes: 6 additions & 3 deletions benches/test_throughput.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::time::{Duration, Instant};

use binggan::{plugins::CacheTrasher, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use binggan::{
plugins::{CacheTrasher, PerfCounterPlugin},
BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM,
};

#[global_allocator]
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -9,10 +12,10 @@ fn run_bench() {
let mut runner: BenchRunner = BenchRunner::new();
runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.

runner.config().enable_perf();
runner
.get_plugin_manager()
.add_plugin(CacheTrasher::default());
.add_plugin(CacheTrasher::default())
.add_plugin(PerfCounterPlugin::default());
runner.config().set_num_iter_for_group(128);

let mut group = runner.new_group();
Expand Down
8 changes: 0 additions & 8 deletions src/bench_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,6 @@ impl BenchRunner {
if group.is_empty() {
return;
}
#[cfg(target_os = "linux")]
{
use crate::plugins::perf_counter::PerfCounterPlugin;
if self.config().enable_perf {
self.plugins
.add_plugin_if_absent(PerfCounterPlugin::default());
}
}

self.plugins.emit(PluginEvents::GroupStart {
runner_name: self.name.as_deref(),
Expand Down
32 changes: 0 additions & 32 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ pub struct Config {
/// The filter for the benchmarks
/// This is read from the command line by default.
pub filter: Option<String>,
/// Enable/disable perf integration
pub enable_perf: bool,
/// Verbose output of binggan. Prints the number of iterations.
pub verbose: bool,
/// Manually set the number of iterations the benchmarks registered afterwards are called.
Expand All @@ -26,7 +24,6 @@ impl Default for Config {
Config {
interleave: true,
filter: None,
enable_perf: false,
verbose: false,
num_iter_bench: None,
num_iter_group: None,
Expand Down Expand Up @@ -85,35 +82,6 @@ impl Config {
self.interleave = interleave;
self
}

/// Enable perf profiling + report
///
/// The numbers are reported with the following legend:
/// ```bash
/// L1dA: L1 data access
/// L1dM: L1 data misses
/// Br: branches
/// BrM: missed branches
/// ```
/// e.g.
/// ```bash
/// fibonacci Memory: 0 B Avg: 135ns Median: 136ns 132ns 140ns
/// L1dA: 809.310 L1dM: 0.002 Br: 685.059 BrM: 0.010
/// baseline Memory: 0 B Avg: 1ns Median: 1ns 1ns 1ns
/// L1dA: 2.001 L1dM: 0.000 Br: 6.001 BrM: 0.000
/// ```
///
/// # Note:
/// This is only available on Linux. On other OSs this does nothing.
///
/// Perf may run into limitations where all counters are reported as zero. <https://github.com/jimblandy/perf-event/issues/2>.
/// Disabling the NMI watchdog should help:
///
/// `sudo sh -c "echo '0' > /proc/sys/kernel/nmi_watchdog"`
pub fn enable_perf(&mut self) -> &mut Self {
self.enable_perf = true;
self
}
}

pub(crate) fn parse_args() -> Config {
Expand Down
9 changes: 5 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
//!
//! # Perf Integration
//! Binggan can integrate with perf to report hardware performance counters.
//! See [Config::enable_perf](crate::Config::enable_perf) for more information.
//! See [PerfCounterPlugin](crate::plugins::PerfCounterPlugin) for more information.
//!
//! # Example for InputGroup
//! ```rust
//! use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
//! use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM, plugins::*};
//!
//! #[global_allocator]
//! pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
Expand All @@ -64,7 +64,9 @@
//! // Run the benchmark for the group with input `Vec<usize>`
//! fn bench_group(mut runner: InputGroup<Vec<usize>, u64>) {
//! runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
//! runner.config().enable_perf(); // Enable perf integration. This only works on linux.
//! runner
//! .get_plugin_manager()
//! .add_plugin(PerfCounterPlugin::default());
//! runner.register("vec", move |data| {
//! let vec = test_vec(data);
//! Some(vec.len() as u64)
Expand Down Expand Up @@ -135,7 +137,6 @@
//! let mut runner: BenchRunner = BenchRunner::new();
//! runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
//!
//! runner.config().enable_perf();
//! runner
//! .get_plugin_manager()
//! .add_plugin(CacheTrasher::default());
Expand Down
39 changes: 37 additions & 2 deletions src/plugins/perf_counter/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,44 @@ impl PerfCounters {
}
}

/// Perf Counter Plugin.
///
/// Stores one counter group per bench id.
/// Plugin to report perf counters.
///
/// The numbers are reported with the following legend:
/// ```bash
/// Br: Branches
/// MBr: Missed Branches
/// L1dA: L1 Data Access
/// L1dM: L1 Data Access Misses
/// TLBdA: Translation Lookaside Buffer Data Access
/// TLBdM: Translation Lookaside Buffer Data Access Misses
/// ```
/// e.g.
/// ```bash
/// fibonacci Memory: 0 B Avg: 135ns Median: 136ns 132ns 140ns
/// L1dA: 809.310 L1dM: 0.002 Br: 685.059 MBr: 0.010
/// baseline Memory: 0 B Avg: 1ns Median: 1ns 1ns 1ns
/// L1dA: 2.001 L1dM: 0.000 Br: 6.001 MBr: 0.000
/// ```
///
/// # Note:
/// This is only available on Linux. On other OSs this does nothing.
///
/// Perf may run into limitations where all counters are reported as zero. <https://github.com/jimblandy/perf-event/issues/2>.
/// Disabling the NMI watchdog should help:
///
/// `sudo sh -c "echo '0' > /proc/sys/kernel/nmi_watchdog"`
///
/// ## Usage Example
/// ```rust
/// use binggan::{*, plugins::*}
///
/// let mut runner = BenchRunner::new();
/// runner
/// .get_plugin_manager()
/// .add_plugin(PerfCounterPlugin::default());
/// ```
#[derive(Default)]
pub struct PerfCounterPlugin {
perf_per_bench: PerBenchData<Option<PerfCounters>>,
Expand Down
7 changes: 6 additions & 1 deletion src/plugins/perf_counter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ pub(crate) mod linux;
#[cfg(target_os = "linux")]
pub use linux::*;

#[cfg(not(target_os = "linux"))]
pub(crate) mod dummy;

#[cfg(not(target_os = "linux"))]
pub use dummy::*;

use crate::stats::*;
use miniserde::Deserialize;
use miniserde::Serialize;
Expand Down Expand Up @@ -92,7 +98,6 @@ impl CounterValues {
print_counter_value("L1dM", &self, other, |stats| stats.l1d_miss_count),
print_counter_value("TLBdA", &self, other, |stats| stats.tlbd_access_count),
print_counter_value("TLBdM", &self, other, |stats| stats.tlbd_miss_count),
print_counter_value("L1dA", &self, other, |stats| stats.l1d_access_count),
]
}
}

0 comments on commit 1132596

Please sign in to comment.