From c220d17d0a73e4cbc178db186e9c1f025b538f09 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Sun, 19 May 2024 19:01:48 +0800
Subject: [PATCH] remove feature flag for perf integration

make compilation depend on target_os
---
 Cargo.toml                     |  5 +++--
 src/bench_input_group.rs       | 19 ++++++++++++-------
 src/bench_runner.rs            | 20 ++++++++++----------
 src/profiler/dummy_profiler.rs |  8 ++------
 src/profiler/mod.rs            |  8 ++++----
 5 files changed, 31 insertions(+), 29 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index c7a3866..7586a1b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,16 +24,17 @@ suspicious   = { priority = -1, level = "deny" }
 alloca = "0.4.0"
 miniserde = "0.1.38"
 peakmem-alloc = "0.3.0"
-perf-event = { version = "0.4.8", optional = true }
 #prettytable-rs = "0.10.0"
 unicode-width = "0.1.11"
 yansi = { version = "1.0.1", features = ["detect-env", "detect-tty"] }
 rustop = "=1.1.4"
 
+[target.'cfg(target_os = "linux")'.dependencies]
+perf-event = { version = "0.1" }
+
 [features]
 real_blackbox = []
 # Enable perf profiler integration
-perf_event = ["perf-event"]
 default = []
 
 [[bench]]
diff --git a/src/bench_input_group.rs b/src/bench_input_group.rs
index c188dd7..83d300c 100644
--- a/src/bench_input_group.rs
+++ b/src/bench_input_group.rs
@@ -70,8 +70,7 @@ impl<I: 'static> InputGroup<I> {
                 input_size_in_bytes: None,
             })
             .collect();
-        let mut runner = BenchRunner::new();
-        runner.set_options(options);
+        let runner = BenchRunner::new_with_options(options);
 
         InputGroup {
             inputs,
@@ -100,8 +99,11 @@ impl<I: 'static> InputGroup<I> {
     /// baseline     Memory: 0 B       Avg: 1ns        Median: 1ns       1ns            1ns      
     ///              L1dA: 2.001       L1dM: 0.000     Br: 6.001         BrM: 0.000     
     /// ```
+    ///
+    /// # Note:
+    /// This is only available on Linux. On other OSs this uses `dummy_profiler`, which does nothing.
     pub fn enable_perf(&mut self) {
-        self.bench_group.runner.options.enable_perf = true;
+        self.bench_group.runner.enable_perf();
     }
 
     /// Enables throughput reporting.
@@ -133,6 +135,12 @@ impl<I: 'static> InputGroup<I> {
     /// Manully set the number of iterations each benchmark is called.
     ///
     /// This disables the automatic detection of the number of iterations.
+    ///
+    /// # Note
+    /// Use this to get more stable and comparable benchmark results, as the number of
+    /// iterations has a big impact on measurement and the iteration detection may
+    /// not always get the same num iterations between runs. There are ways implemented
+    /// to mitigate that but they are limited.
     pub fn set_num_iter(&mut self, num_iter: usize) {
         self.bench_group.runner.set_num_iter(num_iter);
     }
@@ -148,7 +156,7 @@ impl<I: 'static> InputGroup<I> {
     }
 
     /// Sets the filter, which is used to filter the benchmarks by name.
-    /// The filter is fetched from the command line arguments.
+    /// The filter is fetched from the command line arguments by default.
     ///
     /// It can also match an input name.
     pub fn set_filter(&mut self, filter: Option<String>) {
@@ -199,9 +207,6 @@ impl<I: 'static> InputGroup<I> {
             |b| b.get_input_name(),
             |group| {
                 let input_name = group[0].get_input_name().to_owned();
-                //if !input_name.is_empty() {
-                //println!("{}", input_name.black().on_yellow().invert().italic());
-                //}
                 self.bench_group.runner.run_group(Some(&input_name), group);
             },
         );
diff --git a/src/bench_runner.rs b/src/bench_runner.rs
index ec414f7..3a3c809 100644
--- a/src/bench_runner.rs
+++ b/src/bench_runner.rs
@@ -50,22 +50,18 @@ impl Default for BenchRunner {
 }
 
 impl BenchRunner {
-    /// The inputs are a vector of tuples, where the first element is the name of the input and the
-    /// second element is the input itself.
+    /// Creates a new BenchRunner.
     pub fn new() -> Self {
         Self::new_with_options(parse_args())
     }
 
-    /// The inputs are a vector of tuples, where the first element is the name of the input and the
-    /// second element is the input itself.
+    /// Creates a new BenchRunner and prints the bench name.
     pub fn with_name<S: AsRef<str>>(name: S) -> Self {
         println!("{}", name.as_ref().black().on_red().invert().bold());
-
-        Self::new_with_options(parse_args())
+        Self::new()
     }
 
-    /// The inputs are a vector of tuples, where the first element is the name of the input and the
-    /// second element is the input itself.
+    /// Creates a new `BenchRunner` with custom options set.
     pub(crate) fn new_with_options(options: Options) -> Self {
         use yansi::Condition;
         yansi::whenever(Condition::TTY_AND_COLOR);
@@ -79,12 +75,12 @@ impl BenchRunner {
         }
     }
 
-    /// Creates a new group
+    /// Creates a new `BenchGroup`
     /// The group is a collection of benchmarks that are run together.
     pub fn new_group<'a>(&self) -> BenchGroup<'a> {
         BenchGroup::new(self.clone())
     }
-    /// Creates a new group
+    /// Creates a new named `BenchGroup`
     /// The group is a collection of benchmarks that are run together.
     pub fn new_group_with_name<'a, S: Into<String>>(&self, name: S) -> BenchGroup<'a> {
         BenchGroup::with_name(self.clone(), name)
@@ -95,6 +91,7 @@ impl BenchRunner {
     pub fn set_alloc<A: GlobalAlloc + 'static>(&mut self, alloc: &'static PeakMemAlloc<A>) {
         self.alloc = Some(alloc);
     }
+
     /// Enable perf profiling + report
     ///
     /// The numbers are reported with the following legend:
@@ -111,6 +108,9 @@ impl BenchRunner {
     /// baseline     Memory: 0 B       Avg: 1ns        Median: 1ns       1ns            1ns      
     ///              L1dA: 2.001       L1dM: 0.000     Br: 6.001         BrM: 0.000     
     /// ```
+    ///
+    /// # Note:
+    /// This is only available on Linux. On other OSs this uses `dummy_profiler`, which does nothing.
     pub fn enable_perf(&mut self) {
         self.options.enable_perf = true;
     }
diff --git a/src/profiler/dummy_profiler.rs b/src/profiler/dummy_profiler.rs
index 7e627d4..ba6f9d1 100644
--- a/src/profiler/dummy_profiler.rs
+++ b/src/profiler/dummy_profiler.rs
@@ -5,14 +5,10 @@ use super::CounterValues;
 
 pub(crate) struct PerfProfiler {}
 impl PerfProfiler {
-    pub fn new() -> Result<Self, Box<dyn Error>> {
-        panic!("not compiled with perf_event feature flag")
-    }
+    pub fn new() -> Result<Self, Box<dyn Error>> {}
 }
 impl Profiler for PerfProfiler {
     fn enable(&mut self) {}
     fn disable(&mut self) {}
-    fn finish(&mut self, _num_iter: u64) -> std::io::Result<CounterValues> {
-        unreachable!()
-    }
+    fn finish(&mut self, _num_iter: u64) -> std::io::Result<CounterValues> {}
 }
diff --git a/src/profiler/mod.rs b/src/profiler/mod.rs
index fd3f943..cf3c8c3 100644
--- a/src/profiler/mod.rs
+++ b/src/profiler/mod.rs
@@ -1,14 +1,14 @@
 use crate::stats::*;
 use miniserde::*;
 
-#[cfg(not(feature = "perf_event"))]
+#[cfg(not(target_os = "linux"))]
 pub(crate) mod dummy_profiler;
-#[cfg(feature = "perf_event")]
+#[cfg(target_os = "linux")]
 pub(crate) mod perf_profiler;
 
-#[cfg(not(feature = "perf_event"))]
+#[cfg(not(target_os = "linux"))]
 pub(crate) use dummy_profiler::*;
-#[cfg(feature = "perf_event")]
+#[cfg(target_os = "linux")]
 pub(crate) use perf_profiler::*;
 
 use yansi::Paint;