diff --git a/Cargo.toml b/Cargo.toml
index ded053c..c7a3866 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ perf_event = ["perf-event"]
 default = []
 
 [[bench]]
-name = "fibonacci_bench"
+name = "bench"
 harness = false
 
 [[bench]]
diff --git a/benches/bench.rs b/benches/bench.rs
new file mode 100644
index 0000000..c213feb
--- /dev/null
+++ b/benches/bench.rs
@@ -0,0 +1,28 @@
+use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
+
+pub fn factorial(mut n: usize) -> usize {
+    let mut result = 1usize;
+    while n > 0 {
+        result = result.wrapping_mul(black_box(n));
+        n -= 1;
+    }
+    result
+}
+
+fn bench_factorial() {
+    let mut runner = BenchRunner::new();
+    runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
+
+    for val in [100, 400] {
+        runner.bench_function(format!("factorial {}", val), move |_| {
+            factorial(black_box(val));
+        });
+    }
+}
+
+fn main() {
+    bench_factorial();
+}
diff --git a/benches/fibonacci_bench.rs b/benches/fibonacci_bench.rs
deleted file mode 100644
index 0ee7790..0000000
--- a/benches/fibonacci_bench.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
-
-#[global_allocator]
-pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
-
-pub fn factorial(mut n: usize) -> usize {
-    let mut result = 1usize;
-    while n > 0 {
-        result = result.wrapping_mul(black_box(n));
-        n -= 1;
-    }
-    result
-}
-
-fn bench_fibonacci_group<I: 'static>(mut runner: InputGroup<I>) {
-    runner.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting.
-    runner.register("factorial", move |_| {
-        factorial(black_box(400));
-    });
-    //runner.register("fibonacci_alt", move |_| {});
-    runner.run();
-}
-
-fn main() {
-    bench_fibonacci_group(InputGroup::new().name("fibonacci_plain"));
-    bench_fibonacci_group(
-        InputGroup::new_with_inputs(vec![("10", 10), ("15", 15)]).name("fibonacci_input"),
-    );
-}
diff --git a/src/bench.rs b/src/bench.rs
index c5650a3..16ba0da 100644
--- a/src/bench.rs
+++ b/src/bench.rs
@@ -11,13 +11,13 @@ pub trait Bench<'a> {
     fn get_input_name(&self) -> &str;
     fn set_num_iter(&mut self, num_iter: usize);
     /// Sample the number of iterations the benchmark should do
-    fn sample_num_iter(&self) -> usize;
+    fn sample_num_iter(&mut self) -> usize;
     fn exec_bench(&mut self, alloc: &Option<Alloc>);
     fn get_results(&mut self, group_name: Option<&str>) -> BenchResult;
     fn clear_results(&mut self);
 }
 
-type CallBench<'a, I> = Box<dyn Fn(&'a I)>;
+type CallBench<'a, I> = Box<dyn FnMut(&'a I)>;
 
 pub(crate) struct NamedBench<'a, I> {
     pub name: String,
@@ -83,7 +83,7 @@ impl<'a, I> Bench<'a> for InputWithBenchmark<'a, I> {
         &self.input.name
     }
     #[inline]
-    fn sample_num_iter(&self) -> usize {
+    fn sample_num_iter(&mut self) -> usize {
         self.bench.sample_and_get_iter(&self.input)
     }
     fn set_num_iter(&mut self, num_iter: usize) {
@@ -146,7 +146,7 @@ impl RunResult {
 impl<'a, I> NamedBench<'a, I> {
     #[inline]
     /// Each group has its own number of iterations. This is not the final num_iter
-    pub fn sample_and_get_iter(&self, input: &NamedInput<'a, I>) -> usize {
+    pub fn sample_and_get_iter(&mut self, input: &NamedInput<'a, I>) -> usize {
         // We want to run the benchmark for 100ms
         const TARGET_MS_PER_BENCH: u64 = 100;
         {
@@ -174,7 +174,7 @@ impl<'a, I> NamedBench<'a, I> {
     }
     #[inline]
     pub fn exec_bench(
-        &self,
+        &mut self,
         input: &NamedInput<'a, I>,
         alloc: &Option<Alloc>,
         profiler: &mut Option<PerfProfiler>,
diff --git a/src/bench_runner.rs b/src/bench_runner.rs
index 359646a..ec414f7 100644
--- a/src/bench_runner.rs
+++ b/src/bench_runner.rs
@@ -124,6 +124,12 @@ impl BenchRunner {
     /// Manully set the number of iterations each benchmark is called.
     ///
     /// This disables the automatic detection of the number of iterations.
+    ///
+    /// # Note
+    /// Use this to get more stable and comparable benchmark results, as the number of
+    /// iterations has a big impact on measurement and the iteration detection may
+    /// not always get the same num iterations between runs. There are ways implemented
+    /// to mitigate that but they are limited.
     pub fn set_num_iter(&mut self, num_iter: usize) {
         self.num_iter = Some(num_iter);
     }
@@ -156,11 +162,11 @@ impl BenchRunner {
     }
 
     /// Run a single function
-    pub fn bench_function<F>(&mut self, name: String, f: F) -> &mut Self
+    pub fn bench_function<F, S: Into<String>>(&mut self, name: S, f: F) -> &mut Self
     where
         F: Fn(&()) + 'static,
     {
-        let named_bench = NamedBench::new(name, Box::new(f));
+        let named_bench = NamedBench::new(name.into(), Box::new(f));
         let bundle = InputWithBenchmark::new(
             EMPTY_INPUT,
             self.input_size_in_bytes,
@@ -213,19 +219,12 @@ impl BenchRunner {
         // We report at the end, so the alignment is correct (could be calculated up front)
         report_group(name, group, self.alloc.is_some());
 
-        //self.clear_results();
+        // TODO: clearing should be optional, to check the results yourself, e.g. in CI
         for bench in group {
             bench.clear_results();
         }
     }
 
-    // /// Clear the stored results of the benchmarks.
-    //pub fn clear_results(&mut self) {
-    //for bench in &mut self.benches {
-    //bench.clear_results();
-    //}
-    //}
-
     fn run_sequential<'a>(benches: &mut [Box<dyn Bench<'a> + 'a>], alloc: &Option<Alloc>) {
         for bench in benches {
             for iteration in 0..NUM_RUNS {
@@ -301,7 +300,7 @@ impl BenchRunner {
         // In order to make the benchmarks in a group comparable, it is imperative to call them
         // the same numer of times
         let (min_num_iter, max_num_iter) =
-            minmax(benches.iter().map(|b| b.sample_num_iter())).unwrap();
+            minmax(benches.iter_mut().map(|b| b.sample_num_iter())).unwrap();
 
         if verbose {
             println!(