diff --git a/Cargo.toml b/Cargo.toml
index 320779a..ded053c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -41,7 +41,7 @@ name = "fibonacci_bench"
 harness = false
 
 [[bench]]
-name = "bench"
+name = "bench_group"
 harness = false
 
 [[bench]]
diff --git a/benches/bench.rs b/benches/bench_group.rs
similarity index 82%
rename from benches/bench.rs
rename to benches/bench_group.rs
index 2b25415..d15248e 100644
--- a/benches/bench.rs
+++ b/benches/bench_group.rs
@@ -36,16 +36,17 @@ fn run_bench() {
     runner.enable_perf();
 
     runner.set_cache_trasher(true);
+    let mut group = runner.new_group();
     for (input_name, data) in inputs.iter() {
-        runner.set_input_size(data.len() * std::mem::size_of::<usize>());
-        runner.register_with_input("vec", input_name, data, move |data| {
+        group.set_input_size(data.len() * std::mem::size_of::<usize>());
+        group.register_with_input("vec", input_name, data, move |data| {
             black_box(test_vec(data));
         });
-        runner.register_with_input("hashmap", input_name, data, move |data| {
+        group.register_with_input("hashmap", input_name, data, move |data| {
             black_box(test_hashmap(data));
         });
     }
-    runner.run();
+    group.run();
 }
 
 fn main() {
diff --git a/src/bench.rs b/src/bench.rs
index aeee048..c5650a3 100644
--- a/src/bench.rs
+++ b/src/bench.rs
@@ -13,7 +13,7 @@ pub trait Bench<'a> {
     /// Sample the number of iterations the benchmark should do
     fn sample_num_iter(&self) -> usize;
     fn exec_bench(&mut self, alloc: &Option<Alloc>);
-    fn get_results(&mut self, group_name: &Option<String>) -> BenchResult;
+    fn get_results(&mut self, group_name: Option<&str>) -> BenchResult;
     fn clear_results(&mut self);
 }
 
@@ -99,10 +99,10 @@ impl<'a, I> Bench<'a> for InputWithBenchmark<'a, I> {
         self.results.push(res);
     }
 
-    fn get_results(&mut self, group_name: &Option<String>) -> BenchResult {
+    fn get_results(&mut self, group_name: Option<&str>) -> BenchResult {
         let bench_id = format!(
             "{}_{}_{}",
-            group_name.as_ref().unwrap_or(&"".to_string()),
+            group_name.as_ref().unwrap_or(&""),
             self.input.name,
             self.bench.name
         )
diff --git a/src/bench_group.rs b/src/bench_group.rs
new file mode 100644
index 0000000..6641308
--- /dev/null
+++ b/src/bench_group.rs
@@ -0,0 +1,129 @@
+use std::borrow::Cow;
+
+use crate::{
+    bench::{Bench, InputWithBenchmark, NamedBench},
+    bench_runner::{BenchRunner, EMPTY_INPUT},
+    NamedInput,
+};
+
+/// `BenchGroup` is a group of benchmarks run together.
+///
+pub struct BenchGroup<'a> {
+    name: Option<String>,
+    pub(crate) benches: Vec<Box<dyn Bench<'a> + 'a>>,
+    /// The size of the input.
+    /// Enables throughput reporting.
+    input_size_in_bytes: Option<usize>,
+    pub(crate) runner: BenchRunner,
+}
+
+impl<'a> BenchGroup<'a> {
+    /// Create a new BenchGroup with no benchmarks.
+    pub fn new(runner: BenchRunner) -> Self {
+        Self {
+            name: None,
+            benches: Vec::new(),
+            input_size_in_bytes: None,
+            runner,
+        }
+    }
+
+    /// Create a new BenchGroup with no benchmarks.
+    pub fn with_name<S: Into<String>>(runner: BenchRunner, name: S) -> Self {
+        Self {
+            name: Some(name.into()),
+            benches: Vec::new(),
+            input_size_in_bytes: None,
+            runner,
+        }
+    }
+
+    /// Sets name of the group and returns the group.
+    pub fn name<S: Into<String>>(mut self, name: S) -> Self {
+        self.name = Some(name.into());
+        self
+    }
+
+    /// Enables throughput reporting. The throughput will be valid for all inputs that are
+    /// registered afterwards.
+    pub fn set_input_size(&mut self, input_size: usize) {
+        self.input_size_in_bytes = Some(input_size);
+    }
+
+    /// Register a benchmark with the given name and function.
+    pub fn register_with_input<I, F, S: Into<String>>(
+        &mut self,
+        bench_name: S,
+        input_name: S,
+        input: &'a I,
+        fun: F,
+    ) where
+        F: Fn(&'a I) + 'static,
+    {
+        let name = bench_name.into();
+        let input_name = input_name.into();
+
+        let bench = NamedBench::new(name, Box::new(fun));
+        self.register_named_with_input(
+            bench,
+            NamedInput {
+                name: Cow::Owned(input_name),
+                data: input,
+            },
+        );
+    }
+
+    /// Register a benchmark with the given name and function.
+    pub fn register<I, F, S: Into<String>>(&mut self, name: S, fun: F)
+    where
+        F: Fn(&'a ()) + 'static,
+    {
+        let name = name.into();
+        let bench = NamedBench::new(name, Box::new(fun));
+
+        self.register_named_with_input(bench, EMPTY_INPUT);
+    }
+
+    /// Register a benchmark with the given name and function.
+    pub(crate) fn register_named_with_input<I>(
+        &mut self,
+        bench: NamedBench<'a, I>,
+        input: NamedInput<'a, I>,
+    ) {
+        if let Some(filter) = &self.runner.options.filter {
+            if !bench.name.contains(filter) && !input.name.contains(filter) {
+                return;
+            }
+        }
+
+        let bundle = InputWithBenchmark::new(
+            input,
+            self.input_size_in_bytes,
+            bench,
+            self.runner.options.enable_perf,
+        );
+
+        self.benches.push(Box::new(bundle));
+    }
+
+    /// Set the name of the group.
+    /// The name is printed before the benchmarks are run.
+    /// It is also used to distinguish when writing the results to disk.
+    pub fn set_name<S: Into<String>>(&mut self, name: S) {
+        self.name = Some(name.into());
+    }
+
+    /// Sets the filter, which is used to filter the benchmarks by name.
+    /// The filter is fetched from the command line arguments.
+    ///
+    /// It can also match an input name.
+    pub fn set_filter(&mut self, filter: Option<String>) {
+        self.runner.set_filter(filter);
+    }
+
+    /// Run the benchmarks and report the results.
+    pub fn run(&mut self) {
+        self.runner
+            .run_group(self.name.as_deref(), &mut self.benches);
+    }
+}
diff --git a/src/bench_input_group.rs b/src/bench_input_group.rs
index 7039947..c188dd7 100644
--- a/src/bench_input_group.rs
+++ b/src/bench_input_group.rs
@@ -1,7 +1,12 @@
 use std::{alloc::GlobalAlloc, borrow::Cow, collections::HashMap};
 
-use crate::{bench::NamedBench, bench_runner::BenchRunner, parse_args, NamedInput, Options};
+use crate::{
+    bench::NamedBench,
+    bench_runner::{group_by_mut, BenchRunner},
+    parse_args, BenchGroup, NamedInput, Options,
+};
 use peakmem_alloc::*;
+use yansi::Paint;
 
 pub(crate) type Alloc = &'static dyn PeakMemAllocTrait;
 
@@ -13,7 +18,8 @@ pub(crate) type Alloc = &'static dyn PeakMemAllocTrait;
 /// to the `InputGroup`. If this is not possible, use [BenchRunner](crate::BenchRunner) instead.
 pub struct InputGroup<I = ()> {
     inputs: Vec<OwnedNamedInput<I>>,
-    runner: BenchRunner<'static>,
+    bench_group: BenchGroup<'static>,
+    pub(crate) name: Option<String>,
 }
 
 impl Default for InputGroup<()> {
@@ -39,7 +45,7 @@ pub struct OwnedNamedInput<I> {
 impl<I: 'static> InputGroup<I> {
     /// Sets name of the group and returns the group.
     pub fn name<S: Into<String>>(mut self, name: S) -> Self {
-        self.runner.set_name(name.into());
+        self.name = Some(name.into());
         self
     }
     /// The inputs are a vector of tuples, where the first element is the name of the input and the
@@ -67,12 +73,16 @@ impl<I: 'static> InputGroup<I> {
         let mut runner = BenchRunner::new();
         runner.set_options(options);
 
-        InputGroup { inputs, runner }
+        InputGroup {
+            inputs,
+            name: None,
+            bench_group: BenchGroup::new(runner),
+        }
     }
     /// Set the peak mem allocator to be used for the benchmarks.
     /// This will report the peak memory consumption of the benchmarks.
     pub fn set_alloc<A: GlobalAlloc + 'static>(&mut self, alloc: &'static PeakMemAlloc<A>) {
-        self.runner.set_alloc(alloc);
+        self.bench_group.runner.set_alloc(alloc);
     }
     /// Enable perf profiling + report
     ///
@@ -91,7 +101,7 @@ impl<I: 'static> InputGroup<I> {
     ///              L1dA: 2.001       L1dM: 0.000     Br: 6.001         BrM: 0.000     
     /// ```
     pub fn enable_perf(&mut self) {
-        self.runner.options.enable_perf = true;
+        self.bench_group.runner.options.enable_perf = true;
     }
 
     /// Enables throughput reporting.
@@ -108,8 +118,8 @@ impl<I: 'static> InputGroup<I> {
     /// Set the name of the group.
     /// The name is printed before the benchmarks are run.
     /// It is also used to distinguish when writing the results to disk.
-    pub fn set_name(&mut self, name: String) {
-        self.runner.set_name(name);
+    pub fn set_name<S: Into<String>>(&mut self, name: S) {
+        self.name = Some(name.into());
     }
 
     /// Set the options to the given value.
@@ -117,24 +127,24 @@ impl<I: 'static> InputGroup<I> {
     ///
     /// See the Options struct for more information.
     pub fn set_options(&mut self, options: Options) {
-        self.runner.set_options(options);
+        self.bench_group.runner.set_options(options);
     }
 
     /// Manully set the number of iterations each benchmark is called.
     ///
     /// This disables the automatic detection of the number of iterations.
     pub fn set_num_iter(&mut self, num_iter: usize) {
-        self.runner.set_num_iter(num_iter);
+        self.bench_group.runner.set_num_iter(num_iter);
     }
 
     /// Trash CPU cache between bench runs. Defaults to false.
     pub fn set_cache_trasher(&mut self, enable: bool) {
-        self.runner.set_cache_trasher(enable);
+        self.bench_group.runner.set_cache_trasher(enable);
     }
 
     /// Sets the interleave option to the given value.
     pub fn set_interleave(&mut self, interleave: bool) {
-        self.runner.set_interleave(interleave);
+        self.bench_group.runner.set_interleave(interleave);
     }
 
     /// Sets the filter, which is used to filter the benchmarks by name.
@@ -142,7 +152,7 @@ impl<I: 'static> InputGroup<I> {
     ///
     /// It can also match an input name.
     pub fn set_filter(&mut self, filter: Option<String>) {
-        self.runner.set_filter(filter);
+        self.bench_group.runner.set_filter(filter);
     }
 
     /// Register a benchmark with the given name and function.
@@ -163,25 +173,38 @@ impl<I: 'static> InputGroup<I> {
             // (probably).
             let named_input: NamedInput<'static, I> = unsafe { transmute_lifetime(named_input) };
             if let Some(input_size) = input.input_size_in_bytes {
-                self.runner.set_input_size(input_size);
+                self.bench_group.runner.set_input_size(input_size);
             }
-            self.runner
+            self.bench_group
                 .register_named_with_input(named_bench, named_input);
         }
     }
 
     /// Run the benchmarks and report the results.
     pub fn run(&mut self) {
+        if let Some(name) = &self.name {
+            println!("{}", name.black().on_red().invert().bold());
+        }
         let input_name_to_ordinal: HashMap<String, usize> = self
             .inputs
             .iter()
             .enumerate()
             .map(|(i, input)| (input.name.clone(), i))
             .collect();
-        self.runner
+        self.bench_group
             .benches
             .sort_by_key(|bench| std::cmp::Reverse(input_name_to_ordinal[bench.get_input_name()]));
-        self.runner.run();
+        group_by_mut(
+            self.bench_group.benches.as_mut_slice(),
+            |b| b.get_input_name(),
+            |group| {
+                let input_name = group[0].get_input_name().to_owned();
+                //if !input_name.is_empty() {
+                //println!("{}", input_name.black().on_yellow().invert().italic());
+                //}
+                self.bench_group.runner.run_group(Some(&input_name), group);
+            },
+        );
     }
 }
 
diff --git a/src/bench_runner.rs b/src/bench_runner.rs
index 8c520d8..359646a 100644
--- a/src/bench_runner.rs
+++ b/src/bench_runner.rs
@@ -4,7 +4,7 @@ use crate::{
     bench::{Bench, InputWithBenchmark, NamedBench},
     black_box, parse_args,
     report::report_group,
-    Options,
+    BenchGroup, Options,
 };
 use peakmem_alloc::*;
 use yansi::Paint;
@@ -16,10 +16,9 @@ pub(crate) const NUM_RUNS: usize = 32;
 ///
 /// BenchRunner is a collection of benchmarks.
 /// It is self-contained and can be run independently.
-pub struct BenchRunner<'a> {
-    /// Name of the benchmark group.
-    name: Option<String>,
-    pub(crate) benches: Vec<Box<dyn Bench<'a> + 'a>>,
+#[derive(Clone)]
+pub struct BenchRunner {
+    //pub(crate) benches: Vec<Box<dyn Bench<'a> + 'a>>,
     alloc: Option<Alloc>,
     cache_trasher: CacheTrasher,
     pub(crate) options: Options,
@@ -39,23 +38,32 @@ pub struct NamedInput<'a, I> {
     pub(crate) data: &'a I,
 }
 
-const EMPTY_INPUT: NamedInput<()> = NamedInput {
+pub const EMPTY_INPUT: NamedInput<()> = NamedInput {
     name: Cow::Borrowed(""),
     data: &(),
 };
 
-impl<'a> Default for BenchRunner<'a> {
+impl Default for BenchRunner {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl<'a> BenchRunner<'a> {
+impl BenchRunner {
     /// The inputs are a vector of tuples, where the first element is the name of the input and the
     /// second element is the input itself.
     pub fn new() -> Self {
         Self::new_with_options(parse_args())
     }
+
+    /// The inputs are a vector of tuples, where the first element is the name of the input and the
+    /// second element is the input itself.
+    pub fn with_name<S: AsRef<str>>(name: S) -> Self {
+        println!("{}", name.as_ref().black().on_red().invert().bold());
+
+        Self::new_with_options(parse_args())
+    }
+
     /// The inputs are a vector of tuples, where the first element is the name of the input and the
     /// second element is the input itself.
     pub(crate) fn new_with_options(options: Options) -> Self {
@@ -63,15 +71,25 @@ impl<'a> BenchRunner<'a> {
         yansi::whenever(Condition::TTY_AND_COLOR);
 
         BenchRunner {
-            benches: Vec::new(),
             cache_trasher: CacheTrasher::new(1024 * 1024 * 16),
             options,
             alloc: None,
-            name: None,
             input_size_in_bytes: None,
             num_iter: None,
         }
     }
+
+    /// Creates a new group
+    /// The group is a collection of benchmarks that are run together.
+    pub fn new_group<'a>(&self) -> BenchGroup<'a> {
+        BenchGroup::new(self.clone())
+    }
+    /// Creates a new group
+    /// The group is a collection of benchmarks that are run together.
+    pub fn new_group_with_name<'a, S: Into<String>>(&self, name: S) -> BenchGroup<'a> {
+        BenchGroup::with_name(self.clone(), name)
+    }
+
     /// Set the peak mem allocator to be used for the benchmarks.
     /// This will report the peak memory consumption of the benchmarks.
     pub fn set_alloc<A: GlobalAlloc + 'static>(&mut self, alloc: &'static PeakMemAlloc<A>) {
@@ -110,13 +128,6 @@ impl<'a> BenchRunner<'a> {
         self.num_iter = Some(num_iter);
     }
 
-    /// Set the name of the group.
-    /// The name is printed before the benchmarks are run.
-    /// It is also used to distinguish when writing the results to disk.
-    pub fn set_name<S: Into<String>>(&mut self, name: S) {
-        self.name = Some(name.into());
-    }
-
     /// Set the options to the given value.
     /// This will overwrite all current options.
     ///
@@ -144,57 +155,21 @@ impl<'a> BenchRunner<'a> {
         self.options.filter = filter;
     }
 
-    /// Register a benchmark with the given name and function.
-    pub fn register_with_input<I, F, S: Into<String>>(
-        &mut self,
-        bench_name: S,
-        input_name: S,
-        input: &'a I,
-        fun: F,
-    ) where
-        F: Fn(&'a I) + 'static,
+    /// Run a single function
+    pub fn bench_function<F>(&mut self, name: String, f: F) -> &mut Self
+    where
+        F: Fn(&()) + 'static,
     {
-        let name = bench_name.into();
-        let input_name = input_name.into();
-
-        let bench = NamedBench::new(name, Box::new(fun));
-        self.register_named_with_input(
-            bench,
-            NamedInput {
-                name: Cow::Owned(input_name),
-                data: input,
-            },
-        );
-    }
-    /// Register a benchmark with the given name and function.
-    pub(crate) fn register_named_with_input<I>(
-        &mut self,
-        bench: NamedBench<'a, I>,
-        input: NamedInput<'a, I>,
-    ) {
-        if let Some(filter) = &self.options.filter {
-            if !bench.name.contains(filter) && !input.name.contains(filter) {
-                return;
-            }
-        }
-
+        let named_bench = NamedBench::new(name, Box::new(f));
         let bundle = InputWithBenchmark::new(
-            input,
+            EMPTY_INPUT,
             self.input_size_in_bytes,
-            bench,
+            named_bench,
             self.options.enable_perf,
         );
 
-        self.benches.push(Box::new(bundle));
-    }
-    /// Register a benchmark with the given name and function.
-    pub fn register<I, F, S: Into<String>>(&mut self, name: S, fun: F)
-    where
-        F: Fn(&'a ()) + 'static,
-    {
-        let name = name.into();
-        let bench = NamedBench::new(name, Box::new(fun));
-        self.register_named_with_input(bench, EMPTY_INPUT);
+        self.run_group(None, &mut [Box::new(bundle)]);
+        self
     }
 
     /// Trash CPU cache between bench runs. Defaults to false.
@@ -203,64 +178,55 @@ impl<'a> BenchRunner<'a> {
     }
 
     /// Run the benchmarks and report the results.
-    pub fn run(&mut self) {
-        if self.benches.is_empty() {
+    pub fn run_group<'a>(&self, name: Option<&str>, group: &mut [Box<dyn Bench<'a> + 'a>]) {
+        if group.is_empty() {
             return;
         }
 
-        if let Some(name) = &self.name {
-            println!("{}", name.black().on_red().invert().bold());
+        if let Some(name) = &name {
+            println!("{}", name.black().on_yellow().invert().bold());
         }
 
-        // TODO: group by should be configurable
-        group_by_mut(
-            &mut self.benches,
-            |b| b.get_input_name(),
-            |group| {
-                let input_name = group[0].get_input_name();
-                if !input_name.is_empty() {
-                    println!("{}", input_name.black().on_yellow().invert().italic());
-                }
-
-                const MAX_GROUP_SIZE: usize = 5;
-                if self.options.verbose && group.len() > MAX_GROUP_SIZE {
-                    println!(
-                        "Group is quite big, splitting into chunks of {} elements",
-                        MAX_GROUP_SIZE
-                    );
-                }
+        const MAX_GROUP_SIZE: usize = 5;
+        if self.options.verbose && group.len() > MAX_GROUP_SIZE {
+            println!(
+                "Group is quite big, splitting into chunks of {} elements",
+                MAX_GROUP_SIZE
+            );
+        }
 
-                // If the group is quite big, we don't want to create too big chunks, which causes
-                // slow tests, therefore a chunk is at most 5 elements large.
-                for group in group.chunks_mut(MAX_GROUP_SIZE) {
-                    Self::warm_up_group_and_set_iter(group, self.num_iter, self.options.verbose);
-
-                    if self.options.interleave {
-                        Self::run_interleaved(
-                            group,
-                            &self.alloc,
-                            self.options.cache_trasher.then_some(&self.cache_trasher),
-                        );
-                    } else {
-                        Self::run_sequential(group, &self.alloc);
-                    }
-                }
-                // We report at the end, so the alignment is correct (could be calculated up front)
-                report_group(&self.name, group, self.alloc.is_some());
-            },
-        );
+        // If the group is quite big, we don't want to create too big chunks, which causes
+        // slow tests, therefore a chunk is at most 5 elements large.
+        for group in group.chunks_mut(MAX_GROUP_SIZE) {
+            Self::warm_up_group_and_set_iter(group, self.num_iter, self.options.verbose);
 
-        self.clear_results();
-    }
+            if self.options.interleave {
+                Self::run_interleaved(
+                    group,
+                    &self.alloc,
+                    self.options.cache_trasher.then_some(&self.cache_trasher),
+                );
+            } else {
+                Self::run_sequential(group, &self.alloc);
+            }
+        }
+        // We report at the end, so the alignment is correct (could be calculated up front)
+        report_group(name, group, self.alloc.is_some());
 
-    /// Clear the stored results of the benchmarks.
-    pub fn clear_results(&mut self) {
-        for bench in &mut self.benches {
+        //self.clear_results();
+        for bench in group {
             bench.clear_results();
         }
     }
 
-    fn run_sequential(benches: &mut [Box<dyn Bench<'a> + 'a>], alloc: &Option<Alloc>) {
+    // /// Clear the stored results of the benchmarks.
+    //pub fn clear_results(&mut self) {
+    //for bench in &mut self.benches {
+    //bench.clear_results();
+    //}
+    //}
+
+    fn run_sequential<'a>(benches: &mut [Box<dyn Bench<'a> + 'a>], alloc: &Option<Alloc>) {
         for bench in benches {
             for iteration in 0..NUM_RUNS {
                 alloca::with_alloca(
@@ -274,7 +240,7 @@ impl<'a> BenchRunner<'a> {
         }
     }
 
-    fn run_interleaved(
+    fn run_interleaved<'a>(
         benches: &mut [Box<dyn Bench<'a> + 'a>],
         alloc: &Option<Alloc>,
         cache_trasher: Option<&CacheTrasher>,
@@ -317,8 +283,8 @@ impl<'a> BenchRunner<'a> {
         }
     }
 
-    fn warm_up_group_and_set_iter(
-        benches: &mut [Box<dyn Bench<'a> + 'a>],
+    fn warm_up_group_and_set_iter<'b>(
+        benches: &mut [Box<dyn Bench<'b> + 'b>],
         num_iter: Option<usize>,
         verbose: bool,
     ) {
@@ -428,6 +394,7 @@ pub fn group_by_mut<T, K: Ord + ?Sized, F>(
 /// Performs a dummy reads from memory to spoil given amount of CPU cache
 ///
 /// Uses cache aligned data arrays to perform minimum amount of reads possible to spoil the cache
+#[derive(Clone)]
 struct CacheTrasher {
     cache_lines: Vec<CacheLine>,
 }
diff --git a/src/lib.rs b/src/lib.rs
index 048aa55..8af4a7c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -75,6 +75,7 @@ extern crate test;
 pub use peakmem_alloc::*;
 
 pub(crate) mod bench;
+mod bench_group;
 mod bench_input_group;
 pub(crate) mod bench_runner;
 pub(crate) mod format;
@@ -82,6 +83,7 @@ pub(crate) mod profiler;
 
 pub(crate) mod report;
 pub(crate) mod stats;
+pub use bench_group::BenchGroup;
 pub use bench_input_group::InputGroup;
 pub use bench_runner::BenchRunner;
 pub use bench_runner::NamedInput;
@@ -93,7 +95,7 @@ pub use std::hint::black_box;
 
 /// The options to configure the benchmarking.
 /// The can be set on `InputGroup`.
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone)]
 pub struct Options {
     /// Interleave benchmarks
     pub interleave: bool,
diff --git a/src/profiler/mod.rs b/src/profiler/mod.rs
index 06709c1..fd3f943 100644
--- a/src/profiler/mod.rs
+++ b/src/profiler/mod.rs
@@ -53,7 +53,22 @@ fn print_counter_value<F: Fn(&CounterValues) -> f64>(
         })
         .unwrap_or_default();
 
-    format!("{}: {:.3} {}", name, f(stats), diff_str,)
+    format!("{}: {:.3} {}", name, format_number(f(stats)), diff_str,)
+}
+
+fn format_number(n: f64) -> String {
+    let max_digits = 5;
+    let integer_part = n.trunc() as i64;
+    let integer_length = if integer_part != 0 {
+        integer_part.abs().to_string().len() as i32
+    } else if n == 0.0 {
+        1 // Special handling for 0 to consider the digit before the decimal point
+    } else {
+        0 // For numbers less than 1 but not zero
+    };
+
+    let precision = (max_digits - integer_length).max(0) as usize;
+    format!("{:.*}", precision, n)
 }
 
 impl CounterValues {
diff --git a/src/report.rs b/src/report.rs
index 1d1d55d..5a9669a 100644
--- a/src/report.rs
+++ b/src/report.rs
@@ -31,7 +31,7 @@ pub fn get_output_directory() -> PathBuf {
 }
 
 pub(crate) fn report_group<'a>(
-    bench_group_name: &Option<String>,
+    bench_group_name: Option<&str>,
     benches: &mut [Box<dyn Bench<'a> + 'a>],
     report_memory: bool,
 ) {