From 8546a000923fe7cd6b57c1d9fcbd65970addf506 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 13:30:43 +0100
Subject: [PATCH 01/25] gha on push

---
 .github/workflows/precommit.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
index 076db3ba..6a7a6b7d 100644
--- a/.github/workflows/precommit.yaml
+++ b/.github/workflows/precommit.yaml
@@ -3,6 +3,7 @@ name: pre-commit
 on:
   pull_request:
     branches: [main]
+  push:
 
 jobs:
   pre-commit:

From 718027ca89be988712ec051437a8869956556129 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 13:57:11 +0100
Subject: [PATCH 02/25] gha on push main branch

---
 .github/workflows/precommit.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
index 6a7a6b7d..f1478189 100644
--- a/.github/workflows/precommit.yaml
+++ b/.github/workflows/precommit.yaml
@@ -4,6 +4,7 @@ on:
   pull_request:
     branches: [main]
   push:
+    branches: [main]
 
 jobs:
   pre-commit:

From 9b4bfbf688f5a778966640f3bfc1f8b5d92b7344 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 16:35:51 +0000
Subject: [PATCH 03/25] replace hardcoded cuda version with cuda from system

---
 benchmark.sh | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/benchmark.sh b/benchmark.sh
index d2ca1d3c..c6204a0b 100755
--- a/benchmark.sh
+++ b/benchmark.sh
@@ -87,6 +87,20 @@ check_jq() {
     fi
 }
 
+get_torch_cuda_version() {
+    # Get the full CUDA version using nvcc
+    CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}')
+
+    # Remove dots from the CUDA version
+    CUDA_VERSION_NUMERIC=$(echo "${CUDA_VERSION}" | tr -d '.')
+
+    # Set TORCH_CUDA_VERSION variable
+    TORCH_CUDA_VERSION="cu${CUDA_VERSION_NUMERIC}"
+
+    # Return the dynamically set variable
+    echo "${TORCH_CUDA_VERSION}"
+}
+
 # Function to download models
 download_models() {
     echo -e "\nDownloading models..."
@@ -126,7 +140,7 @@ run_benchmarks() {
     if [ "$DEVICE" == "cpu" ] || [ "$USE_NVIDIA" == true ]; then
         # Run Rust benchmarks
         if [ "$DEVICE" == "gpu" ] && [ "$PLATFORM" != "Darwin" ]; then
-            TORCH_CUDA_VERSION=cu117
+            TORCH_CUDA_VERSION=$(get_torch_cuda_version)
         fi
         cargo run --release --bin sample \
             --manifest-path="$DIR/rust_bench/llama2-burn/Cargo.toml" \

From 3ceae9be14b20df2541111407237a95c0ececfea Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 10:32:45 +0100
Subject: [PATCH 04/25] gha to update README.md update date

---
 .github/workflows/update_readme.yaml |  29 +++++++
 README.md.template                   | 118 +++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 .github/workflows/update_readme.yaml
 create mode 100644 README.md.template

diff --git a/.github/workflows/update_readme.yaml b/.github/workflows/update_readme.yaml
new file mode 100644
index 00000000..27bad38b
--- /dev/null
+++ b/.github/workflows/update_readme.yaml
@@ -0,0 +1,29 @@
+name: Update README
+
+on:
+  push:
+    branches: ["main"]
+    paths:
+      - README.md.template
+
+jobs:
+  update-readme:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code Repository
+        uses: actions/checkout@v3
+
+      - name: Update README
+        run: sed "s|<LAST_UPDATE>|$(date -u +"%dth %B %Y")|g" README.md.template > README.md
+
+      - name: Commit changes
+        run: |
+          git config --global user.email "actions@github.com"
+          git config --global user.name "GitHub Actions"
+          git add README.md
+          git commit -m "Update <LAST_UPDATE> placeholder in README.md" || true
+
+      - name: Push changes
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md.template b/README.md.template
new file mode 100644
index 00000000..23c3a2c1
--- /dev/null
+++ b/README.md.template
@@ -0,0 +1,118 @@
+# benchmarks
+MLOps Engines, Frameworks, and Languages benchmarks over main stream AI Models.
+
+## Tool
+
+The benchmarking tool comprises three main scripts:
+- `benchmark.sh` for running the end-to-end benchmarking
+- `download.sh` which is internally used by the benchmark script to download the needed model files based on a configuration
+- `setup.sh` script for setup of dependencies and needed formats conversion
+
+### benchmark
+
+This script runs benchmarks for a transformer model using both Rust and Python implementations. It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device, and NVIDIA flag.
+
+```bash
+./benchmark.sh [OPTIONS]
+```
+where `OPTIONS`:
+- `-p, --prompt`: Prompt for benchmarks (default: 'Explain what is a transformer')
+- `-r, --repetitions`: Number of repetitions for benchmarks (default: 2)
+- `-m, --max_tokens`: Maximum number of tokens for benchmarks (default: 100)
+- `-d, --device`: Device for benchmarks (possible values: 'gpu' or 'cpu', default: 'cpu')
+- `--nvidia`: Use NVIDIA for benchmarks (default: false)
+
+### download
+
+Downloads files from a list of URLs specified in a JSON file. The JSON file should contain an array of objects, each with a 'url', 'file', and 'folder' property. The script checks if the file already exists before downloading it.
+
+```bash
+./download.sh --models <json_file> --cache <cache_file> --force-download
+```
+Options
+- `--models`: JSON file specifying the models to download (default: models.json)
+- `--cache`: Cache file to keep track of downloaded files (default: cache.log)
+- `--force-download`: Force download of all files, removing existing files and cache
+
+### setup
+1. Creates a python virtual environment `venv` and installs project requirements.
+3. Converts and stores models in different formats.
+
+```bash
+./setup.sh
+```
+
+## ML Engines: Feature Table
+
+| Features                    | pytorch | burn | llama.cpp | candle | tinygrad | onnxruntime | CTranslate2 |
+| --------------------------- | ------- | ---- | --------- | ------ | -------- | ----------- | ----------- |
+| Inference support           | ✅      | ✅   | ✅        | ✅     | ✅       | ✅          | ✅          |
+| 16-bit quantization support | ✅      | ✅   | ✅        | ✅     | ✅       | ✅          | ✅          |
+| 8-bit quantization support  | ✅      | ❌   | ✅        | ✅     | ✅       | ✅          | ✅          |
+| 4-bit quantization support  | ✅      | ❌   | ✅        | ✅     | ❌       | ❌          | ❌          |
+| 2/3bit quantization support | ✅      | ❌   | ✅        | ✅     | ❌       | ❌          | ❌          |
+| CUDA support                | ✅      | ✅   | ✅        | ✅     | ✅       | ✅          | ✅          |
+| ROCM support                | ✅      | ✅   | ✅        | ✅     | ✅       | ❌          | ❌          |
+| Intel OneAPI/SYCL support   | ✅**    | ✅   | ✅        | ✅     | ✅       | ❌          | ❌          |
+| Mac M1/M2 support           | ✅      | ✅   | ✅        | ⭐     | ✅       | ✅          | ⭐          |
+| BLAS support(CPU)           | ✅      | ✅   | ✅        | ✅     | ❌       | ✅          | ✅          |
+| Model Parallel support      | ✅      | ❌   | ❌        | ✅     | ❌       | ❌          | ✅          |
+| Tensor Parallel support     | ✅      | ❌   | ❌        | ✅     | ❌       | ❌          | ✅          |
+| Onnx Format support         | ✅      | ✅   | ✅        | ✅     | ✅       | ✅          | ❌          |
+| Training support            | ✅      | 🌟   | ❌        | 🌟     | ❌       | ❌          | ❌          |
+
+⭐ = No Metal Support
+🌟 = Partial Support for Training (Finetuning already works, but training from scratch may not work)
+
+## Benchmarking ML Engines
+
+### A100 80GB Inference Bench:
+
+Model: LLAMA-2-7B
+
+CUDA Version: 11.7
+
+Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --nvidia --prompt 'Explain what is a transformer'`
+
+| Engine      | float32      | float16      | int8         | int4         |
+|-------------|--------------|--------------|--------------|--------------|
+| burn        | 13.28 ± 0.79 |      -       |      -       |      -       |
+| candle      |      -       | 26.30 ± 0.29 |      -       |      -       |
+| llama.cpp   |      -       |      -       | 67.64 ± 22.57| 106.21 ± 2.21|
+| ctranslate  |      -       | 58.54 ± 13.24| 34.22 ± 6.29 |      -       |
+| tinygrad    |      -       | 20.13 ± 1.35 |      -       |      -       |
+
+*(data updated: <LAST_UPDATE>)
+
+
+### M2 MAX 32GB Inference Bench:
+
+#### CPU
+
+Model: LLAMA-2-7B
+
+CUDA Version: NA
+
+Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'`
+
+| Engine      | float32       | float16       | int8         | int4         |
+|-------------|--------------|--------------|--------------|--------------|
+| burn        | 0.30 ± 0.09  |      -       |      -       |      -       |
+| candle      |      -       | 3.43 ± 0.02  |      -       |      -       |
+| llama.cpp   |      -       |      -       | 14.41 ± 1.59 | 20.96 ± 1.94 |
+| ctranslate  |      -       |      -       | 2.11 ± 0.73  |      -       |
+| tinygrad    |      -       | 4.21 ± 0.38  |      -       |      -       |
+
+#### GPU (Metal)
+
+Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --prompt 'Explain what is a transformer'`
+
+| Engine      | float32       | float16       | int8         | int4         |
+|-------------|--------------|--------------|--------------|--------------|
+| burn        |      -       |      -       |      -       |      -       |
+| candle      |      -       |      -       |      -       |      -       |
+| llama.cpp   |      -       |      -       | 31.24 ± 7.82 | 46.75 ± 9.55 |
+| ctranslate  |      -       |      -       |      -       |      -       |
+| tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
+
+*(data updated: <LAST_UPDATE>)

From f6189a42cb229345940546ed593c6a0a3dbf10f3 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 11:03:44 +0100
Subject: [PATCH 05/25] fixed GHA

---
 .github/workflows/update_readme.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/update_readme.yaml b/.github/workflows/update_readme.yaml
index 27bad38b..b641f284 100644
--- a/.github/workflows/update_readme.yaml
+++ b/.github/workflows/update_readme.yaml
@@ -27,3 +27,4 @@ jobs:
         uses: ad-m/github-push-action@master
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: ${{ github.ref }}

From 5582fb46386ef0587805b2642cb9bde8612048c8 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Mon, 20 Nov 2023 11:51:50 +0000
Subject: [PATCH 06/25] Update <LAST_UPDATE> placeholder in README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6e12d2cf..8d8eefdd 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --nvidia
 | ctranslate  |      -       | 58.54 ± 13.24| 34.22 ± 6.29 |      -       |
 | tinygrad    |      -       | 20.13 ± 1.35 |      -       |      -       |
 
-*(data updated: 15th November 2023)
+*(data updated: 20th November 2023)
 
 
 ### M2 MAX 32GB Inference Bench:
@@ -115,4 +115,4 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --prompt
 | ctranslate  |      -       |      -       |      -       |      -       |
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 
-*(data updated: 15th November 2023)
+*(data updated: 20th November 2023)

From 114b8182d118f0800bb58aefe614e6da5c6b7782 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 14:36:56 +0000
Subject: [PATCH 07/25] refactored ctranslate benchmark

---
 ctranslate_bench/bench.py         | 153 ++++++++++++++++++++++++++++++
 ctranslate_bench/bench.sh         | 120 +++++++++++++++++++++++
 ctranslate_bench/requirements.txt |   4 +
 ctranslate_bench/setup.sh         |  38 ++++++++
 4 files changed, 315 insertions(+)
 create mode 100644 ctranslate_bench/bench.py
 create mode 100755 ctranslate_bench/bench.sh
 create mode 100644 ctranslate_bench/requirements.txt
 create mode 100644 ctranslate_bench/setup.sh

diff --git a/ctranslate_bench/bench.py b/ctranslate_bench/bench.py
new file mode 100644
index 00000000..e3aa1fb5
--- /dev/null
+++ b/ctranslate_bench/bench.py
@@ -0,0 +1,153 @@
+import argparse
+import logging
+import os
+import sys
+import time
+from collections import defaultdict
+
+import ctranslate2
+import numpy as np
+import sentencepiece as spm
+
+logging.getLogger("ctranslate2").setLevel(logging.ERROR)
+logging.basicConfig(
+    stream=sys.stdout,
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+B_INST, E_INST = "[INST]", "[/INST]"
+B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+
+
+def get_compute_types(device):
+    compute_types = set()
+    if device in ("cuda", "cpu"):
+        return set(ctranslate2.get_supported_compute_types(device))
+    else:
+        return compute_types
+
+
+class CTranslateBenchmark:
+    def __init__(self, model_path, device, compute_type):
+        self.model_path = model_path
+        self.results = []
+        self.device = device
+        self.compute_type = compute_type
+
+    def load_model(self):
+        self.generator = ctranslate2.Generator(
+            self.model_path,
+            device=self.device,
+            compute_type=self.compute_type,
+        )
+        self.sp = spm.SentencePieceProcessor(
+            os.path.join(self.model_path, "tokenizer.model")
+        )
+        return self
+
+    def run_model(self, prompt, max_tokens):
+        prompt_tokens = ["<s>"] + self.sp.encode_as_pieces(
+            f"{B_INST} {prompt.strip()} {E_INST}"
+        )
+        start = time.time()
+        step_results = self.generator.generate_tokens(
+            prompt_tokens,
+            max_length=max_tokens,
+            sampling_temperature=0.6,
+            sampling_topk=20,
+            sampling_topp=1,
+        )
+        count = 0
+        for _ in self.generate_words(step_results):
+            count += 1
+        return count / (time.time() - start)
+
+    def benchmark(self, prompt, max_tokens, repetitions):
+        for i in range(repetitions):
+            logging.info(
+                f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]"
+            )
+            tokens_per_second = self.run_model(prompt, max_tokens)
+            self.results.append(tokens_per_second)
+
+    def generate_words(self, step_results):
+        tokens_buffer = []
+
+        for step_result in step_results:
+            is_new_word = step_result.token.startswith("▁")
+
+            if is_new_word and tokens_buffer:
+                word = self.sp.decode(tokens_buffer)
+                if word:
+                    yield word
+                tokens_buffer = []
+
+            tokens_buffer.append(step_result.token_id)
+
+        if tokens_buffer:
+            word = self.sp.decode(tokens_buffer)
+            if word:
+                yield word
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="CTranslate Benchmark Llama model.")
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        help="The prompt for the model.",
+    )
+    parser.add_argument("--max_tokens", type=int, help="The maximum number of tokens.")
+    parser.add_argument(
+        "--repetitions",
+        type=int,
+        help="The number of repetitions for the benchmark.",
+    )
+    parser.add_argument(
+        "--device",
+        help="Device to use for the benchmark.",
+    )
+    parser.add_argument(
+        "--log_file",
+        type=str,
+        help="Path to the log file for writing logs (in append mode).",
+    )
+    parser.add_argument(
+        "--models_dir",
+        type=str,
+        help="Path to the models directory.",
+    )
+    args = parser.parse_args()
+    logging.info(
+        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
+        + f"repetitions={args.repetitions} device={args.device}"
+    )
+    report = defaultdict(lambda: defaultdict(float))
+    compute_types = get_compute_types(args.device)
+    for compute_type in compute_types.intersection({"float16", "int8"}):
+        logging.info(f"Running ctranslate benchmark with {compute_type}")
+        ctranslate_bench = CTranslateBenchmark(
+            f"{args.models_dir}/llama-2-7b-hf-float16",
+            device=args.device,
+            compute_type=compute_type,
+        ).load_model()
+        ctranslate_bench.benchmark(
+            max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
+        )
+        report["ctranslate"][compute_type] = {
+            "mean": np.mean(ctranslate_bench.results),
+            "std": np.std(ctranslate_bench.results),
+        }
+
+    logging.info("Benchmark report")
+    with open(args.log_file, "a") as file:
+        for framework, quantizations in report.items():
+            for quantization, stats in quantizations.items():
+                logging.info(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}"
+                )
+                print(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}",
+                    file=file,
+                )
diff --git a/ctranslate_bench/bench.sh b/ctranslate_bench/bench.sh
new file mode 100755
index 00000000..608b289c
--- /dev/null
+++ b/ctranslate_bench/bench.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Function to print script usage
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_platform() {
+    local platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash $SCRIPT_DIR/setup.sh "$1"
+}
+
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    python $SCRIPT_DIR/bench.py \
+        --prompt "$PROMPT" \
+        --repetitions "$REPETITIONS" \
+        --max_tokens $MAX_TOKENS \
+        --log_file "$LOG_FILENAME" \
+        --models_dir "$MODELS_DIR" \
+        --device "$DEVICE"
+
+}
+
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_python
+setup "$MODELS_DIR"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/ctranslate_bench/requirements.txt b/ctranslate_bench/requirements.txt
new file mode 100644
index 00000000..e516d057
--- /dev/null
+++ b/ctranslate_bench/requirements.txt
@@ -0,0 +1,4 @@
+sentencepiece==0.1.99
+ctranslate2==3.20.0
+transformers==4.35.0
+torch==2.1.0
diff --git a/ctranslate_bench/setup.sh b/ctranslate_bench/setup.sh
new file mode 100644
index 00000000..ff98dec8
--- /dev/null
+++ b/ctranslate_bench/setup.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+################################################################################
+# Script: setup.sh <MODELS_FOLDER>
+# Description: This script automates the setup of a virtual environment,
+# installs project requirements, converts and stores models.
+################################################################################
+
+set -euo pipefail
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <models_folder>"
+    exit 1
+fi
+
+# Define directory paths
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+VENV_DIR="$SCRIPT_DIR/venv"
+MODELS_FOLDER="$1"
+LLAMA_HF_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-hf"
+
+if [ ! -d "$VENV_DIR" ]; then
+    python -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    source $VENV_DIR/bin/activate
+    $VENV_DIR/bin/pip install --upgrade pip > /dev/null
+else
+    source $VENV_DIR/bin/activate
+fi
+
+$VENV_DIR/bin/pip install -r $SCRIPT_DIR/requirements.txt > /dev/null
+
+if [ ! -d "$LLAMA_HF_MODEL_DIR-float16" ]; then
+    echo "Creating llama-2-7b-hf-float16 model..."
+    ct2-transformers-converter --model "$LLAMA_HF_MODEL_DIR/" --quantization float16 --output_dir "$LLAMA_HF_MODEL_DIR-float16" --copy_files tokenizer.model
+else
+    echo "Model llama-2-7b-hf-float16 already exists!"
+fi

From ad4e543f3cfafed6025c057470bff2342e497e43 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 18:19:29 +0000
Subject: [PATCH 08/25] started refactored ctranslate benchmark; cuda not
 working

---
 llamacpp_bench/bench.py         | 101 +++++++++++++++++++++++++++
 llamacpp_bench/bench.sh         | 120 ++++++++++++++++++++++++++++++++
 llamacpp_bench/requirements.txt |   2 +
 llamacpp_bench/setup.sh         |  40 +++++++++++
 4 files changed, 263 insertions(+)
 create mode 100644 llamacpp_bench/bench.py
 create mode 100755 llamacpp_bench/bench.sh
 create mode 100644 llamacpp_bench/requirements.txt
 create mode 100755 llamacpp_bench/setup.sh

diff --git a/llamacpp_bench/bench.py b/llamacpp_bench/bench.py
new file mode 100644
index 00000000..d494cde1
--- /dev/null
+++ b/llamacpp_bench/bench.py
@@ -0,0 +1,101 @@
+import argparse
+import logging
+import sys
+import time
+from collections import defaultdict
+
+import numpy as np
+from llama_cpp import Llama
+
+logging.getLogger("llama_cpp").setLevel(logging.ERROR)
+logging.basicConfig(
+    stream=sys.stdout,
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+
+class LlamaCPPBenchmark:
+    def __init__(self, model_path, device):
+        self.model_path = model_path
+        self.device = device
+        self.results = []
+
+    def load_model(self):
+        self.model = Llama(model_path=self.model_path, n_gpu_layers=-1, verbose=True)
+        return self
+
+    def run_model(self, prompt, max_tokens):
+        start = time.time()
+        output = self.model.create_completion(prompt, max_tokens=max_tokens)
+        tokens = output["usage"]["completion_tokens"]
+        return tokens / (time.time() - start)
+
+    def benchmark(self, prompt, max_tokens, repetitions):
+        for i in range(repetitions):
+            logging.info(
+                f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]"
+            )
+            tokens_per_second = self.run_model(prompt, max_tokens)
+            self.results.append(tokens_per_second)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="llama.cpp Benchmark Llama model.")
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        help="The prompt for the model.",
+    )
+    parser.add_argument("--max_tokens", type=int, help="The maximum number of tokens.")
+    parser.add_argument(
+        "--repetitions",
+        type=int,
+        help="The number of repetitions for the benchmark.",
+    )
+    parser.add_argument(
+        "--device",
+        help="Device to use for the benchmark.",
+    )
+    parser.add_argument(
+        "--log_file",
+        type=str,
+        help="Path to the log file for writing logs (in append mode).",
+    )
+    parser.add_argument(
+        "--models_dir",
+        type=str,
+        help="Path to the models directory.",
+    )
+    args = parser.parse_args()
+    logging.info(
+        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
+        + f"repetitions={args.repetitions} device={args.device}"
+    )
+    report = defaultdict(lambda: defaultdict(float))
+    for quantize in ("Q8_0", "Q4_0"):
+        logging.info(f"Running llama-cpp benchmark with {quantize}")
+        llamacpp_bench = LlamaCPPBenchmark(
+            f"{args.models_dir}/llama-2-7b-gguf/llama-2-7b.{quantize}.gguf",
+            device=args.device,
+        ).load_model()
+        llamacpp_bench.benchmark(
+            max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
+        )
+        q = "int8" if quantize == "Q8_0" else "int4"
+        report["llama.cpp"][q] = {
+            "mean": np.mean(llamacpp_bench.results),
+            "std": np.std(llamacpp_bench.results),
+        }
+
+    logging.info("Benchmark report")
+    with open(args.log_file, "a") as file:
+        for framework, quantizations in report.items():
+            for quantization, stats in quantizations.items():
+                logging.info(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}"
+                )
+                print(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}",
+                    file=file,
+                )
diff --git a/llamacpp_bench/bench.sh b/llamacpp_bench/bench.sh
new file mode 100755
index 00000000..5d626811
--- /dev/null
+++ b/llamacpp_bench/bench.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Function to print script usage
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_platform() {
+    local platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash $SCRIPT_DIR/setup.sh "$1"
+}
+
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    python $SCRIPT_DIR/bench.py \
+        --prompt "$PROMPT" \
+        --repetitions "$REPETITIONS" \
+        --max_tokens $MAX_TOKENS \
+        --log_file "$LOG_FILENAME" \
+        --models_dir "$MODELS_DIR" \
+        --device "$DEVICE"
+
+}
+
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_python
+setup "$DEVICE"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/llamacpp_bench/requirements.txt b/llamacpp_bench/requirements.txt
new file mode 100644
index 00000000..918e0647
--- /dev/null
+++ b/llamacpp_bench/requirements.txt
@@ -0,0 +1,2 @@
+llama_cpp_python==0.2.15
+sentencepiece==0.1.99
diff --git a/llamacpp_bench/setup.sh b/llamacpp_bench/setup.sh
new file mode 100755
index 00000000..3b5a5956
--- /dev/null
+++ b/llamacpp_bench/setup.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+################################################################################
+# Script: setup.sh <DEVICE>
+# Description: This script automates the setup of a virtual environment,
+# installs project requirements.
+################################################################################
+
+set -euo pipefail
+
+# if [ "$#" -ne 1 ]; then
+#     echo "Usage: $0 <DEVICE>"
+#     exit 1
+# fi
+
+
+# Define directory paths
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+VENV_DIR="$SCRIPT_DIR/venv"
+DEVICE="$1"
+if [ "$DEVICE" == "cuda" ]; then
+    export CMAKE_ARGS=-DLLAMA_CUBLAS=on
+elif [ "$DEVICE" == "metal" ]; then
+    export CMAKE_ARGS=-DLLAMA_METAL=on
+else
+    export CMAKE_ARGS=-DLLAMA_CUBLAS=off
+fi
+export FORCE_CMAKE=1
+
+if [ ! -d "$VENV_DIR" ]; then
+    python -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    source $VENV_DIR/bin/activate
+    pip install --upgrade pip > /dev/null
+else
+    source $VENV_DIR/bin/activate
+fi
+
+echo "Installing requirements with CMAKE_ARGS=$CMAKE_ARGS and FORCE_CMAKE=$FORCE_CMAKE"
+pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir llama-cpp-python

From f34bb0c1ccc4a9f0dfc25e61c985748514552245 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 18:21:51 +0000
Subject: [PATCH 09/25] force reinstall and check setup args

---
 llamacpp_bench/setup.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llamacpp_bench/setup.sh b/llamacpp_bench/setup.sh
index 3b5a5956..3e72c02e 100755
--- a/llamacpp_bench/setup.sh
+++ b/llamacpp_bench/setup.sh
@@ -8,10 +8,10 @@
 
 set -euo pipefail
 
-# if [ "$#" -ne 1 ]; then
-#     echo "Usage: $0 <DEVICE>"
-#     exit 1
-# fi
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <DEVICE>"
+    exit 1
+fi
 
 
 # Define directory paths
@@ -37,4 +37,4 @@ else
 fi
 
 echo "Installing requirements with CMAKE_ARGS=$CMAKE_ARGS and FORCE_CMAKE=$FORCE_CMAKE"
-pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir llama-cpp-python
+pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir --force-reinstall llama-cpp-python > /dev/null

From 5aa63ce3c71697e02a2fa0e1c7f9172e2862ffba Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 18:24:11 +0000
Subject: [PATCH 10/25] minor fix

---
 llamacpp_bench/bench.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llamacpp_bench/bench.py b/llamacpp_bench/bench.py
index d494cde1..e4f05679 100644
--- a/llamacpp_bench/bench.py
+++ b/llamacpp_bench/bench.py
@@ -22,7 +22,11 @@ def __init__(self, model_path, device):
         self.results = []
 
     def load_model(self):
-        self.model = Llama(model_path=self.model_path, n_gpu_layers=-1, verbose=True)
+        self.model = Llama(
+            model_path=self.model_path,
+            n_gpu_layers=0 if self.device == "cpu" else -1,
+            verbose=True,
+        )
         return self
 
     def run_model(self, prompt, max_tokens):

From 73d12e97f5960c71416357b10e597dbe286c1c5d Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Mon, 20 Nov 2023 18:28:03 +0000
Subject: [PATCH 11/25] venv per device

---
 .gitignore              | 2 ++
 llamacpp_bench/setup.sh | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index eb4a4006..09069f6a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,8 +122,10 @@ celerybeat.pid
 # Environments
 .env
 .venv
+.venv_*
 env/
 venv/
+venv_*/
 ENV/
 env.bak/
 venv.bak/
diff --git a/llamacpp_bench/setup.sh b/llamacpp_bench/setup.sh
index 3e72c02e..32f74250 100755
--- a/llamacpp_bench/setup.sh
+++ b/llamacpp_bench/setup.sh
@@ -16,8 +16,8 @@ fi
 
 # Define directory paths
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-VENV_DIR="$SCRIPT_DIR/venv"
 DEVICE="$1"
+VENV_DIR="$SCRIPT_DIR/venv_$DEVICE"
 if [ "$DEVICE" == "cuda" ]; then
     export CMAKE_ARGS=-DLLAMA_CUBLAS=on
 elif [ "$DEVICE" == "metal" ]; then
@@ -37,4 +37,4 @@ else
 fi
 
 echo "Installing requirements with CMAKE_ARGS=$CMAKE_ARGS and FORCE_CMAKE=$FORCE_CMAKE"
-pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir --force-reinstall llama-cpp-python > /dev/null
+pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir > /dev/null

From 1d690232b43ba36ae3b13eef277a4c8614b43b2e Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Tue, 21 Nov 2023 09:55:00 +0000
Subject: [PATCH 12/25] bugfix llamacpp build

---
 llamacpp_bench/bench.sh         |  5 ++
 llamacpp_bench/requirements.txt |  4 +-
 llamacpp_bench/setup.sh         | 82 ++++++++++++++++++++++++++-------
 3 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/llamacpp_bench/bench.sh b/llamacpp_bench/bench.sh
index 5d626811..bd2f153a 100755
--- a/llamacpp_bench/bench.sh
+++ b/llamacpp_bench/bench.sh
@@ -52,6 +52,11 @@ run_benchmarks() {
     local LOG_FILENAME="$5"
     local MODELS_DIR="$6"
 
+    if [ "$DEVICE" == "cuda" ] || [ "$DEVICE" == "metal" ]; then
+        export LLAMA_CPP_LIB=$SCRIPT_DIR/venv/libllama_$DEVICE.so
+        echo "LLAMA_CPP_LIB=$LLAMA_CPP_LIB"
+    fi
+
     python $SCRIPT_DIR/bench.py \
         --prompt "$PROMPT" \
         --repetitions "$REPETITIONS" \
diff --git a/llamacpp_bench/requirements.txt b/llamacpp_bench/requirements.txt
index 918e0647..4ae348fb 100644
--- a/llamacpp_bench/requirements.txt
+++ b/llamacpp_bench/requirements.txt
@@ -1,2 +1,2 @@
-llama_cpp_python==0.2.15
-sentencepiece==0.1.99
+llama_cpp_python
+sentencepiece
diff --git a/llamacpp_bench/setup.sh b/llamacpp_bench/setup.sh
index 32f74250..d51470a0 100755
--- a/llamacpp_bench/setup.sh
+++ b/llamacpp_bench/setup.sh
@@ -2,39 +2,87 @@
 
 ################################################################################
 # Script: setup.sh <DEVICE>
-# Description: This script automates the setup of a virtual environment,
-# installs project requirements.
+# Description: Automates the setup of a virtual environment and installs project
+# requirements.
 ################################################################################
 
 set -euo pipefail
 
+# Function to clone and build llama.cpp
+clone_and_build_llama() {
+    local DEVICE="$1"
+    local VENV_DIR="$2"
+    local SCRIPT_DIR="$3"
+
+    # Check if DEVICE and ENV are provided as arguments
+    if [ "$#" -ne 3 ]; then
+        echo "Usage: $0 <DEVICE> <ENV> <SCRIPT_DIR>"
+        exit 1
+    fi
+
+    case "$DEVICE" in
+        cuda)
+            export LLAMA_CUBLAS=on
+            ;;
+        metal)
+            export LLAMA_METAL=on
+            ;;
+        cpu)
+            return 0
+            ;;
+        *)
+            echo "Unsupported DEVICE: $DEVICE"
+            return 1
+            ;;
+    esac
+
+    local LIBLLAMA_FILE="$VENV_DIR/libllama_$DEVICE.so"
+
+    if [ -e "$LIBLLAMA_FILE" ]; then
+        echo "File $LIBLLAMA_FILE exists."
+        exit 0
+    fi
+
+    # Remove existing llama.cpp directory if it exists
+    if [ -d "$SCRIPT_DIR/llama.cpp" ]; then
+        echo "Removing existing llama.cpp directory..."
+        rm -rf $SCRIPT_DIR/llama.cpp
+    fi
+
+    git clone --depth=1 https://github.com/ggerganov/llama.cpp $SCRIPT_DIR/llama.cpp
+    cd $SCRIPT_DIR/llama.cpp
+
+    # Build llama.cpp
+    make clean > /dev/null
+    echo "Building llama.cpp..."
+    make libllama.so > /dev/null
+    cp libllama.so "$LIBLLAMA_FILE"
+    cd $SCRIPT_DIR
+
+    rm -rf $SCRIPT_DIR/llama.cpp
+}
+
+# Main script starts here
+
 if [ "$#" -ne 1 ]; then
     echo "Usage: $0 <DEVICE>"
     exit 1
 fi
 
-
 # Define directory paths
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 DEVICE="$1"
-VENV_DIR="$SCRIPT_DIR/venv_$DEVICE"
-if [ "$DEVICE" == "cuda" ]; then
-    export CMAKE_ARGS=-DLLAMA_CUBLAS=on
-elif [ "$DEVICE" == "metal" ]; then
-    export CMAKE_ARGS=-DLLAMA_METAL=on
-else
-    export CMAKE_ARGS=-DLLAMA_CUBLAS=off
-fi
-export FORCE_CMAKE=1
+VENV_DIR="$SCRIPT_DIR/venv"
+LIBLLAMA_FILE="$VENV_DIR/libllama_$DEVICE.so"
 
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
-    source $VENV_DIR/bin/activate
+    source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
+    pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null
 else
-    source $VENV_DIR/bin/activate
+    source "$VENV_DIR/bin/activate"
 fi
 
-echo "Installing requirements with CMAKE_ARGS=$CMAKE_ARGS and FORCE_CMAKE=$FORCE_CMAKE"
-pip install -r $SCRIPT_DIR/requirements.txt --no-cache-dir > /dev/null
+clone_and_build_llama "$DEVICE" "$VENV_DIR" "$SCRIPT_DIR"

From d1a30a7802d5114a756d30328392de493ef8cb56 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Tue, 21 Nov 2023 11:49:26 +0000
Subject: [PATCH 13/25] minor fix in llama.cpp; added tinygrad bench

---
 llamacpp_bench/requirements.txt |   1 -
 tinygrad_bench/bench.sh         | 161 ++++++++++++++++++++++++++++++++
 tinygrad_bench/setup.sh         |  25 +++++
 3 files changed, 186 insertions(+), 1 deletion(-)
 create mode 100755 tinygrad_bench/bench.sh
 create mode 100755 tinygrad_bench/setup.sh

diff --git a/llamacpp_bench/requirements.txt b/llamacpp_bench/requirements.txt
index 4ae348fb..0b5641bd 100644
--- a/llamacpp_bench/requirements.txt
+++ b/llamacpp_bench/requirements.txt
@@ -1,2 +1 @@
 llama_cpp_python
-sentencepiece
diff --git a/tinygrad_bench/bench.sh b/tinygrad_bench/bench.sh
new file mode 100755
index 00000000..5933e0f2
--- /dev/null
+++ b/tinygrad_bench/bench.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Function to print script usage
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_platform() {
+    local platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash $SCRIPT_DIR/setup.sh "$1"
+}
+
+run_llama_experiment() {
+    models_dir=$1
+    script_dir=$2
+    prompt=$3
+    max_tokens=$4
+    repetitions=$5
+    device=$6
+
+    # if [ "$device" != "cuda" ]; then
+    #     export CUDA_VISIBLE_DEVICES=""
+    # fi
+
+    declare -a tokens_per_second_array=()
+
+    for ((i=1; i<=$repetitions; i++)); do
+        tokens_per_second=$(python3 "$script_dir/tinygrad/examples/llama.py" \
+            --model "$models_dir/llama-2-7b-hf/pytorch_model.bin.index.json" \
+            --prompt "$prompt" \
+            --count "$max_tokens" \
+            --timing \
+            | grep -E 'total [0-9]+[.][0-9]+ ms, [0-9]+[.][0-9]+ tok/sec' \
+            | awk -F '[:, ]' '{ sum += $(NF-1); count++ } END { if (count > 0) print sum/count }'
+        )
+        tokens_per_second_array+=("$tokens_per_second")
+    done
+
+    # Return the array of values
+    echo "${tokens_per_second_array[@]}"
+}
+
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    source "$SCRIPT_DIR/venv/bin/activate"
+
+    # Assign the result to an array variable
+    result_array=($(run_llama_experiment "$MODELS_DIR" "$SCRIPT_DIR" "$PROMPT" "$MAX_TOKENS" $REPETITIONS "$DEVICE"))
+
+    total=0
+    for value in "${result_array[@]}"; do
+        total=$(echo "$total + $value" | bc -l)
+    done
+    mean=$(echo "$total / ${#result_array[@]}" | bc -l)
+
+    sum_squared_diff=0
+    for value in "${result_array[@]}"; do
+        diff=$(echo "$value - $mean" | bc -l)
+        sum_squared_diff=$(echo "$sum_squared_diff + ($diff * $diff)" | bc -l)
+    done
+    variance=$(echo "$sum_squared_diff / ${#result_array[@]}" | bc -l)
+    std=$(echo "sqrt($variance)" | bc -l)
+    echo "tinygrad, float16 : $(printf "%.2f" $mean) ± $(printf "%.2f" $std)" >> "$LOG_FILENAME"
+}
+
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_python
+setup "$DEVICE"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/tinygrad_bench/setup.sh b/tinygrad_bench/setup.sh
new file mode 100755
index 00000000..ac6a6f73
--- /dev/null
+++ b/tinygrad_bench/setup.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+################################################################################
+# Script: setup.sh
+# Description: Automates the setup of a virtual environment and installs project
+# requirements.
+################################################################################
+
+set -euo pipefail
+
+# Define directory paths
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_DIR="$SCRIPT_DIR/venv"
+
+if [ ! -d "$VENV_DIR" ]; then
+    python -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    source "$VENV_DIR/bin/activate"
+    pip install --upgrade pip > /dev/null
+    git clone --depth=1 https://github.com/tinygrad/tinygrad.git $SCRIPT_DIR/tinygrad
+    cd $SCRIPT_DIR/tinygrad
+    pip install -e .
+    pip install sentencepiece
+    cd ..
+fi

From b1efe3c7af84b02da577c1600dba36a0f86e969c Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Tue, 21 Nov 2023 11:54:47 +0000
Subject: [PATCH 14/25] added cpu support and logs to stdout

---
 .gitignore              | 5 +++--
 tinygrad_bench/bench.sh | 7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 09069f6a..7e86b38d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,10 +122,8 @@ celerybeat.pid
 # Environments
 .env
 .venv
-.venv_*
 env/
 venv/
-venv_*/
 ENV/
 env.bak/
 venv.bak/
@@ -164,3 +162,6 @@ cython_debug/
 # don't check-in sub folder
 models/*
 !models/.gitkeep
+
+# Repositories
+tinygrad_bench/tinygrad
diff --git a/tinygrad_bench/bench.sh b/tinygrad_bench/bench.sh
index 5933e0f2..89a34efe 100755
--- a/tinygrad_bench/bench.sh
+++ b/tinygrad_bench/bench.sh
@@ -52,9 +52,9 @@ run_llama_experiment() {
     repetitions=$5
     device=$6
 
-    # if [ "$device" != "cuda" ]; then
-    #     export CUDA_VISIBLE_DEVICES=""
-    # fi
+    if [ "$device" != "cuda" ]; then
+        export CUDA_VISIBLE_DEVICES=""
+    fi
 
     declare -a tokens_per_second_array=()
 
@@ -64,6 +64,7 @@ run_llama_experiment() {
             --prompt "$prompt" \
             --count "$max_tokens" \
             --timing \
+            | tee /dev/tty \
             | grep -E 'total [0-9]+[.][0-9]+ ms, [0-9]+[.][0-9]+ tok/sec' \
             | awk -F '[:, ]' '{ sum += $(NF-1); count++ } END { if (count > 0) print sum/count }'
         )

From 1bc390db219cc3a3993f10fe20f9ee0078cf6f54 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Tue, 21 Nov 2023 11:58:20 +0000
Subject: [PATCH 15/25] renamed bench folders and removed python benchmarks

---
 .gitignore                                    |   2 +-
 .../bench.py                                  |   0
 .../bench.sh                                  |   0
 .../requirements.txt                          |   0
 .../setup.sh                                  |   0
 {llamacpp_bench => bench_llamacpp}/bench.py   |   0
 {llamacpp_bench => bench_llamacpp}/bench.sh   |   0
 .../requirements.txt                          |   0
 {llamacpp_bench => bench_llamacpp}/setup.sh   |   0
 {tinygrad_bench => bench_tinygrad}/bench.sh   |   0
 {tinygrad_bench => bench_tinygrad}/setup.sh   |   0
 python_bench/__init__.py                      |   0
 python_bench/benchmark.py                     |  46 --
 python_bench/ctranslate.py                    |  76 --
 python_bench/llama_cpp.py                     |  28 -
 python_bench/tinygrad.py                      | 664 ------------------
 16 files changed, 1 insertion(+), 815 deletions(-)
 rename {ctranslate_bench => bench_ctranslate}/bench.py (100%)
 rename {ctranslate_bench => bench_ctranslate}/bench.sh (100%)
 rename {ctranslate_bench => bench_ctranslate}/requirements.txt (100%)
 rename {ctranslate_bench => bench_ctranslate}/setup.sh (100%)
 rename {llamacpp_bench => bench_llamacpp}/bench.py (100%)
 rename {llamacpp_bench => bench_llamacpp}/bench.sh (100%)
 rename {llamacpp_bench => bench_llamacpp}/requirements.txt (100%)
 rename {llamacpp_bench => bench_llamacpp}/setup.sh (100%)
 rename {tinygrad_bench => bench_tinygrad}/bench.sh (100%)
 rename {tinygrad_bench => bench_tinygrad}/setup.sh (100%)
 delete mode 100644 python_bench/__init__.py
 delete mode 100644 python_bench/benchmark.py
 delete mode 100644 python_bench/ctranslate.py
 delete mode 100644 python_bench/llama_cpp.py
 delete mode 100644 python_bench/tinygrad.py

diff --git a/.gitignore b/.gitignore
index 7e86b38d..1d47faa4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,4 +164,4 @@ models/*
 !models/.gitkeep
 
 # Repositories
-tinygrad_bench/tinygrad
+bench_tinygrad/tinygrad
diff --git a/ctranslate_bench/bench.py b/bench_ctranslate/bench.py
similarity index 100%
rename from ctranslate_bench/bench.py
rename to bench_ctranslate/bench.py
diff --git a/ctranslate_bench/bench.sh b/bench_ctranslate/bench.sh
similarity index 100%
rename from ctranslate_bench/bench.sh
rename to bench_ctranslate/bench.sh
diff --git a/ctranslate_bench/requirements.txt b/bench_ctranslate/requirements.txt
similarity index 100%
rename from ctranslate_bench/requirements.txt
rename to bench_ctranslate/requirements.txt
diff --git a/ctranslate_bench/setup.sh b/bench_ctranslate/setup.sh
similarity index 100%
rename from ctranslate_bench/setup.sh
rename to bench_ctranslate/setup.sh
diff --git a/llamacpp_bench/bench.py b/bench_llamacpp/bench.py
similarity index 100%
rename from llamacpp_bench/bench.py
rename to bench_llamacpp/bench.py
diff --git a/llamacpp_bench/bench.sh b/bench_llamacpp/bench.sh
similarity index 100%
rename from llamacpp_bench/bench.sh
rename to bench_llamacpp/bench.sh
diff --git a/llamacpp_bench/requirements.txt b/bench_llamacpp/requirements.txt
similarity index 100%
rename from llamacpp_bench/requirements.txt
rename to bench_llamacpp/requirements.txt
diff --git a/llamacpp_bench/setup.sh b/bench_llamacpp/setup.sh
similarity index 100%
rename from llamacpp_bench/setup.sh
rename to bench_llamacpp/setup.sh
diff --git a/tinygrad_bench/bench.sh b/bench_tinygrad/bench.sh
similarity index 100%
rename from tinygrad_bench/bench.sh
rename to bench_tinygrad/bench.sh
diff --git a/tinygrad_bench/setup.sh b/bench_tinygrad/setup.sh
similarity index 100%
rename from tinygrad_bench/setup.sh
rename to bench_tinygrad/setup.sh
diff --git a/python_bench/__init__.py b/python_bench/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/python_bench/benchmark.py b/python_bench/benchmark.py
deleted file mode 100644
index efbbeba2..00000000
--- a/python_bench/benchmark.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import annotations
-
-import logging
-from abc import ABC, abstractmethod
-
-logger = logging.getLogger(__name__)
-
-
-class Benchmark(ABC):
-    """
-    An abstract class for benchmarking different machine learning frameworks.
-
-    This class provides a skeleton for benchmarking the performance of different
-    machine learning frameworks. It includes methods for loading a model, running
-    the model, and benchmarking the model's performance. The actual implementation
-    of these methods is left to the subclasses.
-
-    Attributes:
-        model_path (str): The path to the model file.
-
-    Methods:
-        load_model(): An abstract method that loads the model.
-        run_model(prompt, max_tokens): An abstract method that runs the model and estimate tokens/second.
-        benchmark(prompt, max_tokens, repetitions=10): Runs the model several times
-            and calculates the average tokens per second.
-    """
-
-    def __init__(self, model_path):
-        self.model_path = model_path
-        self.results = []
-
-    @abstractmethod
-    def load_model(self) -> Benchmark:
-        pass
-
-    @abstractmethod
-    def run_model(self, prompt, max_tokens) -> float:
-        pass
-
-    def benchmark(self, prompt, max_tokens, repetitions):
-        for i in range(repetitions):
-            logger.info(
-                f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]"
-            )
-            tokens_per_second = self.run_model(prompt, max_tokens)
-            self.results.append(tokens_per_second)
diff --git a/python_bench/ctranslate.py b/python_bench/ctranslate.py
deleted file mode 100644
index 37cc10b3..00000000
--- a/python_bench/ctranslate.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import logging
-import os
-import time
-
-import ctranslate2
-import sentencepiece as spm
-
-from python_bench.benchmark import Benchmark
-
-logging.getLogger("ctranslate2").setLevel(logging.ERROR)
-
-B_INST, E_INST = "[INST]", "[/INST]"
-B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
-
-
-def get_compute_types(gpu, nvidia):
-    compute_types = set()
-    if gpu and nvidia:
-        compute_types = set(ctranslate2.get_supported_compute_types("cuda"))
-    elif not gpu:
-        compute_types = set(ctranslate2.get_supported_compute_types("cpu"))
-    return compute_types
-
-
-class CTranslateBenchmark(Benchmark):
-    def __init__(self, model_path, gpu, compute_type):
-        super().__init__(model_path)
-        self.gpu = gpu
-        self.compute_type = compute_type
-
-    def load_model(self) -> Benchmark:
-        self.generator = ctranslate2.Generator(
-            self.model_path,
-            device="cuda" if self.gpu else "cpu",
-            compute_type=self.compute_type,
-        )
-        self.sp = spm.SentencePieceProcessor(
-            os.path.join(self.model_path, "tokenizer.model")
-        )
-        return self
-
-    def run_model(self, prompt, max_tokens):
-        prompt_tokens = ["<s>"] + self.sp.encode_as_pieces(
-            f"{B_INST} {prompt.strip()} {E_INST}"
-        )
-        start = time.time()
-        step_results = self.generator.generate_tokens(
-            prompt_tokens,
-            max_length=max_tokens,
-            sampling_temperature=0.6,
-            sampling_topk=20,
-            sampling_topp=1,
-        )
-        count = 0
-        for _ in self.generate_words(step_results):
-            count += 1
-        return count / (time.time() - start)
-
-    def generate_words(self, step_results):
-        tokens_buffer = []
-
-        for step_result in step_results:
-            is_new_word = step_result.token.startswith("▁")
-
-            if is_new_word and tokens_buffer:
-                word = self.sp.decode(tokens_buffer)
-                if word:
-                    yield word
-                tokens_buffer = []
-
-            tokens_buffer.append(step_result.token_id)
-
-        if tokens_buffer:
-            word = self.sp.decode(tokens_buffer)
-            if word:
-                yield word
diff --git a/python_bench/llama_cpp.py b/python_bench/llama_cpp.py
deleted file mode 100644
index f7c1365c..00000000
--- a/python_bench/llama_cpp.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import logging
-import time
-
-from llama_cpp import Llama
-
-from python_bench.benchmark import Benchmark
-
-logging.getLogger("llama_cpp").setLevel(logging.ERROR)
-
-
-class LlamaCPPBenchmark(Benchmark):
-    def __init__(self, model_path, gpu):
-        super().__init__(model_path)
-        self.gpu = gpu
-
-    def load_model(self) -> Benchmark:
-        self.model = Llama(
-            model_path=self.model_path,
-            n_gpu_layers=-1 if self.gpu else 0,
-            verbose=False,
-        )
-        return self
-
-    def run_model(self, prompt, max_tokens):
-        start = time.time()
-        output = self.model.create_completion(prompt, max_tokens=max_tokens)
-        tokens = output["usage"]["completion_tokens"]
-        return tokens / (time.time() - start)
diff --git a/python_bench/tinygrad.py b/python_bench/tinygrad.py
deleted file mode 100644
index 15d976e7..00000000
--- a/python_bench/tinygrad.py
+++ /dev/null
@@ -1,664 +0,0 @@
-import json
-import logging
-import os
-import time
-from pathlib import Path
-from typing import Optional, Union
-
-import numpy as np
-from tinygrad.helpers import CI, dtypes, getenv
-from tinygrad.jit import JIT_SUPPORTED_DEVICE, TinyJit
-from tinygrad.nn import Embedding, Linear
-from tinygrad.nn.state import load_state_dict, safe_load, torch_load
-from tinygrad.shape.symbolic import Variable
-from tinygrad.tensor import Tensor
-
-from python_bench.benchmark import Benchmark
-
-logging.getLogger("tinygrad").setLevel(logging.ERROR)
-np.set_printoptions(linewidth=200)
-
-
-MAX_CONTEXT = 1024
-
-
-# https://github.com/facebookresearch/llama/blob/1076b9c51c77ad06e9d7ba8a4c6df775741732bd/llama/model.py#L47
-def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0) -> Tensor:
-    freqs = 1.0 / (theta ** (Tensor.arange(0, dim, 2)[: (dim // 2)] / dim))
-    freqs = Tensor.arange(end).unsqueeze(dim=1) * freqs.unsqueeze(dim=0)
-    return Tensor.stack([Tensor.cos(freqs), Tensor.sin(freqs)], dim=-1).reshape(
-        1, end, 1, dim // 2, 2
-    )
-
-
-# (a+i*b) * (c+i*d) = (ac-bd) + i*(ad+bc)
-def complex_mult(A, c, d):
-    a, b = A[:, :, :, :, 0:1], A[:, :, :, :, 1:2]
-    ro = a * c - b * d
-    co = a * d + b * c
-    return ro.cat(co, dim=-1)
-
-
-def apply_rotary_emb(xq, xk, freqs_cis) -> tuple[Tensor, Tensor]:
-    assert (
-        freqs_cis.shape[1] == xq.shape[1] and freqs_cis.shape[1] == xk.shape[1]
-    ), f"freqs_cis shape mismatch {freqs_cis.shape} xq:{xq.shape} xk:{xk.shape}"
-    xq = xq.reshape(*xq.shape[0:-1], -1, 2)
-    xk = xk.reshape(*xk.shape[0:-1], -1, 2)
-    assert len(xq.shape) == 5 and len(xk.shape) == 5 and len(freqs_cis.shape) == 5
-    c, d = (
-        freqs_cis[:, : xq.shape[1], :, :, 0:1],
-        freqs_cis[:, : xq.shape[1], :, :, 1:2],
-    )
-    xq_out = complex_mult(xq, c, d)
-    xk_out = complex_mult(xk, c, d)
-    return xq_out.flatten(3), xk_out.flatten(3)
-
-
-def repeat_kv(x: Tensor, n_rep: int) -> Tensor:
-    bs, seqlen, n_kv_heads, head_dim = x.shape
-    if n_rep == 1:
-        return x
-    return (
-        x.reshape(bs, seqlen, n_kv_heads, 1, head_dim)
-        .expand(bs, seqlen, n_kv_heads, n_rep, head_dim)
-        .reshape(bs, seqlen, n_kv_heads * n_rep, head_dim)
-    )
-
-
-class RMSNorm:
-    def __init__(self, dim, eps=1e-6):
-        self.eps = eps
-        self.weight = Tensor.ones(dim)
-
-    def __call__(self, x: Tensor):
-        # TODO: convert to float?
-        return (x * (x.pow(2).mean(-1, keepdim=True) + self.eps).rsqrt()) * self.weight
-
-
-class Attention:
-    def __init__(self, dim, n_heads, n_kv_heads, linear=Linear):
-        self.n_heads = n_heads
-        self.n_kv_heads = n_kv_heads if n_kv_heads is not None else n_heads
-        self.head_dim = dim // n_heads
-        self.n_rep = self.n_heads // self.n_kv_heads
-
-        self.wq = linear(dim, self.n_heads * self.head_dim, bias=False)
-        self.wk = linear(dim, self.n_kv_heads * self.head_dim, bias=False)
-        self.wv = linear(dim, self.n_kv_heads * self.head_dim, bias=False)
-        self.wo = linear(self.n_heads * self.head_dim, dim, bias=False)
-
-    def __call__(
-        self,
-        x: Tensor,
-        start_pos: Union[Variable, int],
-        freqs_cis: Tensor,
-        mask: Optional[Tensor],
-    ) -> Tensor:
-        xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
-        xq = xq.reshape(xq.shape[0], xq.shape[1], self.n_heads, self.head_dim)
-        xk = xk.reshape(xk.shape[0], xk.shape[1], self.n_kv_heads, self.head_dim)
-        xv = xv.reshape(xv.shape[0], xv.shape[1], self.n_kv_heads, self.head_dim)
-        xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis)
-        bsz, seqlen, n_heads, head_dim = xq.shape
-
-        # create kv cache
-        if not hasattr(self, "cache_k"):
-            self.cache_k, self.cache_v = Tensor.zeros(
-                bsz, MAX_CONTEXT, self.n_kv_heads, self.head_dim
-            ), Tensor.zeros(bsz, MAX_CONTEXT, self.n_kv_heads, self.head_dim)
-
-        keys = self.cache_k.shrink((None, (0, start_pos), None, None)).cat(xk, dim=1)
-        values = self.cache_v.shrink((None, (0, start_pos), None, None)).cat(xv, dim=1)
-
-        # update the cache
-        self.cache_k.assign(
-            keys.pad(
-                (None, (0, MAX_CONTEXT - start_pos - seqlen), None, None)
-            ).contiguous()
-        ).realize()
-        self.cache_v.assign(
-            values.pad(
-                (None, (0, MAX_CONTEXT - start_pos - seqlen), None, None)
-            ).contiguous()
-        ).realize()
-
-        keys, values = repeat_kv(keys, self.n_rep), repeat_kv(values, self.n_rep)
-
-        xq, keys, values = (
-            xq.transpose(1, 2),
-            keys.transpose(1, 2),
-            values.transpose(1, 2),
-        )
-        attn = (
-            xq.scaled_dot_product_attention(keys, values, mask)
-            .transpose(1, 2)
-            .reshape(bsz, seqlen, -1)
-        )
-        return self.wo(attn)
-
-
-class FeedForward:
-    def __init__(
-        self, dim, hidden_dim, multiple_of, linear=Linear, ffn_dim_multiplier=None
-    ):
-        # TODO: what is this?
-        hidden_dim = int(2 * hidden_dim / 3)
-        # custom dim factor multiplier
-        if ffn_dim_multiplier is not None:
-            hidden_dim = int(ffn_dim_multiplier * hidden_dim)
-        hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
-        self.w1 = linear(dim, hidden_dim, bias=False)
-        self.w2 = linear(hidden_dim, dim, bias=False)
-        self.w3 = linear(dim, hidden_dim, bias=False)
-
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.w2(self.w1(x).silu() * self.w3(x))
-
-
-class TransformerBlock:
-    def __init__(
-        self,
-        dim,
-        multiple_of,
-        n_heads,
-        n_kv_heads,
-        norm_eps,
-        linear=Linear,
-        ffn_dim_multiplier=None,
-    ):
-        self.attention = Attention(dim, n_heads, n_kv_heads, linear)
-        self.feed_forward = FeedForward(
-            dim, 4 * dim, multiple_of, linear, ffn_dim_multiplier
-        )
-        self.attention_norm = RMSNorm(dim, norm_eps)
-        self.ffn_norm = RMSNorm(dim, norm_eps)
-
-    def __call__(
-        self,
-        x: Tensor,
-        start_pos: Union[Variable, int],
-        freqs_cis: Tensor,
-        mask: Union[Tensor, None],
-    ):
-        h = x + self.attention(self.attention_norm(x), start_pos, freqs_cis, mask)
-        return (h + self.feed_forward(self.ffn_norm(h))).realize()
-
-
-class Transformer:
-    def __init__(
-        self,
-        dim,
-        multiple_of,
-        n_heads,
-        n_layers,
-        norm_eps,
-        vocab_size,
-        device,
-        linear=Linear,
-        max_batch_size=32,
-        max_seq_len=1024,
-        ffn_dim_multiplier=None,
-        n_kv_heads=None,
-        rope_theta=10000,
-    ):
-        self.JIT = getenv("JIT", 0 if CI else int(device in JIT_SUPPORTED_DEVICE))
-        self.layers = [
-            TransformerBlock(
-                dim,
-                multiple_of,
-                n_heads,
-                n_kv_heads,
-                norm_eps,
-                linear,
-                ffn_dim_multiplier,
-            )
-            for _ in range(n_layers)
-        ]
-        self.norm = RMSNorm(dim, norm_eps)
-        self.tok_embeddings = Embedding(vocab_size, dim)
-        self.output = linear(dim, vocab_size, bias=False)
-        self.freqs_cis = precompute_freqs_cis(
-            dim // n_heads, max_seq_len * 2, rope_theta
-        )
-        self.forward_jit = TinyJit(self.forward)
-
-    def forward(
-        self, tokens: Tensor, start_pos: Union[Variable, int], temperature: float = 0.0
-    ):
-        _bsz, seqlen = tokens.shape
-        freqs_cis = self.freqs_cis.shrink(
-            (None, (start_pos, start_pos + seqlen), None, None, None)
-        )
-        mask = (
-            Tensor.full(
-                (1, 1, seqlen, start_pos + seqlen), float("-inf"), dtype=dtypes.float32
-            )
-            .triu(start_pos + 1)
-            .realize()
-            if seqlen > 1
-            else None
-        )
-
-        h = self.tok_embeddings(tokens)
-        for layer in self.layers:
-            h = layer(h, start_pos, freqs_cis, mask)
-        logits = self.output(self.norm(h))
-        return (logits[:, -1, :] / (temperature + 1e-10)).softmax().flatten().realize()
-
-    def __call__(self, tokens: Tensor, start_pos: Variable, temperature: float = 0.0):
-        # TODO: better way to handle the first call v.s. the rest?
-        if tokens.shape[0:2] == (1, 1) and self.JIT:
-            assert start_pos > 0
-            return self.forward_jit(
-                tokens,
-                Variable("start_pos", 1, MAX_CONTEXT).bind(start_pos),
-                temperature,
-            )
-        return self.forward(tokens, start_pos, temperature)
-
-
-# **** files and arguments ****
-MODEL_PARAMS = {
-    "1": {
-        "7B": {
-            "args": {
-                "dim": 4096,
-                "multiple_of": 256,
-                "n_heads": 32,
-                "n_layers": 32,
-                "norm_eps": 1e-06,
-                "vocab_size": 32000,
-            },
-            "files": 1,
-        },
-        "13B": {
-            "args": {
-                "dim": 5120,
-                "multiple_of": 256,
-                "n_heads": 40,
-                "n_layers": 40,
-                "norm_eps": 1e-06,
-                "vocab_size": 32000,
-            },
-            "files": 2,
-        },
-        "30B": {
-            "args": {
-                "dim": 6656,
-                "multiple_of": 256,
-                "n_heads": 52,
-                "n_layers": 60,
-                "norm_eps": 1e-06,
-                "vocab_size": 32000,
-            },
-            "files": 4,
-        },
-        "65B": {
-            "args": {
-                "dim": 8192,
-                "multiple_of": 256,
-                "n_heads": 64,
-                "n_layers": 80,
-                "norm_eps": 1e-05,
-                "vocab_size": 32000,
-            },
-            "files": 8,
-        },
-    },
-    "2": {
-        "7B": {
-            "args": {
-                "dim": 4096,
-                "multiple_of": 256,
-                "n_heads": 32,
-                "n_layers": 32,
-                "norm_eps": 1e-05,
-                "vocab_size": 32000,
-            },
-            "files": 1,
-        },
-        "13B": {
-            "args": {
-                "dim": 5120,
-                "multiple_of": 256,
-                "n_heads": 40,
-                "n_layers": 40,
-                "norm_eps": 1e-05,
-                "vocab_size": 32000,
-            },
-            "files": 2,
-        },
-        "70B": {
-            "args": {
-                "dim": 8192,
-                "multiple_of": 4096,
-                "ffn_dim_multiplier": 1.3,
-                "n_heads": 64,
-                "n_kv_heads": 8,
-                "n_layers": 80,
-                "norm_eps": 1e-05,
-                "vocab_size": 32000,
-            },
-            "files": 8,
-        },
-    },
-    "code": {
-        "7B": {
-            "args": {
-                "dim": 4096,
-                "n_layers": 32,
-                "n_heads": 32,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32016,
-            },
-            "files": 1,
-        },
-        "7B-Python": {
-            "args": {
-                "dim": 4096,
-                "n_layers": 32,
-                "n_heads": 32,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32000,
-            },
-            "files": 1,
-        },
-        "7B-Instruct": {
-            "args": {
-                "dim": 4096,
-                "n_layers": 32,
-                "n_heads": 32,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32016,
-            },
-            "files": 1,
-        },
-        "13B": {
-            "args": {
-                "dim": 5120,
-                "n_layers": 40,
-                "n_heads": 40,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32016,
-            },
-            "files": 2,
-        },
-        "13B-Python": {
-            "args": {
-                "dim": 5120,
-                "n_layers": 40,
-                "n_heads": 40,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32000,
-            },
-            "files": 2,
-        },
-        "13B-Instruct": {
-            "args": {
-                "dim": 5120,
-                "n_layers": 40,
-                "n_heads": 40,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32016,
-            },
-            "files": 2,
-        },
-        "34B": {
-            "args": {
-                "dim": 8192,
-                "n_layers": 48,
-                "n_heads": 64,
-                "n_kv_heads": 8,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32000,
-            },
-            "files": 4,
-        },
-        "34B-Python": {
-            "args": {
-                "dim": 8192,
-                "n_layers": 48,
-                "n_heads": 64,
-                "n_kv_heads": 8,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32000,
-            },
-            "files": 4,
-        },
-        "34B-Instruct": {
-            "args": {
-                "dim": 8192,
-                "n_layers": 48,
-                "n_heads": 64,
-                "n_kv_heads": 8,
-                "multiple_of": 256,
-                "ffn_dim_multiplier": 1.0,
-                "norm_eps": 1e-5,
-                "rope_theta": 1000000,
-                "vocab_size": 32000,
-            },
-            "files": 4,
-        },
-    },
-}
-
-
-# **** helper functions ****
-def concat_weights(models, device):
-    def convert(name) -> Tensor:
-        disk_tensors = [model[name] for model in models]
-        if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
-            return disk_tensors[0].to(device=device)
-        axis = (
-            1
-            if name.startswith("tok_embeddings.")
-            or name.endswith(".attention.wo.weight")
-            or name.endswith(".feed_forward.w2.weight")
-            else 0
-        )
-        lazy_tensors = [data.to(device=device) for data in disk_tensors]
-        return lazy_tensors[0].cat(*lazy_tensors[1:], dim=axis)
-
-    return {
-        name: convert(name)
-        for name in {name: None for model in models for name in model}
-    }
-
-
-def load(fn: str):
-    if fn.endswith(".index.json"):
-        with open(fn) as fp:
-            weight_map = json.load(fp)["weight_map"]
-        parts = {
-            n: load(str(Path(fn).parent / Path(n).name))
-            for n in set(weight_map.values())
-        }
-        return {k: parts[n][k] for k, n in weight_map.items()}
-    elif fn.endswith(".safetensors"):
-        return safe_load(fn)
-    else:
-        return torch_load(fn)
-
-
-def convert_from_huggingface(weights, model):
-    keymap = {
-        "model.embed_tokens.weight": "tok_embeddings.weight",
-        **{
-            f"model.layers.{layer}.input_layernorm.weight": f"layers.{layer}.attention_norm.weight"
-            for layer in range(len(model.layers))
-        },
-        **{
-            f"model.layers.{layer}.self_attn.{x}_proj.weight": f"layers.{layer}.attention.w{x}.weight"
-            for x in ["q", "k", "v", "o"]
-            for layer in range(len(model.layers))
-        },
-        **{
-            f"model.layers.{layer}.post_attention_layernorm.weight": f"layers.{layer}.ffn_norm.weight"
-            for layer in range(len(model.layers))
-        },
-        **{
-            f"model.layers.{layer}.mlp.{x}_proj.weight": f"layers.{layer}.feed_forward.w{y}.weight"
-            for x, y in {"gate": "1", "down": "2", "up": "3"}.items()
-            for layer in range(len(model.layers))
-        },
-        "model.norm.weight": "norm.weight",
-        "lm_head.weight": "output.weight",
-    }
-    return {keymap[k]: v for k, v in weights.items() if ".rotary_emb." not in k}
-
-
-class AbsmaxQuantizedLinear:
-    def __init__(self, in_features, out_features, bias=False):
-        assert not bias
-        self.weight = Tensor.ones(out_features, in_features, dtype=dtypes.int8)
-        self.scale = Tensor.ones(out_features, dtype=dtypes.half)
-
-    def __call__(self, x):
-        return x.dot(self.weight.cast(dtype=dtypes.half).T * self.scale)
-
-    @staticmethod
-    def quantize(tensors):
-        new_tensors = {}
-        for name, v in tensors.items():
-            if (
-                "feed_forward" in name
-                or ("attention.w") in name
-                or name == "output.weight"
-            ):
-                scale = v.abs().max(axis=1) / 127.0
-                int8_weight = (v.T / scale).T.cast(dtype=dtypes.int8)
-                new_tensors[name] = int8_weight
-                new_tensors[name.replace("weight", "scale")] = scale
-            else:
-                new_tensors[name] = v
-        return new_tensors
-
-
-class LLaMa:
-    @staticmethod
-    def build(
-        model_path,
-        tokenizer_path,
-        device,
-        model_gen="1",
-        model_size="7B",
-        quantize=False,
-    ):
-        from sentencepiece import SentencePieceProcessor
-
-        sp_model = SentencePieceProcessor(model_file=str(tokenizer_path))
-        assert (
-            sp_model.vocab_size()
-            == MODEL_PARAMS[model_gen][model_size]["args"]["vocab_size"]
-        ), f"{sp_model.vocab_size()=} not equal to {MODEL_PARAMS[model_gen][model_size]['args']['vocab_size']}"
-
-        params = MODEL_PARAMS[model_gen][model_size]
-        model = (
-            Transformer(**params["args"], device=device, linear=AbsmaxQuantizedLinear)
-            if quantize
-            else Transformer(**params["args"], device=device)
-        )
-
-        if model_path.is_dir():
-            weights = concat_weights(
-                [
-                    load(filename)
-                    for filename in [
-                        f"{model_path}/consolidated.{i:02d}.pth"
-                        for i in range(params["files"])
-                    ]
-                ],
-                device=device,
-            )
-        else:
-            weights = load(str(model_path))
-        if "model.embed_tokens.weight" in weights:
-            weights = convert_from_huggingface(weights, model)
-
-        if quantize:
-            weights = AbsmaxQuantizedLinear.quantize(weights)
-            for _, v in weights.items():
-                v.realize()
-        load_state_dict(model, weights, strict=False)
-
-        return LLaMa(model, sp_model)
-
-    def __init__(self, model, tokenizer):
-        self.model = model
-        self.tokenizer = tokenizer
-
-
-class TinyGradBenchmark(Benchmark):
-    def __init__(
-        self, model_path, device, quantize, gen="2", temperature=0.7, model_size="7B"
-    ):
-        super().__init__(model_path)
-        self.model = None
-        self.quantize = quantize
-        self.model_gen = gen
-        self.temperature = temperature
-        self.model_size = model_size
-        self.device = device
-
-    def load_model(self) -> Benchmark:
-        self.model = LLaMa.build(
-            Path(os.path.join(self.model_path, "pytorch_model.bin.index.json")),
-            Path(os.path.join(self.model_path, "tokenizer.model")),
-            model_gen=self.model_gen,
-            model_size=self.model_size,
-            quantize=self.quantize,
-            device=self.device,
-        )
-        return self
-
-    def run_model(self, prompt, max_tokens) -> float:
-        Tensor.no_grad = True
-        toks = [self.model.tokenizer.bos_id()] + self.model.tokenizer.encode(prompt)
-        start_pos = 0
-        outputted = self.model.tokenizer.decode(toks)
-
-        new_toks = [self.model.tokenizer.bos_id()] + self.model.tokenizer.encode(
-            outputted
-        )
-        assert toks == new_toks[: len(toks)]
-        toks = new_toks
-        assert outputted == self.model.tokenizer.decode(toks)
-        times = []
-        for _ in range(max_tokens):
-            start = time.time()
-            probs = self.model.model(
-                Tensor([toks[start_pos:]]), start_pos, self.temperature
-            ).realize()
-            times.append(time.time() - start)
-            probs_np = probs.numpy()
-            tok = int(np.random.choice(len(probs_np), p=probs_np))
-            start_pos = len(toks)
-            toks.append(tok)
-            cur = self.model.tokenizer.decode(toks)
-            outputted = cur
-        return len(times) / sum(times)

From 873bef53f3459bb1712a47c314c1c25831824d9f Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Tue, 21 Nov 2023 13:04:46 +0000
Subject: [PATCH 16/25] disabled ctranslate for metal; check cuda for llama.cpp
 and tinygrad with cuda device

---
 bench_ctranslate/bench.py |  4 ++++
 bench_llamacpp/bench.sh   | 14 ++++++++++++++
 bench_tinygrad/bench.sh   | 14 ++++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/bench_ctranslate/bench.py b/bench_ctranslate/bench.py
index e3aa1fb5..5ee07808 100644
--- a/bench_ctranslate/bench.py
+++ b/bench_ctranslate/bench.py
@@ -119,6 +119,10 @@ def generate_words(self, step_results):
         help="Path to the models directory.",
     )
     args = parser.parse_args()
+    if args.device == "metal":
+        logging.info(f"Skipping benchmark with device={args.device}")
+        sys.exit(0)
+
     logging.info(
         f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
         + f"repetitions={args.repetitions} device={args.device}"
diff --git a/bench_llamacpp/bench.sh b/bench_llamacpp/bench.sh
index bd2f153a..2a916c91 100755
--- a/bench_llamacpp/bench.sh
+++ b/bench_llamacpp/bench.sh
@@ -17,6 +17,17 @@ print_usage() {
     exit 1
 }
 
+check_cuda() {
+    if command -v nvcc &> /dev/null
+    then
+        echo -e "\nUsing CUDA"
+        nvcc --version
+    else
+        echo -e "\nCUDA is not available."
+        exit 1
+    fi
+}
+
 check_platform() {
     local platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
@@ -92,6 +103,9 @@ while [ "$#" -gt 0 ]; do
                     print_usage
                     ;;
             esac
+            if [ "$DEVICE" == "cuda"]; then
+                check_cuda
+            fi
             shift 2
             ;;
         -lf|--log_file)
diff --git a/bench_tinygrad/bench.sh b/bench_tinygrad/bench.sh
index 89a34efe..907bff48 100755
--- a/bench_tinygrad/bench.sh
+++ b/bench_tinygrad/bench.sh
@@ -17,6 +17,17 @@ print_usage() {
     exit 1
 }
 
+check_cuda() {
+    if command -v nvcc &> /dev/null
+    then
+        echo -e "\nUsing CUDA"
+        nvcc --version
+    else
+        echo -e "\nCUDA is not available."
+        exit 1
+    fi
+}
+
 check_platform() {
     local platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
@@ -129,6 +140,9 @@ while [ "$#" -gt 0 ]; do
                     print_usage
                     ;;
             esac
+            if [ "$DEVICE" == "cuda"]; then
+                check_cuda
+            fi
             shift 2
             ;;
         -lf|--log_file)

From dfc7aa5acf0860a5eba9cddba9a4a6f1a917e12b Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 10:45:07 +0000
Subject: [PATCH 17/25] added burn benchmark

---
 .gitignore                |   1 +
 bench_burn/bench.sh       | 167 ++++++++++++++++++++++++++++++++++++++
 bench_burn/setup.sh       |  61 ++++++++++++++
 bench_ctranslate/bench.sh |  16 +++-
 bench_ctranslate/setup.sh |   2 +-
 bench_llamacpp/bench.sh   |  18 +++-
 bench_llamacpp/setup.sh   |   2 -
 7 files changed, 261 insertions(+), 6 deletions(-)
 create mode 100755 bench_burn/bench.sh
 create mode 100755 bench_burn/setup.sh

diff --git a/.gitignore b/.gitignore
index 1d47faa4..c8c2d70e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,4 @@ models/*
 
 # Repositories
 bench_tinygrad/tinygrad
+bench_burn/llama2-burn
diff --git a/bench_burn/bench.sh b/bench_burn/bench.sh
new file mode 100755
index 00000000..c2aaac3b
--- /dev/null
+++ b/bench_burn/bench.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks burn llama benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file     Logging file name.
+#   -md, --models_dir   Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_cuda() {
+    if command -v nvcc &> /dev/null
+    then
+        echo -e "\nUsing CUDA"
+        nvcc --version
+    else
+        echo -e "\nCUDA is not available."
+        exit 1
+    fi
+}
+
+check_platform() {
+    local platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash $SCRIPT_DIR/setup.sh "$1"
+}
+
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    cargo clean --manifest-path="$SCRIPT_DIR/llama2-burn/Cargo.toml"
+
+    echo "Building burn"
+    if [ "$DEVICE" == "cuda" ]; then
+        export TORCH_CUDA_VERSION=cu117
+        DEVICE=gpu
+    fi
+    cargo build --release --manifest-path="$SCRIPT_DIR/llama2-burn/Cargo.toml"
+    echo "Running benchmarks"
+
+    benchmark_output=$(
+        cargo run --release --bin benchmark \
+            --manifest-path="$SCRIPT_DIR/llama2-burn/Cargo.toml" \
+            "$MODELS_DIR/llama-2-7b-burn/llama-2-7b-burn" \
+            "$MODELS_DIR/llama-2-7b-burn/tokenizer.model" \
+            "$PROMPT" \
+            $MAX_TOKENS \
+            $DEVICE \
+            $REPETITIONS
+    )
+    mean=$(echo "$benchmark_output" | grep -oP '\d+\.\d+ ± \d+\.\d+' | awk -F ' ± ' '{print $1}')
+    std=$(echo "$benchmark_output" | grep -oP '\d+\.\d+ ± \d+\.\d+' | awk -F ' ± ' '{print $2}')
+    echo "burn, float16 : $(printf "%.2f" $mean) ± $(printf "%.2f" $std)" >> "$LOG_FILENAME"
+}
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            if [ "$DEVICE" == "cuda" ]; then
+                check_cuda
+            fi
+            if [ "$DEVICE" == "metal" ]; then
+                echo "Metal not supported!"
+                exit 0
+            fi
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_python
+setup "$MODELS_DIR"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/bench_burn/setup.sh b/bench_burn/setup.sh
new file mode 100755
index 00000000..90f5698b
--- /dev/null
+++ b/bench_burn/setup.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+####################################################################################
+# Script: setup.sh <MODELS_FOLDER>
+# Description: Automates the setup of a virtual environment, clone llama burn repo,
+# installs project requirements and handles model conversion.
+####################################################################################
+
+set -euo pipefail
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <models_folder>"
+    exit 1
+fi
+
+# Define directory paths
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_DIR="$SCRIPT_DIR/venv"
+MODELS_FOLDER="$1"
+BURN_MODEL_INPUT_DIR=$MODELS_FOLDER/llama-2-7b-raw
+BURN_FOLDER=$SCRIPT_DIR/llama2-burn
+BURN_MODEL_FOLDER=$MODELS_FOLDER/llama-2-7b-burn
+BURN_MODEL_NAME="llama-2-7b-burn"
+
+check_and_create_directory() {
+    if [ ! -d "$1" ]; then
+        mkdir -p "$1"
+    fi
+}
+
+if [ ! -d "$VENV_DIR" ]; then
+    python -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    source "$VENV_DIR/bin/activate"
+    pip install --upgrade pip > /dev/null
+    if [ -d "$BURN_FOLDER" ]; then
+        rm -rf $BURN_FOLDER
+    fi
+    git clone --depth=1 https://github.com/premAI-io/llama2-burn.git $BURN_FOLDER
+    pip install -r $BURN_FOLDER/llama-py/requirements.txt > /dev/null
+fi
+
+# Check and create llama-2-7b-burn model
+if [ ! -e "$BURN_MODEL_FOLDER/$BURN_MODEL_NAME.cfg" ]; then
+    check_and_create_directory "$BURN_MODEL_FOLDER"
+
+    if [ ! -d "$BURN_MODEL_FOLDER/params" ]; then
+        echo "Dumping model from $BURN_MODEL_INPUT_DIR to $BURN_MODEL_FOLDER"
+        python "$BURN_FOLDER/llama-py/dump_model.py" --model-dir "$BURN_MODEL_INPUT_DIR" --output-dir "$BURN_MODEL_FOLDER"
+        cp "$BURN_MODEL_INPUT_DIR/tokenizer.model" "$BURN_MODEL_FOLDER"
+        rm -r $BURN_MODEL_INPUT_DIR
+    else
+        echo "Model already dumped at $BURN_MODEL_FOLDER/params."
+    fi
+
+    echo "Converting dumped model to burn"
+    cargo run --manifest-path="$BURN_FOLDER/Cargo.toml" --bin convert -- "$BURN_MODEL_FOLDER/params" "$BURN_MODEL_NAME" "$BURN_MODEL_FOLDER"
+    rm -r "$BURN_MODEL_FOLDER/params"
+else
+    echo "Model llama-2-7b-burn already exists!"
+fi
diff --git a/bench_ctranslate/bench.sh b/bench_ctranslate/bench.sh
index 608b289c..1754cf23 100755
--- a/bench_ctranslate/bench.sh
+++ b/bench_ctranslate/bench.sh
@@ -1,10 +1,24 @@
 #!/bin/bash
 
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks burn ctranslate benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file     Logging file name.
+#   -md, --models_dir   Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
 set -euo pipefail
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-# Function to print script usage
 print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
diff --git a/bench_ctranslate/setup.sh b/bench_ctranslate/setup.sh
index ff98dec8..c2becea5 100644
--- a/bench_ctranslate/setup.sh
+++ b/bench_ctranslate/setup.sh
@@ -3,7 +3,7 @@
 ################################################################################
 # Script: setup.sh <MODELS_FOLDER>
 # Description: This script automates the setup of a virtual environment,
-# installs project requirements, converts and stores models.
+# installs project requirements, converts model.
 ################################################################################
 
 set -euo pipefail
diff --git a/bench_llamacpp/bench.sh b/bench_llamacpp/bench.sh
index 2a916c91..9f8aea41 100755
--- a/bench_llamacpp/bench.sh
+++ b/bench_llamacpp/bench.sh
@@ -1,10 +1,24 @@
 #!/bin/bash
 
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks llama.cpp llama benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file     Logging file name.
+#   -md, --models_dir   Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
 set -euo pipefail
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-# Function to print script usage
 print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
@@ -103,7 +117,7 @@ while [ "$#" -gt 0 ]; do
                     print_usage
                     ;;
             esac
-            if [ "$DEVICE" == "cuda"]; then
+            if [ "$DEVICE" == "cuda" ]; then
                 check_cuda
             fi
             shift 2
diff --git a/bench_llamacpp/setup.sh b/bench_llamacpp/setup.sh
index d51470a0..340b6c67 100755
--- a/bench_llamacpp/setup.sh
+++ b/bench_llamacpp/setup.sh
@@ -8,13 +8,11 @@
 
 set -euo pipefail
 
-# Function to clone and build llama.cpp
 clone_and_build_llama() {
     local DEVICE="$1"
     local VENV_DIR="$2"
     local SCRIPT_DIR="$3"
 
-    # Check if DEVICE and ENV are provided as arguments
     if [ "$#" -ne 3 ]; then
         echo "Usage: $0 <DEVICE> <ENV> <SCRIPT_DIR>"
         exit 1

From 6b77d1ffb6781e463a7f4a4c329e9b9929b7e379 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 12:15:45 +0000
Subject: [PATCH 18/25] bugfix for tinygrad

---
 bench_burn/setup.sh     | 1 -
 bench_tinygrad/bench.sh | 7 +++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/bench_burn/setup.sh b/bench_burn/setup.sh
index 90f5698b..ad9b205e 100755
--- a/bench_burn/setup.sh
+++ b/bench_burn/setup.sh
@@ -48,7 +48,6 @@ if [ ! -e "$BURN_MODEL_FOLDER/$BURN_MODEL_NAME.cfg" ]; then
         echo "Dumping model from $BURN_MODEL_INPUT_DIR to $BURN_MODEL_FOLDER"
         python "$BURN_FOLDER/llama-py/dump_model.py" --model-dir "$BURN_MODEL_INPUT_DIR" --output-dir "$BURN_MODEL_FOLDER"
         cp "$BURN_MODEL_INPUT_DIR/tokenizer.model" "$BURN_MODEL_FOLDER"
-        rm -r $BURN_MODEL_INPUT_DIR
     else
         echo "Model already dumped at $BURN_MODEL_FOLDER/params."
     fi
diff --git a/bench_tinygrad/bench.sh b/bench_tinygrad/bench.sh
index 907bff48..a8f46b97 100755
--- a/bench_tinygrad/bench.sh
+++ b/bench_tinygrad/bench.sh
@@ -70,12 +70,11 @@ run_llama_experiment() {
     declare -a tokens_per_second_array=()
 
     for ((i=1; i<=$repetitions; i++)); do
-        tokens_per_second=$(python3 "$script_dir/tinygrad/examples/llama.py" \
-            --model "$models_dir/llama-2-7b-hf/pytorch_model.bin.index.json" \
+        tokens_per_second=$(python "$script_dir/tinygrad/examples/llama.py" \
+            --model "$models_dir/llama-2-7b-raw" \
             --prompt "$prompt" \
             --count "$max_tokens" \
             --timing \
-            | tee /dev/tty \
             | grep -E 'total [0-9]+[.][0-9]+ ms, [0-9]+[.][0-9]+ tok/sec' \
             | awk -F '[:, ]' '{ sum += $(NF-1); count++ } END { if (count > 0) print sum/count }'
         )
@@ -140,7 +139,7 @@ while [ "$#" -gt 0 ]; do
                     print_usage
                     ;;
             esac
-            if [ "$DEVICE" == "cuda"]; then
+            if [ "$DEVICE" == "cuda" ]; then
                 check_cuda
             fi
             shift 2

From 9e79b125ab42db18207a0c93a7b70c9ed69af68e Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 15:02:24 +0000
Subject: [PATCH 19/25] bugfixes for burn setup; added shelcheck; removed
 rust_bench/llama2-burn

---
 .pre-commit-config.yaml                       |    6 +
 bench.py                                      |    1 -
 bench_burn/bench.sh                           |   23 +-
 bench_burn/setup.sh                           |   13 +-
 bench_ctranslate/bench.sh                     |    9 +-
 bench_ctranslate/setup.sh                     |    9 +-
 bench_llamacpp/bench.sh                       |    9 +-
 bench_tinygrad/bench.sh                       |   13 +-
 bench_tinygrad/setup.sh                       |    8 +-
 benchmark.sh                                  |  240 +-
 download.sh                                   |    2 +-
 rust_bench/llama2-burn/Cargo.lock             | 2990 -----------------
 rust_bench/llama2-burn/Cargo.toml             |   17 -
 rust_bench/llama2-burn/LICENSE                |   21 -
 rust_bench/llama2-burn/README.md              |  109 -
 rust_bench/llama2-burn/llama-py/dump.py       |   83 -
 rust_bench/llama2-burn/llama-py/dump_model.py |  109 -
 rust_bench/llama2-burn/llama-py/dump_test.py  |   37 -
 rust_bench/llama2-burn/llama-py/model.py      |  269 --
 .../llama2-burn/llama-py/requirements.txt     |    3 -
 rust_bench/llama2-burn/llama-py/test.py       |   88 -
 .../llama2-burn/llama-py/test_tokenizer.py    |   12 -
 rust_bench/llama2-burn/llama-py/tokenizer.py  |   38 -
 .../llama2-burn/src/bin/convert/main.rs       |   69 -
 rust_bench/llama2-burn/src/bin/sample/main.rs |  196 --
 rust_bench/llama2-burn/src/lib.rs             |    2 -
 rust_bench/llama2-burn/src/model.rs           |  664 ----
 rust_bench/llama2-burn/src/token.rs           |   66 -
 28 files changed, 104 insertions(+), 5002 deletions(-)
 delete mode 100644 rust_bench/llama2-burn/Cargo.lock
 delete mode 100644 rust_bench/llama2-burn/Cargo.toml
 delete mode 100644 rust_bench/llama2-burn/LICENSE
 delete mode 100644 rust_bench/llama2-burn/README.md
 delete mode 100644 rust_bench/llama2-burn/llama-py/dump.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/dump_model.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/dump_test.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/model.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/requirements.txt
 delete mode 100644 rust_bench/llama2-burn/llama-py/test.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/test_tokenizer.py
 delete mode 100644 rust_bench/llama2-burn/llama-py/tokenizer.py
 delete mode 100644 rust_bench/llama2-burn/src/bin/convert/main.rs
 delete mode 100644 rust_bench/llama2-burn/src/bin/sample/main.rs
 delete mode 100644 rust_bench/llama2-burn/src/lib.rs
 delete mode 100644 rust_bench/llama2-burn/src/model.rs
 delete mode 100644 rust_bench/llama2-burn/src/token.rs

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index af896c6a..b9ae8d39 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,6 +31,12 @@ repos:
         args: ["--config=setup.cfg"]
         additional_dependencies: [flake8-isort]
 
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.9.0.6
+    hooks:
+    -   id: shellcheck
+        exclude: setup.sh
+
 ci:
   autoupdate_schedule: weekly
   skip: []
diff --git a/bench.py b/bench.py
index ca316af5..59d92e61 100644
--- a/bench.py
+++ b/bench.py
@@ -4,7 +4,6 @@
 from collections import defaultdict
 
 import numpy as np
-
 from python_bench.ctranslate import CTranslateBenchmark, get_compute_types
 from python_bench.llama_cpp import LlamaCPPBenchmark
 from python_bench.tinygrad import TinyGradBenchmark
diff --git a/bench_burn/bench.sh b/bench_burn/bench.sh
index c2aaac3b..640654c2 100755
--- a/bench_burn/bench.sh
+++ b/bench_burn/bench.sh
@@ -42,8 +42,18 @@ check_cuda() {
     fi
 }
 
+check_rust() {
+    if which cargo &>/dev/null ; then
+        echo -e "\nRust is installed. Using $(which cargo)"
+    else
+        echo -e "\nRust is not installed. Please install Rust before proceeding."
+        exit 1  # Error exit code
+    fi
+}
+
 check_platform() {
-    local platform=$(uname -s)
+    local platform
+    platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
     elif [[ "$platform" == "Darwin" ]]; then
@@ -66,7 +76,7 @@ check_python() {
 
 setup() {
     echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
-    bash $SCRIPT_DIR/setup.sh "$1"
+    bash "$SCRIPT_DIR/setup.sh" "$1"
 }
 
 run_benchmarks() {
@@ -93,13 +103,13 @@ run_benchmarks() {
             "$MODELS_DIR/llama-2-7b-burn/llama-2-7b-burn" \
             "$MODELS_DIR/llama-2-7b-burn/tokenizer.model" \
             "$PROMPT" \
-            $MAX_TOKENS \
-            $DEVICE \
-            $REPETITIONS
+            "$MAX_TOKENS" \
+            "$DEVICE" \
+            "$REPETITIONS"
     )
     mean=$(echo "$benchmark_output" | grep -oP '\d+\.\d+ ± \d+\.\d+' | awk -F ' ± ' '{print $1}')
     std=$(echo "$benchmark_output" | grep -oP '\d+\.\d+ ± \d+\.\d+' | awk -F ' ± ' '{print $2}')
-    echo "burn, float16 : $(printf "%.2f" $mean) ± $(printf "%.2f" $std)" >> "$LOG_FILENAME"
+    echo "burn, float16 : $(printf "%.2f" "$mean") ± $(printf "%.2f" "$std")" >> "$LOG_FILENAME"
 }
 # Parse command-line arguments
 while [ "$#" -gt 0 ]; do
@@ -162,6 +172,7 @@ LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
 MODELS_DIR="${MODELS_DIR:-"./models"}"
 
 check_platform
+check_rust
 check_python
 setup "$MODELS_DIR"
 run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/bench_burn/setup.sh b/bench_burn/setup.sh
index ad9b205e..4b668e0c 100755
--- a/bench_burn/setup.sh
+++ b/bench_burn/setup.sh
@@ -34,10 +34,10 @@ if [ ! -d "$VENV_DIR" ]; then
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
     if [ -d "$BURN_FOLDER" ]; then
-        rm -rf $BURN_FOLDER
+        rm -rf "$BURN_FOLDER"
     fi
-    git clone --depth=1 https://github.com/premAI-io/llama2-burn.git $BURN_FOLDER
-    pip install -r $BURN_FOLDER/llama-py/requirements.txt > /dev/null
+    git clone --depth=1 https://github.com/premAI-io/llama2-burn.git "$BURN_FOLDER"
+    pip install -r "$BURN_FOLDER"/llama-py/requirements.txt > /dev/null
 fi
 
 # Check and create llama-2-7b-burn model
@@ -46,14 +46,17 @@ if [ ! -e "$BURN_MODEL_FOLDER/$BURN_MODEL_NAME.cfg" ]; then
 
     if [ ! -d "$BURN_MODEL_FOLDER/params" ]; then
         echo "Dumping model from $BURN_MODEL_INPUT_DIR to $BURN_MODEL_FOLDER"
-        python "$BURN_FOLDER/llama-py/dump_model.py" --model-dir "$BURN_MODEL_INPUT_DIR" --output-dir "$BURN_MODEL_FOLDER"
+        python "$BURN_FOLDER/llama-py/dump_model.py" "$BURN_MODEL_INPUT_DIR" "$BURN_MODEL_INPUT_DIR/tokenizer.model"
+        mv "$(pwd)/params" "$BURN_MODEL_FOLDER"
         cp "$BURN_MODEL_INPUT_DIR/tokenizer.model" "$BURN_MODEL_FOLDER"
     else
         echo "Model already dumped at $BURN_MODEL_FOLDER/params."
     fi
 
     echo "Converting dumped model to burn"
-    cargo run --manifest-path="$BURN_FOLDER/Cargo.toml" --bin convert -- "$BURN_MODEL_FOLDER/params" "$BURN_MODEL_NAME" "$BURN_MODEL_FOLDER"
+    cargo run --manifest-path="$BURN_FOLDER/Cargo.toml" --bin convert -- "$BURN_MODEL_FOLDER/params" "$BURN_MODEL_NAME"
+    mv "$BURN_MODEL_NAME.bin" "$BURN_MODEL_FOLDER"
+    mv "$BURN_MODEL_NAME.cfg" "$BURN_MODEL_FOLDER"
     rm -r "$BURN_MODEL_FOLDER/params"
 else
     echo "Model llama-2-7b-burn already exists!"
diff --git a/bench_ctranslate/bench.sh b/bench_ctranslate/bench.sh
index 1754cf23..9e823e40 100755
--- a/bench_ctranslate/bench.sh
+++ b/bench_ctranslate/bench.sh
@@ -32,7 +32,8 @@ print_usage() {
 }
 
 check_platform() {
-    local platform=$(uname -s)
+    local platform
+    platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
     elif [[ "$platform" == "Darwin" ]]; then
@@ -55,7 +56,7 @@ check_python() {
 
 setup() {
     echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
-    bash $SCRIPT_DIR/setup.sh "$1"
+    bash "$SCRIPT_DIR"/setup.sh "$1"
 }
 
 run_benchmarks() {
@@ -66,10 +67,10 @@ run_benchmarks() {
     local LOG_FILENAME="$5"
     local MODELS_DIR="$6"
 
-    python $SCRIPT_DIR/bench.py \
+    python "$SCRIPT_DIR"/bench.py \
         --prompt "$PROMPT" \
         --repetitions "$REPETITIONS" \
-        --max_tokens $MAX_TOKENS \
+        --max_tokens "$MAX_TOKENS" \
         --log_file "$LOG_FILENAME" \
         --models_dir "$MODELS_DIR" \
         --device "$DEVICE"
diff --git a/bench_ctranslate/setup.sh b/bench_ctranslate/setup.sh
index c2becea5..d38480b3 100644
--- a/bench_ctranslate/setup.sh
+++ b/bench_ctranslate/setup.sh
@@ -22,14 +22,13 @@ LLAMA_HF_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-hf"
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
-    source $VENV_DIR/bin/activate
-    $VENV_DIR/bin/pip install --upgrade pip > /dev/null
+    source "$VENV_DIR"/bin/activate
+    pip install --upgrade pip > /dev/null
+    pip install -r "$SCRIPT_DIR"/requirements.txt > /dev/null
 else
-    source $VENV_DIR/bin/activate
+    source "$VENV_DIR"/bin/activate
 fi
 
-$VENV_DIR/bin/pip install -r $SCRIPT_DIR/requirements.txt > /dev/null
-
 if [ ! -d "$LLAMA_HF_MODEL_DIR-float16" ]; then
     echo "Creating llama-2-7b-hf-float16 model..."
     ct2-transformers-converter --model "$LLAMA_HF_MODEL_DIR/" --quantization float16 --output_dir "$LLAMA_HF_MODEL_DIR-float16" --copy_files tokenizer.model
diff --git a/bench_llamacpp/bench.sh b/bench_llamacpp/bench.sh
index 9f8aea41..af5c7e62 100755
--- a/bench_llamacpp/bench.sh
+++ b/bench_llamacpp/bench.sh
@@ -43,7 +43,8 @@ check_cuda() {
 }
 
 check_platform() {
-    local platform=$(uname -s)
+    local platform
+    platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
     elif [[ "$platform" == "Darwin" ]]; then
@@ -66,7 +67,7 @@ check_python() {
 
 setup() {
     echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
-    bash $SCRIPT_DIR/setup.sh "$1"
+    bash "$SCRIPT_DIR"/setup.sh "$1"
 }
 
 run_benchmarks() {
@@ -82,10 +83,10 @@ run_benchmarks() {
         echo "LLAMA_CPP_LIB=$LLAMA_CPP_LIB"
     fi
 
-    python $SCRIPT_DIR/bench.py \
+    python "$SCRIPT_DIR"/bench.py \
         --prompt "$PROMPT" \
         --repetitions "$REPETITIONS" \
-        --max_tokens $MAX_TOKENS \
+        --max_tokens "$MAX_TOKENS" \
         --log_file "$LOG_FILENAME" \
         --models_dir "$MODELS_DIR" \
         --device "$DEVICE"
diff --git a/bench_tinygrad/bench.sh b/bench_tinygrad/bench.sh
index a8f46b97..bfd3b74d 100755
--- a/bench_tinygrad/bench.sh
+++ b/bench_tinygrad/bench.sh
@@ -29,7 +29,8 @@ check_cuda() {
 }
 
 check_platform() {
-    local platform=$(uname -s)
+    local platform
+    platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
     elif [[ "$platform" == "Darwin" ]]; then
@@ -52,7 +53,7 @@ check_python() {
 
 setup() {
     echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
-    bash $SCRIPT_DIR/setup.sh "$1"
+    bash "$SCRIPT_DIR"/setup.sh "$1"
 }
 
 run_llama_experiment() {
@@ -69,7 +70,7 @@ run_llama_experiment() {
 
     declare -a tokens_per_second_array=()
 
-    for ((i=1; i<=$repetitions; i++)); do
+    for ((i=1; i<=repetitions; i++)); do
         tokens_per_second=$(python "$script_dir/tinygrad/examples/llama.py" \
             --model "$models_dir/llama-2-7b-raw" \
             --prompt "$prompt" \
@@ -93,10 +94,12 @@ run_benchmarks() {
     local LOG_FILENAME="$5"
     local MODELS_DIR="$6"
 
+    # shellcheck disable=SC1091
     source "$SCRIPT_DIR/venv/bin/activate"
 
     # Assign the result to an array variable
-    result_array=($(run_llama_experiment "$MODELS_DIR" "$SCRIPT_DIR" "$PROMPT" "$MAX_TOKENS" $REPETITIONS "$DEVICE"))
+    # shellcheck disable=SC2207
+    result_array=($(run_llama_experiment "$MODELS_DIR" "$SCRIPT_DIR" "$PROMPT" "$MAX_TOKENS" "$REPETITIONS" "$DEVICE"))
 
     total=0
     for value in "${result_array[@]}"; do
@@ -111,7 +114,7 @@ run_benchmarks() {
     done
     variance=$(echo "$sum_squared_diff / ${#result_array[@]}" | bc -l)
     std=$(echo "sqrt($variance)" | bc -l)
-    echo "tinygrad, float16 : $(printf "%.2f" $mean) ± $(printf "%.2f" $std)" >> "$LOG_FILENAME"
+    echo "tinygrad, float16 : $(printf "%.2f" "$mean") ± $(printf "%.2f" "$std")" >> "$LOG_FILENAME"
 }
 
 # Parse command-line arguments
diff --git a/bench_tinygrad/setup.sh b/bench_tinygrad/setup.sh
index ac6a6f73..e1baa4a9 100755
--- a/bench_tinygrad/setup.sh
+++ b/bench_tinygrad/setup.sh
@@ -17,9 +17,9 @@ if [ ! -d "$VENV_DIR" ]; then
     echo "Virtual environment '$VENV_DIR' created."
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
-    git clone --depth=1 https://github.com/tinygrad/tinygrad.git $SCRIPT_DIR/tinygrad
-    cd $SCRIPT_DIR/tinygrad
-    pip install -e .
-    pip install sentencepiece
+    git clone --depth=1 https://github.com/tinygrad/tinygrad.git "$SCRIPT_DIR"/tinygrad
+    cd "$SCRIPT_DIR"/tinygrad
+    pip install -e . > /dev/null
+    pip install sentencepiece > /dev/null
     cd ..
 fi
diff --git a/benchmark.sh b/benchmark.sh
index c6204a0b..b92d9e51 100755
--- a/benchmark.sh
+++ b/benchmark.sh
@@ -1,84 +1,24 @@
 #!/bin/bash
-
-##############################################################################################
-# Script: run_benchmarks.sh
-# Description: This script runs benchmarks for a transformer model using both
-# Rust and Python implementations. It provides options to customize the
-# benchmarks, such as the prompt, repetitions, maximum tokens, device, and NVIDIA flag.
-#
-# Usage: ./run_benchmarks.sh [OPTIONS]
-# OPTIONS:
-#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
-#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
-#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
-#   -d, --device      Device for benchmarks (possible values: 'gpu' or 'cpu', default: 'cpu')
-#   --nvidia          Use NVIDIA for benchmarks (default: false)
-#   -h, --help        Show this help message
-##############################################################################################
-
 set -euo pipefail
 
-# Function to print script usage
 print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
-    echo "  -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')"
-    echo "  -r, --repetitions Number of repetitions for benchmarks (default: 2)"
-    echo "  -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)"
-    echo "  -d, --device      Device for benchmarks (possible values: 'gpu' or 'cpu', default: 'cpu')"
-    echo "  --nvidia          Use NVIDIA for benchmarks (default: false)"
-    echo "  -h, --help        Show this help message"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
     exit 1
 }
 
-# Function to check the platform
-check_platform() {
-    local platform=$(uname -s)
-    if [[ "$platform" == "Linux" ]]; then
-        echo "Running on Linux."
-        check_cuda
-    elif [[ "$platform" == "Darwin" ]]; then
-        echo "Running on Mac OS."
-    else
-        echo "Unknown platform."
-        exit 1
-    fi
-}
-
-# Function to check if CUDA is available
-check_cuda() {
-    if command -v nvcc &> /dev/null
-    then
-        echo -e "\nUsing CUDA"
-        nvcc --version
-    else
-        echo -e "\nCUDA is not available."
-        exit 1
-    fi
-}
-
-# Function to check if Python exists
-check_python() {
-    if command -v python &> /dev/null
-    then
-        echo -e "\nUsing $(python --version)."
-    else
-        echo -e "\nPython does not exist."
-        exit 1
-    fi
-}
 
-# Function to check if rust is installed
-check_rust() {
-    if which cargo &>/dev/null ; then
-        echo -e "\nRust is installed. Using $(which cargo)"
-    else
-        echo -e "\nRust is not installed. Please install Rust before proceeding."
-        exit 1  # Error exit code
-    fi
+download_models() {
+    echo -e "\nDownloading models..."
+    bash ./download.sh --models models.json --cache cache.log
 }
 
-# Function to check if jq is installed
 check_jq() {
     if ! command -v jq &> /dev/null
     then
@@ -87,108 +27,6 @@ check_jq() {
     fi
 }
 
-get_torch_cuda_version() {
-    # Get the full CUDA version using nvcc
-    CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}')
-
-    # Remove dots from the CUDA version
-    CUDA_VERSION_NUMERIC=$(echo "${CUDA_VERSION}" | tr -d '.')
-
-    # Set TORCH_CUDA_VERSION variable
-    TORCH_CUDA_VERSION="cu${CUDA_VERSION_NUMERIC}"
-
-    # Return the dynamically set variable
-    echo "${TORCH_CUDA_VERSION}"
-}
-
-# Function to download models
-download_models() {
-    echo -e "\nDownloading models..."
-    bash ./download.sh --models models.json --cache cache.log
-}
-
-# Function to set up
-setup() {
-    echo -e "\nSetting up..."
-    bash ./setup.sh
-}
-
-# Function to run python benchmarks
-run_benchmarks() {
-    local PROMPT="$1"
-    local REPETITIONS="$2"
-    local MAX_TOKENS="$3"
-    local DEVICE="$4"
-    local USE_NVIDIA="$5"
-    local LOG_FILENAME="$6"
-    local DIR=$(pwd)
-    local CARGO_CANDLE_FEATURES=""
-    local PYTHON_DEVICE=""
-    local PYTHON_NVIDIA=""
-    local PLATFORM=$(uname -s)
-
-    echo "Running benchmarks with the following parameters:"
-    echo "  Prompt: $PROMPT"
-    echo "  Repetitions: $REPETITIONS"
-    echo "  Max Tokens: $MAX_TOKENS"
-    echo "  Device: $DEVICE"
-    echo "  NVIDIA: $USE_NVIDIA"
-
-    echo "Running rust benchmarks..."
-    source ./venv/bin/activate
-
-    if [ "$DEVICE" == "cpu" ] || [ "$USE_NVIDIA" == true ]; then
-        # Run Rust benchmarks
-        if [ "$DEVICE" == "gpu" ] && [ "$PLATFORM" != "Darwin" ]; then
-            TORCH_CUDA_VERSION=$(get_torch_cuda_version)
-        fi
-        cargo run --release --bin sample \
-            --manifest-path="$DIR/rust_bench/llama2-burn/Cargo.toml" \
-            "$DIR/models/llama-2-7b-burn/llama-2-7b-burn" \
-            "$DIR/models/llama-2-7b-burn/tokenizer.model" \
-            "$PROMPT" \
-            $MAX_TOKENS \
-            $DEVICE \
-            $REPETITIONS \
-            "$LOG_FILENAME"
-    fi
-
-    if [ "$DEVICE" == "cpu" ] || [ "$USE_NVIDIA" == true ]; then
-        # Set features option based on $DEVICE
-        [ "$DEVICE" == "gpu" ] && CARGO_CANDLE_FEATURES="--features cuda"
-
-        cargo run --release $CARGO_CANDLE_FEATURES \
-            --manifest-path="$DIR/rust_bench/llama2-candle/Cargo.toml" \
-            -- --local-weights "$DIR/models/llama-2-7b-st/" \
-            --repetitions "$REPETITIONS" \
-            --prompt "$PROMPT" \
-            --sample-len $MAX_TOKENS \
-            --log-file $LOG_FILENAME
-    fi
-
-    # Set options based on $DEVICE and $USE_NVIDIA
-    [ "$DEVICE" == "gpu" ] && PYTHON_DEVICE="--gpu"
-    [ "$USE_NVIDIA" == true ] && PYTHON_NVIDIA="--nvidia"
-
-    cd $DIR
-    echo "Running python benchmarks..."
-    python ./bench.py \
-        --prompt "$PROMPT" \
-        --repetitions "$REPETITIONS" \
-        --max_tokens $MAX_TOKENS \
-        --log_file "$LOG_FILENAME" \
-        $PYTHON_DEVICE \
-        $PYTHON_NVIDIA
-    deactivate
-}
-
-# Default values
-DEFAULT_PROMPT="Explain what is a transformer"
-DEFAULT_REPETITIONS=10
-DEFAULT_MAX_TOKENS=100
-DEFAULT_DEVICE="gpu"
-USE_NVIDIA=false
-
 # Parse command-line arguments
 while [ "$#" -gt 0 ]; do
     case "$1" in
@@ -207,22 +45,22 @@ while [ "$#" -gt 0 ]; do
         -d|--device)
             DEVICE="$2"
             case "$DEVICE" in
-                "gpu" | "cpu")
+                "cuda" | "metal" | "cpu")
                     ;;
                 *)
-                    echo "Invalid value for --device. Please use 'gpu' or 'cpu'."
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
                     print_usage
                     ;;
             esac
             shift 2
             ;;
-        --nvidia)
-            USE_NVIDIA=true
-            if [ "$DEVICE" != "gpu" ]; then
-                echo "Error: The '--nvidia' flag can only be used with 'gpu' as the device."
-                print_usage
-            fi
-            shift 1
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
             ;;
         -h|--help)
             print_usage
@@ -234,19 +72,33 @@ while [ "$#" -gt 0 ]; do
     esac
 done
 
-# Set default values if not provided
-PROMPT="${PROMPT:-$DEFAULT_PROMPT}"
-REPETITIONS="${REPETITIONS:-$DEFAULT_REPETITIONS}"
-MAX_TOKENS="${MAX_TOKENS:-$DEFAULT_MAX_TOKENS}"
-DEVICE="${DEVICE:-$DEFAULT_DEVICE}"
-
-timestamp=$(date +"%Y%m%d%H%M%S")
-log_filename="benchmark_${timestamp}.log"
-
-check_platform
-check_python
-check_rust
 check_jq
 download_models
-setup
-run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" $USE_NVIDIA "$log_filename"
+
+
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+folders=$(find . -type d -name "bench_*")
+
+for folder in $folders; do
+    if [ -d "$folder" ]; then
+        echo "Running benchmark $folder/bench.sh..."
+
+        if ! bash "$folder/bench.sh" \
+            --prompt "$PROMPT" \
+            --repetitions "$REPETITIONS" \
+            --max_tokens "$MAX_TOKENS" \
+            --models_dir "$MODELS_DIR" \
+            --log_file "$LOG_FILENAME" \
+            --device "$DEVICE"; then
+            echo "Error: Something went wrong in $folder/bench.sh"
+        else
+            echo "Success: $folder/bench.sh completed successfully"
+        fi
+    fi
+done
diff --git a/download.sh b/download.sh
index 85d31847..c6354b60 100755
--- a/download.sh
+++ b/download.sh
@@ -96,7 +96,7 @@ fi
 if $force_download; then
   echo "Force download enabled. Removing all files in the models folder and cache file."
   rm -rf ./models/*
-  > "$cache_file"
+  rm "$cache_file"
 fi
 
 # Read the JSON file
diff --git a/rust_bench/llama2-burn/Cargo.lock b/rust_bench/llama2-burn/Cargo.lock
deleted file mode 100644
index 75fb3335..00000000
--- a/rust_bench/llama2-burn/Cargo.lock
+++ /dev/null
@@ -1,2990 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "adler"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
-
-[[package]]
-name = "aes"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
-dependencies = [
- "cfg-if",
- "cipher",
- "cpufeatures",
-]
-
-[[package]]
-name = "ahash"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
-dependencies = [
- "cfg-if",
- "once_cell",
- "version_check",
-]
-
-[[package]]
-name = "aho-corasick"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
-dependencies = [
- "memchr 2.5.0",
-]
-
-[[package]]
-name = "allocator-api2"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
-
-[[package]]
-name = "android-tzdata"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
-
-[[package]]
-name = "android_system_properties"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "ansi_term"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "anstream"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
-dependencies = [
- "anstyle",
- "anstyle-parse",
- "anstyle-query",
- "anstyle-wincon",
- "colorchoice",
- "utf8parse",
-]
-
-[[package]]
-name = "anstyle"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
-
-[[package]]
-name = "anstyle-parse"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140"
-dependencies = [
- "utf8parse",
-]
-
-[[package]]
-name = "anstyle-query"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
-dependencies = [
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "anstyle-wincon"
-version = "3.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
-dependencies = [
- "anstyle",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "anyhow"
-version = "1.0.72"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
-
-[[package]]
-name = "arc-swap"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
-
-[[package]]
-name = "atty"
-version = "0.2.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
-dependencies = [
- "hermit-abi 0.1.19",
- "libc",
- "winapi",
-]
-
-[[package]]
-name = "autocfg"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
-
-[[package]]
-name = "base64"
-version = "0.21.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
-
-[[package]]
-name = "base64ct"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
-
-[[package]]
-name = "bincode"
-version = "2.0.0-rc.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f11ea1a0346b94ef188834a65c068a03aec181c94896d481d7a0a40d85b0ce95"
-dependencies = [
- "serde",
-]
-
-[[package]]
-name = "bit_field"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
-
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
-
-[[package]]
-name = "block-buffer"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "bumpalo"
-version = "3.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
-
-[[package]]
-name = "burn"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "burn-core",
- "burn-train",
-]
-
-[[package]]
-name = "burn-autodiff"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "burn-common",
- "burn-tensor",
- "burn-tensor-testgen",
- "derive-new",
- "spin 0.9.8",
-]
-
-[[package]]
-name = "burn-common"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "const-random",
- "rand",
- "spin 0.9.8",
- "uuid",
-]
-
-[[package]]
-name = "burn-core"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "bincode",
- "burn-autodiff",
- "burn-common",
- "burn-dataset",
- "burn-derive",
- "burn-tensor",
- "derive-new",
- "flate2",
- "half",
- "hashbrown 0.14.0",
- "libm",
- "log",
- "rand",
- "rmp-serde",
- "serde",
- "serde_json",
- "spin 0.9.8",
-]
-
-[[package]]
-name = "burn-dataset"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "csv",
- "derive-new",
- "dirs",
- "gix-tempfile",
- "image",
- "r2d2",
- "r2d2_sqlite",
- "rand",
- "rmp-serde",
- "rusqlite",
- "sanitize-filename",
- "serde",
- "serde_json",
- "serde_rusqlite",
- "strum",
- "strum_macros",
- "tempfile",
- "thiserror",
-]
-
-[[package]]
-name = "burn-derive"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "burn-tch"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "burn-tensor",
- "half",
- "libc",
- "rand",
- "tch",
-]
-
-[[package]]
-name = "burn-tensor"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "derive-new",
- "half",
- "hashbrown 0.14.0",
- "libm",
- "num-traits",
- "rand",
- "rand_distr",
- "serde",
-]
-
-[[package]]
-name = "burn-tensor-testgen"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "burn-train"
-version = "0.9.0"
-source = "git+https://github.com/burn-rs/burn.git#ed255c5561b85876cf02cbc4d48f35e1f0d29ac0"
-dependencies = [
- "burn-core",
- "derive-new",
- "indicatif",
- "log",
- "log4rs",
- "nvml-wrapper",
- "rgb",
- "serde",
- "terminal_size",
- "textplots",
-]
-
-[[package]]
-name = "bytemuck"
-version = "1.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
-
-[[package]]
-name = "byteorder"
-version = "1.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
-
-[[package]]
-name = "bzip2"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
-dependencies = [
- "bzip2-sys",
- "libc",
-]
-
-[[package]]
-name = "bzip2-sys"
-version = "0.1.11+1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
-[[package]]
-name = "cc"
-version = "1.0.79"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
-dependencies = [
- "jobserver",
-]
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "chrono"
-version = "0.4.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5"
-dependencies = [
- "android-tzdata",
- "iana-time-zone",
- "js-sys",
- "num-traits",
- "time 0.1.45",
- "wasm-bindgen",
- "winapi",
-]
-
-[[package]]
-name = "cipher"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
-dependencies = [
- "crypto-common",
- "inout",
-]
-
-[[package]]
-name = "clap"
-version = "2.34.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
-dependencies = [
- "ansi_term",
- "atty",
- "bitflags 1.3.2",
- "strsim 0.8.0",
- "textwrap",
- "unicode-width",
- "vec_map",
-]
-
-[[package]]
-name = "clap"
-version = "4.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b"
-dependencies = [
- "clap_builder",
- "clap_derive",
-]
-
-[[package]]
-name = "clap_builder"
-version = "4.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663"
-dependencies = [
- "anstream",
- "anstyle",
- "clap_lex",
- "strsim 0.10.0",
-]
-
-[[package]]
-name = "clap_derive"
-version = "4.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
-dependencies = [
- "heck 0.4.1",
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "clap_lex"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
-
-[[package]]
-name = "color_quant"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
-
-[[package]]
-name = "colorchoice"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
-
-[[package]]
-name = "colored"
-version = "2.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6"
-dependencies = [
- "is-terminal",
- "lazy_static",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "console"
-version = "0.15.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
-dependencies = [
- "encode_unicode",
- "lazy_static",
- "libc",
- "unicode-width",
- "windows-sys 0.45.0",
-]
-
-[[package]]
-name = "const-random"
-version = "0.1.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e"
-dependencies = [
- "const-random-macro",
- "proc-macro-hack",
-]
-
-[[package]]
-name = "const-random-macro"
-version = "0.1.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb"
-dependencies = [
- "getrandom",
- "once_cell",
- "proc-macro-hack",
- "tiny-keccak",
-]
-
-[[package]]
-name = "constant_time_eq"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
-
-[[package]]
-name = "core-foundation-sys"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
-
-[[package]]
-name = "cpufeatures"
-version = "0.2.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "crc32fast"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
-dependencies = [
- "cfg-if",
-]
-
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-deque"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
-dependencies = [
- "cfg-if",
- "crossbeam-epoch",
- "crossbeam-utils",
-]
-
-[[package]]
-name = "crossbeam-epoch"
-version = "0.9.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
-dependencies = [
- "autocfg",
- "cfg-if",
- "crossbeam-utils",
- "memoffset",
- "scopeguard",
-]
-
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
-dependencies = [
- "cfg-if",
-]
-
-[[package]]
-name = "crunchy"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
-
-[[package]]
-name = "crypto-common"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
-dependencies = [
- "generic-array",
- "typenum",
-]
-
-[[package]]
-name = "csv"
-version = "1.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
-dependencies = [
- "csv-core",
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "csv-core"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
-dependencies = [
- "memchr 2.5.0",
-]
-
-[[package]]
-name = "darling"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858"
-dependencies = [
- "darling_core",
- "darling_macro",
-]
-
-[[package]]
-name = "darling_core"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
-dependencies = [
- "fnv",
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim 0.9.3",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "darling_macro"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
-dependencies = [
- "darling_core",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "dashmap"
-version = "5.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d"
-dependencies = [
- "cfg-if",
- "hashbrown 0.14.0",
- "lock_api",
- "once_cell",
- "parking_lot_core",
-]
-
-[[package]]
-name = "derivative"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "derive-new"
-version = "0.5.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "destructure_traitobject"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c877555693c14d2f84191cfd3ad8582790fc52b5e2274b40b59cf5f5cea25c7"
-
-[[package]]
-name = "digest"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
-dependencies = [
- "block-buffer",
- "crypto-common",
- "subtle",
-]
-
-[[package]]
-name = "dirs"
-version = "5.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
-dependencies = [
- "dirs-sys",
-]
-
-[[package]]
-name = "dirs-sys"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
-dependencies = [
- "libc",
- "option-ext",
- "redox_users",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "drawille"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e64e461c3f1e69d99372620640b3fd5f0309eeda2e26e4af69f6760c0e1df845"
-dependencies = [
- "colored",
- "fnv",
-]
-
-[[package]]
-name = "either"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
-
-[[package]]
-name = "encode_unicode"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
-
-[[package]]
-name = "env_logger"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece"
-dependencies = [
- "humantime",
- "is-terminal",
- "log",
- "regex",
- "termcolor",
-]
-
-[[package]]
-name = "errno"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
-dependencies = [
- "errno-dragonfly",
- "libc",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "errno-dragonfly"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
-dependencies = [
- "cc",
- "libc",
-]
-
-[[package]]
-name = "exr"
-version = "1.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "279d3efcc55e19917fff7ab3ddd6c14afb6a90881a0078465196fe2f99d08c56"
-dependencies = [
- "bit_field",
- "flume",
- "half",
- "lebe",
- "miniz_oxide",
- "rayon-core",
- "smallvec",
- "zune-inflate",
-]
-
-[[package]]
-name = "fallible-iterator"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
-
-[[package]]
-name = "fallible-streaming-iterator"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
-
-[[package]]
-name = "fastrand"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
-
-[[package]]
-name = "fdeflate"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d329bdeac514ee06249dabc27877490f17f5d371ec693360768b838e19f3ae10"
-dependencies = [
- "simd-adler32",
-]
-
-[[package]]
-name = "flate2"
-version = "1.0.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
-dependencies = [
- "crc32fast",
- "miniz_oxide",
-]
-
-[[package]]
-name = "flume"
-version = "0.10.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577"
-dependencies = [
- "futures-core",
- "futures-sink",
- "nanorand",
- "pin-project",
- "spin 0.9.8",
-]
-
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
-[[package]]
-name = "form_urlencoded"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
-dependencies = [
- "percent-encoding",
-]
-
-[[package]]
-name = "futures-core"
-version = "0.3.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
-
-[[package]]
-name = "futures-sink"
-version = "0.3.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
-
-[[package]]
-name = "generic-array"
-version = "0.14.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
-dependencies = [
- "typenum",
- "version_check",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
-dependencies = [
- "cfg-if",
- "js-sys",
- "libc",
- "wasi 0.11.0+wasi-snapshot-preview1",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "gif"
-version = "0.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80792593675e051cf94a4b111980da2ba60d4a83e43e0048c5693baab3977045"
-dependencies = [
- "color_quant",
- "weezl",
-]
-
-[[package]]
-name = "gix-features"
-version = "0.32.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "882695cccf38da4c3cc7ee687bdb412cf25e37932d7f8f2c306112ea712449f1"
-dependencies = [
- "gix-hash",
- "gix-trace",
- "libc",
-]
-
-[[package]]
-name = "gix-fs"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d5b6e9d34a2c61ea4a02bbca94c409ab6dbbca1348cbb67298cd7fed8758761"
-dependencies = [
- "gix-features",
-]
-
-[[package]]
-name = "gix-hash"
-version = "0.11.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b422ff2ad9a0628baaad6da468cf05385bf3f5ab495ad5a33cce99b9f41092f"
-dependencies = [
- "hex",
- "thiserror",
-]
-
-[[package]]
-name = "gix-tempfile"
-version = "7.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa28d567848cec8fdd77d36ad4f5f78ecfaba7d78f647d4f63c8ae1a2cec7243"
-dependencies = [
- "dashmap",
- "gix-fs",
- "libc",
- "once_cell",
- "parking_lot",
- "signal-hook",
- "signal-hook-registry",
- "tempfile",
-]
-
-[[package]]
-name = "gix-trace"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96b6d623a1152c3facb79067d6e2ecdae48130030cf27d6eb21109f13bd7b836"
-
-[[package]]
-name = "half"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
-dependencies = [
- "cfg-if",
- "crunchy",
- "num-traits",
- "serde",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
-[[package]]
-name = "hashbrown"
-version = "0.13.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
-dependencies = [
- "ahash",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
-dependencies = [
- "ahash",
- "allocator-api2",
- "serde",
-]
-
-[[package]]
-name = "hashlink"
-version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f"
-dependencies = [
- "hashbrown 0.14.0",
-]
-
-[[package]]
-name = "heck"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
-dependencies = [
- "unicode-segmentation",
-]
-
-[[package]]
-name = "heck"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
-
-[[package]]
-name = "hermit-abi"
-version = "0.1.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "hermit-abi"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
-
-[[package]]
-name = "hex"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
-
-[[package]]
-name = "hmac"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
-dependencies = [
- "digest",
-]
-
-[[package]]
-name = "humantime"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
-
-[[package]]
-name = "iana-time-zone"
-version = "0.1.57"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
-dependencies = [
- "android_system_properties",
- "core-foundation-sys",
- "iana-time-zone-haiku",
- "js-sys",
- "wasm-bindgen",
- "windows",
-]
-
-[[package]]
-name = "iana-time-zone-haiku"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
-dependencies = [
- "cc",
-]
-
-[[package]]
-name = "ident_case"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
-
-[[package]]
-name = "idna"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
-dependencies = [
- "unicode-bidi",
- "unicode-normalization",
-]
-
-[[package]]
-name = "image"
-version = "0.24.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "527909aa81e20ac3a44803521443a765550f09b5130c2c2fa1ea59c2f8f50a3a"
-dependencies = [
- "bytemuck",
- "byteorder",
- "color_quant",
- "exr",
- "gif",
- "jpeg-decoder",
- "num-rational",
- "num-traits",
- "png",
- "qoi",
- "tiff",
-]
-
-[[package]]
-name = "indexmap"
-version = "1.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
-dependencies = [
- "autocfg",
- "hashbrown 0.12.3",
-]
-
-[[package]]
-name = "indicatif"
-version = "0.17.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057"
-dependencies = [
- "console",
- "instant",
- "number_prefix",
- "portable-atomic",
- "unicode-width",
-]
-
-[[package]]
-name = "inout"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "instant"
-version = "0.1.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
-dependencies = [
- "cfg-if",
-]
-
-[[package]]
-name = "io-lifetimes"
-version = "1.0.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
-dependencies = [
- "hermit-abi 0.3.2",
- "libc",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "is-terminal"
-version = "0.4.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
-dependencies = [
- "hermit-abi 0.3.2",
- "rustix 0.38.4",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "itertools"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
-dependencies = [
- "either",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
-
-[[package]]
-name = "jobserver"
-version = "0.1.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "jpeg-decoder"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e"
-dependencies = [
- "rayon",
-]
-
-[[package]]
-name = "js-sys"
-version = "0.3.64"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
-dependencies = [
- "wasm-bindgen",
-]
-
-[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
-[[package]]
-name = "lebe"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
-
-[[package]]
-name = "libc"
-version = "0.2.147"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
-
-[[package]]
-name = "libloading"
-version = "0.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
-dependencies = [
- "cfg-if",
- "winapi",
-]
-
-[[package]]
-name = "libm"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
-
-[[package]]
-name = "libsqlite3-sys"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326"
-dependencies = [
- "cc",
- "pkg-config",
- "vcpkg",
-]
-
-[[package]]
-name = "linked-hash-map"
-version = "0.5.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
-
-[[package]]
-name = "linux-raw-sys"
-version = "0.3.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
-
-[[package]]
-name = "linux-raw-sys"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
-
-[[package]]
-name = "llama"
-version = "0.1.0"
-dependencies = [
- "burn",
- "burn-tch",
- "clap 4.4.7",
- "env_logger",
- "log",
- "npy",
- "num-traits",
- "rust_tokenizers",
- "serde",
-]
-
-[[package]]
-name = "lock_api"
-version = "0.4.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
-dependencies = [
- "autocfg",
- "scopeguard",
-]
-
-[[package]]
-name = "log"
-version = "0.4.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
-dependencies = [
- "serde",
-]
-
-[[package]]
-name = "log-mdc"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a94d21414c1f4a51209ad204c1776a3d0765002c76c6abcb602a6f09f1e881c7"
-
-[[package]]
-name = "log4rs"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d36ca1786d9e79b8193a68d480a0907b612f109537115c6ff655a3a1967533fd"
-dependencies = [
- "anyhow",
- "arc-swap",
- "chrono",
- "derivative",
- "fnv",
- "humantime",
- "libc",
- "log",
- "log-mdc",
- "parking_lot",
- "serde",
- "serde-value",
- "serde_json",
- "serde_yaml",
- "thiserror",
- "thread-id",
- "typemap-ors",
- "winapi",
-]
-
-[[package]]
-name = "matrixmultiply"
-version = "0.3.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77"
-dependencies = [
- "autocfg",
- "rawpointer",
-]
-
-[[package]]
-name = "memchr"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "memchr"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
-
-[[package]]
-name = "memoffset"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "meval"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f79496a5651c8d57cd033c5add8ca7ee4e3d5f7587a4777484640d9cb60392d9"
-dependencies = [
- "fnv",
- "nom 1.2.4",
-]
-
-[[package]]
-name = "miniz_oxide"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
-dependencies = [
- "adler",
- "simd-adler32",
-]
-
-[[package]]
-name = "nanorand"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "ndarray"
-version = "0.15.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
-dependencies = [
- "matrixmultiply",
- "num-complex",
- "num-integer",
- "num-traits",
- "rawpointer",
-]
-
-[[package]]
-name = "nom"
-version = "1.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5b8c256fd9471521bcb84c3cdba98921497f1a331cbc15b8030fc63b82050ce"
-
-[[package]]
-name = "nom"
-version = "3.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05aec50c70fd288702bcd93284a8444607f3292dbdf2a30de5ea5dcdbe72287b"
-dependencies = [
- "memchr 1.0.2",
-]
-
-[[package]]
-name = "npy"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "428bbb4dd63ca4318d430b61e64e9ba7809d11525605905eaa88c73b01d91ed4"
-dependencies = [
- "byteorder",
- "nom 3.2.1",
-]
-
-[[package]]
-name = "num-complex"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d"
-dependencies = [
- "num-traits",
-]
-
-[[package]]
-name = "num-integer"
-version = "0.1.45"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
-dependencies = [
- "autocfg",
- "num-traits",
-]
-
-[[package]]
-name = "num-rational"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
-dependencies = [
- "autocfg",
- "num-integer",
- "num-traits",
-]
-
-[[package]]
-name = "num-traits"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
-dependencies = [
- "autocfg",
- "libm",
-]
-
-[[package]]
-name = "num_cpus"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
-dependencies = [
- "hermit-abi 0.3.2",
- "libc",
-]
-
-[[package]]
-name = "number_prefix"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
-
-[[package]]
-name = "nvml-wrapper"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454"
-dependencies = [
- "bitflags 1.3.2",
- "libloading",
- "nvml-wrapper-sys",
- "static_assertions",
- "thiserror",
- "wrapcenum-derive",
-]
-
-[[package]]
-name = "nvml-wrapper-sys"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c961a2ea9e91c59a69b78e69090f6f5b867bb46c0c56de9482da232437c4987e"
-dependencies = [
- "libloading",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.18.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
-
-[[package]]
-name = "option-ext"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
-
-[[package]]
-name = "ordered-float"
-version = "2.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87"
-dependencies = [
- "num-traits",
-]
-
-[[package]]
-name = "parking_lot"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
-dependencies = [
- "lock_api",
- "parking_lot_core",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
-dependencies = [
- "cfg-if",
- "libc",
- "redox_syscall 0.3.5",
- "smallvec",
- "windows-targets 0.48.1",
-]
-
-[[package]]
-name = "password-hash"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700"
-dependencies = [
- "base64ct",
- "rand_core",
- "subtle",
-]
-
-[[package]]
-name = "paste"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
-
-[[package]]
-name = "pbkdf2"
-version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917"
-dependencies = [
- "digest",
- "hmac",
- "password-hash",
- "sha2",
-]
-
-[[package]]
-name = "percent-encoding"
-version = "2.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
-
-[[package]]
-name = "pin-project"
-version = "1.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
-dependencies = [
- "pin-project-internal",
-]
-
-[[package]]
-name = "pin-project-internal"
-version = "1.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "pkg-config"
-version = "0.3.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
-
-[[package]]
-name = "png"
-version = "0.17.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59871cc5b6cce7eaccca5a802b4173377a1c2ba90654246789a8fa2334426d11"
-dependencies = [
- "bitflags 1.3.2",
- "crc32fast",
- "fdeflate",
- "flate2",
- "miniz_oxide",
-]
-
-[[package]]
-name = "portable-atomic"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edc55135a600d700580e406b4de0d59cb9ad25e344a3a091a97ded2622ec4ec6"
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
-
-[[package]]
-name = "proc-macro-error"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
-dependencies = [
- "proc-macro-error-attr",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
- "version_check",
-]
-
-[[package]]
-name = "proc-macro-error-attr"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
-dependencies = [
- "proc-macro2",
- "quote",
- "version_check",
-]
-
-[[package]]
-name = "proc-macro-hack"
-version = "0.5.20+deprecated"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.66"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "protobuf"
-version = "2.28.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
-
-[[package]]
-name = "qoi"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
-dependencies = [
- "bytemuck",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "r2d2"
-version = "0.8.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
-dependencies = [
- "log",
- "parking_lot",
- "scheduled-thread-pool",
-]
-
-[[package]]
-name = "r2d2_sqlite"
-version = "0.22.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99f31323d6161385f385046738df520e0e8694fa74852d35891fc0be08348ddc"
-dependencies = [
- "r2d2",
- "rusqlite",
- "uuid",
-]
-
-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha",
- "rand_core",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "rand_distr"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
-dependencies = [
- "num-traits",
- "rand",
-]
-
-[[package]]
-name = "rawpointer"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
-
-[[package]]
-name = "rayon"
-version = "1.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
-dependencies = [
- "either",
- "rayon-core",
-]
-
-[[package]]
-name = "rayon-core"
-version = "1.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
-dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-utils",
- "num_cpus",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
-dependencies = [
- "bitflags 1.3.2",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
-dependencies = [
- "bitflags 1.3.2",
-]
-
-[[package]]
-name = "redox_users"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
-dependencies = [
- "getrandom",
- "redox_syscall 0.2.16",
- "thiserror",
-]
-
-[[package]]
-name = "regex"
-version = "1.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
-dependencies = [
- "aho-corasick",
- "memchr 2.5.0",
- "regex-automata",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
-dependencies = [
- "aho-corasick",
- "memchr 2.5.0",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-syntax"
-version = "0.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
-
-[[package]]
-name = "rgb"
-version = "0.8.36"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20ec2d3e3fc7a92ced357df9cebd5a10b6fb2aa1ee797bf7e9ce2f17dffc8f59"
-dependencies = [
- "bytemuck",
-]
-
-[[package]]
-name = "ring"
-version = "0.16.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
-dependencies = [
- "cc",
- "libc",
- "once_cell",
- "spin 0.5.2",
- "untrusted",
- "web-sys",
- "winapi",
-]
-
-[[package]]
-name = "rmp"
-version = "0.8.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f9860a6cc38ed1da53456442089b4dfa35e7cedaa326df63017af88385e6b20"
-dependencies = [
- "byteorder",
- "num-traits",
- "paste",
-]
-
-[[package]]
-name = "rmp-serde"
-version = "1.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a"
-dependencies = [
- "byteorder",
- "rmp",
- "serde",
-]
-
-[[package]]
-name = "rusqlite"
-version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2"
-dependencies = [
- "bitflags 2.3.3",
- "chrono",
- "csv",
- "fallible-iterator",
- "fallible-streaming-iterator",
- "hashlink",
- "libsqlite3-sys",
- "serde_json",
- "smallvec",
- "time 0.3.23",
- "url",
- "uuid",
-]
-
-[[package]]
-name = "rust_tokenizers"
-version = "8.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f367f6b13bc686e822237b97caeb4b2e366dd1936ec204f11d266ede402c31b"
-dependencies = [
- "csv",
- "hashbrown 0.13.2",
- "itertools",
- "lazy_static",
- "protobuf",
- "rayon",
- "regex",
- "serde",
- "serde_json",
- "thiserror",
- "unicode-normalization",
- "unicode-normalization-alignments",
-]
-
-[[package]]
-name = "rustix"
-version = "0.37.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06"
-dependencies = [
- "bitflags 1.3.2",
- "errno",
- "io-lifetimes",
- "libc",
- "linux-raw-sys 0.3.8",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "rustix"
-version = "0.38.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
-dependencies = [
- "bitflags 2.3.3",
- "errno",
- "libc",
- "linux-raw-sys 0.4.3",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "rustls"
-version = "0.21.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79ea77c539259495ce8ca47f53e66ae0330a8819f67e23ac96ca02f50e7b7d36"
-dependencies = [
- "log",
- "ring",
- "rustls-webpki 0.101.2",
- "sct",
-]
-
-[[package]]
-name = "rustls-webpki"
-version = "0.100.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b"
-dependencies = [
- "ring",
- "untrusted",
-]
-
-[[package]]
-name = "rustls-webpki"
-version = "0.101.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "513722fd73ad80a71f72b61009ea1b584bcfa1483ca93949c8f290298837fa59"
-dependencies = [
- "ring",
- "untrusted",
-]
-
-[[package]]
-name = "rustversion"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
-
-[[package]]
-name = "ryu"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
-
-[[package]]
-name = "safetensors"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1d818a2cb3f564a1844be835011acf5c7ec8ad1986a47f73abc7b5fea91cc3a"
-dependencies = [
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "sanitize-filename"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603"
-dependencies = [
- "lazy_static",
- "regex",
-]
-
-[[package]]
-name = "scheduled-thread-pool"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
-dependencies = [
- "parking_lot",
-]
-
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
-
-[[package]]
-name = "sct"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
-dependencies = [
- "ring",
- "untrusted",
-]
-
-[[package]]
-name = "serde"
-version = "1.0.175"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde-value"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
-dependencies = [
- "ordered-float",
- "serde",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.175"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.103"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "serde_rusqlite"
-version = "0.33.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d47abf7a442693a91719a07972deed78a2a4a8cb418df8d454eee656c77cb8af"
-dependencies = [
- "rusqlite",
- "serde",
-]
-
-[[package]]
-name = "serde_yaml"
-version = "0.8.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b"
-dependencies = [
- "indexmap",
- "ryu",
- "serde",
- "yaml-rust",
-]
-
-[[package]]
-name = "sha1"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
-dependencies = [
- "cfg-if",
- "cpufeatures",
- "digest",
-]
-
-[[package]]
-name = "sha2"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
-dependencies = [
- "cfg-if",
- "cpufeatures",
- "digest",
-]
-
-[[package]]
-name = "signal-hook"
-version = "0.3.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
-dependencies = [
- "libc",
- "signal-hook-registry",
-]
-
-[[package]]
-name = "signal-hook-registry"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
-dependencies = [
- "libc",
-]
-
-[[package]]
-name = "simd-adler32"
-version = "0.3.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
-
-[[package]]
-name = "smallvec"
-version = "1.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
-
-[[package]]
-name = "spin"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
-
-[[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]
-
-[[package]]
-name = "static_assertions"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
-
-[[package]]
-name = "strsim"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
-
-[[package]]
-name = "strsim"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
-
-[[package]]
-name = "strsim"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
-
-[[package]]
-name = "structopt"
-version = "0.3.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
-dependencies = [
- "clap 2.34.0",
- "lazy_static",
- "structopt-derive",
-]
-
-[[package]]
-name = "structopt-derive"
-version = "0.4.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
-dependencies = [
- "heck 0.3.3",
- "proc-macro-error",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "strum"
-version = "0.24.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
-
-[[package]]
-name = "strum_macros"
-version = "0.24.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
-dependencies = [
- "heck 0.4.1",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "subtle"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
-
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "tch"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cbd9ce6fb581a1b918db880b649d1364b50f7f6717eda8497bcdc929cddd4b9"
-dependencies = [
- "half",
- "lazy_static",
- "libc",
- "ndarray",
- "rand",
- "safetensors",
- "thiserror",
- "torch-sys",
- "zip",
-]
-
-[[package]]
-name = "tempfile"
-version = "3.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998"
-dependencies = [
- "cfg-if",
- "fastrand",
- "redox_syscall 0.3.5",
- "rustix 0.38.4",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "termcolor"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
-name = "terminal_size"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237"
-dependencies = [
- "rustix 0.37.23",
- "windows-sys 0.48.0",
-]
-
-[[package]]
-name = "textplots"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c771910e9d7e37d50f6d39f5ce4c4351fd797dcaed9a348e911877401af1b799"
-dependencies = [
- "drawille",
- "meval",
- "rgb",
- "structopt",
-]
-
-[[package]]
-name = "textwrap"
-version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
-dependencies = [
- "unicode-width",
-]
-
-[[package]]
-name = "thiserror"
-version = "1.0.44"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
-dependencies = [
- "thiserror-impl",
-]
-
-[[package]]
-name = "thiserror-impl"
-version = "1.0.44"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
-]
-
-[[package]]
-name = "thread-id"
-version = "4.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ee93aa2b8331c0fec9091548843f2c90019571814057da3b783f9de09349d73"
-dependencies = [
- "libc",
- "redox_syscall 0.2.16",
- "winapi",
-]
-
-[[package]]
-name = "tiff"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7449334f9ff2baf290d55d73983a7d6fa15e01198faef72af07e2a8db851e471"
-dependencies = [
- "flate2",
- "jpeg-decoder",
- "weezl",
-]
-
-[[package]]
-name = "time"
-version = "0.1.45"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
-dependencies = [
- "libc",
- "wasi 0.10.0+wasi-snapshot-preview1",
- "winapi",
-]
-
-[[package]]
-name = "time"
-version = "0.3.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
-dependencies = [
- "itoa",
- "serde",
- "time-core",
- "time-macros",
-]
-
-[[package]]
-name = "time-core"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
-
-[[package]]
-name = "time-macros"
-version = "0.2.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96ba15a897f3c86766b757e5ac7221554c6750054d74d5b28844fce5fb36a6c4"
-dependencies = [
- "time-core",
-]
-
-[[package]]
-name = "tiny-keccak"
-version = "2.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
-dependencies = [
- "crunchy",
-]
-
-[[package]]
-name = "tinyvec"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
-dependencies = [
- "tinyvec_macros",
-]
-
-[[package]]
-name = "tinyvec_macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
-
-[[package]]
-name = "torch-sys"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42b2b81a479510717464df1d07c02cb4aebb26539a39b5db6637dda114a476cb"
-dependencies = [
- "anyhow",
- "cc",
- "libc",
- "serde",
- "serde_json",
- "ureq",
- "zip",
-]
-
-[[package]]
-name = "typemap-ors"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a68c24b707f02dd18f1e4ccceb9d49f2058c2fb86384ef9972592904d7a28867"
-dependencies = [
- "unsafe-any-ors",
-]
-
-[[package]]
-name = "typenum"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
-
-[[package]]
-name = "unicode-bidi"
-version = "0.3.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
-
-[[package]]
-name = "unicode-normalization"
-version = "0.1.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
-dependencies = [
- "tinyvec",
-]
-
-[[package]]
-name = "unicode-normalization-alignments"
-version = "0.1.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
-dependencies = [
- "smallvec",
-]
-
-[[package]]
-name = "unicode-segmentation"
-version = "1.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
-
-[[package]]
-name = "unicode-width"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
-
-[[package]]
-name = "unsafe-any-ors"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0a303d30665362d9680d7d91d78b23f5f899504d4f08b3c4cf08d055d87c0ad"
-dependencies = [
- "destructure_traitobject",
-]
-
-[[package]]
-name = "untrusted"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
-
-[[package]]
-name = "ureq"
-version = "2.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
-dependencies = [
- "base64",
- "flate2",
- "log",
- "once_cell",
- "rustls",
- "rustls-webpki 0.100.1",
- "serde",
- "serde_json",
- "url",
- "webpki-roots",
-]
-
-[[package]]
-name = "url"
-version = "2.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
-dependencies = [
- "form_urlencoded",
- "idna",
- "percent-encoding",
-]
-
-[[package]]
-name = "utf8parse"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
-
-[[package]]
-name = "uuid"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
-dependencies = [
- "getrandom",
- "rand",
-]
-
-[[package]]
-name = "vcpkg"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
-
-[[package]]
-name = "vec_map"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
-
-[[package]]
-name = "version_check"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
-
-[[package]]
-name = "wasi"
-version = "0.10.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
-
-[[package]]
-name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
-
-[[package]]
-name = "wasm-bindgen"
-version = "0.2.87"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
-dependencies = [
- "cfg-if",
- "wasm-bindgen-macro",
-]
-
-[[package]]
-name = "wasm-bindgen-backend"
-version = "0.2.87"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
-dependencies = [
- "bumpalo",
- "log",
- "once_cell",
- "proc-macro2",
- "quote",
- "syn 2.0.27",
- "wasm-bindgen-shared",
-]
-
-[[package]]
-name = "wasm-bindgen-macro"
-version = "0.2.87"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
-dependencies = [
- "quote",
- "wasm-bindgen-macro-support",
-]
-
-[[package]]
-name = "wasm-bindgen-macro-support"
-version = "0.2.87"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.27",
- "wasm-bindgen-backend",
- "wasm-bindgen-shared",
-]
-
-[[package]]
-name = "wasm-bindgen-shared"
-version = "0.2.87"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
-
-[[package]]
-name = "web-sys"
-version = "0.3.64"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
-dependencies = [
- "js-sys",
- "wasm-bindgen",
-]
-
-[[package]]
-name = "webpki-roots"
-version = "0.23.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
-dependencies = [
- "rustls-webpki 0.100.1",
-]
-
-[[package]]
-name = "weezl"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-util"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
-dependencies = [
- "winapi",
-]
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
-dependencies = [
- "windows-targets 0.48.1",
-]
-
-[[package]]
-name = "windows-sys"
-version = "0.45.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
-dependencies = [
- "windows-targets 0.42.2",
-]
-
-[[package]]
-name = "windows-sys"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
-dependencies = [
- "windows-targets 0.48.1",
-]
-
-[[package]]
-name = "windows-targets"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
-dependencies = [
- "windows_aarch64_gnullvm 0.42.2",
- "windows_aarch64_msvc 0.42.2",
- "windows_i686_gnu 0.42.2",
- "windows_i686_msvc 0.42.2",
- "windows_x86_64_gnu 0.42.2",
- "windows_x86_64_gnullvm 0.42.2",
- "windows_x86_64_msvc 0.42.2",
-]
-
-[[package]]
-name = "windows-targets"
-version = "0.48.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
-dependencies = [
- "windows_aarch64_gnullvm 0.48.0",
- "windows_aarch64_msvc 0.48.0",
- "windows_i686_gnu 0.48.0",
- "windows_i686_msvc 0.48.0",
- "windows_x86_64_gnu 0.48.0",
- "windows_x86_64_gnullvm 0.48.0",
- "windows_x86_64_msvc 0.48.0",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
-
-[[package]]
-name = "wrapcenum-derive"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bcc065c85ad2c3bd12aa4118bf164835712e25080c392557801a13292c60aec"
-dependencies = [
- "darling",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "yaml-rust"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
-dependencies = [
- "linked-hash-map",
-]
-
-[[package]]
-name = "zip"
-version = "0.6.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261"
-dependencies = [
- "aes",
- "byteorder",
- "bzip2",
- "constant_time_eq",
- "crc32fast",
- "crossbeam-utils",
- "flate2",
- "hmac",
- "pbkdf2",
- "sha1",
- "time 0.3.23",
- "zstd",
-]
-
-[[package]]
-name = "zstd"
-version = "0.11.2+zstd.1.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
-dependencies = [
- "zstd-safe",
-]
-
-[[package]]
-name = "zstd-safe"
-version = "5.0.2+zstd.1.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
-dependencies = [
- "libc",
- "zstd-sys",
-]
-
-[[package]]
-name = "zstd-sys"
-version = "2.0.8+zstd.1.5.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
-[[package]]
-name = "zune-inflate"
-version = "0.2.54"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
-dependencies = [
- "simd-adler32",
-]
diff --git a/rust_bench/llama2-burn/Cargo.toml b/rust_bench/llama2-burn/Cargo.toml
deleted file mode 100644
index 19793648..00000000
--- a/rust_bench/llama2-burn/Cargo.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[package]
-name = "llama"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-burn = { git = "https://github.com/burn-rs/burn.git" }
-burn-tch = { package = "burn-tch", git = "https://github.com/burn-rs/burn.git" }
-serde = {version = "1.0.171", features = ["std", "derive"]}
-npy = "0.4.0"
-num-traits = "0.2.15"
-rust_tokenizers = "8.1.0"
-clap = { version = "4.2.4", features = ["derive"] }
-env_logger = "0.10.0"
-log = "0.4"
diff --git a/rust_bench/llama2-burn/LICENSE b/rust_bench/llama2-burn/LICENSE
deleted file mode 100644
index ed2a3394..00000000
--- a/rust_bench/llama2-burn/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023 Gadersd
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/rust_bench/llama2-burn/README.md b/rust_bench/llama2-burn/README.md
deleted file mode 100644
index 09448b06..00000000
--- a/rust_bench/llama2-burn/README.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Llama2-burn Project
-
-## Overview
-
-This project provides a port of Meta's large language model, Llama2, to the Burn, a Rust deep learning framework. Specifically, the project hosts Python and Rust scripts necessary for the conversion, loading, and verification of weights from the Llama2 model into parameters compatible with Burn's framework.
-
-## Pre-requisites
-
-You will need to obtain the Llama2 model files to use this project. These can be downloaded directly from Meta or via Hugging Face's Model Hub.
-
-## Project Structure
-
-The directory structure of the project is as follows:
-
-```
-.
-├── Cargo.lock
-├── Cargo.toml
-├── LICENSE
-├── README.md
-├── llama-py
-│   ├── dump.py
-│   ├── dump_model.py
-│   ├── dump_test.py
-│   ├── model.py
-│   ├── requirements.txt
-│   ├── test.py
-│   ├── test_tokenizer.py
-│   └── tokenizer.py
-└── src
-    ├── bin
-    │   ├── convert
-    │   │   └── main.rs
-    │   ├── sample
-    │   │   └── main.rs
-    │   └── test
-    │       └── main.rs
-    ├── helper.rs
-    ├── lib.rs
-    ├── model.rs
-    └── token.rs
-```
-
-## Usage
-
-Follow the steps below to go from the downloaded Llama2 model files to dumping, conversion, and running in Rust:
-
-### Step 1: Loading and Testing with Python Scripts
-
-Inside the `llama-py` folder, you will find the necessary Python scripts. Here, you will primarily use `test.py`, `dump_model.py`, and `test_tokenizer.py`.
-
-1. **Test the Model**: Run the `test.py` script to load the model and verify it with a short prompt. If the output is gibberish, then there might be an issue with the model. Execute this script using the command:
-```
-python3 test.py <model_dir> <tokenizer_path>
-```
-Example: `python3 test.py llama2-7b-chat tokenizer.model`
-
-2. **Dump the Model Weights**: Run the `dump_model.py` script to load the model and dump the weights into the `params` folder ready for loading in Rust.  Execute this script using the command:
-```
-python3 dump_model.py <model_dir> <tokenizer_path>
-```
-Example: `python3 dump_model.py llama2-7b-chat tokenizer.model`
-
-3. **Test the Tokenizer**: Finally, run the `test_tokenizer.py` script to load the tokenizer.model file and verify an example encoding and decoding. This script should be run in the same directory as the tokenizer file. Execute this script using the command:
-```
-python3 test_tokenizer.py
-```
-
-### Step 2: Conversion and Running with Rust Binaries
-
-Inside the 'src/bin' folder, you will find Rust binaries: `convert`, `sample`, and `test`.
-
-1. **Converting Dumped Weights**: The `convert` binary converts dumped weights into burn's model format. It saves them for further use. Execute this using the following command:
-```
-cargo run --bin convert <dump_path> <burn_model_name>
-```
-Example: `cargo run --release --bin convert params llama2-7b-chat`
-
-2. **Testing Weights And Rust Inference Code**: The `test` binary loads the dumped weights and tests an example prompt to examine if the model weights and rust inference code produce sensible output. It is a companion to `test.py`. Execute this using the command:
-```
-cargo run --bin test <tokenizer_filepath> <dump_path>
-```
-Example: `cargo run --release --bin test tokenizer.model params`
-
-3. **Sampling Text**: The `sample` binary loads the converted burn model file and generates a sample output based on an input prompt. The model can run on either the cpu or gpu. Execute this using the following command:
-```
-cargo run --bin sample <model_name> <tokenizer_filepath> <prompt> <n_tokens>
-```
-Example:
-```
-#export TORCH_CUDA_VERSION=cu113 # if running on gpu
-cargo run --release --bin sample llama2-7b-chat tokenizer.model "Hello, I am " 10 cpu
-```
-
-## Note
-
-Loading and converting weights occur on your CPU. Please ensure your CPU has enough RAM to hold the model and some extra resources for smooth functioning. Be patient as the process may take a while.
-
-## Contribution
-
-Contributions to improve this project are most welcome. Feel free to clone the repository, test the conversions, and submit a pull request for any enhancements, bug fixes, or other improvements.
-
-## License
-
-This project is licensed as specified in the LICENSE file.
-
----
-
-Thanks for testing out the Llama2-Burn project! Happy coding!
diff --git a/rust_bench/llama2-burn/llama-py/dump.py b/rust_bench/llama2-burn/llama-py/dump.py
deleted file mode 100644
index f758084e..00000000
--- a/rust_bench/llama2-burn/llama-py/dump.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import pathlib
-
-import numpy as np
-import torch
-
-
-def save_scalar(s, name, path):
-    s = np.array([1.0, float(s)]).astype(np.float32)
-    np.save(pathlib.Path(path, f"{name}.npy"), s)
-
-
-def save_tensor(tensor, name, path):
-    tensor_numpy = tensor.numpy()
-    tensor_dims = np.array(tensor_numpy.shape)
-    tensor_values = tensor_numpy.flatten()
-    tensor_to_save = np.concatenate((tensor_dims, tensor_values)).astype(np.float32)
-    np.save(pathlib.Path(path, f"{name}.npy"), tensor_to_save)
-
-
-def save_linear(linear, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(
-        linear.weight.t(), "weight", path
-    )  # PyTorch and Tinygrad strangely transpose linear weights so reverse that
-    if linear.bias is not None:
-        save_tensor(linear.bias, "bias", path)
-
-
-def save_rmsnorm(norm, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(norm.weight, "weight", path)
-    save_scalar(norm.eps, "eps", path)
-
-
-def save_attention(attention, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_linear(attention.wq, pathlib.Path(path, "wq"))
-    save_linear(attention.wk, pathlib.Path(path, "wk"))
-    save_linear(attention.wv, pathlib.Path(path, "wv"))
-    save_linear(attention.wo, pathlib.Path(path, "wo"))
-    n_kv_head = attention.n_kv_heads
-    n_head = n_kv_head * attention.n_rep
-    save_scalar(n_head, "n_head", path)
-    save_scalar(n_kv_head, "n_kv_head", path)
-
-
-def save_feedforward(feed_forward, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_linear(feed_forward.w1, pathlib.Path(path, "w1"))
-    save_linear(feed_forward.w2, pathlib.Path(path, "w2"))
-    save_linear(feed_forward.w3, pathlib.Path(path, "w3"))
-
-
-def save_embedding(embedding, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(embedding.weight, "weight", path)
-
-
-def save_transformer_block(transformer_block, path):
-    pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_attention(transformer_block.attention, pathlib.Path(path, "attention"))
-    save_feedforward(transformer_block.feed_forward, pathlib.Path(path, "feedforward"))
-    save_rmsnorm(transformer_block.attention_norm, pathlib.Path(path, "attention_norm"))
-    save_rmsnorm(transformer_block.ffn_norm, pathlib.Path(path, "ffn_norm"))
-
-
-def save_transformer(transformer, path):
-    with torch.no_grad():
-        pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-        save_scalar(len(transformer.layers), "n_layer", path)
-        for idx, layer in enumerate(transformer.layers):
-            save_transformer_block(layer, pathlib.Path(path, f"layer{idx}"))
-        save_rmsnorm(transformer.norm, pathlib.Path(path, "norm"))
-        save_embedding(transformer.tok_embeddings, pathlib.Path(path, "tok_embeddings"))
-        save_linear(transformer.output, pathlib.Path(path, "output"))
-        save_scalar(10000.0, "theta", path)
-        save_scalar(transformer.params.max_seq_len, "n_ctx", path)
-        save_scalar(transformer.params.multiple_of, "multiple_of", path)
-        if transformer.params.ffn_dim_multiplier is not None:
-            save_scalar(
-                transformer.params.ffn_dim_multiplier, "ffn_dim_multiplier", path
-            )
-        # save_tensor(transformer.freqs_cis, 'freqs_cis', path)
diff --git a/rust_bench/llama2-burn/llama-py/dump_model.py b/rust_bench/llama2-burn/llama-py/dump_model.py
deleted file mode 100644
index 3cfcad6a..00000000
--- a/rust_bench/llama2-burn/llama-py/dump_model.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import argparse
-import json
-import logging
-import sys
-from pathlib import Path
-
-import dump
-import tokenizer
-import torch
-from model import ModelArgs, Transformer
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-
-def load_model(model_dir):
-    tok = tokenizer.Tokenizer(model_path=str(model_dir / "tokenizer.model"))
-    checkpoints = sorted((model_dir).glob("*.pth"))
-    if len(checkpoints) == 0:
-        raise ValueError(f"No checkpoint files found in {model_dir}")
-
-    weights = [torch.load(filename, map_location="cpu") for filename in checkpoints]
-    with open(model_dir / "params.json") as f:
-        params = json.loads(f.read())
-
-    model_args: ModelArgs = ModelArgs(
-        max_batch_size=1,
-        **params,
-    )
-    model_args.vocab_size = tok.n_words
-    model = Transformer(model_args)
-    model.load_state_dict(concat_weights(weights), strict=False)
-    model.max_seq_len = model.tok_embeddings.weight.shape[0]
-    logger.info("Loaded model")
-
-    return model
-
-
-def concat_weights(models):
-    def convert(name) -> torch.Tensor:
-        disk_tensors = [model[name] for model in models]
-        if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
-            return disk_tensors[0]
-        axis = (
-            1
-            if name.startswith("tok_embeddings.")
-            or name.endswith(".attention.wo.weight")
-            or name.endswith(".feed_forward.w2.weight")
-            else 0
-        )
-        return disk_tensors[0].cat(*disk_tensors[1:], dim=axis)
-
-    return {
-        name: convert(name)
-        for name in {name: None for model in models for name in model}
-    }
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Load and dump transformer model.")
-    parser.add_argument(
-        "--model-dir",
-        type=Path,
-        required=True,
-        help="Path to the directory containing the model checkpoints",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        required=True,
-        help="Path to the directory where to dump the model.",
-    )
-
-    args = parser.parse_args()
-
-    model_dir = args.model_dir
-    output_dir = args.output_dir
-
-    # Check if the model-dir/params directory already exists
-    params_dir = output_dir / "params"
-    if params_dir.is_dir():
-        logger.info(
-            f"The {params_dir} directory already exists. Model dump will not be performed."
-        )
-        sys.exit(0)
-
-    # Check that the model dir contains the required files
-    if (
-        not (model_dir / "params.json").is_file()
-        or not (model_dir / "tokenizer.model").is_file()
-        or not any(model_dir.glob("*.pth"))
-    ):
-        logger.error(
-            "The model directory must contain params.json, tokenizer.model, and at least one .pth file"
-        )
-        sys.exit(1)
-
-    try:
-        logger.info(f"Loading model from {model_dir}")
-        llama = load_model(model_dir)
-
-        logger.info("Dumping model...")
-        dump.save_transformer(llama, params_dir)
-        logger.info(f"Dump saved in {params_dir} folder.")
-    except Exception as e:
-        logger.error(f"An error occurred: {e}")
diff --git a/rust_bench/llama2-burn/llama-py/dump_test.py b/rust_bench/llama2-burn/llama-py/dump_test.py
deleted file mode 100644
index d8a6f73f..00000000
--- a/rust_bench/llama2-burn/llama-py/dump_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import dump
-import torch
-from model import ModelArgs, Transformer
-
-if __name__ == "__main__":
-    n_vocab = 10
-    n_ctx = 15
-    n_state = 8
-    multiple_of = 3
-    n_head = 4
-    n_kv_head = 2
-    n_layer = 3
-    norm_eps = 1e-6
-    max_batch_size = 1
-
-    model_args = ModelArgs(
-        dim=n_state,
-        n_layers=n_layer,
-        n_heads=n_head,
-        n_kv_heads=n_kv_head,
-        vocab_size=n_vocab,
-        multiple_of=multiple_of,
-        norm_eps=norm_eps,
-        max_batch_size=max_batch_size,
-    )
-
-    llama = Transformer(model_args)
-
-    with torch.no_grad():
-        tokens = torch.tensor([0, 2, 1], dtype=torch.int32).unsqueeze(0)
-        output = llama(tokens, 0)
-        print(f"Test input {tokens.numpy()}")
-        print(f"Test output {output.numpy()}")
-
-        print("Dumping test model...")
-        dump.save_transformer(llama, "params")
-        print("Dump saved in params folder.")
diff --git a/rust_bench/llama2-burn/llama-py/model.py b/rust_bench/llama2-burn/llama-py/model.py
deleted file mode 100644
index 562ef0b9..00000000
--- a/rust_bench/llama2-burn/llama-py/model.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# This file is adapted from the LLama project:
-# https://github.com/facebookresearch/llama/blob/main/llama/model.py
-
-# Original LLama code by Facebook AI Research
-# Adapted by Gadersd
-
-import math
-from dataclasses import dataclass
-
-import torch
-import torch.nn.functional as F
-from torch import nn
-from torch.nn import Embedding, Linear
-
-
-@dataclass
-class ModelArgs:
-    dim: int = 4096
-    n_layers: int = 32
-    n_heads: int = 32
-    n_kv_heads: int | None = None
-    vocab_size: int = -1  # defined later by tokenizer
-    multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
-    ffn_dim_multiplier: float | None = None
-    norm_eps: float = 1e-5
-
-    max_batch_size: int = 32
-    max_seq_len: int = 2048
-
-
-class RMSNorm(torch.nn.Module):
-    def __init__(self, dim: int, eps: float = 1e-6):
-        super().__init__()
-        self.eps = eps
-        self.weight = nn.Parameter(torch.ones(dim))
-
-    def _norm(self, x):
-        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
-
-    def forward(self, x):
-        output = self._norm(x.float()).type_as(x)
-        return output * self.weight
-
-
-def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
-    freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
-    t = torch.arange(end, device=freqs.device)  # type: ignore
-    freqs = torch.outer(t, freqs).float()  # type: ignore
-    freqs_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64
-    return freqs_cis
-
-
-def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
-    ndim = x.ndim
-    assert 0 <= 1 < ndim
-    assert freqs_cis.shape == (x.shape[1], x.shape[-1])
-    shape = [d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)]
-    return freqs_cis.view(*shape)
-
-
-def apply_rotary_emb(
-    xq: torch.Tensor,
-    xk: torch.Tensor,
-    freqs_cis: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
-    xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
-    freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
-    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
-    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
-    return xq_out.type_as(xq), xk_out.type_as(xk)
-
-
-def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
-    """torch.repeat_interleave(x, dim=2, repeats=n_rep)"""
-    bs, slen, n_kv_heads, head_dim = x.shape
-    if n_rep == 1:
-        return x
-    return (
-        x[:, :, :, None, :]
-        .expand(bs, slen, n_kv_heads, n_rep, head_dim)
-        .reshape(bs, slen, n_kv_heads * n_rep, head_dim)
-    )
-
-
-class Attention(nn.Module):
-    def __init__(self, args: ModelArgs):
-        super().__init__()
-        self.n_kv_heads = args.n_heads if args.n_kv_heads is None else args.n_kv_heads
-        model_parallel_size = 1  # fs_init.get_model_parallel_world_size()
-        self.n_local_heads = args.n_heads // model_parallel_size
-        self.n_local_kv_heads = self.n_kv_heads // model_parallel_size
-        self.n_rep = self.n_local_heads // self.n_local_kv_heads
-        self.head_dim = args.dim // args.n_heads
-
-        self.wq = Linear(
-            args.dim,
-            args.n_heads * self.head_dim,
-            bias=False,
-        )
-        self.wk = Linear(
-            args.dim,
-            self.n_kv_heads * self.head_dim,
-            bias=False,
-        )
-        self.wv = Linear(
-            args.dim,
-            self.n_kv_heads * self.head_dim,
-            bias=False,
-        )
-        self.wo = Linear(
-            args.n_heads * self.head_dim,
-            args.dim,
-            bias=False,
-        )
-
-        self.cache_k = torch.zeros(
-            (
-                args.max_batch_size,
-                args.max_seq_len,
-                self.n_local_kv_heads,
-                self.head_dim,
-            )
-        )
-        self.cache_v = torch.zeros(
-            (
-                args.max_batch_size,
-                args.max_seq_len,
-                self.n_local_kv_heads,
-                self.head_dim,
-            )
-        )
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        start_pos: int,
-        freqs_cis: torch.Tensor,
-        mask: torch.Tensor | None,
-    ):
-        bsz, seqlen, _ = x.shape
-        xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
-
-        xq = xq.view(bsz, seqlen, self.n_local_heads, self.head_dim)
-        xk = xk.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim)
-        xv = xv.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim)
-
-        xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis)
-
-        self.cache_k = self.cache_k.to(xq)
-        self.cache_v = self.cache_v.to(xq)
-
-        self.cache_k[:bsz, start_pos : start_pos + seqlen] = xk  # noqa
-        self.cache_v[:bsz, start_pos : start_pos + seqlen] = xv  # noqa
-
-        keys = self.cache_k[:bsz, : start_pos + seqlen]
-        values = self.cache_v[:bsz, : start_pos + seqlen]
-
-        # repeat k/v heads if n_kv_heads < n_heads
-        keys = repeat_kv(keys, self.n_rep)  # (bs, seqlen, n_local_heads, head_dim)
-        values = repeat_kv(values, self.n_rep)  # (bs, seqlen, n_local_heads, head_dim)
-
-        xq = xq.transpose(1, 2)  # (bs, n_local_heads, seqlen, head_dim)
-        keys = keys.transpose(1, 2)
-        values = values.transpose(1, 2)
-        scores = torch.matmul(xq, keys.transpose(2, 3)) / math.sqrt(self.head_dim)
-        if mask is not None:
-            scores = scores + mask  # (bs, n_local_heads, seqlen, cache_len + seqlen)
-        scores = F.softmax(scores.float(), dim=-1).type_as(xq)
-        output = torch.matmul(scores, values)  # (bs, n_local_heads, seqlen, head_dim)
-        output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1)
-        return self.wo(output)
-
-
-class FeedForward(nn.Module):
-    def __init__(
-        self,
-        dim: int,
-        hidden_dim: int,
-        multiple_of: int,
-        ffn_dim_multiplier: float | None,
-    ):
-        super().__init__()
-        hidden_dim = int(2 * hidden_dim / 3)
-        # custom dim factor multiplier
-        if ffn_dim_multiplier is not None:
-            hidden_dim = int(ffn_dim_multiplier * hidden_dim)
-        hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
-
-        self.w1 = Linear(dim, hidden_dim, bias=False)
-        self.w2 = Linear(hidden_dim, dim, bias=False)
-        self.w3 = Linear(dim, hidden_dim, bias=False)
-
-    def forward(self, x):
-        return self.w2(F.silu(self.w1(x)) * self.w3(x))
-
-
-class TransformerBlock(nn.Module):
-    def __init__(self, layer_id: int, args: ModelArgs):
-        super().__init__()
-        self.n_heads = args.n_heads
-        self.dim = args.dim
-        self.head_dim = args.dim // args.n_heads
-        self.attention = Attention(args)
-        self.feed_forward = FeedForward(
-            dim=args.dim,
-            hidden_dim=4 * args.dim,
-            multiple_of=args.multiple_of,
-            ffn_dim_multiplier=args.ffn_dim_multiplier,
-        )
-        self.layer_id = layer_id
-        self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps)
-        self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        start_pos: int,
-        freqs_cis: torch.Tensor,
-        mask: torch.Tensor | None,
-    ):
-        h = x + self.attention.forward(
-            self.attention_norm(x), start_pos, freqs_cis, mask
-        )
-        out = h + self.feed_forward.forward(self.ffn_norm(h))
-        return out
-
-
-class Transformer(nn.Module):
-    def __init__(self, params: ModelArgs):
-        super().__init__()
-        self.params = params
-        self.vocab_size = params.vocab_size
-        self.n_layers = params.n_layers
-
-        self.tok_embeddings = Embedding(params.vocab_size, params.dim)
-
-        self.layers = torch.nn.ModuleList()
-        for layer_id in range(params.n_layers):
-            self.layers.append(TransformerBlock(layer_id, params))
-
-        self.norm = RMSNorm(params.dim, eps=params.norm_eps)
-        self.output = Linear(params.dim, params.vocab_size, bias=False)
-
-        self.freqs_cis = precompute_freqs_cis(
-            self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
-        )
-
-    @torch.inference_mode()
-    def forward(self, tokens: torch.Tensor, start_pos: int):
-        _bsz, seqlen = tokens.shape
-        h = self.tok_embeddings(tokens)
-        # print(h.numpy())
-        # print(h.shape)
-        self.freqs_cis = self.freqs_cis.to(h.device)
-        freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen]  # noqa
-
-        mask = None
-        if seqlen > 1:
-            mask = torch.full(
-                (1, 1, seqlen, seqlen), float("-inf"), device=tokens.device
-            )
-            mask = torch.triu(mask, diagonal=start_pos + 1).type_as(h)
-
-        for layer in self.layers:
-            h = layer(h, start_pos, freqs_cis, mask)
-        h = self.norm(h)
-        output = self.output(h).float()
-        return output
diff --git a/rust_bench/llama2-burn/llama-py/requirements.txt b/rust_bench/llama2-burn/llama-py/requirements.txt
deleted file mode 100644
index 4ba53e28..00000000
--- a/rust_bench/llama2-burn/llama-py/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-numpy
-torch
-sentencepiece
diff --git a/rust_bench/llama2-burn/llama-py/test.py b/rust_bench/llama2-burn/llama-py/test.py
deleted file mode 100644
index e8a3facc..00000000
--- a/rust_bench/llama2-burn/llama-py/test.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import json
-import sys
-from pathlib import Path
-
-import tokenizer
-import torch
-from model import ModelArgs, Transformer
-
-# from safetensors.torch import load_file
-
-
-def load_model(model_dir, tokenizer_path):
-    tok = tokenizer.Tokenizer(model_path=tokenizer_path)
-    checkpoints = sorted(Path(model_dir).glob("*.pth"))
-    if len(checkpoints) == 0:
-        raise ValueError(f"No checkpoint files found in {model_dir}")
-
-    weights = [torch.load(filename, map_location="cpu") for filename in checkpoints]
-    with open(Path(model_dir) / "params.json") as f:
-        params = json.loads(f.read())
-
-    model_args: ModelArgs = ModelArgs(
-        max_batch_size=1,
-        **params,
-    )
-    model_args.vocab_size = tok.n_words
-    model = Transformer(model_args)
-    model.load_state_dict(concat_weights(weights), strict=False)
-    model.max_seq_len = model.tok_embeddings.weight.shape[0]
-    print("Loaded model")
-
-    return model
-
-
-# The concat_weights function is adapted from the tinygrad library:
-# https://github.com/tinygrad/tinygrad/blob/master/tinygrad/examples/llama.py
-# Original code by TinyGrad authors
-# Adapted by [Your Name]
-def concat_weights(models):
-    def convert(name) -> torch.Tensor:
-        disk_tensors = [model[name] for model in models]
-        if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
-            return disk_tensors[0]
-        axis = (
-            1
-            if name.startswith("tok_embeddings.")
-            or name.endswith(".attention.wo.weight")
-            or name.endswith(".feed_forward.w2.weight")
-            else 0
-        )
-        return disk_tensors[0].cat(*disk_tensors[1:], dim=axis)
-
-    return {
-        name: convert(name)
-        for name in {name: None for model in models for name in model}
-    }
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 3:
-        raise ValueError(
-            "You must provide the model_dir and tok_path as command line parameters"
-        )
-
-    model_dir = sys.argv[1]
-    tokenizer_path = sys.argv[2]
-
-    try:
-        with torch.no_grad():
-            tok = tokenizer.Tokenizer(model_path=tokenizer_path)
-            llama = load_model(model_dir, tokenizer_path)
-
-            """tokens = torch.tensor([0, 2, 1])
-          out = llama(tokens.unsqueeze(0), 0)
-
-          print(out[0, :3, :10].numpy())"""
-
-            tokens = tok.encode("Hello, I am ", True, False)
-            for i in range(0, 10):
-                token_tensor = torch.tensor(tokens)
-                logits = llama(token_tensor.unsqueeze(0), 0)
-                sample = logits[:, -1, :].argmax(dim=-1).item()
-                print(f"Sample is {sample} {tok.decode(sample)}")
-                tokens = tokens + [sample]
-        decoded = tok.decode(tokens)
-        print(f"Sampled output: {decoded}")
-    except Exception as e:
-        print(f"An error occurred: {e}")
diff --git a/rust_bench/llama2-burn/llama-py/test_tokenizer.py b/rust_bench/llama2-burn/llama-py/test_tokenizer.py
deleted file mode 100644
index 1304b823..00000000
--- a/rust_bench/llama2-burn/llama-py/test_tokenizer.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import tokenizer
-
-if __name__ == "__main__":
-    tok = tokenizer.Tokenizer("tokenizer.model")
-
-    test_str = "Hello, I am Llama2!"
-    encoded = tok.encode(test_str, True, True)
-    decoded = tok.decode(encoded)
-
-    print(f"Test string: {test_str}")
-    print(f"Encoded tokens: {encoded}")
-    print(f"Decoded string: {decoded}")
diff --git a/rust_bench/llama2-burn/llama-py/tokenizer.py b/rust_bench/llama2-burn/llama-py/tokenizer.py
deleted file mode 100644
index 40b701a8..00000000
--- a/rust_bench/llama2-burn/llama-py/tokenizer.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# This file is adapted from the LLama project:
-# https://github.com/facebookresearch/llama/blob/main/llama/tokenizer.py
-
-# Original LLama code by Facebook AI Research
-# Adapted by Gadersd
-
-import logging
-
-from sentencepiece import SentencePieceProcessor
-
-logger = logging.getLogger(__name__)
-
-
-class Tokenizer:
-    def __init__(self, model_path: str):
-        self.sp_model = SentencePieceProcessor(model_file=model_path)
-
-        # BOS / EOS token IDs
-        self.n_words: int = self.sp_model.vocab_size()
-        self.bos_id: int = self.sp_model.bos_id()
-        self.eos_id: int = self.sp_model.eos_id()
-        self.pad_id: int = self.sp_model.pad_id()
-
-        logger.info(
-            f"#words: {self.n_words} BOS ID: {self.bos_id} EOS ID: {self.eos_id} PAD ID: {self.pad_id}"
-        )
-
-    def encode(self, s: str, bos: bool, eos: bool) -> list[int]:
-        assert type(s) is str
-        t = self.sp_model.encode(s)
-        if bos:
-            t = [self.bos_id] + t
-        if eos:
-            t = t + [self.eos_id]
-        return t
-
-    def decode(self, t: list[int]) -> str:
-        return self.sp_model.decode(t)
diff --git a/rust_bench/llama2-burn/src/bin/convert/main.rs b/rust_bench/llama2-burn/src/bin/convert/main.rs
deleted file mode 100644
index 4b909768..00000000
--- a/rust_bench/llama2-burn/src/bin/convert/main.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-use llama::model::*;
-
-use std::{error::Error, process};
-
-use burn_tch::{TchBackend, TchDevice};
-use clap::Parser;
-
-use burn::{config::Config, module::Module, tensor::backend::Backend};
-
-use burn::record::{self, BinFileRecorder, HalfPrecisionSettings, Recorder};
-
-fn convert_llama_dump_to_model<B: Backend>(
-    dump_path: &str,
-    model_name: &str,
-    output_dir: &str,
-    device: &B::Device,
-) -> Result<(), Box<dyn Error>> {
-    let (llama, llama_config): (Llama<B>, LlamaConfig) = load_llama_dump(dump_path, device)?;
-
-    save_llama_model_file(llama, &format!("{output_dir}/{model_name}"))?;
-    llama_config.save(&format!("{output_dir}/{model_name}.cfg"))?;
-
-    Ok(())
-}
-
-fn save_llama_model_file<B: Backend>(
-    llama: Llama<B>,
-    name: &str,
-) -> Result<(), record::RecorderError> {
-    BinFileRecorder::<HalfPrecisionSettings>::new().record(llama.into_record(), name.into())
-}
-
-#[derive(Parser, Debug)]
-#[command(author, version, about, long_about = None)]
-struct Args {
-    /// Path to the llama dump.
-    #[clap(required = true, help = "Path to the llama dump")]
-    dump_path: String,
-
-    /// Name of the output model.
-    #[clap(required = true, help = "Name of the output model")]
-    model_name: String,
-
-    /// Output directory for the model.
-    #[clap(required = true, help = "Output directory for the model")]
-    output_dir: String,
-}
-
-fn main() {
-    type Backend = TchBackend<f32>;
-
-    // might crash if lacking enough GPU memory so use CPU for conversion
-    let device = TchDevice::Cpu;
-
-    let args = Args::parse();
-
-    let dump_path = &args.dump_path;
-    let model_name = &args.model_name;
-    let output_dir = &args.output_dir;
-
-    if let Err(e) =
-        convert_llama_dump_to_model::<Backend>(dump_path, model_name, &output_dir, &device)
-    {
-        eprintln!("Failed to convert llama dump to model: {:?}", e);
-        process::exit(1);
-    }
-
-    println!("Successfully converted {} to {}", dump_path, model_name);
-}
diff --git a/rust_bench/llama2-burn/src/bin/sample/main.rs b/rust_bench/llama2-burn/src/bin/sample/main.rs
deleted file mode 100644
index 3727ddc0..00000000
--- a/rust_bench/llama2-burn/src/bin/sample/main.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-use llama::model::*;
-use llama::token::LlamaTokenizer;
-
-use num_traits::cast::ToPrimitive;
-use std::io::prelude::*;
-use std::{env, error::Error, fs::OpenOptions, io, time::Instant};
-
-use burn_tch::{TchBackend, TchDevice};
-
-use burn::{
-    config::Config,
-    module::Module,
-    tensor::{backend::Backend, Data, Tensor},
-};
-use env_logger::Env;
-use log::info;
-
-use burn::record::{self, BinFileRecorder, HalfPrecisionSettings, Recorder};
-
-fn init_logger() {
-    env::set_var("RUST_LOG", "info");
-    env_logger::Builder::from_env(Env::default().default_filter_or("info"))
-        .format_timestamp(Some(env_logger::TimestampPrecision::Millis))
-        .format_module_path(false)
-        .format_level(true)
-        .init();
-}
-
-fn load_llama<B: Backend>(model_name: &str) -> Result<(Llama<B>, LlamaConfig), Box<dyn Error>> {
-    let config = LlamaConfig::load(&format!("{model_name}.cfg"))?;
-    let llama = load_llama_model_file(&config, model_name)?;
-
-    Ok((llama, config))
-}
-
-fn load_llama_model_file<B: Backend>(
-    config: &LlamaConfig,
-    filename: &str,
-) -> Result<Llama<B>, record::RecorderError> {
-    BinFileRecorder::<HalfPrecisionSettings>::new()
-        .load(filename.into())
-        .map(|record| config.init().load_record(record))
-}
-
-fn sample_llama<B: Backend>(
-    llama: &Llama<B>,
-    tokenizer: &LlamaTokenizer,
-    prompt: &str,
-    n_tokens: usize,
-) -> String {
-    let device = llama.devices()[0].clone();
-
-    let mut tokens = tokenizer.encode(prompt, true, false);
-    let mut text = String::new();
-
-    for _ in 0..n_tokens {
-        let token_tensor = Tensor::from_ints(Data::from_usize(Data::new(
-            tokens.iter().map(|&t| t as usize).collect(),
-            [tokens.len()].into(),
-        )))
-        .unsqueeze::<2>()
-        .to_device(&device);
-
-        let out = llama.forward(token_tensor);
-
-        let [_, n_token, _] = out.dims();
-        let last_row: Tensor<B, 1> = out.slice([0..1, (n_token - 1)..n_token]).flatten(0, 2);
-
-        let token_id = last_row.argmax(0).into_scalar().to_i64().unwrap();
-
-        tokens.push(token_id);
-
-        let token_text = tokenizer.decode(&[token_id], true);
-
-        text += &token_text;
-    }
-
-    text
-}
-
-#[cfg(feature = "f16")]
-type Elem = burn::tensor::f16;
-#[cfg(not(feature = "f16"))]
-type Elem = f32;
-
-fn main() {
-    type Backend = TchBackend<Elem>;
-    init_logger();
-
-    let args: Vec<String> = std::env::args().collect();
-    if args.len() != 8 {
-        panic!(
-            "Usage: {} <model_name> <tokenizer_filepath> <prompt> <n_tokens> <device> <repetitions> <log_file>",
-            args[0]
-        );
-    }
-
-    let model_name = &args[1];
-    let tokenizer_filepath = &args[2];
-    let prompt = &args[3];
-    let n_tokens: usize = args[4].parse().unwrap_or_else(|_| {
-        panic!("Error: Invalid number of tokens");
-    });
-
-    // Specify device based on command line argument
-    let device_param = &args[5];
-    let device = if device_param == "cpu" {
-        TchDevice::Cpu
-    } else if device_param == "gpu" {
-        #[cfg(not(target_os = "macos"))]
-        let device = TchDevice::Cuda(0);
-        #[cfg(target_os = "macos")]
-        let device = TchDevice::Mps;
-
-        device
-    } else {
-        panic!("Error: Invalid device parameter (must be 'cpu' or 'gpu')");
-    };
-
-    let repetitions: usize = args[6].parse().unwrap_or_else(|_| {
-        panic!("Error: Invalid number of repetitions");
-    });
-
-    let log_file: String = args[7].parse().unwrap_or_else(|_| {
-        panic!("Error: Invalid log filename.");
-    });
-
-    let tokenizer = match LlamaTokenizer::new(tokenizer_filepath) {
-        Ok(tokenizer) => tokenizer,
-        Err(e) => {
-            panic!("Failed to load tokenizer: {:?}", e);
-        }
-    };
-
-    let (llama, _llama_config): (Llama<Backend>, LlamaConfig) = match load_llama(model_name) {
-        Ok((llama, llama_config)) => (llama, llama_config),
-        Err(e) => {
-            panic!("Failed to load llama model: {:?}", e);
-        }
-    };
-
-    let llama = llama.to_device(&device);
-
-    let mut tokens_per_second_values = Vec::new();
-    info!("Running burn benchmark");
-
-    for r in 0..repetitions {
-        let start_time = Instant::now();
-        let width = repetitions.to_string().len();
-        info!("Running repetition [{:0width$}/{}]", r + 1, repetitions);
-        let _ = sample_llama(&llama, &tokenizer, prompt, n_tokens);
-        let elapsed_time = start_time.elapsed();
-        let elapsed_seconds = elapsed_time.as_secs_f64();
-        let tokens_per_second = n_tokens as f64 / elapsed_seconds;
-        tokens_per_second_values.push(tokens_per_second);
-    }
-
-    let average_tokens_per_second =
-        tokens_per_second_values.iter().sum::<f64>() / repetitions as f64;
-
-    let standard_deviation = if repetitions > 1 {
-        let variance = tokens_per_second_values
-            .iter()
-            .map(|&x| (x - average_tokens_per_second).powi(2))
-            .sum::<f64>()
-            / repetitions as f64;
-        variance.sqrt()
-    } else {
-        0.0
-    };
-
-    let file = OpenOptions::new()
-        .create(true)
-        .append(true)
-        .open(log_file)
-        .unwrap();
-    let mut file_writer = io::BufWriter::new(file);
-    let elem_type = if cfg!(feature = "f16") {
-        "float16"
-    } else {
-        "float32"
-    };
-    info!(
-        "burn, {} : {:.2} ± {:.2}",
-        elem_type, average_tokens_per_second, standard_deviation
-    );
-    writeln!(
-        file_writer,
-        "{}",
-        format!(
-            "burn, {} : {:.2} ± {:.2}",
-            elem_type, average_tokens_per_second, standard_deviation
-        )
-    )
-    .unwrap();
-}
diff --git a/rust_bench/llama2-burn/src/lib.rs b/rust_bench/llama2-burn/src/lib.rs
deleted file mode 100644
index bdba95cb..00000000
--- a/rust_bench/llama2-burn/src/lib.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-pub mod model;
-pub mod token;
diff --git a/rust_bench/llama2-burn/src/model.rs b/rust_bench/llama2-burn/src/model.rs
deleted file mode 100644
index 5cb57fb3..00000000
--- a/rust_bench/llama2-burn/src/model.rs
+++ /dev/null
@@ -1,664 +0,0 @@
-use std::f32::NEG_INFINITY;
-
-use burn::{
-    config::Config,
-    module::{Module, Param},
-    nn,
-    tensor::{
-        activation::{sigmoid, softmax},
-        backend::Backend,
-        Data, Int, Tensor,
-    },
-};
-
-#[derive(Config, Debug)]
-pub struct LlamaConfig {
-    n_vocab: usize,
-    n_ctx: usize,
-    n_state: usize,
-    multiple_of: usize,
-    ffn_dim_multiplier: Option<usize>,
-    n_head: usize,
-    n_kv_head: usize,
-    n_layer: usize,
-    #[config(default = 1e-6)]
-    norm_eps: f64,
-}
-
-impl LlamaConfig {
-    pub fn init<B: Backend>(&self) -> Llama<B> {
-        let token_embedding = nn::EmbeddingConfig::new(self.n_vocab, self.n_state).init();
-        let rotary_encoder =
-            RotaryEncodingConfig::new(self.n_ctx, self.n_state / self.n_head, 10000.0).init();
-        let blocks: Vec<_> = (0..self.n_layer)
-            .into_iter()
-            .map(|_| {
-                ResidualDecoderAttentionBlockConfig::new(
-                    self.n_state,
-                    self.multiple_of,
-                    self.n_head,
-                    self.n_kv_head,
-                    self.norm_eps,
-                )
-                .with_ffn_dim_multiplier(self.ffn_dim_multiplier)
-                .init()
-            })
-            .collect();
-
-        let norm = RMSNormConfig::new(self.n_state, self.norm_eps).init();
-        let output = nn::LinearConfig::new(self.n_state, self.n_vocab)
-            .with_bias(false)
-            .init();
-
-        let mask = attn_decoder_mask(self.n_ctx).into();
-
-        let n_vocab = self.n_vocab;
-        let n_ctx = self.n_ctx;
-
-        Llama {
-            token_embedding,
-            rotary_encoder,
-            blocks,
-            norm,
-            output,
-            mask,
-            n_vocab,
-            n_ctx,
-        }
-    }
-}
-
-#[derive(Module, Debug)]
-pub struct Llama<B: Backend> {
-    token_embedding: nn::Embedding<B>,
-    rotary_encoder: RotaryEncoding<B>,
-    blocks: Vec<ResidualDecoderAttentionBlock<B>>,
-    norm: RMSNorm<B>,
-    output: nn::Linear<B>,
-    mask: Param<Tensor<B, 2>>,
-    n_vocab: usize,
-    n_ctx: usize,
-}
-
-impl<B: Backend> Llama<B> {
-    pub fn forward(&self, x: Tensor<B, 2, Int>) -> Tensor<B, 3> {
-        let [_, seq_len] = x.dims();
-
-        assert!(
-            seq_len <= self.n_ctx,
-            "Token sequence length {} must not exceed {}.",
-            seq_len,
-            self.n_ctx
-        );
-
-        let x = self.token_embedding.forward(x);
-
-        let mut x = x;
-        for block in &self.blocks {
-            x = block.forward(x, &self.rotary_encoder, self.mask.val());
-        }
-
-        self.output.forward(self.norm.forward(x))
-    }
-}
-
-#[derive(Config)]
-pub struct ResidualDecoderAttentionBlockConfig {
-    n_state: usize,
-    multiple_of: usize,
-    ffn_dim_multiplier: Option<usize>,
-    n_head: usize,
-    n_kv_head: usize,
-    norm_eps: f64,
-}
-
-impl ResidualDecoderAttentionBlockConfig {
-    fn init<B: Backend>(&self) -> ResidualDecoderAttentionBlock<B> {
-        let attn =
-            MultiHeadSelfAttentionConfig::new(self.n_state, self.n_head, self.n_kv_head).init();
-        let attn_norm = RMSNormConfig::new(self.n_state, self.norm_eps).init();
-
-        let mlp = MLPConfig::new(self.n_state, 4 * self.n_state, self.multiple_of)
-            .with_ffn_dim_multiplier(self.ffn_dim_multiplier)
-            .init();
-        let mlp_norm = RMSNormConfig::new(self.n_state, self.norm_eps).init();
-
-        ResidualDecoderAttentionBlock {
-            attn,
-            attn_norm,
-            mlp,
-            mlp_norm,
-        }
-    }
-}
-
-#[derive(Module, Debug)]
-pub struct ResidualDecoderAttentionBlock<B: Backend> {
-    attn: MultiHeadSelfAttention<B>,
-    attn_norm: RMSNorm<B>,
-    mlp: MLP<B>,
-    mlp_norm: RMSNorm<B>,
-}
-
-impl<B: Backend> ResidualDecoderAttentionBlock<B> {
-    fn forward(
-        &self,
-        x: Tensor<B, 3>,
-        rotary_encoder: &RotaryEncoding<B>,
-        mask: Tensor<B, 2>,
-    ) -> Tensor<B, 3> {
-        let x = x.clone()
-            + self
-                .attn
-                .forward(self.attn_norm.forward(x), rotary_encoder, Some(mask));
-        let x = x.clone() + self.mlp.forward(self.mlp_norm.forward(x));
-        return x;
-    }
-}
-
-#[derive(Config)]
-pub struct MLPConfig {
-    n_state: usize,
-    n_state_hidden: usize,
-    multiple_of: usize,
-    ffn_dim_multiplier: Option<usize>,
-}
-
-impl MLPConfig {
-    fn init<B: Backend>(&self) -> MLP<B> {
-        let mut hidden_dim = 2 * self.n_state_hidden / 3;
-        if let Some(ffn_dim_multiplier) = self.ffn_dim_multiplier {
-            hidden_dim = ffn_dim_multiplier * hidden_dim;
-        }
-        hidden_dim = self.multiple_of * ((hidden_dim + self.multiple_of - 1) / self.multiple_of);
-
-        let w1 = nn::LinearConfig::new(self.n_state, hidden_dim)
-            .with_bias(false)
-            .init();
-        let w2 = nn::LinearConfig::new(hidden_dim, self.n_state)
-            .with_bias(false)
-            .init();
-        let w3 = nn::LinearConfig::new(self.n_state, hidden_dim)
-            .with_bias(false)
-            .init();
-
-        let silu = SILU::new();
-
-        MLP { w1, w2, w3, silu }
-    }
-}
-
-#[derive(Module, Debug)]
-pub struct MLP<B: Backend> {
-    w1: nn::Linear<B>,
-    w2: nn::Linear<B>,
-    w3: nn::Linear<B>,
-    silu: SILU,
-}
-
-impl<B: Backend> MLP<B> {
-    fn forward(&self, x: Tensor<B, 3>) -> Tensor<B, 3> {
-        self.w2
-            .forward(self.silu.forward(self.w1.forward(x.clone())) * self.w3.forward(x))
-    }
-}
-
-#[derive(Config)]
-pub struct MultiHeadSelfAttentionConfig {
-    n_state: usize,
-    n_head: usize,
-    n_kv_head: usize,
-}
-
-impl MultiHeadSelfAttentionConfig {
-    fn init<B: Backend>(&self) -> MultiHeadSelfAttention<B> {
-        assert!(
-            self.n_state % self.n_head == 0,
-            "State size {} must be a multiple of the number of heads {}",
-            self.n_state,
-            self.n_head
-        );
-        assert!(
-            self.n_head % self.n_kv_head == 0,
-            "The number of query heads {} must be a multiple of the number of k/v heads {}",
-            self.n_head,
-            self.n_kv_head
-        );
-
-        let n_head_dim = self.n_state / self.n_head;
-
-        let n_head = self.n_head;
-        let n_kv_head = self.n_kv_head;
-        let query = nn::LinearConfig::new(self.n_state, self.n_state)
-            .with_bias(false)
-            .init();
-        let key = nn::LinearConfig::new(self.n_state, n_kv_head * n_head_dim)
-            .with_bias(false)
-            .init();
-        let value = nn::LinearConfig::new(self.n_state, n_kv_head * n_head_dim)
-            .with_bias(false)
-            .init();
-        let out = nn::LinearConfig::new(self.n_state, self.n_state)
-            .with_bias(false)
-            .init();
-
-        MultiHeadSelfAttention {
-            n_head,
-            n_kv_head,
-            query,
-            key,
-            value,
-            out,
-        }
-    }
-}
-
-#[derive(Module, Debug)]
-pub struct MultiHeadSelfAttention<B: Backend> {
-    n_head: usize,
-    n_kv_head: usize,
-    query: nn::Linear<B>,
-    key: nn::Linear<B>,
-    value: nn::Linear<B>,
-    out: nn::Linear<B>,
-}
-
-impl<B: Backend> MultiHeadSelfAttention<B> {
-    fn forward(
-        &self,
-        x: Tensor<B, 3>,
-        rotary_encoder: &RotaryEncoding<B>,
-        mask: Option<Tensor<B, 2>>,
-    ) -> Tensor<B, 3> {
-        let q = self.query.forward(x.clone());
-        let k = self.key.forward(x.clone());
-        let v = self.value.forward(x);
-
-        let wv = qkv_attention_rotary(q, k, v, mask, self.n_head, self.n_kv_head, rotary_encoder);
-
-        return self.out.forward(wv);
-    }
-}
-
-fn qkv_attention_rotary<B: Backend>(
-    q: Tensor<B, 3>,
-    k: Tensor<B, 3>,
-    v: Tensor<B, 3>,
-    mask: Option<Tensor<B, 2>>,
-    n_head: usize,
-    n_kv_head: usize,
-    rotary_encoder: &RotaryEncoding<B>,
-) -> Tensor<B, 3> {
-    let [n_batch, n_qctx, n_state] = q.dims();
-    let [_, n_ctx, _] = k.dims();
-
-    let n_hstate = n_state / n_head;
-    let scale = (n_hstate as f64).powf(-0.25); // keeps the value weightings roughly normally distributed
-
-    let q = q.reshape([n_batch, n_qctx, n_head, n_hstate]);
-    // interleave kv heads to match the number of q heads
-    let n_repeat = n_head / n_kv_head;
-    let k = repeat_kv(k.reshape([n_batch, n_ctx, n_kv_head, n_hstate]), n_repeat);
-    let v = repeat_kv(v.reshape([n_batch, n_ctx, n_kv_head, n_hstate]), n_repeat);
-
-    // the last two dims need to be (n_ctx, n_hstate)
-    let q = rotary_encoder.forward(q.swap_dims(1, 2)) * scale;
-    let k = rotary_encoder.forward(k.swap_dims(1, 2)) * scale;
-    let v = v.swap_dims(1, 2);
-
-    // compute value weightings
-    let qk = q.matmul(k.transpose());
-
-    // apply mask
-    let qk = if let Some(mask) = mask {
-        qk + mask.slice([0..n_qctx, 0..n_ctx]).unsqueeze::<4>()
-    } else {
-        qk
-    };
-
-    // normalize value weightings
-    let w = softmax(qk, 3);
-    let o = w.matmul(v).swap_dims(1, 2).flatten(2, 3);
-
-    return o;
-}
-
-/// For a tensor of size (n_batch, n_ctx, n_kv_head, n_hstate), repeats the head keys or values in an interleaving manner so that the number
-/// of heads is effectively multiplied by n_repeat
-fn repeat_kv<B: Backend>(x: Tensor<B, 4>, n_repeat: usize) -> Tensor<B, 4> {
-    if n_repeat > 1 {
-        let [n_batch, n_ctx, n_kv_head, n_hstate] = x.dims();
-        x.repeat(3, n_repeat)
-            .reshape([n_batch, n_ctx, n_kv_head * n_repeat, n_hstate])
-    } else {
-        x
-    }
-}
-
-/// Generates a strictly upper triangular matrix filled with -inf that when added to an attention weight matrix prevents
-/// vectors from attending to other vectors further in the sequence, preventing future information from flowing into the past
-pub fn attn_decoder_mask<B: Backend>(seq_length: usize) -> Tensor<B, 2> {
-    let mut mask = Tensor::<B, 2>::zeros([seq_length, seq_length]);
-
-    for i in 0..(seq_length - 1) {
-        let values = Tensor::<B, 2>::zeros([1, seq_length - (i + 1)]).add_scalar(NEG_INFINITY);
-        mask = mask.slice_assign([i..i + 1, i + 1..seq_length], values);
-    }
-
-    return mask;
-}
-
-#[derive(Config, Debug)]
-pub struct RotaryEncodingConfig {
-    max_sequence_length: usize,
-    state_size: usize,
-    theta: f64,
-}
-
-impl RotaryEncodingConfig {
-    pub fn init<B: Backend>(&self) -> RotaryEncoding<B> {
-        assert!(self.state_size % 2 == 0, "Head dims must be even.");
-        assert!(self.theta > 0.0, "Theta must be positive.");
-
-        let half_state_size = self.state_size / 2;
-
-        let arange_m = Tensor::from_floats([[1.0, 0.0, 0.0, 1.0], [0.0, -1.0, 1.0, 0.0]]).into();
-
-        let inv_freq = powto(
-            self.theta,
-            Tensor::arange(0..half_state_size).float() * (2.0 / self.state_size as f64),
-        )
-        .powf(-1.0);
-
-        let periods = Tensor::arange(0..self.max_sequence_length)
-            .float()
-            .unsqueeze::<2>()
-            .transpose()
-            .repeat(1, half_state_size)
-            * inv_freq.unsqueeze();
-
-        let p_cos = periods.clone().cos();
-        let p_sin = periods.sin();
-        let freq_cis = Tensor::cat(vec![p_cos, p_sin], 1)
-            .reshape([self.max_sequence_length, 2, half_state_size])
-            .transpose()
-            .repeat(2, 2)
-            .reshape([self.max_sequence_length, self.state_size, 2])
-            .into();
-
-        RotaryEncoding { arange_m, freq_cis }
-    }
-}
-
-fn powto<B: Backend, const D: usize>(base: f64, x: Tensor<B, D>) -> Tensor<B, D> {
-    let logbase = base.ln();
-    x.mul_scalar(logbase).exp()
-}
-
-/// Conceptually, pairs the values of a vector (v0 v1 v2 ... vn) into complex numbers (c0 c1 c2 ... cn/2)
-/// which are then rotated counter-clockwise by the angle seq_index / theta^(2*pair_index/n).
-/// This encodes sequence positions in a way that is agnostic to the maximum sequence length
-/// which potentially allows for arbitrarily long sequences without retraining.
-#[derive(Module, Debug)]
-pub struct RotaryEncoding<B: Backend> {
-    arange_m: Param<Tensor<B, 2>>,
-    freq_cis: Param<Tensor<B, 3>>,
-}
-
-impl<B: Backend> RotaryEncoding<B> {
-    /// Applies rotary positional encoding to a tensor of dimenions (..., seq_len, n_state)
-    fn forward<const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
-        assert!(D >= 2);
-        let orig_shape = x.shape();
-        let (n_ctx, n_state) = (orig_shape.dims[D - 2], orig_shape.dims[D - 1]);
-        let dummy_dim_size = orig_shape.num_elements() / (n_ctx * n_state);
-
-        let out = x
-            .reshape([dummy_dim_size, n_ctx, n_state / 2, 2])
-            .matmul(self.arange_m.val().unsqueeze())
-            .reshape([dummy_dim_size, n_ctx, n_state, 2])
-            * self.freq_cis.val().slice([0..n_ctx]).unsqueeze();
-
-        out.sum_dim(D - 1).reshape(orig_shape)
-    }
-}
-
-#[derive(Config)]
-pub struct RMSNormConfig {
-    layer_size: usize,
-    eps: f64,
-}
-
-impl RMSNormConfig {
-    fn init<B: Backend>(&self) -> RMSNorm<B> {
-        assert!(self.eps > 0.0, "eps must be positive.");
-
-        let weight = Tensor::ones([self.layer_size]);
-        let eps = self.eps;
-
-        RMSNorm { weight, eps }
-    }
-}
-
-#[derive(Module, Debug)]
-pub struct RMSNorm<B: Backend> {
-    weight: Tensor<B, 1>,
-    eps: f64,
-}
-
-impl<B: Backend> RMSNorm<B> {
-    fn forward<const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
-        let rms = (x.clone().powf(2.0).mean_dim(D - 1) + self.eps).sqrt();
-        (x / rms) * self.weight.clone().unsqueeze()
-    }
-}
-
-#[derive(Module, Clone, Debug)]
-pub struct SILU {}
-
-impl SILU {
-    fn new() -> Self {
-        Self {}
-    }
-
-    fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
-        x.clone() * sigmoid(x)
-    }
-}
-
-use npy::{self, NpyData};
-use num_traits::cast::ToPrimitive;
-use std::error::Error;
-use std::io::Read;
-
-use burn::tensor::ElementConversion;
-
-fn numpy_to_tensor<B: Backend, const D: usize>(
-    numpy_data: NpyData<f32>,
-    device: &B::Device,
-) -> Tensor<B, D> {
-    let v = numpy_data.to_vec();
-
-    let shape: Vec<_> = v[0..D].into_iter().map(|&v| v as usize).collect();
-    let data: Vec<B::FloatElem> = v[D..].into_iter().map(|e| e.elem()).collect();
-
-    Tensor::from_data_device(Data::new(data, shape.into()), device)
-}
-
-fn load_tensor<B: Backend, const D: usize>(
-    name: &str,
-    path: &str,
-    device: &B::Device,
-) -> Result<Tensor<B, D>, Box<dyn Error>> {
-    let tensor_path = format!("{}/{}.npy", path, name);
-
-    let mut buf = vec![];
-    std::fs::File::open(&tensor_path)?.read_to_end(&mut buf)?;
-
-    let tensor_numpy: NpyData<f32> = NpyData::from_bytes(&buf)?;
-
-    let tensor = numpy_to_tensor(tensor_numpy, device);
-
-    println!("{}", tensor_path);
-
-    Ok(tensor)
-}
-
-fn load_f32<B: Backend>(name: &str, path: &str, device: &B::Device) -> Result<f32, Box<dyn Error>> {
-    load_tensor::<B, 1>(name, path, device).map(|t| t.into_scalar().to_f32().unwrap())
-}
-
-fn load_usize<B: Backend>(
-    name: &str,
-    path: &str,
-    device: &B::Device,
-) -> Result<usize, Box<dyn Error>> {
-    load_tensor::<B, 1>(name, path, device).map(|t| t.into_scalar().to_usize().unwrap())
-}
-
-fn load_linear<B: Backend>(
-    path: &str,
-    device: &B::Device,
-) -> Result<nn::Linear<B>, Box<dyn Error>> {
-    let weight = load_tensor::<B, 2>("weight", path, device)?;
-    let bias = load_tensor::<B, 1>("bias", path, device).ok();
-
-    let record = nn::LinearRecord {
-        weight: weight.into(),
-        bias: bias.map(|t| t.into()),
-    };
-
-    let linear: nn::Linear<B> = nn::LinearConfig::new(3, 3).init_with(record);
-    Ok(linear)
-}
-
-fn load_rmsnorm<B: Backend>(path: &str, device: &B::Device) -> Result<RMSNorm<B>, Box<dyn Error>> {
-    let weight = load_tensor::<B, 1>("weight", path, device)?;
-    let eps = load_f32::<B>("eps", path, device)?.into();
-
-    let rmsnorm = RMSNorm {
-        weight: weight.into(),
-        eps: eps,
-    };
-
-    Ok(rmsnorm)
-}
-
-fn load_attention<B: Backend>(
-    path: &str,
-    device: &B::Device,
-) -> Result<MultiHeadSelfAttention<B>, Box<dyn Error>> {
-    let query = load_linear(&format!("{}/{}", path, "wq"), device)?;
-    let key = load_linear(&format!("{}/{}", path, "wk"), device)?;
-    let value = load_linear(&format!("{}/{}", path, "wv"), device)?;
-    let out = load_linear(&format!("{}/{}", path, "wo"), device)?;
-
-    let n_head = load_usize::<B>("n_head", path, device)?;
-    let n_kv_head = load_usize::<B>("n_kv_head", path, device)?;
-
-    let attention = MultiHeadSelfAttention {
-        n_head,
-        n_kv_head,
-        query,
-        key,
-        value,
-        out,
-    };
-
-    Ok(attention)
-}
-
-fn load_feedforward<B: Backend>(path: &str, device: &B::Device) -> Result<MLP<B>, Box<dyn Error>> {
-    let w1 = load_linear(&format!("{}/{}", path, "w1"), device)?;
-    let w2 = load_linear(&format!("{}/{}", path, "w2"), device)?;
-    let w3 = load_linear(&format!("{}/{}", path, "w3"), device)?;
-
-    let mlp = MLP {
-        w1,
-        w2,
-        w3,
-        silu: SILU::new(),
-    };
-
-    Ok(mlp)
-}
-
-fn load_transformer_block<B: Backend>(
-    path: &str,
-    device: &B::Device,
-) -> Result<ResidualDecoderAttentionBlock<B>, Box<dyn Error>> {
-    let attn = load_attention(&format!("{}/{}", path, "attention"), device)?;
-    let attn_norm = load_rmsnorm(&format!("{}/{}", path, "attention_norm"), device)?;
-    let mlp = load_feedforward(&format!("{}/{}", path, "feedforward"), device)?;
-    let mlp_norm = load_rmsnorm(&format!("{}/{}", path, "ffn_norm"), device)?;
-
-    let block = ResidualDecoderAttentionBlock {
-        attn,
-        attn_norm,
-        mlp,
-        mlp_norm,
-    };
-
-    Ok(block)
-}
-
-use burn::nn::{EmbeddingConfig, EmbeddingRecord};
-
-pub fn load_llama_dump<B: Backend>(
-    path: &str,
-    device: &B::Device,
-) -> Result<(Llama<B>, LlamaConfig), Box<dyn Error>> {
-    let mut blocks: Vec<ResidualDecoderAttentionBlock<B>> = vec![];
-    let n_layer = load_usize::<B>("n_layer", path, device)?;
-    for i in 0..n_layer {
-        let block = load_transformer_block(&format!("{}/layer{}", path, i), device)?;
-        blocks.push(block);
-    }
-
-    let n_ctx = load_usize::<B>("n_ctx", path, device)?;
-    let theta = load_f32::<B>("theta", path, device)?;
-    let multiple_of = load_usize::<B>("multiple_of", path, device)?;
-    let ffn_dim_multiplier = load_usize::<B>("ffn_dim_multiplier", path, device).ok();
-
-    let token_embedding = load_tensor("tok_embeddings/weight", path, device)?;
-    let [n_vocab, n_state] = token_embedding.dims();
-    let n_head = blocks[0].attn.n_head;
-    let n_kv_head = blocks[0].attn.n_kv_head;
-    let head_dim = n_state / n_head;
-
-    let token_embedding = EmbeddingConfig::new(n_vocab, n_state).init_with(EmbeddingRecord {
-        weight: token_embedding.into(),
-    });
-    let rotary_encoding = RotaryEncodingConfig::new(n_ctx, head_dim, theta.into()).init();
-
-    let norm = load_rmsnorm(&format!("{}/{}", path, "norm"), device)?;
-    let output = load_linear(&format!("{}/{}", path, "output"), device)?;
-    let mask = attn_decoder_mask(n_ctx).into();
-
-    let norm_eps = norm.eps;
-
-    let llama = Llama {
-        token_embedding,
-        rotary_encoder: rotary_encoding,
-        blocks,
-        norm,
-        output,
-        mask,
-        n_vocab,
-        n_ctx,
-    };
-
-    let llama_config = LlamaConfig::new(
-        n_vocab,
-        n_ctx,
-        n_state,
-        multiple_of,
-        n_head,
-        n_kv_head,
-        n_layer,
-    )
-    .with_norm_eps(norm_eps)
-    .with_ffn_dim_multiplier(ffn_dim_multiplier);
-
-    Ok((llama, llama_config))
-}
diff --git a/rust_bench/llama2-burn/src/token.rs b/rust_bench/llama2-burn/src/token.rs
deleted file mode 100644
index 507d78d9..00000000
--- a/rust_bench/llama2-burn/src/token.rs
+++ /dev/null
@@ -1,66 +0,0 @@
-use rust_tokenizers::{
-    error::TokenizerError,
-    tokenizer::{SentencePieceBpeTokenizer, Tokenizer, TruncationStrategy},
-    vocab::Vocab,
-};
-use std::result;
-
-const BOS_TOKEN_ID: i64 = 1;
-const EOS_TOKEN_ID: i64 = 2;
-
-pub type Result<T> = result::Result<T, TokenizerError>;
-
-pub struct LlamaTokenizer {
-    spm: SentencePieceBpeTokenizer,
-}
-
-impl LlamaTokenizer {
-    pub fn new(tokenizer_path: &str) -> Result<Self> {
-        let lower_case = false;
-        SentencePieceBpeTokenizer::from_file(tokenizer_path, lower_case).map(|spm| Self { spm })
-    }
-
-    pub fn encode(&self, text: &str, include_bos: bool, include_eos: bool) -> Vec<i64> {
-        let pre = if include_bos {
-            vec![BOS_TOKEN_ID]
-        } else {
-            vec![]
-        };
-
-        let post = if include_eos {
-            vec![EOS_TOKEN_ID]
-        } else {
-            vec![]
-        };
-
-        let token_ids = self
-            .spm
-            .encode(
-                text,
-                None,
-                std::usize::MAX,
-                &TruncationStrategy::LongestFirst,
-                0,
-            )
-            .token_ids;
-
-        [pre, token_ids, post]
-            .into_iter()
-            .flat_map(|v| v.into_iter())
-            .collect()
-    }
-
-    pub fn decode(&self, tokens: &[i64], skip_special_tokens: bool) -> String {
-        let clean_spaces = false;
-        self.spm.decode(tokens, skip_special_tokens, clean_spaces)
-    }
-
-    pub fn vocab_size(&self, include_special_tokens: bool) -> usize {
-        let vocab = self.spm.vocab();
-        if include_special_tokens {
-            vocab.values().len() + vocab.special_values().len()
-        } else {
-            vocab.values().len()
-        }
-    }
-}

From 7ab7b78124a2bdb7f6d5ba3561ec50789450e629 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 16:05:42 +0000
Subject: [PATCH 20/25] added candle

---
 .pre-commit-config.yaml                       |   1 -
 README.md.template                            |  14 +-
 bench.py                                      | 113 ------------
 bench_burn/setup.sh                           |   1 +
 bench_candle/bench.sh                         | 166 ++++++++++++++++++
 .../convert_to_safetensors.py                 |   0
 .../llama2-candle/Cargo.lock                  |   0
 .../llama2-candle/Cargo.toml                  |   0
 .../llama2-candle/src/main.rs                 |   0
 bench_candle/requirements.txt                 |   3 +
 bench_candle/setup.sh                         |  40 +++++
 bench_ctranslate/setup.sh                     |   2 +
 bench_llamacpp/setup.sh                       |  12 +-
 bench_tinygrad/setup.sh                       |   1 +
 setup.sh                                      |  83 ---------
 15 files changed, 227 insertions(+), 209 deletions(-)
 delete mode 100644 bench.py
 create mode 100755 bench_candle/bench.sh
 rename convert_to_safetensors.py => bench_candle/convert_to_safetensors.py (100%)
 rename {rust_bench => bench_candle}/llama2-candle/Cargo.lock (100%)
 rename {rust_bench => bench_candle}/llama2-candle/Cargo.toml (100%)
 rename {rust_bench => bench_candle}/llama2-candle/src/main.rs (100%)
 create mode 100644 bench_candle/requirements.txt
 create mode 100755 bench_candle/setup.sh
 delete mode 100755 setup.sh

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b9ae8d39..5a8b760e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,7 +35,6 @@ repos:
     rev: v0.9.0.6
     hooks:
     -   id: shellcheck
-        exclude: setup.sh
 
 ci:
   autoupdate_schedule: weekly
diff --git a/README.md.template b/README.md.template
index 23c3a2c1..124caccc 100644
--- a/README.md.template
+++ b/README.md.template
@@ -74,13 +74,13 @@ CUDA Version: 11.7
 
 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --nvidia --prompt 'Explain what is a transformer'`
 
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        | 13.28 ± 0.79 |      -       |      -       |      -       |
-| candle      |      -       | 26.30 ± 0.29 |      -       |      -       |
-| llama.cpp   |      -       |      -       | 67.64 ± 22.57| 106.21 ± 2.21|
-| ctranslate  |      -       | 58.54 ± 13.24| 34.22 ± 6.29 |      -       |
-| tinygrad    |      -       | 20.13 ± 1.35 |      -       |      -       |
+| Engine      | float32      | float16       | int8          | int4          |
+|-------------|--------------|---------------|---------------|---------------|
+| burn        | 13.12 ± 0.85 |      -        |      -        |      -        |
+| candle      |      -       | 36.78 ± 2.17  |      -        |      -        |
+| llama.cpp   |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
+| ctranslate  |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
+| tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
 
 *(data updated: <LAST_UPDATE>)
 
diff --git a/bench.py b/bench.py
deleted file mode 100644
index 59d92e61..00000000
--- a/bench.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import argparse
-import logging
-import sys
-from collections import defaultdict
-
-import numpy as np
-from python_bench.ctranslate import CTranslateBenchmark, get_compute_types
-from python_bench.llama_cpp import LlamaCPPBenchmark
-from python_bench.tinygrad import TinyGradBenchmark
-
-logging.basicConfig(
-    stream=sys.stdout,
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s",
-)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Benchmark Llama model.")
-    parser.add_argument(
-        "--prompt",
-        type=str,
-        help="The prompt for the model.",
-        default="Explain what is a transformer",
-    )
-    parser.add_argument(
-        "--max_tokens", type=int, default=100, help="The maximum number of tokens."
-    )
-    parser.add_argument(
-        "--repetitions",
-        type=int,
-        default=10,
-        help="The number of repetitions for the benchmark.",
-    )
-    parser.add_argument(
-        "--gpu",
-        action="store_true",
-        default=False,
-        help="Flag to indicate whether to use GPU for the benchmark.",
-    )
-    parser.add_argument(
-        "--nvidia",
-        action="store_true",
-        default=False,
-        help="Flag to indicate whether the system uses NVIDIA",
-    )
-    parser.add_argument(
-        "--log_file",
-        type=str,
-        help="Path to the log file for writing logs (in append mode).",
-        default="benchmark.log",
-    )
-    args = parser.parse_args()
-
-    logging.info(
-        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
-        + f"repetitions={args.repetitions} gpu={args.gpu} nvidia={args.gpu}"
-    )
-    report = defaultdict(lambda: defaultdict(float))
-    for quantize in ("Q8_0", "Q4_0"):
-        logging.info(f"Running llama-cpp benchmark with {quantize}")
-        llamacpp_bench = LlamaCPPBenchmark(
-            f"./models/llama-2-7b-gguf/llama-2-7b.{quantize}.gguf", gpu=args.gpu
-        ).load_model()
-        llamacpp_bench.benchmark(
-            max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
-        )
-        q = "int8" if quantize == "Q8_0" else "int4"
-        report["llama.cpp"][q] = {
-            "mean": np.mean(llamacpp_bench.results),
-            "std": np.std(llamacpp_bench.results),
-        }
-
-    compute_types = get_compute_types(gpu=args.gpu, nvidia=args.nvidia)
-    for compute_type in compute_types.intersection({"float16", "int8"}):
-        logging.info(f"Running ctranslate benchmark with {compute_type}")
-        ctranslate_bench = CTranslateBenchmark(
-            "./models/llama-2-7b-hf-float16",
-            gpu=args.gpu,
-            compute_type=compute_type,
-        ).load_model()
-        ctranslate_bench.benchmark(
-            max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
-        )
-        report["ctranslate"][compute_type] = {
-            "mean": np.mean(ctranslate_bench.results),
-            "std": np.std(ctranslate_bench.results),
-        }
-
-    logging.info("Running tinygrad benchmark")
-    tinygrad_bench = TinyGradBenchmark(
-        "./models/llama-2-7b-hf",
-        quantize=False,
-        device="CPU" if not args.gpu else "GPU" if args.nvidia else "METAL",
-    ).load_model()
-    tinygrad_bench.benchmark(
-        max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
-    )
-    report["tinygrad"]["float16"] = {
-        "mean": np.mean(tinygrad_bench.results),
-        "std": np.std(tinygrad_bench.results),
-    }
-
-    logging.info("Benchmark report")
-    with open(args.log_file, "a") as file:
-        for framework, quantizations in report.items():
-            for quantization, stats in quantizations.items():
-                logging.info(
-                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}"
-                )
-                print(
-                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}",
-                    file=file,
-                )
diff --git a/bench_burn/setup.sh b/bench_burn/setup.sh
index 4b668e0c..de190c63 100755
--- a/bench_burn/setup.sh
+++ b/bench_burn/setup.sh
@@ -31,6 +31,7 @@ check_and_create_directory() {
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
     if [ -d "$BURN_FOLDER" ]; then
diff --git a/bench_candle/bench.sh b/bench_candle/bench.sh
new file mode 100755
index 00000000..9ac495b1
--- /dev/null
+++ b/bench_candle/bench.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks candle llama benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file     Logging file name.
+#   -md, --models_dir   Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_cuda() {
+    if command -v nvcc &> /dev/null
+    then
+        echo -e "\nUsing CUDA"
+        nvcc --version
+    else
+        echo -e "\nCUDA is not available."
+        exit 1
+    fi
+}
+
+check_rust() {
+    if which cargo &>/dev/null ; then
+        echo -e "\nRust is installed. Using $(which cargo)"
+    else
+        echo -e "\nRust is not installed. Please install Rust before proceeding."
+        exit 1  # Error exit code
+    fi
+}
+
+check_platform() {
+    local platform
+    platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash "$SCRIPT_DIR/setup.sh" "$1"
+}
+
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    if [ "$DEVICE" == "cpu" ] || [ "$DEVICE" == "cuda" ]; then
+        [ "$DEVICE" == "cuda" ] && CARGO_CANDLE_FEATURES="--features cuda"
+
+        cargo run --release "$CARGO_CANDLE_FEATURES" \
+            --manifest-path="$SCRIPT_DIR/llama2-candle/Cargo.toml" \
+            -- --local-weights "$MODELS_DIR/llama-2-7b-st/" \
+            --repetitions "$REPETITIONS" \
+            --prompt "$PROMPT" \
+            --sample-len "$MAX_TOKENS" \
+            --log-file "$LOG_FILENAME"
+    fi
+}
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            if [ "$DEVICE" == "cuda" ]; then
+                check_cuda
+            fi
+            if [ "$DEVICE" == "metal" ]; then
+                echo "Metal not supported!"
+                exit 0
+            fi
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_rust
+check_python
+setup "$MODELS_DIR"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/convert_to_safetensors.py b/bench_candle/convert_to_safetensors.py
similarity index 100%
rename from convert_to_safetensors.py
rename to bench_candle/convert_to_safetensors.py
diff --git a/rust_bench/llama2-candle/Cargo.lock b/bench_candle/llama2-candle/Cargo.lock
similarity index 100%
rename from rust_bench/llama2-candle/Cargo.lock
rename to bench_candle/llama2-candle/Cargo.lock
diff --git a/rust_bench/llama2-candle/Cargo.toml b/bench_candle/llama2-candle/Cargo.toml
similarity index 100%
rename from rust_bench/llama2-candle/Cargo.toml
rename to bench_candle/llama2-candle/Cargo.toml
diff --git a/rust_bench/llama2-candle/src/main.rs b/bench_candle/llama2-candle/src/main.rs
similarity index 100%
rename from rust_bench/llama2-candle/src/main.rs
rename to bench_candle/llama2-candle/src/main.rs
diff --git a/bench_candle/requirements.txt b/bench_candle/requirements.txt
new file mode 100644
index 00000000..fc98f758
--- /dev/null
+++ b/bench_candle/requirements.txt
@@ -0,0 +1,3 @@
+torch==2.1.0
+safetensors==0.4.0
+numpy==1.26.2
diff --git a/bench_candle/setup.sh b/bench_candle/setup.sh
new file mode 100755
index 00000000..664dc524
--- /dev/null
+++ b/bench_candle/setup.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+################################################################################
+# Script: setup.sh <MODELS_FOLDER>
+# Description: This script automates the setup of a virtual environment,
+# installs project requirements, converts model.
+################################################################################
+
+set -euo pipefail
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <models_folder>"
+    exit 1
+fi
+
+# Define directory paths
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+VENV_DIR="$SCRIPT_DIR/venv"
+MODELS_FOLDER="$1"
+LLAMA_HF_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-hf"
+LLAMA_ST_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-st"
+
+if [ ! -d "$VENV_DIR" ]; then
+    python -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
+    source "$VENV_DIR"/bin/activate
+    pip install --upgrade pip > /dev/null
+    pip install -r "$SCRIPT_DIR"/requirements.txt > /dev/null
+else
+    # shellcheck disable=SC1091
+    source "$VENV_DIR"/bin/activate
+fi
+
+if [ ! -d "$LLAMA_ST_MODEL_DIR" ]; then
+    echo "Storing llama-2-7b-hf in safetensors format..."
+    python "$SCRIPT_DIR"/convert_to_safetensors.py --input_dir "$LLAMA_HF_MODEL_DIR" --output_dir "$LLAMA_ST_MODEL_DIR"
+else
+    echo "Model llama-2-7b-hf in safetensors format already exists!"
+fi
diff --git a/bench_ctranslate/setup.sh b/bench_ctranslate/setup.sh
index d38480b3..7c322e06 100644
--- a/bench_ctranslate/setup.sh
+++ b/bench_ctranslate/setup.sh
@@ -22,10 +22,12 @@ LLAMA_HF_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-hf"
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
     source "$VENV_DIR"/bin/activate
     pip install --upgrade pip > /dev/null
     pip install -r "$SCRIPT_DIR"/requirements.txt > /dev/null
 else
+    # shellcheck disable=SC1091
     source "$VENV_DIR"/bin/activate
 fi
 
diff --git a/bench_llamacpp/setup.sh b/bench_llamacpp/setup.sh
index 340b6c67..87d4fc6c 100755
--- a/bench_llamacpp/setup.sh
+++ b/bench_llamacpp/setup.sh
@@ -44,20 +44,20 @@ clone_and_build_llama() {
     # Remove existing llama.cpp directory if it exists
     if [ -d "$SCRIPT_DIR/llama.cpp" ]; then
         echo "Removing existing llama.cpp directory..."
-        rm -rf $SCRIPT_DIR/llama.cpp
+        rm -rf "$SCRIPT_DIR"/llama.cpp
     fi
 
-    git clone --depth=1 https://github.com/ggerganov/llama.cpp $SCRIPT_DIR/llama.cpp
-    cd $SCRIPT_DIR/llama.cpp
+    git clone --depth=1 https://github.com/ggerganov/llama.cpp "$SCRIPT_DIR"/llama.cpp
+    cd "$SCRIPT_DIR"/llama.cpp
 
     # Build llama.cpp
     make clean > /dev/null
     echo "Building llama.cpp..."
     make libllama.so > /dev/null
     cp libllama.so "$LIBLLAMA_FILE"
-    cd $SCRIPT_DIR
+    cd "$SCRIPT_DIR"
 
-    rm -rf $SCRIPT_DIR/llama.cpp
+    rm -rf "$SCRIPT_DIR"/llama.cpp
 }
 
 # Main script starts here
@@ -76,10 +76,12 @@ LIBLLAMA_FILE="$VENV_DIR/libllama_$DEVICE.so"
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
     pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null
 else
+    # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"
 fi
 
diff --git a/bench_tinygrad/setup.sh b/bench_tinygrad/setup.sh
index e1baa4a9..5f1daa51 100755
--- a/bench_tinygrad/setup.sh
+++ b/bench_tinygrad/setup.sh
@@ -15,6 +15,7 @@ VENV_DIR="$SCRIPT_DIR/venv"
 if [ ! -d "$VENV_DIR" ]; then
     python -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
     git clone --depth=1 https://github.com/tinygrad/tinygrad.git "$SCRIPT_DIR"/tinygrad
diff --git a/setup.sh b/setup.sh
deleted file mode 100755
index d8e7cf6a..00000000
--- a/setup.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/bash
-
-################################################################################
-# Script: setup_and_convert.sh
-# Description: This script automates the setup of a virtual environment,
-# installs project requirements, converts and stores models.
-################################################################################
-
-set -euo pipefail
-
-# Define directory paths
-VENV_DIR="venv"
-LLAMA_HF_MODEL_DIR="./models/llama-2-7b-hf"
-LLAMA_ST_MODEL_DIR="./models/llama-2-7b-st"
-BURN_MODEL_INPUT_DIR=$(pwd)/models/llama-2-7b-raw
-BURN_FOLDER=$(pwd)/rust_bench/llama2-burn
-BURN_MODEL_FOLDER=$(pwd)/models/llama-2-7b-burn
-BURN_MODEL_NAME="llama-2-7b-burn"
-
-create_and_activate_venv() {
-    if [ ! -d "$VENV_DIR" ]; then
-        python -m venv "$VENV_DIR"
-        echo "Virtual environment '$VENV_DIR' created."
-    fi
-    source "$VENV_DIR/bin/activate"
-    pip install --upgrade pip > /dev/null
-}
-
-install_requirements() {
-    pip install -r "$1"
-}
-
-check_and_create_directory() {
-    if [ ! -d "$1" ]; then
-        mkdir -p "$1"
-    fi
-}
-
-# Check and create virtual environment
-create_and_activate_venv
-
-# Install requirements for the project
-CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install -r requirements.txt > /dev/null
-
-# Check and create llama-2-7b-hf-float16 model
-if [ ! -d "$LLAMA_HF_MODEL_DIR-float16" ]; then
-    echo "Creating llama-2-7b-hf-float16 model..."
-    ct2-transformers-converter --model "$LLAMA_HF_MODEL_DIR/" --quantization float16 --output_dir "$LLAMA_HF_MODEL_DIR-float16" --copy_files tokenizer.model
-else
-    echo "Model llama-2-7b-hf-float16 already exists!"
-fi
-
-# Check and create llama-2-7b-st model
-if [ ! -d "$LLAMA_ST_MODEL_DIR" ]; then
-    echo "Storing llama-2-7b-hf in safetensors format..."
-    python convert_to_safetensors.py --input_dir "$LLAMA_HF_MODEL_DIR" --output_dir "$LLAMA_ST_MODEL_DIR"
-else
-    echo "Model llama-2-7b-hf in safetensors format already exists!"
-fi
-
-# Check and create llama-2-7b-burn model
-if [ ! -e "$BURN_MODEL_FOLDER/$BURN_MODEL_NAME.cfg" ]; then
-    check_and_create_directory "$BURN_MODEL_FOLDER"
-
-    if [ ! -d "$BURN_MODEL_FOLDER/params" ]; then
-        create_and_activate_venv
-        echo "Installing requirements for dumping"
-        install_requirements "$BURN_FOLDER/llama-py/requirements.txt" > /dev/null
-        echo "Dumping model from $BURN_MODEL_INPUT_DIR to $BURN_MODEL_FOLDER"
-        python "$BURN_FOLDER/llama-py/dump_model.py" --model-dir "$BURN_MODEL_INPUT_DIR" --output-dir "$BURN_MODEL_FOLDER"
-        deactivate
-        cp "$BURN_MODEL_INPUT_DIR/tokenizer.model" "$BURN_MODEL_FOLDER"
-        rm -r $BURN_MODEL_INPUT_DIR
-    else
-        echo "Model already dumped at $BURN_MODEL_FOLDER/params."
-    fi
-
-    echo "Converting dumped model to burn"
-    cargo run --manifest-path="$BURN_FOLDER/Cargo.toml" --bin convert -- "$BURN_MODEL_FOLDER/params" "$BURN_MODEL_NAME" "$BURN_MODEL_FOLDER"
-    rm -r "$BURN_MODEL_FOLDER/params"
-else
-    echo "Model llama-2-7b-burn already exists!"
-fi

From 763ca314247a30bebb96ee86f0fd49f367916fcf Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 16:06:34 +0000
Subject: [PATCH 21/25] removed requirements.txt

---
 requirements.txt | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index b37ebfcc..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-llama_cpp_python==0.2.15
-sentencepiece==0.1.99
-ctranslate2==3.20.0
-huggingface-hub==0.17.3
-transformers==4.35.0
-torch==2.1.0
-# Using fixed commit (a72b3700) for tinygrad to ensure stability in benchmarking.
-# Helps maintain reproducibility and guards against potential breaking changes.
-git+https://github.com/tinygrad/tinygrad.git@a72b370066837af5b4d44eeb5c4fb30aebf5c502

From f7f7cf35f65a5619064e3ae0df3a3539fc1e67f6 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Wed, 22 Nov 2023 16:52:00 +0000
Subject: [PATCH 22/25] minor changes

---
 README.md.template        | 14 +++++++-------
 bench_burn/bench.sh       |  5 +++--
 bench_candle/bench.sh     |  5 +++--
 bench_ctranslate/bench.sh |  7 ++++---
 bench_llamacpp/bench.sh   |  5 +++--
 bench_tinygrad/bench.sh   | 19 +++++++++++++++++--
 benchmark.sh              |  3 ++-
 7 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/README.md.template b/README.md.template
index 124caccc..f7189624 100644
--- a/README.md.template
+++ b/README.md.template
@@ -6,21 +6,21 @@ MLOps Engines, Frameworks, and Languages benchmarks over main stream AI Models.
 The benchmarking tool comprises three main scripts:
 - `benchmark.sh` for running the end-to-end benchmarking
 - `download.sh` which is internally used by the benchmark script to download the needed model files based on a configuration
-- `setup.sh` script for setup of dependencies and needed formats conversion
 
 ### benchmark
 
-This script runs benchmarks for a transformer model using both Rust and Python implementations. It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device, and NVIDIA flag.
+This script runs all the defined benchmarks (i.e. `bench_{benchmark_name}`). It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device.
 
 ```bash
 ./benchmark.sh [OPTIONS]
 ```
 where `OPTIONS`:
-- `-p, --prompt`: Prompt for benchmarks (default: 'Explain what is a transformer')
-- `-r, --repetitions`: Number of repetitions for benchmarks (default: 2)
-- `-m, --max_tokens`: Maximum number of tokens for benchmarks (default: 100)
-- `-d, --device`: Device for benchmarks (possible values: 'gpu' or 'cpu', default: 'cpu')
-- `--nvidia`: Use NVIDIA for benchmarks (default: false)
+- `-p, --prompt`        Prompt for benchmarks (default: 'Explain what is a transformer')
+- `-r, --repetitions`   Number of repetitions for benchmarks (default: 10)
+- `-m, --max_tokens`    Maximum number of tokens for benchmarks (default: 100)
+- `-d, --device`        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+- `-lf, --log_file`     Logging file name.
+- `-md, --models_dir`   Models directory.
 
 ### download
 
diff --git a/bench_burn/bench.sh b/bench_burn/bench.sh
index 640654c2..7dc44c63 100755
--- a/bench_burn/bench.sh
+++ b/bench_burn/bench.sh
@@ -10,8 +10,8 @@
 #   -r, --repetitions Number of repetitions for benchmarks (default: 2)
 #   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
 #   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-#   -lf, --log_file     Logging file name.
-#   -md, --models_dir   Models directory.
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
 #   -h, --help        Show this help message
 ########################################################################################################
 
@@ -25,6 +25,7 @@ print_usage() {
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
     echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
diff --git a/bench_candle/bench.sh b/bench_candle/bench.sh
index 9ac495b1..e5f793ec 100755
--- a/bench_candle/bench.sh
+++ b/bench_candle/bench.sh
@@ -10,8 +10,8 @@
 #   -r, --repetitions Number of repetitions for benchmarks (default: 2)
 #   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
 #   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-#   -lf, --log_file     Logging file name.
-#   -md, --models_dir   Models directory.
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
 #   -h, --help        Show this help message
 ########################################################################################################
 
@@ -25,6 +25,7 @@ print_usage() {
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
     echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
diff --git a/bench_ctranslate/bench.sh b/bench_ctranslate/bench.sh
index 9e823e40..54c2c7d1 100755
--- a/bench_ctranslate/bench.sh
+++ b/bench_ctranslate/bench.sh
@@ -2,7 +2,7 @@
 
 ########################################################################################################
 # Script: bench.sh
-# Description: This script runs benchmarks burn ctranslate benchmark.
+# Description: This script runs ctranslate llama benchmark.
 #
 # Usage: ./bench.sh [OPTIONS]
 # OPTIONS:
@@ -10,8 +10,8 @@
 #   -r, --repetitions Number of repetitions for benchmarks (default: 2)
 #   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
 #   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-#   -lf, --log_file     Logging file name.
-#   -md, --models_dir   Models directory.
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
 #   -h, --help        Show this help message
 ########################################################################################################
 
@@ -25,6 +25,7 @@ print_usage() {
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
     echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
diff --git a/bench_llamacpp/bench.sh b/bench_llamacpp/bench.sh
index af5c7e62..a7c86cea 100755
--- a/bench_llamacpp/bench.sh
+++ b/bench_llamacpp/bench.sh
@@ -10,8 +10,8 @@
 #   -r, --repetitions Number of repetitions for benchmarks (default: 2)
 #   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
 #   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-#   -lf, --log_file     Logging file name.
-#   -md, --models_dir   Models directory.
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
 #   -h, --help        Show this help message
 ########################################################################################################
 
@@ -25,6 +25,7 @@ print_usage() {
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
     echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
diff --git a/bench_tinygrad/bench.sh b/bench_tinygrad/bench.sh
index bfd3b74d..4bb09d75 100755
--- a/bench_tinygrad/bench.sh
+++ b/bench_tinygrad/bench.sh
@@ -1,16 +1,31 @@
 #!/bin/bash
 
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks tinygrad llama benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
 set -euo pipefail
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-# Function to print script usage
 print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
-    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 10)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
diff --git a/benchmark.sh b/benchmark.sh
index b92d9e51..ee6ba91b 100755
--- a/benchmark.sh
+++ b/benchmark.sh
@@ -5,8 +5,9 @@ print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
     echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
-    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 10)"
     echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"

From 8449a43211e12b6fa0f0918c65435b774e218250 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Wed, 22 Nov 2023 23:03:05 +0000
Subject: [PATCH 23/25] Update <LAST_UPDATE> placeholder in README.md

---
 README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 8d8eefdd..f17dbc64 100644
--- a/README.md
+++ b/README.md
@@ -6,21 +6,21 @@ MLOps Engines, Frameworks, and Languages benchmarks over main stream AI Models.
 The benchmarking tool comprises three main scripts:
 - `benchmark.sh` for running the end-to-end benchmarking
 - `download.sh` which is internally used by the benchmark script to download the needed model files based on a configuration
-- `setup.sh` script for setup of dependencies and needed formats conversion
 
 ### benchmark
 
-This script runs benchmarks for a transformer model using both Rust and Python implementations. It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device, and NVIDIA flag.
+This script runs all the defined benchmarks (i.e. `bench_{benchmark_name}`). It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device.
 
 ```bash
 ./benchmark.sh [OPTIONS]
 ```
 where `OPTIONS`:
-- `-p, --prompt`: Prompt for benchmarks (default: 'Explain what is a transformer')
-- `-r, --repetitions`: Number of repetitions for benchmarks (default: 2)
-- `-m, --max_tokens`: Maximum number of tokens for benchmarks (default: 100)
-- `-d, --device`: Device for benchmarks (possible values: 'gpu' or 'cpu', default: 'cpu')
-- `--nvidia`: Use NVIDIA for benchmarks (default: false)
+- `-p, --prompt`        Prompt for benchmarks (default: 'Explain what is a transformer')
+- `-r, --repetitions`   Number of repetitions for benchmarks (default: 10)
+- `-m, --max_tokens`    Maximum number of tokens for benchmarks (default: 100)
+- `-d, --device`        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+- `-lf, --log_file`     Logging file name.
+- `-md, --models_dir`   Models directory.
 
 ### download
 
@@ -74,15 +74,15 @@ CUDA Version: 11.7
 
 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --nvidia --prompt 'Explain what is a transformer'`
 
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        | 13.28 ± 0.79 |      -       |      -       |      -       |
-| candle      |      -       | 26.30 ± 0.29 |      -       |      -       |
-| llama.cpp   |      -       |      -       | 67.64 ± 22.57| 106.21 ± 2.21|
-| ctranslate  |      -       | 58.54 ± 13.24| 34.22 ± 6.29 |      -       |
-| tinygrad    |      -       | 20.13 ± 1.35 |      -       |      -       |
+| Engine      | float32      | float16       | int8          | int4          |
+|-------------|--------------|---------------|---------------|---------------|
+| burn        | 13.12 ± 0.85 |      -        |      -        |      -        |
+| candle      |      -       | 36.78 ± 2.17  |      -        |      -        |
+| llama.cpp   |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
+| ctranslate  |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
+| tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
 
-*(data updated: 20th November 2023)
+*(data updated: 22th November 2023)
 
 
 ### M2 MAX 32GB Inference Bench:
@@ -115,4 +115,4 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --prompt
 | ctranslate  |      -       |      -       |      -       |      -       |
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 
-*(data updated: 20th November 2023)
+*(data updated: 22th November 2023)

From 48f991dbc4830ba3c0697a20c3968f0f563ed08a Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 23 Nov 2023 09:30:32 +0100
Subject: [PATCH 24/25] updated readme

---
 README.md.template | 57 +++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/README.md.template b/README.md.template
index f7189624..fc32074d 100644
--- a/README.md.template
+++ b/README.md.template
@@ -1,47 +1,46 @@
 # benchmarks
 MLOps Engines, Frameworks, and Languages benchmarks over main stream AI Models.
 
-## Tool
+## Structure
 
-The benchmarking tool comprises three main scripts:
-- `benchmark.sh` for running the end-to-end benchmarking
-- `download.sh` which is internally used by the benchmark script to download the needed model files based on a configuration
+The repository is organized to facilitate benchmark management and execution through a consistent structure:
 
-### benchmark
+- Each benchmark, identified as `bench_name`, has a dedicated folder, `bench_{bench_name}`.
+- Within these benchmark folders, a common script named `bench.sh` handles setup, environment configuration, and execution.
 
-This script runs all the defined benchmarks (i.e. `bench_{benchmark_name}`). It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device.
+### Benchmark Script
 
-```bash
-./benchmark.sh [OPTIONS]
-```
-where `OPTIONS`:
-- `-p, --prompt`        Prompt for benchmarks (default: 'Explain what is a transformer')
-- `-r, --repetitions`   Number of repetitions for benchmarks (default: 10)
-- `-m, --max_tokens`    Maximum number of tokens for benchmarks (default: 100)
-- `-d, --device`        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-- `-lf, --log_file`     Logging file name.
-- `-md, --models_dir`   Models directory.
+The `bench.sh` script supports key parameters:
 
-### download
+- `prompt`: Benchmark-specific prompt.
+- `max_tokens`: Maximum tokens for the benchmark.
+- `repetitions`: Number of benchmark repetitions.
+- `log_file`: File for storing benchmark logs.
+- `device`: Device for benchmark execution (cpu, cuda, metal).
+- `models_dir`: Directory containing necessary model files.
 
-Downloads files from a list of URLs specified in a JSON file. The JSON file should contain an array of objects, each with a 'url', 'file', and 'folder' property. The script checks if the file already exists before downloading it.
+### Unified Execution
 
-```bash
-./download.sh --models <json_file> --cache <cache_file> --force-download
-```
-Options
-- `--models`: JSON file specifying the models to download (default: models.json)
-- `--cache`: Cache file to keep track of downloaded files (default: cache.log)
-- `--force-download`: Force download of all files, removing existing files and cache
+An overarching `bench.sh` script streamlines benchmark execution:
+
+- Downloads essential files for benchmarking.
+- Iterates through all benchmark folders in the repository.
 
-### setup
-1. Creates a python virtual environment `venv` and installs project requirements.
-3. Converts and stores models in different formats.
+This empowers users to seamlessly execute benchmarks based on their preference. To run a specific benchmark, navigate to the corresponding benchmark folder (e.g., `bench_{bench_name}`) and execute the `bench.sh` script with the required parameters.
+
+
+
+## Usage
 
 ```bash
-./setup.sh
+# Run a specific benchmark
+./bench_{bench_name}/bench.sh --prompt <value> --max_tokens <value> --num_repetitions <value> --log_file <file_path> --device <cpu/cuda/metal> --models_dir <path_to_models>
+
+# Run all benchmarks collectively
+./bench.sh --prompt <value> --max_tokens <value> --num_repetitions <value> --log_file <file_path> --device <cpu/cuda/metal> --models_dir <path_to_models>
 ```
 
+
 ## ML Engines: Feature Table
 
 | Features                    | pytorch | burn | llama.cpp | candle | tinygrad | onnxruntime | CTranslate2 |

From d1520a80a88eef07f62797f41f95b9313b770809 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Thu, 23 Nov 2023 08:38:03 +0000
Subject: [PATCH 25/25] Update <LAST_UPDATE> placeholder in README.md

---
 README.md | 61 +++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index f17dbc64..e66d30d6 100644
--- a/README.md
+++ b/README.md
@@ -1,47 +1,46 @@
 # benchmarks
 MLOps Engines, Frameworks, and Languages benchmarks over main stream AI Models.
 
-## Tool
+## Structure
 
-The benchmarking tool comprises three main scripts:
-- `benchmark.sh` for running the end-to-end benchmarking
-- `download.sh` which is internally used by the benchmark script to download the needed model files based on a configuration
+The repository is organized to facilitate benchmark management and execution through a consistent structure:
 
-### benchmark
+- Each benchmark, identified as `bench_name`, has a dedicated folder, `bench_{bench_name}`.
+- Within these benchmark folders, a common script named `bench.sh` handles setup, environment configuration, and execution.
 
-This script runs all the defined benchmarks (i.e. `bench_{benchmark_name}`). It provides options to customize the benchmarks, such as the prompt, repetitions, maximum tokens, device.
+### Benchmark Script
 
-```bash
-./benchmark.sh [OPTIONS]
-```
-where `OPTIONS`:
-- `-p, --prompt`        Prompt for benchmarks (default: 'Explain what is a transformer')
-- `-r, --repetitions`   Number of repetitions for benchmarks (default: 10)
-- `-m, --max_tokens`    Maximum number of tokens for benchmarks (default: 100)
-- `-d, --device`        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
-- `-lf, --log_file`     Logging file name.
-- `-md, --models_dir`   Models directory.
+The `bench.sh` script supports key parameters:
 
-### download
+- `prompt`: Benchmark-specific prompt.
+- `max_tokens`: Maximum tokens for the benchmark.
+- `repetitions`: Number of benchmark repetitions.
+- `log_file`: File for storing benchmark logs.
+- `device`: Device for benchmark execution (cpu, cuda, metal).
+- `models_dir`: Directory containing necessary model files.
 
-Downloads files from a list of URLs specified in a JSON file. The JSON file should contain an array of objects, each with a 'url', 'file', and 'folder' property. The script checks if the file already exists before downloading it.
+### Unified Execution
 
-```bash
-./download.sh --models <json_file> --cache <cache_file> --force-download
-```
-Options
-- `--models`: JSON file specifying the models to download (default: models.json)
-- `--cache`: Cache file to keep track of downloaded files (default: cache.log)
-- `--force-download`: Force download of all files, removing existing files and cache
+An overarching `bench.sh` script streamlines benchmark execution:
+
+- Downloads essential files for benchmarking.
+- Iterates through all benchmark folders in the repository.
 
-### setup
-1. Creates a python virtual environment `venv` and installs project requirements.
-3. Converts and stores models in different formats.
+This empowers users to seamlessly execute benchmarks based on their preference. To run a specific benchmark, navigate to the corresponding benchmark folder (e.g., `bench_{bench_name}`) and execute the `bench.sh` script with the required parameters.
+
+
+
+## Usage
 
 ```bash
-./setup.sh
+# Run a specific benchmark
+./bench_{bench_name}/bench.sh --prompt <value> --max_tokens <value> --num_repetitions <value> --log_file <file_path> --device <cpu/cuda/metal> --models_dir <path_to_models>
+
+# Run all benchmarks collectively
+./bench.sh --prompt <value> --max_tokens <value> --num_repetitions <value> --log_file <file_path> --device <cpu/cuda/metal> --models_dir <path_to_models>
 ```
 
+
 ## ML Engines: Feature Table
 
 | Features                    | pytorch | burn | llama.cpp | candle | tinygrad | onnxruntime | CTranslate2 |
@@ -82,7 +81,7 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --nvidia
 | ctranslate  |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
 | tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
 
-*(data updated: 22th November 2023)
+*(data updated: 23th November 2023)
 
 
 ### M2 MAX 32GB Inference Bench:
@@ -115,4 +114,4 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --prompt
 | ctranslate  |      -       |      -       |      -       |      -       |
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 
-*(data updated: 22th November 2023)
+*(data updated: 23th November 2023)