From fd03e395c6930cdd307d8007c2ce9da97619147e Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Fri, 24 Nov 2023 10:00:57 +0530
Subject: [PATCH 01/22] Feat: Adding the integration for CTransformers for
 benchmarks.

---
 bench_ctransformers/bench.py | 142 +++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 bench_ctransformers/bench.py

diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py
new file mode 100644
index 00000000..17faf740
--- /dev/null
+++ b/bench_ctransformers/bench.py
@@ -0,0 +1,142 @@
+import argparse
+import logging 
+import sys
+import time 
+from typing import Optional
+from collections import defaultdict
+import numpy as np
+from ctransformers import AutoModelForCausalLM
+
+logging.getLogger('ctransformers').setLevel(logging.ERROR)
+logging.basicConfig(
+    stream=sys.stdout,
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+class CTransformersBenchmark:
+    def __init__(self, model_path: str, device: Optional[str]='cpu', model_type: Optional[str]=None) -> None:
+        self.model_path, self.device = model_path, device
+        self.model_map = {
+            'gpt2' : {
+                'devices': ['cpu'],
+                'type': 'gpt2' 
+            },
+            'gptj': {
+                'type': 'gptj',
+                'devices': ['cpu'],
+            },
+            'gpt4allj': {
+                'devices': ['cpu'],
+                'type': 'gptj'
+            },
+            'gpt-neo': {
+                'devices': ['cpu'],
+                'type': 'gpt_neox'
+            },
+            'falcon':{
+                'devices': ['cpu', 'cuda'],
+                'type': 'falcon'
+            },
+            'llama': {
+                'devices': ['cpu', 'cuda', 'metal'],
+                'type': 'llama'
+            }, 
+            'mpt': {
+                'devices': ['cpu', 'cuda'],
+                'type': 'mpt'
+            },
+            'starcoder': {
+                'devices': ['cpu'],
+                'type': 'gpt_bigcode'
+            }, 
+            'dolly': {
+                'devices': ['cpu'],
+                'type': 'dolly-v2'
+            },
+            'replit': {
+                'devices': ['cpu'],
+                'type': 'replit'
+            }
+        }
+        self.results = [] 
+        # check if the model path falls under this 
+        _model_name = model_path.split('/')[-1].lower()
+        matched_key_from_map = [key for key in self.model_map if key in _model_name]
+        if not matched_key_from_map and model_type is None:
+            raise ValueError(
+                f"The model: {_model_name} does not fall under the following model categories: {list(self.model_map.keys())}"
+                f"If you think, that your model path falls under any of the model architecture, then place the value inside model_type argument"
+            )
+        
+        self.model_type = matched_key_from_map[0] if model_type is None else model_type
+        
+        # check if the selected model supports that device else choose default device (i.e. first value of the list)
+        self.device = device if device is not None and device in self.model_map[self.model_type]['devices'] else self.model_map[self.model_type]['devices'][0]
+        
+    def load_model(self):
+        # FIXME: Not sure how to get num layers for each model to know how many to fit into VRAM.
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_path,
+            model_type=self.model_type,
+            gpu_layers=50 if self.device == 'cuda' else 0
+        )
+        return self
+    
+    def run_model(self, prompt: str, max_tokens: int) -> float:
+        start = time.time()
+        output = self.model(prompt, max_new_tokens=max_tokens)
+        tokens = len(self.model.tokenize(output))
+        return tokens / (time.time() - start)
+
+    def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None:
+        for i in range(repetitions):
+            logging.info(
+                f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]"
+            )   
+            tokens_per_second = self.run_model(prompt, max_tokens)
+            self.results.append(tokens_per_second)
+    
+   
+   
+path = "/home/anindya/Downloads/replit-openorca.ggmlv1.q4_0.bin"
+ben = CTransformersBenchmark(
+    model_path=path, device='cpu'
+).load_model()
+
+ben.benchmark(prompt="hello", max_tokens=3, repetitions=2)
+print(ben.results) 
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="CTransformers Benchmark.")
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        help="The prompt for the model.",
+    )
+    parser.add_argument("--max_tokens", type=int, help="The maximum number of tokens.")
+    parser.add_argument(
+        "--repetitions",
+        type=int,
+        help="The number of repetitions for the benchmark.",
+    )
+    parser.add_argument(
+        "--device",
+        help="Device to use for the benchmark.",
+    )
+    parser.add_argument(
+        "--log_file",
+        type=str,
+        help="Path to the log file for writing logs (in append mode).",
+    )
+    parser.add_argument(
+        "--models_dir",
+        type=str,
+        help="Path to the models directory.",
+    )
+    args = parser.parse_args()
+    logging.info(
+        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
+        + f"repetitions={args.repetitions} device={args.device}"
+    )
+    report = defaultdict(lambda: defaultdict(float))
\ No newline at end of file

From acec0f6dad396724746945f4d964bbfdcb10a892 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Fri, 24 Nov 2023 10:01:45 +0530
Subject: [PATCH 02/22] Adding sh file to run the benchmarks for CTransformers

---
 bench_ctransformers/bench.sh | 74 ++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 bench_ctransformers/bench.sh

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
new file mode 100644
index 00000000..e8df4a46
--- /dev/null
+++ b/bench_ctransformers/bench.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+########################################################################################################
+# Script: bench.sh
+# Description: This script runs benchmarks llama.cpp llama benchmark.
+#
+# Usage: ./bench.sh [OPTIONS]
+# OPTIONS:
+#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -lf, --log_file   Logging file name.
+#   -md, --models_dir Models directory.
+#   -h, --help        Show this help message
+########################################################################################################
+
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+print_usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "OPTIONS:"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
+    echo "  -lf, --log_file     Logging file name."
+    echo "  -md, --models_dir   Models directory."
+    echo "  -h, --help          Show this help message"
+    exit 1
+}
+
+check_cuda() {
+    if command -v nvcc &> /dev/null
+    then
+        echo -e "\nUsing CUDA"
+        nvcc --version
+    else
+        echo -e "\nCUDA is not available."
+        exit 1
+    fi
+}
+
+check_platform() {
+    local platform
+    platform=$(uname -s)
+    if [[ "$platform" == "Linux" ]]; then
+        echo "Running on Linux."
+    elif [[ "$platform" == "Darwin" ]]; then
+        echo "Running on Mac OS."
+    else
+        echo "Unknown platform."
+        exit 1
+    fi
+}
+
+check_python() {
+    if command -v python &> /dev/null
+    then
+        echo -e "\nUsing $(python --version)."
+    else
+        echo -e "\nPython does not exist."
+        exit 1
+    fi
+}
+
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash "$SCRIPT_DIR"/setup.sh "$1"
+}
+

From 88d4ef83612717f88b3d134e015ab81f246ebe78 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Fri, 24 Nov 2023 10:02:04 +0530
Subject: [PATCH 03/22] adding requirements to install dependencies for
 ctransformers

---
 bench_ctransformers/requirements.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 bench_ctransformers/requirements.txt

diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt
new file mode 100644
index 00000000..f5d212fa
--- /dev/null
+++ b/bench_ctransformers/requirements.txt
@@ -0,0 +1 @@
+ctransformers
\ No newline at end of file

From 2f65ffa677599d413682d34c55434f49566f12ed Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Mon, 27 Nov 2023 10:05:22 +0530
Subject: [PATCH 04/22] Refactor: Bench CTransformers by removing model_type.

For, now only llama is supported, since it has all the three types of device support,
and also it is a standard model.
---
 bench_ctransformers/bench.py | 125 +++++++++++++----------------------
 1 file changed, 47 insertions(+), 78 deletions(-)

diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py
index 17faf740..58c66e37 100644
--- a/bench_ctransformers/bench.py
+++ b/bench_ctransformers/bench.py
@@ -1,88 +1,40 @@
 import argparse
-import logging 
+import logging
 import sys
-import time 
-from typing import Optional
+import time
 from collections import defaultdict
+from typing import Optional
+
 import numpy as np
 from ctransformers import AutoModelForCausalLM
 
-logging.getLogger('ctransformers').setLevel(logging.ERROR)
+logging.getLogger("ctransformers").setLevel(logging.ERROR)
 logging.basicConfig(
     stream=sys.stdout,
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(message)s",
 )
 
-class CTransformersBenchmark:
-    def __init__(self, model_path: str, device: Optional[str]='cpu', model_type: Optional[str]=None) -> None:
+
+class LlamaCTransformersBenchmark:
+    def __init__(
+        self,
+        model_path: str,
+        device: Optional[str] = "cpu",
+    ) -> None:
         self.model_path, self.device = model_path, device
-        self.model_map = {
-            'gpt2' : {
-                'devices': ['cpu'],
-                'type': 'gpt2' 
-            },
-            'gptj': {
-                'type': 'gptj',
-                'devices': ['cpu'],
-            },
-            'gpt4allj': {
-                'devices': ['cpu'],
-                'type': 'gptj'
-            },
-            'gpt-neo': {
-                'devices': ['cpu'],
-                'type': 'gpt_neox'
-            },
-            'falcon':{
-                'devices': ['cpu', 'cuda'],
-                'type': 'falcon'
-            },
-            'llama': {
-                'devices': ['cpu', 'cuda', 'metal'],
-                'type': 'llama'
-            }, 
-            'mpt': {
-                'devices': ['cpu', 'cuda'],
-                'type': 'mpt'
-            },
-            'starcoder': {
-                'devices': ['cpu'],
-                'type': 'gpt_bigcode'
-            }, 
-            'dolly': {
-                'devices': ['cpu'],
-                'type': 'dolly-v2'
-            },
-            'replit': {
-                'devices': ['cpu'],
-                'type': 'replit'
-            }
-        }
-        self.results = [] 
-        # check if the model path falls under this 
-        _model_name = model_path.split('/')[-1].lower()
-        matched_key_from_map = [key for key in self.model_map if key in _model_name]
-        if not matched_key_from_map and model_type is None:
-            raise ValueError(
-                f"The model: {_model_name} does not fall under the following model categories: {list(self.model_map.keys())}"
-                f"If you think, that your model path falls under any of the model architecture, then place the value inside model_type argument"
-            )
-        
-        self.model_type = matched_key_from_map[0] if model_type is None else model_type
-        
-        # check if the selected model supports that device else choose default device (i.e. first value of the list)
-        self.device = device if device is not None and device in self.model_map[self.model_type]['devices'] else self.model_map[self.model_type]['devices'][0]
-        
+        self.results = []
+        self.device = device
+
     def load_model(self):
         # FIXME: Not sure how to get num layers for each model to know how many to fit into VRAM.
         self.model = AutoModelForCausalLM.from_pretrained(
             self.model_path,
-            model_type=self.model_type,
-            gpu_layers=50 if self.device == 'cuda' else 0
+            model_type="llama",
+            gpu_layers=50 if self.device in ["cuda", "metal"] else 0,
         )
         return self
-    
+
     def run_model(self, prompt: str, max_tokens: int) -> float:
         start = time.time()
         output = self.model(prompt, max_new_tokens=max_tokens)
@@ -93,20 +45,11 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None:
         for i in range(repetitions):
             logging.info(
                 f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]"
-            )   
+            )
             tokens_per_second = self.run_model(prompt, max_tokens)
             self.results.append(tokens_per_second)
-    
-   
-   
-path = "/home/anindya/Downloads/replit-openorca.ggmlv1.q4_0.bin"
-ben = CTransformersBenchmark(
-    model_path=path, device='cpu'
-).load_model()
 
-ben.benchmark(prompt="hello", max_tokens=3, repetitions=2)
-print(ben.results) 
-    
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="CTransformers Benchmark.")
     parser.add_argument(
@@ -139,4 +82,30 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None:
         f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
         + f"repetitions={args.repetitions} device={args.device}"
     )
-    report = defaultdict(lambda: defaultdict(float))
\ No newline at end of file
+    report = defaultdict(lambda: defaultdict(float))
+    for quantize in ("Q8_0", "Q4_0"):
+        logging.info(f"Running CTransformer benchmark on Llama with {quantize}")
+        llama_ctransformers_bench = LlamaCTransformersBenchmark(
+            f"{args.models_dir}/llama-2-7b-gguf/llama-2-7b.{quantize}.gguf",
+            device=args.device,
+        ).load_model()
+        llama_ctransformers_bench.benchmark(
+            max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions
+        )
+        q = "int8" if quantize == "Q8_0" else "int4"
+        report["llama_ctransformers"][q] = {
+            "mean": np.mean(llama_ctransformers_bench.results),
+            "std": np.std(llama_ctransformers_bench.results),
+        }
+
+    logging.info("Benchmark report")
+    with open(args.log_file, "a") as file:
+        for framework, quantizations in report.items():
+            for quantization, stats in quantizations.items():
+                logging.info(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}"
+                )
+                print(
+                    f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}",
+                    file=file,
+                )

From 73ccb4fb6ce456e3e7c4a66499b0dc285f2fa374 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Mon, 27 Nov 2023 10:23:57 +0530
Subject: [PATCH 05/22] Added benchmark bash script to run benchmarking.

---
 bench_ctransformers/bench.sh | 81 +++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
index e8df4a46..36c9a7c1 100644
--- a/bench_ctransformers/bench.sh
+++ b/bench_ctransformers/bench.sh
@@ -15,7 +15,6 @@
 #   -h, --help        Show this help message
 ########################################################################################################
 
-
 set -euo pipefail
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
@@ -38,6 +37,7 @@ check_cuda() {
     then
         echo -e "\nUsing CUDA"
         nvcc --version
+        pip install ctransformers[cuda]
     else
         echo -e "\nCUDA is not available."
         exit 1
@@ -51,6 +51,9 @@ check_platform() {
         echo "Running on Linux."
     elif [[ "$platform" == "Darwin" ]]; then
         echo "Running on Mac OS."
+        echo "Installing CTransformers on metal"
+        export CT_METAL=1
+        pip install ctransformers --no-binary ctransformers
     else
         echo "Unknown platform."
         exit 1
@@ -72,3 +75,79 @@ setup() {
     bash "$SCRIPT_DIR"/setup.sh "$1"
 }
 
+run_benchmarks() {
+    local PROMPT="$1"
+    local REPETITIONS="$2"
+    local MAX_TOKENS="$3"
+    local DEVICE="$4"
+    local LOG_FILENAME="$5"
+    local MODELS_DIR="$6"
+
+    python "$SCRIPT_DIR"/bench.py \
+        --prompt "$PROMPT" \
+        --repetitions "$REPETITIONS" \
+        --max_tokens "$MAX_TOKENS" \
+        --log_file "$LOG_FILENAME" \
+        --models_dir "$MODELS_DIR" \
+        --device "$DEVICE"
+}
+
+# Parse command-line arguments
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        -p|--prompt)
+            PROMPT="$2"
+            shift 2
+            ;;
+        -r|--repetitions)
+            REPETITIONS="$2"
+            shift 2
+            ;;
+        -m|--max_tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        -d|--device)
+            DEVICE="$2"
+            case "$DEVICE" in
+                "cuda" | "metal" | "cpu")
+                    ;;
+                *)
+                    echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'."
+                    print_usage
+                    ;;
+            esac
+            if [ "$DEVICE" == "cuda" ]; then
+                check_cuda
+            fi
+            shift 2
+            ;;
+        -lf|--log_file)
+            LOG_FILENAME="$2"
+            shift 2
+            ;;
+        -md|--models_dir)
+            MODELS_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            print_usage
+            ;;
+        *)
+            echo "Unknown option: $1"
+            print_usage
+            ;;
+    esac
+done
+# Set default values if not provided
+PROMPT="${PROMPT:-"Explain what is a transformer"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-100}"
+DEVICE="${DEVICE:-'cpu'}"
+LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+MODELS_DIR="${MODELS_DIR:-"./models"}"
+
+check_platform
+check_python
+setup "$DEVICE"
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"

From 995441be8fda25da38cbefec01a8b71e5bd61efd Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Mon, 27 Nov 2023 10:25:12 +0530
Subject: [PATCH 06/22] Added a requirements file for installing CTransformers

---
 bench_ctransformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt
index f5d212fa..b06a12c5 100644
--- a/bench_ctransformers/requirements.txt
+++ b/bench_ctransformers/requirements.txt
@@ -1 +1 @@
-ctransformers
\ No newline at end of file
+ctransformers

From 93cbfc8cf6f2109704aa8df78b92f27a4f20cab1 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Tue, 28 Nov 2023 02:03:46 +0530
Subject: [PATCH 07/22] Removing setup.sh for device

---
 bench_ctransformers/bench.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
index 36c9a7c1..87973ecc 100644
--- a/bench_ctransformers/bench.sh
+++ b/bench_ctransformers/bench.sh
@@ -149,5 +149,4 @@ MODELS_DIR="${MODELS_DIR:-"./models"}"
 
 check_platform
 check_python
-setup "$DEVICE"
 run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"

From 8fcfa949927756d10612a5fe67cc51f0cfb5538b Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Tue, 28 Nov 2023 02:49:12 +0530
Subject: [PATCH 08/22] Added numpy in ewquirements.txt

---
 bench_ctransformers/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt
index b06a12c5..1816ee0d 100644
--- a/bench_ctransformers/requirements.txt
+++ b/bench_ctransformers/requirements.txt
@@ -1 +1,2 @@
 ctransformers
+numpy

From d4468ee307bd5e3c6fd82007e2dcceb9079a5611 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Tue, 28 Nov 2023 02:50:07 +0530
Subject: [PATCH 09/22] added custom dependency installation in benchmark
 script for cuda

---
 bench_ctransformers/bench.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
index 87973ecc..e1c682a1 100644
--- a/bench_ctransformers/bench.sh
+++ b/bench_ctransformers/bench.sh
@@ -37,7 +37,7 @@ check_cuda() {
     then
         echo -e "\nUsing CUDA"
         nvcc --version
-        pip install ctransformers[cuda]
+        pip install ctransformers[cuda] numpy
     else
         echo -e "\nCUDA is not available."
         exit 1
@@ -49,6 +49,7 @@ check_platform() {
     platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
+        pip install -r requirements.txt
     elif [[ "$platform" == "Darwin" ]]; then
         echo "Running on Mac OS."
         echo "Installing CTransformers on metal"
@@ -70,11 +71,6 @@ check_python() {
     fi
 }
 
-setup() {
-    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
-    bash "$SCRIPT_DIR"/setup.sh "$1"
-}
-
 run_benchmarks() {
     local PROMPT="$1"
     local REPETITIONS="$2"

From 7d19c233a9a24887cfbbd4f033b3d52f1793c466 Mon Sep 17 00:00:00 2001
From: Anindyadeep <proanindyadeep@gmail.com>
Date: Thu, 30 Nov 2023 01:41:03 +0530
Subject: [PATCH 10/22] fix: in time calculation, token length count is
 excluded.

Co-authored-by: Nicola Sosio <sosio.nicola94@tiscali.it>
---
 bench_ctransformers/bench.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py
index 58c66e37..9e82aa8b 100644
--- a/bench_ctransformers/bench.py
+++ b/bench_ctransformers/bench.py
@@ -38,8 +38,9 @@ def load_model(self):
     def run_model(self, prompt: str, max_tokens: int) -> float:
         start = time.time()
         output = self.model(prompt, max_new_tokens=max_tokens)
+        delta = time.time() - start
         tokens = len(self.model.tokenize(output))
-        return tokens / (time.time() - start)
+        return tokens / delta
 
     def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None:
         for i in range(repetitions):

From c949737d3a2fe1b58193b56154cb15e5000cf352 Mon Sep 17 00:00:00 2001
From: Anindyadeep <proanindyadeep@gmail.com>
Date: Thu, 30 Nov 2023 01:41:22 +0530
Subject: [PATCH 11/22] fix: typo

Co-authored-by: Nicola Sosio <sosio.nicola94@tiscali.it>
---
 bench_ctransformers/bench.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
index e1c682a1..738c40eb 100644
--- a/bench_ctransformers/bench.sh
+++ b/bench_ctransformers/bench.sh
@@ -2,7 +2,7 @@
 
 ########################################################################################################
 # Script: bench.sh
-# Description: This script runs benchmarks llama.cpp llama benchmark.
+# Description: This script runs benchmarks ctransformers llama benchmark.
 #
 # Usage: ./bench.sh [OPTIONS]
 # OPTIONS:

From 271d392dc15bb5a616b91e49715300576911025f Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Thu, 30 Nov 2023 02:20:10 +0530
Subject: [PATCH 12/22] Added ctransformers benchmark results for A100 and CPU

---
 README.md | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index c6728bdb..53155af0 100644
--- a/README.md
+++ b/README.md
@@ -73,14 +73,15 @@ CUDA Version: 11.7
 
 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --prompt 'Explain what is a transformer'`
 
-| Engine      | float32      | float16       | int8          | int4          |
-|-------------|--------------|---------------|---------------|---------------|
-| burn        | 13.12 ± 0.85 |      -        |      -        |      -        |
-| candle      |      -       | 36.78 ± 2.17  |      -        |      -        |
-| llama.cpp   |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
-| ctranslate  |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
-| tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
-| onnx        |      -       | 54.16 ± 3.15  |      -        |      -        |
+| Engine               | float32      | float16       | int8          | int4          |
+|----------------------|--------------|---------------|---------------|---------------|
+| burn                 | 13.12 ± 0.85 |      -        |      -        |      -        |
+| candle               |      -       | 36.78 ± 2.17  |      -        |      -        |
+| llama.cpp            |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
+| ctranslate           |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
+| tinygrad             |      -       | 20.32 ± 0.06  |      -        |      -        |
+| onnx                 |      -       | 54.16 ± 3.15  |      -        |      -        |
+| ctransformers        |      -       |      -        | 81.61 ± 3.66  | 84.51 ± 7.93  |
 
 *(data updated: 23th November 2023)
 
@@ -108,13 +109,14 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt
 
 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prompt 'Explain what is a transformer'`
 
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        |      -       |      -       |      -       |      -       |
-| candle      |      -       |      -       |      -       |      -       |
-| llama.cpp   |      -       |      -       | 31.24 ± 7.82 | 46.75 ± 9.55 |
-| ctranslate  |      -       |      -       |      -       |      -       |
-| tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
-| onnx        |      -       |      -       |      -       |      -       |
+| Engine               | float32      | float16       | int8         | int4         |
+|----------------------|--------------|---------------|--------------|--------------|
+| burn                 |      -       |      -        |      -       |      -       |
+| candle               |      -       |      -        |      -       |      -       |
+| llama.cpp            |      -       |      -        | 31.24 ± 7.82 | 46.75 ± 9.55 |
+| ctranslate           |      -       |      -        |      -       |      -       |
+| tinygrad             |      -       | 29.78 ± 1.18  |      -       |      -       |
+| onnx                 |      -       |      -        |      -       |      -       |
+| ctransformers        |      -       |      -        | 4.58 ± 0.07  | 7.00 ± 0.23  |
 
 *(data updated: 23th November 2023)

From 99246edf4dba021907f991665a85ef7720c14026 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Thu, 30 Nov 2023 02:40:54 +0530
Subject: [PATCH 13/22] added latest benchmark info for ctransformers, m2(cpu,
 gpu), a100.

---
 README.md | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 53155af0..900d77f2 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --promp
 | onnx                 |      -       | 54.16 ± 3.15  |      -        |      -        |
 | ctransformers        |      -       |      -        | 81.61 ± 3.66  | 84.51 ± 7.93  |
 
-*(data updated: 23th November 2023)
+*(data updated: 30th November 2023)
 
 
 ### M2 MAX 32GB Inference Bench:
@@ -96,14 +96,15 @@ CUDA Version: NA
 
 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'`
 
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        | 0.30 ± 0.09  |      -       |      -       |      -       |
-| candle      |      -       | 3.43 ± 0.02  |      -       |      -       |
-| llama.cpp   |      -       |      -       | 14.41 ± 1.59 | 20.96 ± 1.94 |
-| ctranslate  |      -       |      -       | 2.11 ± 0.73  |      -       |
-| tinygrad    |      -       | 4.21 ± 0.38  |      -       |      -       |
-| onnx        |      -       |      -       |      -       |      -       |
+| Engine               | float32      | float16      | int8         | int4         |
+|----------------------|--------------|--------------|--------------|--------------|
+| burn                 | 0.30 ± 0.09  |      -       |      -       |      -       |
+| candle               |      -       | 3.43 ± 0.02  |      -       |      -       |
+| llama.cpp            |      -       |      -       | 14.41 ± 1.59 | 20.96 ± 1.94 |
+| ctranslate           |      -       |      -       | 2.11 ± 0.73  |      -       |
+| tinygrad             |      -       | 4.21 ± 0.38  |      -       |      -       |
+| onnx                 |      -       |      -       |      -       |      -       |
+| ctransformers        |      -       |      -       | 13.79 ± 0.50 | 22.93 ± 0.86 |
 
 #### GPU (Metal)
 
@@ -117,6 +118,6 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prom
 | ctranslate           |      -       |      -        |      -       |      -       |
 | tinygrad             |      -       | 29.78 ± 1.18  |      -       |      -       |
 | onnx                 |      -       |      -        |      -       |      -       |
-| ctransformers        |      -       |      -        | 4.58 ± 0.07  | 7.00 ± 0.23  |
+| ctransformers        |      -       |      -        | 21.24 ± 0.81 | 34.08 ± 4.78 |
 
 *(data updated: 23th November 2023)

From c8c861b9bf1a7677ba84940f321d75f2b184e998 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 1 Dec 2023 13:00:35 +0000
Subject: [PATCH 14/22] Update <LAST_UPDATE> placeholder in llama2.md

---
 docs/llama2.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/llama2.md b/docs/llama2.md
index 05a5f259..cf882ed1 100644
--- a/docs/llama2.md
+++ b/docs/llama2.md
@@ -17,7 +17,7 @@
 | tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
 | onnx        |      -       | 54.16 ± 3.15  |      -        |      -        |
 
-*(Data updated: `30th November 2023`)
+*(Data updated: `01th December 2023`)
 
 
 ## M2 MAX 32GB Inference Bench:
@@ -53,4 +53,4 @@
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 | onnx        |      -       |      -       |      -       |      -       |
 
-*(Data updated: `30th November 2023`)
+*(Data updated: `01th December 2023`)

From 964e2cd13a721ac0e7e05d7b3d9059a58c1d3a50 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Fri, 1 Dec 2023 18:57:03 +0530
Subject: [PATCH 15/22] revert default docs to latest changes in main

---
 docs/llama2.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/llama2.md b/docs/llama2.md
index cf882ed1..05a5f259 100644
--- a/docs/llama2.md
+++ b/docs/llama2.md
@@ -17,7 +17,7 @@
 | tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
 | onnx        |      -       | 54.16 ± 3.15  |      -        |      -        |
 
-*(Data updated: `01th December 2023`)
+*(Data updated: `30th November 2023`)
 
 
 ## M2 MAX 32GB Inference Bench:
@@ -53,4 +53,4 @@
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 | onnx        |      -       |      -       |      -       |      -       |
 
-*(Data updated: `01th December 2023`)
+*(Data updated: `30th November 2023`)

From c6516942a6d34ad8273569d0a4b12edab0197174 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 01:35:00 +0530
Subject: [PATCH 16/22] added setup.sh file for installing dependencies for
 ctransformers

---
 bench_ctransformers/setup.sh | 85 ++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100755 bench_ctransformers/setup.sh

diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh
new file mode 100755
index 00000000..08cc5b01
--- /dev/null
+++ b/bench_ctransformers/setup.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+################################################################################
+# Script: setup.sh <DEVICE>
+# Description: Automates the setup of a virtual environment and installs project
+# requirements.
+################################################################################
+
+set -euo pipefail
+
+# Function to install CTransformers with CUDA version check
+install_ctransformers_cuda() {
+    CUDA_VERSION=$(nvcc --version | grep "release" | sed -n 's/.*release \(.*\),.*/\1/p')
+
+    if [ -z "$CUDA_VERSION" ]; then
+        echo "CUDA is not installed or not found."
+        exit 1
+    fi
+
+    CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1)
+    CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2)
+
+    if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then
+        echo "Detected CUDA version >= 12.2"
+        pip install ctransformers[cuda] > /dev/null
+    else
+        echo "Detected CUDA version < 12.2"
+        CMAKE_ARGS="-DCMAKE_CUDA_COMPILER=$(which nvcc)" CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers > /dev/null
+    fi
+}
+
+install_device_specific_ctransformers() {
+    local DEVICE="$1"
+
+    if [ "$#" -ne 1 ]; then
+        echo "Usage: $0 <DEVICE>"
+        exit 1
+    fi
+
+    case "$DEVICE" in
+        cuda)
+            echo "Installing CTransformers for CUDA."
+            install_ctransformers_cuda
+            ;;
+        metal)
+            echo "Installing CTransformers for Metal."
+            CT_METAL=1 pip install ctransformers --no-binary ctransformers
+            ;;
+        cpu)
+            echo "Installing CTransformers for CPU."
+            pip install ctransformers > /dev/null
+            ;;
+        *)
+            echo "Unsupported DEVICE: $DEVICE"
+            return 1
+            ;;
+    esac
+}
+
+# Main script starts here.
+
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <DEVICE>"
+    exit 1
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+DEVICE="$1"
+VENV_DIR="$SCRIPT_DIR/venv"
+
+# Build and activate the virtual environment.
+
+if [ ! -d "$VENV_DIR" ]; then
+    python3 -m venv "$VENV_DIR"
+    echo "Virtual environment '$VENV_DIR' created."
+    # shellcheck disable=SC1091
+    source "$VENV_DIR/bin/activate"
+    pip install --upgrade pip > /dev/null
+    pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null
+else
+    # shellcheck disable=SC1091
+    source "$VENV_DIR/bin/activate"
+fi
+
+install_device_specific_ctransformers "$DEVICE"

From ae02965e5d8fc52c2550921ab92535720f298745 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 01:36:00 +0530
Subject: [PATCH 17/22] Refactor: bencharks bash file.

- integrated setup.sh file installations.
- removed un-necessary package installation inside bench.sh
---
 bench_ctransformers/bench.sh | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh
index 738c40eb..37d36003 100644
--- a/bench_ctransformers/bench.sh
+++ b/bench_ctransformers/bench.sh
@@ -2,7 +2,7 @@
 
 ########################################################################################################
 # Script: bench.sh
-# Description: This script runs benchmarks ctransformers llama benchmark.
+# Description: This script runs benchmarks llama.cpp llama benchmark.
 #
 # Usage: ./bench.sh [OPTIONS]
 # OPTIONS:
@@ -37,7 +37,6 @@ check_cuda() {
     then
         echo -e "\nUsing CUDA"
         nvcc --version
-        pip install ctransformers[cuda] numpy
     else
         echo -e "\nCUDA is not available."
         exit 1
@@ -49,12 +48,8 @@ check_platform() {
     platform=$(uname -s)
     if [[ "$platform" == "Linux" ]]; then
         echo "Running on Linux."
-        pip install -r requirements.txt
     elif [[ "$platform" == "Darwin" ]]; then
         echo "Running on Mac OS."
-        echo "Installing CTransformers on metal"
-        export CT_METAL=1
-        pip install ctransformers --no-binary ctransformers
     else
         echo "Unknown platform."
         exit 1
@@ -71,6 +66,11 @@ check_python() {
     fi
 }
 
+setup() {
+    echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
+    bash "$SCRIPT_DIR"/setup.sh "$1"
+}
+
 run_benchmarks() {
     local PROMPT="$1"
     local REPETITIONS="$2"
@@ -79,6 +79,8 @@ run_benchmarks() {
     local LOG_FILENAME="$5"
     local MODELS_DIR="$6"
 
+    # shellcheck disable=SC1091
+    source "$SCRIPT_DIR/venv/bin/activate"
     python "$SCRIPT_DIR"/bench.py \
         --prompt "$PROMPT" \
         --repetitions "$REPETITIONS" \
@@ -145,4 +147,5 @@ MODELS_DIR="${MODELS_DIR:-"./models"}"
 
 check_platform
 check_python
+setup "$DEVICE"
 run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"

From 8ebfd596049624ff6bbb3b9f3ac5fc2c1b6df423 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 01:37:28 +0530
Subject: [PATCH 18/22] removed ctransformers in requirements file

---
 bench_ctransformers/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt
index 1816ee0d..24ce15ab 100644
--- a/bench_ctransformers/requirements.txt
+++ b/bench_ctransformers/requirements.txt
@@ -1,2 +1 @@
-ctransformers
 numpy

From de4090935b3fdbe2f45d23ead60d5ac5da4a0c07 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 01:37:58 +0530
Subject: [PATCH 19/22] added ctransformers results inside llama2.md.template

---
 docs/llama2.md.template | 51 ++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/docs/llama2.md.template b/docs/llama2.md.template
index 3233ba09..0a0560b6 100644
--- a/docs/llama2.md.template
+++ b/docs/llama2.md.template
@@ -8,14 +8,15 @@
 - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --prompt 'Explain what is a transformer'`
 
 **Performance Metrics:**
-| Engine      | float32      | float16       | int8          | int4          |
-|-------------|--------------|---------------|---------------|---------------|
-| burn        | 13.12 ± 0.85 |      -        |      -        |      -        |
-| candle      |      -       | 36.78 ± 2.17  |      -        |      -        |
-| llama.cpp   |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
-| ctranslate  |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
-| tinygrad    |      -       | 20.32 ± 0.06  |      -        |      -        |
-| onnx        |      -       | 54.16 ± 3.15  |      -        |      -        |
+| Engine               | float32      | float16       | int8          | int4          |
+|----------------------|--------------|---------------|---------------|---------------|
+| burn                 | 13.12 ± 0.85 |      -        |      -        |      -        |
+| candle               |      -       | 36.78 ± 2.17  |      -        |      -        |
+| llama.cpp            |      -       |      -        | 84.48 ± 3.76  | 106.76 ± 1.29 |
+| ctranslate           |      -       | 51.38 ± 16.01 | 36.12 ± 11.93 |      -        |
+| tinygrad             |      -       | 20.32 ± 0.06  |      -        |      -        |
+| onnx                 |      -       | 54.16 ± 3.15  |      -        |      -        |
+| ctransformers        |      -       |      -        | 81.61 ± 3.66  | 84.51 ± 7.93  |
 
 *(Data updated: `<LAST_UPDATE>`)
 
@@ -30,27 +31,29 @@
 - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'`
 
 **Performance Metrics:**
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        | 0.30 ± 0.09  |      -       |      -       |      -       |
-| candle      |      -       | 3.43 ± 0.02  |      -       |      -       |
-| llama.cpp   |      -       |      -       | 14.41 ± 1.59 | 20.96 ± 1.94 |
-| ctranslate  |      -       |      -       | 2.11 ± 0.73  |      -       |
-| tinygrad    |      -       | 4.21 ± 0.38  |      -       |      -       |
-| onnx        |      -       |      -       |      -       |      -       |
+| Engine               | float32      | float16      | int8         | int4         |
+|----------------------|--------------|--------------|--------------|--------------|
+| burn                 | 0.30 ± 0.09  |      -       |      -       |      -       |
+| candle               |      -       | 3.43 ± 0.02  |      -       |      -       |
+| llama.cpp            |      -       |      -       | 14.41 ± 1.59 | 20.96 ± 1.94 |
+| ctranslate           |      -       |      -       | 2.11 ± 0.73  |      -       |
+| tinygrad             |      -       | 4.21 ± 0.38  |      -       |      -       |
+| onnx                 |      -       |      -       |      -       |      -       |
+| ctransformers        |      -       |      -       | 13.79 ± 0.50 | 22.93 ± 0.86 |
 
 ### GPU (Metal)
 
 **Command:** `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prompt 'Explain what is a transformer'`
 
 **Performance Metrics:**
-| Engine      | float32      | float16      | int8         | int4         |
-|-------------|--------------|--------------|--------------|--------------|
-| burn        |      -       |      -       |      -       |      -       |
-| candle      |      -       |      -       |      -       |      -       |
-| llama.cpp   |      -       |      -       | 31.24 ± 7.82 | 46.75 ± 9.55 |
-| ctranslate  |      -       |      -       |      -       |      -       |
-| tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
-| onnx        |      -       |      -       |      -       |      -       |
+| Engine               | float32      | float16       | int8         | int4         |
+|----------------------|--------------|---------------|--------------|--------------|
+| burn                 |      -       |      -        |      -       |      -       |
+| candle               |      -       |      -        |      -       |      -       |
+| llama.cpp            |      -       |      -        | 31.24 ± 7.82 | 46.75 ± 9.55 |
+| ctranslate           |      -       |      -        |      -       |      -       |
+| tinygrad             |      -       | 29.78 ± 1.18  |      -       |      -       |
+| onnx                 |      -       |      -        |      -       |      -       |
+| ctransformers        |      -       |      -        | 21.24 ± 0.81 | 34.08 ± 4.78 |
 
 *(Data updated: `<LAST_UPDATE>`)

From 99707b37a847902e24f52bb00e401e95a562d920 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 02:14:57 +0530
Subject: [PATCH 20/22] fix: quite installation for metal devices

---
 bench_ctransformers/setup.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh
index 08cc5b01..2f1efe3d 100755
--- a/bench_ctransformers/setup.sh
+++ b/bench_ctransformers/setup.sh
@@ -44,7 +44,7 @@ install_device_specific_ctransformers() {
             ;;
         metal)
             echo "Installing CTransformers for Metal."
-            CT_METAL=1 pip install ctransformers --no-binary ctransformers
+            CT_METAL=1 pip install ctransformers --no-binary ctransformers > /dev/null
             ;;
         cpu)
             echo "Installing CTransformers for CPU."

From 4cf57878bc8435a5caa43f67eb6559dcf602df4d Mon Sep 17 00:00:00 2001
From: Anindyadeep <proanindyadeep@gmail.com>
Date: Sat, 2 Dec 2023 15:58:17 +0530
Subject: [PATCH 21/22] fix: syntax

Co-authored-by: Nicola Sosio <sosio.nicola94@tiscali.it>
---
 bench_ctransformers/setup.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh
index 2f1efe3d..0fd16d22 100755
--- a/bench_ctransformers/setup.sh
+++ b/bench_ctransformers/setup.sh
@@ -20,7 +20,7 @@ install_ctransformers_cuda() {
     CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1)
     CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2)
 
-    if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then
+   if [ "$CUDA_MAJOR" -gt 12 ] || [ "$CUDA_MAJOR" -eq 12 -a "$CUDA_MINOR" -ge 2 ]; then
         echo "Detected CUDA version >= 12.2"
         pip install ctransformers[cuda] > /dev/null
     else

From 0f332b995862a78dff9ccce4b0b3d16a4a186a75 Mon Sep 17 00:00:00 2001
From: Anindyadeep <anindya@pop-os.localdomain>
Date: Sat, 2 Dec 2023 16:10:16 +0530
Subject: [PATCH 22/22] Refactor: setup sctipt

- reverted back the conditional statement.
- placed install_ctransformers_cuda() function after requirements installation.
---
 bench_ctransformers/setup.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh
index 0fd16d22..7d83fb65 100755
--- a/bench_ctransformers/setup.sh
+++ b/bench_ctransformers/setup.sh
@@ -20,7 +20,7 @@ install_ctransformers_cuda() {
     CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1)
     CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2)
 
-   if [ "$CUDA_MAJOR" -gt 12 ] || [ "$CUDA_MAJOR" -eq 12 -a "$CUDA_MINOR" -ge 2 ]; then
+   if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then
         echo "Detected CUDA version >= 12.2"
         pip install ctransformers[cuda] > /dev/null
     else
@@ -77,9 +77,8 @@ if [ ! -d "$VENV_DIR" ]; then
     source "$VENV_DIR/bin/activate"
     pip install --upgrade pip > /dev/null
     pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null
+    install_device_specific_ctransformers "$DEVICE"
 else
     # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"
 fi
-
-install_device_specific_ctransformers "$DEVICE"