From fd03e395c6930cdd307d8007c2ce9da97619147e Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 24 Nov 2023 10:00:57 +0530 Subject: [PATCH 01/22] Feat: Adding the integration for CTransformers for benchmarks. --- bench_ctransformers/bench.py | 142 +++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 bench_ctransformers/bench.py diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py new file mode 100644 index 00000000..17faf740 --- /dev/null +++ b/bench_ctransformers/bench.py @@ -0,0 +1,142 @@ +import argparse +import logging +import sys +import time +from typing import Optional +from collections import defaultdict +import numpy as np +from ctransformers import AutoModelForCausalLM + +logging.getLogger('ctransformers').setLevel(logging.ERROR) +logging.basicConfig( + stream=sys.stdout, + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +class CTransformersBenchmark: + def __init__(self, model_path: str, device: Optional[str]='cpu', model_type: Optional[str]=None) -> None: + self.model_path, self.device = model_path, device + self.model_map = { + 'gpt2' : { + 'devices': ['cpu'], + 'type': 'gpt2' + }, + 'gptj': { + 'type': 'gptj', + 'devices': ['cpu'], + }, + 'gpt4allj': { + 'devices': ['cpu'], + 'type': 'gptj' + }, + 'gpt-neo': { + 'devices': ['cpu'], + 'type': 'gpt_neox' + }, + 'falcon':{ + 'devices': ['cpu', 'cuda'], + 'type': 'falcon' + }, + 'llama': { + 'devices': ['cpu', 'cuda', 'metal'], + 'type': 'llama' + }, + 'mpt': { + 'devices': ['cpu', 'cuda'], + 'type': 'mpt' + }, + 'starcoder': { + 'devices': ['cpu'], + 'type': 'gpt_bigcode' + }, + 'dolly': { + 'devices': ['cpu'], + 'type': 'dolly-v2' + }, + 'replit': { + 'devices': ['cpu'], + 'type': 'replit' + } + } + self.results = [] + # check if the model path falls under this + _model_name = model_path.split('/')[-1].lower() + matched_key_from_map = [key for key in self.model_map if key in _model_name] + if not matched_key_from_map and model_type is None: + raise ValueError( + f"The model: {_model_name} does not fall under the following model categories: {list(self.model_map.keys())}" + f"If you think, that your model path falls under any of the model architecture, then place the value inside model_type argument" + ) + + self.model_type = matched_key_from_map[0] if model_type is None else model_type + + # check if the selected model supports that device else choose default device (i.e. first value of the list) + self.device = device if device is not None and device in self.model_map[self.model_type]['devices'] else self.model_map[self.model_type]['devices'][0] + + def load_model(self): + # FIXME: Not sure how to get num layers for each model to know how many to fit into VRAM. + self.model = AutoModelForCausalLM.from_pretrained( + self.model_path, + model_type=self.model_type, + gpu_layers=50 if self.device == 'cuda' else 0 + ) + return self + + def run_model(self, prompt: str, max_tokens: int) -> float: + start = time.time() + output = self.model(prompt, max_new_tokens=max_tokens) + tokens = len(self.model.tokenize(output)) + return tokens / (time.time() - start) + + def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: + for i in range(repetitions): + logging.info( + f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]" + ) + tokens_per_second = self.run_model(prompt, max_tokens) + self.results.append(tokens_per_second) + + + +path = "/home/anindya/Downloads/replit-openorca.ggmlv1.q4_0.bin" +ben = CTransformersBenchmark( + model_path=path, device='cpu' +).load_model() + +ben.benchmark(prompt="hello", max_tokens=3, repetitions=2) +print(ben.results) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="CTransformers Benchmark.") + parser.add_argument( + "--prompt", + type=str, + help="The prompt for the model.", + ) + parser.add_argument("--max_tokens", type=int, help="The maximum number of tokens.") + parser.add_argument( + "--repetitions", + type=int, + help="The number of repetitions for the benchmark.", + ) + parser.add_argument( + "--device", + help="Device to use for the benchmark.", + ) + parser.add_argument( + "--log_file", + type=str, + help="Path to the log file for writing logs (in append mode).", + ) + parser.add_argument( + "--models_dir", + type=str, + help="Path to the models directory.", + ) + args = parser.parse_args() + logging.info( + f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} " + + f"repetitions={args.repetitions} device={args.device}" + ) + report = defaultdict(lambda: defaultdict(float)) \ No newline at end of file From acec0f6dad396724746945f4d964bbfdcb10a892 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 24 Nov 2023 10:01:45 +0530 Subject: [PATCH 02/22] Adding sh file to run the benchmarks for CTransformers --- bench_ctransformers/bench.sh | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 bench_ctransformers/bench.sh diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh new file mode 100644 index 00000000..e8df4a46 --- /dev/null +++ b/bench_ctransformers/bench.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +######################################################################################################## +# Script: bench.sh +# Description: This script runs benchmarks llama.cpp llama benchmark. +# +# Usage: ./bench.sh [OPTIONS] +# OPTIONS: +# -p, --prompt Prompt for benchmarks (default: 'Explain what is a transformer') +# -r, --repetitions Number of repetitions for benchmarks (default: 2) +# -m, --max_tokens Maximum number of tokens for benchmarks (default: 100) +# -d, --device Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu') +# -lf, --log_file Logging file name. +# -md, --models_dir Models directory. +# -h, --help Show this help message +######################################################################################################## + + +set -euo pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +print_usage() { + echo "Usage: $0 [OPTIONS]" + echo "OPTIONS:" + echo " -p, --prompt Prompt for benchmarks (default: 'Explain what is a transformer')" + echo " -r, --repetitions Number of repetitions for benchmarks (default: 2)" + echo " -m, --max_tokens Maximum number of tokens for benchmarks (default: 100)" + echo " -d, --device Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')" + echo " -lf, --log_file Logging file name." + echo " -md, --models_dir Models directory." + echo " -h, --help Show this help message" + exit 1 +} + +check_cuda() { + if command -v nvcc &> /dev/null + then + echo -e "\nUsing CUDA" + nvcc --version + else + echo -e "\nCUDA is not available." + exit 1 + fi +} + +check_platform() { + local platform + platform=$(uname -s) + if [[ "$platform" == "Linux" ]]; then + echo "Running on Linux." + elif [[ "$platform" == "Darwin" ]]; then + echo "Running on Mac OS." + else + echo "Unknown platform." + exit 1 + fi +} + +check_python() { + if command -v python &> /dev/null + then + echo -e "\nUsing $(python --version)." + else + echo -e "\nPython does not exist." + exit 1 + fi +} + +setup() { + echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." + bash "$SCRIPT_DIR"/setup.sh "$1" +} + From 88d4ef83612717f88b3d134e015ab81f246ebe78 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 24 Nov 2023 10:02:04 +0530 Subject: [PATCH 03/22] adding requirements to install dependencies for ctransformers --- bench_ctransformers/requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 bench_ctransformers/requirements.txt diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt new file mode 100644 index 00000000..f5d212fa --- /dev/null +++ b/bench_ctransformers/requirements.txt @@ -0,0 +1 @@ +ctransformers \ No newline at end of file From 2f65ffa677599d413682d34c55434f49566f12ed Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Mon, 27 Nov 2023 10:05:22 +0530 Subject: [PATCH 04/22] Refactor: Bench CTransformers by removing model_type. For, now only llama is supported, since it has all the three types of device support, and also it is a standard model. --- bench_ctransformers/bench.py | 125 +++++++++++++---------------------- 1 file changed, 47 insertions(+), 78 deletions(-) diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py index 17faf740..58c66e37 100644 --- a/bench_ctransformers/bench.py +++ b/bench_ctransformers/bench.py @@ -1,88 +1,40 @@ import argparse -import logging +import logging import sys -import time -from typing import Optional +import time from collections import defaultdict +from typing import Optional + import numpy as np from ctransformers import AutoModelForCausalLM -logging.getLogger('ctransformers').setLevel(logging.ERROR) +logging.getLogger("ctransformers").setLevel(logging.ERROR) logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) -class CTransformersBenchmark: - def __init__(self, model_path: str, device: Optional[str]='cpu', model_type: Optional[str]=None) -> None: + +class LlamaCTransformersBenchmark: + def __init__( + self, + model_path: str, + device: Optional[str] = "cpu", + ) -> None: self.model_path, self.device = model_path, device - self.model_map = { - 'gpt2' : { - 'devices': ['cpu'], - 'type': 'gpt2' - }, - 'gptj': { - 'type': 'gptj', - 'devices': ['cpu'], - }, - 'gpt4allj': { - 'devices': ['cpu'], - 'type': 'gptj' - }, - 'gpt-neo': { - 'devices': ['cpu'], - 'type': 'gpt_neox' - }, - 'falcon':{ - 'devices': ['cpu', 'cuda'], - 'type': 'falcon' - }, - 'llama': { - 'devices': ['cpu', 'cuda', 'metal'], - 'type': 'llama' - }, - 'mpt': { - 'devices': ['cpu', 'cuda'], - 'type': 'mpt' - }, - 'starcoder': { - 'devices': ['cpu'], - 'type': 'gpt_bigcode' - }, - 'dolly': { - 'devices': ['cpu'], - 'type': 'dolly-v2' - }, - 'replit': { - 'devices': ['cpu'], - 'type': 'replit' - } - } - self.results = [] - # check if the model path falls under this - _model_name = model_path.split('/')[-1].lower() - matched_key_from_map = [key for key in self.model_map if key in _model_name] - if not matched_key_from_map and model_type is None: - raise ValueError( - f"The model: {_model_name} does not fall under the following model categories: {list(self.model_map.keys())}" - f"If you think, that your model path falls under any of the model architecture, then place the value inside model_type argument" - ) - - self.model_type = matched_key_from_map[0] if model_type is None else model_type - - # check if the selected model supports that device else choose default device (i.e. first value of the list) - self.device = device if device is not None and device in self.model_map[self.model_type]['devices'] else self.model_map[self.model_type]['devices'][0] - + self.results = [] + self.device = device + def load_model(self): # FIXME: Not sure how to get num layers for each model to know how many to fit into VRAM. self.model = AutoModelForCausalLM.from_pretrained( self.model_path, - model_type=self.model_type, - gpu_layers=50 if self.device == 'cuda' else 0 + model_type="llama", + gpu_layers=50 if self.device in ["cuda", "metal"] else 0, ) return self - + def run_model(self, prompt: str, max_tokens: int) -> float: start = time.time() output = self.model(prompt, max_new_tokens=max_tokens) @@ -93,20 +45,11 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: for i in range(repetitions): logging.info( f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]" - ) + ) tokens_per_second = self.run_model(prompt, max_tokens) self.results.append(tokens_per_second) - - - -path = "/home/anindya/Downloads/replit-openorca.ggmlv1.q4_0.bin" -ben = CTransformersBenchmark( - model_path=path, device='cpu' -).load_model() -ben.benchmark(prompt="hello", max_tokens=3, repetitions=2) -print(ben.results) - + if __name__ == "__main__": parser = argparse.ArgumentParser(description="CTransformers Benchmark.") parser.add_argument( @@ -139,4 +82,30 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} " + f"repetitions={args.repetitions} device={args.device}" ) - report = defaultdict(lambda: defaultdict(float)) \ No newline at end of file + report = defaultdict(lambda: defaultdict(float)) + for quantize in ("Q8_0", "Q4_0"): + logging.info(f"Running CTransformer benchmark on Llama with {quantize}") + llama_ctransformers_bench = LlamaCTransformersBenchmark( + f"{args.models_dir}/llama-2-7b-gguf/llama-2-7b.{quantize}.gguf", + device=args.device, + ).load_model() + llama_ctransformers_bench.benchmark( + max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions + ) + q = "int8" if quantize == "Q8_0" else "int4" + report["llama_ctransformers"][q] = { + "mean": np.mean(llama_ctransformers_bench.results), + "std": np.std(llama_ctransformers_bench.results), + } + + logging.info("Benchmark report") + with open(args.log_file, "a") as file: + for framework, quantizations in report.items(): + for quantization, stats in quantizations.items(): + logging.info( + f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}" + ) + print( + f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}", + file=file, + ) From 73ccb4fb6ce456e3e7c4a66499b0dc285f2fa374 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Mon, 27 Nov 2023 10:23:57 +0530 Subject: [PATCH 05/22] Added benchmark bash script to run benchmarking. --- bench_ctransformers/bench.sh | 81 +++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh index e8df4a46..36c9a7c1 100644 --- a/bench_ctransformers/bench.sh +++ b/bench_ctransformers/bench.sh @@ -15,7 +15,6 @@ # -h, --help Show this help message ######################################################################################################## - set -euo pipefail SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" @@ -38,6 +37,7 @@ check_cuda() { then echo -e "\nUsing CUDA" nvcc --version + pip install ctransformers[cuda] else echo -e "\nCUDA is not available." exit 1 @@ -51,6 +51,9 @@ check_platform() { echo "Running on Linux." elif [[ "$platform" == "Darwin" ]]; then echo "Running on Mac OS." + echo "Installing CTransformers on metal" + export CT_METAL=1 + pip install ctransformers --no-binary ctransformers else echo "Unknown platform." exit 1 @@ -72,3 +75,79 @@ setup() { bash "$SCRIPT_DIR"/setup.sh "$1" } +run_benchmarks() { + local PROMPT="$1" + local REPETITIONS="$2" + local MAX_TOKENS="$3" + local DEVICE="$4" + local LOG_FILENAME="$5" + local MODELS_DIR="$6" + + python "$SCRIPT_DIR"/bench.py \ + --prompt "$PROMPT" \ + --repetitions "$REPETITIONS" \ + --max_tokens "$MAX_TOKENS" \ + --log_file "$LOG_FILENAME" \ + --models_dir "$MODELS_DIR" \ + --device "$DEVICE" +} + +# Parse command-line arguments +while [ "$#" -gt 0 ]; do + case "$1" in + -p|--prompt) + PROMPT="$2" + shift 2 + ;; + -r|--repetitions) + REPETITIONS="$2" + shift 2 + ;; + -m|--max_tokens) + MAX_TOKENS="$2" + shift 2 + ;; + -d|--device) + DEVICE="$2" + case "$DEVICE" in + "cuda" | "metal" | "cpu") + ;; + *) + echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'." + print_usage + ;; + esac + if [ "$DEVICE" == "cuda" ]; then + check_cuda + fi + shift 2 + ;; + -lf|--log_file) + LOG_FILENAME="$2" + shift 2 + ;; + -md|--models_dir) + MODELS_DIR="$2" + shift 2 + ;; + -h|--help) + print_usage + ;; + *) + echo "Unknown option: $1" + print_usage + ;; + esac +done +# Set default values if not provided +PROMPT="${PROMPT:-"Explain what is a transformer"}" +REPETITIONS="${REPETITIONS:-10}" +MAX_TOKENS="${MAX_TOKENS:-100}" +DEVICE="${DEVICE:-'cpu'}" +LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}" +MODELS_DIR="${MODELS_DIR:-"./models"}" + +check_platform +check_python +setup "$DEVICE" +run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR" From 995441be8fda25da38cbefec01a8b71e5bd61efd Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Mon, 27 Nov 2023 10:25:12 +0530 Subject: [PATCH 06/22] Added a requirements file for installing CTransformers --- bench_ctransformers/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt index f5d212fa..b06a12c5 100644 --- a/bench_ctransformers/requirements.txt +++ b/bench_ctransformers/requirements.txt @@ -1 +1 @@ -ctransformers \ No newline at end of file +ctransformers From 93cbfc8cf6f2109704aa8df78b92f27a4f20cab1 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 28 Nov 2023 02:03:46 +0530 Subject: [PATCH 07/22] Removing setup.sh for device --- bench_ctransformers/bench.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh index 36c9a7c1..87973ecc 100644 --- a/bench_ctransformers/bench.sh +++ b/bench_ctransformers/bench.sh @@ -149,5 +149,4 @@ MODELS_DIR="${MODELS_DIR:-"./models"}" check_platform check_python -setup "$DEVICE" run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR" From 8fcfa949927756d10612a5fe67cc51f0cfb5538b Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 28 Nov 2023 02:49:12 +0530 Subject: [PATCH 08/22] Added numpy in ewquirements.txt --- bench_ctransformers/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt index b06a12c5..1816ee0d 100644 --- a/bench_ctransformers/requirements.txt +++ b/bench_ctransformers/requirements.txt @@ -1 +1,2 @@ ctransformers +numpy From d4468ee307bd5e3c6fd82007e2dcceb9079a5611 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 28 Nov 2023 02:50:07 +0530 Subject: [PATCH 09/22] added custom dependency installation in benchmark script for cuda --- bench_ctransformers/bench.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh index 87973ecc..e1c682a1 100644 --- a/bench_ctransformers/bench.sh +++ b/bench_ctransformers/bench.sh @@ -37,7 +37,7 @@ check_cuda() { then echo -e "\nUsing CUDA" nvcc --version - pip install ctransformers[cuda] + pip install ctransformers[cuda] numpy else echo -e "\nCUDA is not available." exit 1 @@ -49,6 +49,7 @@ check_platform() { platform=$(uname -s) if [[ "$platform" == "Linux" ]]; then echo "Running on Linux." + pip install -r requirements.txt elif [[ "$platform" == "Darwin" ]]; then echo "Running on Mac OS." echo "Installing CTransformers on metal" @@ -70,11 +71,6 @@ check_python() { fi } -setup() { - echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." - bash "$SCRIPT_DIR"/setup.sh "$1" -} - run_benchmarks() { local PROMPT="$1" local REPETITIONS="$2" From 7d19c233a9a24887cfbbd4f033b3d52f1793c466 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 30 Nov 2023 01:41:03 +0530 Subject: [PATCH 10/22] fix: in time calculation, token length count is excluded. Co-authored-by: Nicola Sosio --- bench_ctransformers/bench.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bench_ctransformers/bench.py b/bench_ctransformers/bench.py index 58c66e37..9e82aa8b 100644 --- a/bench_ctransformers/bench.py +++ b/bench_ctransformers/bench.py @@ -38,8 +38,9 @@ def load_model(self): def run_model(self, prompt: str, max_tokens: int) -> float: start = time.time() output = self.model(prompt, max_new_tokens=max_tokens) + delta = time.time() - start tokens = len(self.model.tokenize(output)) - return tokens / (time.time() - start) + return tokens / delta def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: for i in range(repetitions): From c949737d3a2fe1b58193b56154cb15e5000cf352 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 30 Nov 2023 01:41:22 +0530 Subject: [PATCH 11/22] fix: typo Co-authored-by: Nicola Sosio --- bench_ctransformers/bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh index e1c682a1..738c40eb 100644 --- a/bench_ctransformers/bench.sh +++ b/bench_ctransformers/bench.sh @@ -2,7 +2,7 @@ ######################################################################################################## # Script: bench.sh -# Description: This script runs benchmarks llama.cpp llama benchmark. +# Description: This script runs benchmarks ctransformers llama benchmark. # # Usage: ./bench.sh [OPTIONS] # OPTIONS: From 271d392dc15bb5a616b91e49715300576911025f Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 30 Nov 2023 02:20:10 +0530 Subject: [PATCH 12/22] Added ctransformers benchmark results for A100 and CPU --- README.md | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c6728bdb..53155af0 100644 --- a/README.md +++ b/README.md @@ -73,14 +73,15 @@ CUDA Version: 11.7 Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --prompt 'Explain what is a transformer'` -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|---------------|---------------|---------------| -| burn | 13.12 ± 0.85 | - | - | - | -| candle | - | 36.78 ± 2.17 | - | - | -| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | -| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | -| tinygrad | - | 20.32 ± 0.06 | - | - | -| onnx | - | 54.16 ± 3.15 | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|---------------|---------------|---------------| +| burn | 13.12 ± 0.85 | - | - | - | +| candle | - | 36.78 ± 2.17 | - | - | +| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | +| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | +| tinygrad | - | 20.32 ± 0.06 | - | - | +| onnx | - | 54.16 ± 3.15 | - | - | +| ctransformers | - | - | 81.61 ± 3.66 | 84.51 ± 7.93 | *(data updated: 23th November 2023) @@ -108,13 +109,14 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prompt 'Explain what is a transformer'` -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|--------------|--------------|--------------| -| burn | - | - | - | - | -| candle | - | - | - | - | -| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | -| ctranslate | - | - | - | - | -| tinygrad | - | 29.78 ± 1.18 | - | - | -| onnx | - | - | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|---------------|--------------|--------------| +| burn | - | - | - | - | +| candle | - | - | - | - | +| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | +| ctranslate | - | - | - | - | +| tinygrad | - | 29.78 ± 1.18 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 4.58 ± 0.07 | 7.00 ± 0.23 | *(data updated: 23th November 2023) From 99246edf4dba021907f991665a85ef7720c14026 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 30 Nov 2023 02:40:54 +0530 Subject: [PATCH 13/22] added latest benchmark info for ctransformers, m2(cpu, gpu), a100. --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 53155af0..900d77f2 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --promp | onnx | - | 54.16 ± 3.15 | - | - | | ctransformers | - | - | 81.61 ± 3.66 | 84.51 ± 7.93 | -*(data updated: 23th November 2023) +*(data updated: 30th November 2023) ### M2 MAX 32GB Inference Bench: @@ -96,14 +96,15 @@ CUDA Version: NA Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'` -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|--------------|--------------|--------------| -| burn | 0.30 ± 0.09 | - | - | - | -| candle | - | 3.43 ± 0.02 | - | - | -| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | -| ctranslate | - | - | 2.11 ± 0.73 | - | -| tinygrad | - | 4.21 ± 0.38 | - | - | -| onnx | - | - | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|--------------|--------------|--------------| +| burn | 0.30 ± 0.09 | - | - | - | +| candle | - | 3.43 ± 0.02 | - | - | +| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | +| ctranslate | - | - | 2.11 ± 0.73 | - | +| tinygrad | - | 4.21 ± 0.38 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 13.79 ± 0.50 | 22.93 ± 0.86 | #### GPU (Metal) @@ -117,6 +118,6 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prom | ctranslate | - | - | - | - | | tinygrad | - | 29.78 ± 1.18 | - | - | | onnx | - | - | - | - | -| ctransformers | - | - | 4.58 ± 0.07 | 7.00 ± 0.23 | +| ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | *(data updated: 23th November 2023) From c8c861b9bf1a7677ba84940f321d75f2b184e998 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 1 Dec 2023 13:00:35 +0000 Subject: [PATCH 14/22] Update placeholder in llama2.md --- docs/llama2.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/llama2.md b/docs/llama2.md index 05a5f259..cf882ed1 100644 --- a/docs/llama2.md +++ b/docs/llama2.md @@ -17,7 +17,7 @@ | tinygrad | - | 20.32 ± 0.06 | - | - | | onnx | - | 54.16 ± 3.15 | - | - | -*(Data updated: `30th November 2023`) +*(Data updated: `01th December 2023`) ## M2 MAX 32GB Inference Bench: @@ -53,4 +53,4 @@ | tinygrad | - | 29.78 ± 1.18 | - | - | | onnx | - | - | - | - | -*(Data updated: `30th November 2023`) +*(Data updated: `01th December 2023`) From 964e2cd13a721ac0e7e05d7b3d9059a58c1d3a50 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 1 Dec 2023 18:57:03 +0530 Subject: [PATCH 15/22] revert default docs to latest changes in main --- docs/llama2.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/llama2.md b/docs/llama2.md index cf882ed1..05a5f259 100644 --- a/docs/llama2.md +++ b/docs/llama2.md @@ -17,7 +17,7 @@ | tinygrad | - | 20.32 ± 0.06 | - | - | | onnx | - | 54.16 ± 3.15 | - | - | -*(Data updated: `01th December 2023`) +*(Data updated: `30th November 2023`) ## M2 MAX 32GB Inference Bench: @@ -53,4 +53,4 @@ | tinygrad | - | 29.78 ± 1.18 | - | - | | onnx | - | - | - | - | -*(Data updated: `01th December 2023`) +*(Data updated: `30th November 2023`) From c6516942a6d34ad8273569d0a4b12edab0197174 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 01:35:00 +0530 Subject: [PATCH 16/22] added setup.sh file for installing dependencies for ctransformers --- bench_ctransformers/setup.sh | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100755 bench_ctransformers/setup.sh diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh new file mode 100755 index 00000000..08cc5b01 --- /dev/null +++ b/bench_ctransformers/setup.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +################################################################################ +# Script: setup.sh +# Description: Automates the setup of a virtual environment and installs project +# requirements. +################################################################################ + +set -euo pipefail + +# Function to install CTransformers with CUDA version check +install_ctransformers_cuda() { + CUDA_VERSION=$(nvcc --version | grep "release" | sed -n 's/.*release \(.*\),.*/\1/p') + + if [ -z "$CUDA_VERSION" ]; then + echo "CUDA is not installed or not found." + exit 1 + fi + + CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1) + CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2) + + if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then + echo "Detected CUDA version >= 12.2" + pip install ctransformers[cuda] > /dev/null + else + echo "Detected CUDA version < 12.2" + CMAKE_ARGS="-DCMAKE_CUDA_COMPILER=$(which nvcc)" CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers > /dev/null + fi +} + +install_device_specific_ctransformers() { + local DEVICE="$1" + + if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 + fi + + case "$DEVICE" in + cuda) + echo "Installing CTransformers for CUDA." + install_ctransformers_cuda + ;; + metal) + echo "Installing CTransformers for Metal." + CT_METAL=1 pip install ctransformers --no-binary ctransformers + ;; + cpu) + echo "Installing CTransformers for CPU." + pip install ctransformers > /dev/null + ;; + *) + echo "Unsupported DEVICE: $DEVICE" + return 1 + ;; + esac +} + +# Main script starts here. + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEVICE="$1" +VENV_DIR="$SCRIPT_DIR/venv" + +# Build and activate the virtual environment. + +if [ ! -d "$VENV_DIR" ]; then + python3 -m venv "$VENV_DIR" + echo "Virtual environment '$VENV_DIR' created." + # shellcheck disable=SC1091 + source "$VENV_DIR/bin/activate" + pip install --upgrade pip > /dev/null + pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null +else + # shellcheck disable=SC1091 + source "$VENV_DIR/bin/activate" +fi + +install_device_specific_ctransformers "$DEVICE" From ae02965e5d8fc52c2550921ab92535720f298745 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 01:36:00 +0530 Subject: [PATCH 17/22] Refactor: bencharks bash file. - integrated setup.sh file installations. - removed un-necessary package installation inside bench.sh --- bench_ctransformers/bench.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bench_ctransformers/bench.sh b/bench_ctransformers/bench.sh index 738c40eb..37d36003 100644 --- a/bench_ctransformers/bench.sh +++ b/bench_ctransformers/bench.sh @@ -2,7 +2,7 @@ ######################################################################################################## # Script: bench.sh -# Description: This script runs benchmarks ctransformers llama benchmark. +# Description: This script runs benchmarks llama.cpp llama benchmark. # # Usage: ./bench.sh [OPTIONS] # OPTIONS: @@ -37,7 +37,6 @@ check_cuda() { then echo -e "\nUsing CUDA" nvcc --version - pip install ctransformers[cuda] numpy else echo -e "\nCUDA is not available." exit 1 @@ -49,12 +48,8 @@ check_platform() { platform=$(uname -s) if [[ "$platform" == "Linux" ]]; then echo "Running on Linux." - pip install -r requirements.txt elif [[ "$platform" == "Darwin" ]]; then echo "Running on Mac OS." - echo "Installing CTransformers on metal" - export CT_METAL=1 - pip install ctransformers --no-binary ctransformers else echo "Unknown platform." exit 1 @@ -71,6 +66,11 @@ check_python() { fi } +setup() { + echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." + bash "$SCRIPT_DIR"/setup.sh "$1" +} + run_benchmarks() { local PROMPT="$1" local REPETITIONS="$2" @@ -79,6 +79,8 @@ run_benchmarks() { local LOG_FILENAME="$5" local MODELS_DIR="$6" + # shellcheck disable=SC1091 + source "$SCRIPT_DIR/venv/bin/activate" python "$SCRIPT_DIR"/bench.py \ --prompt "$PROMPT" \ --repetitions "$REPETITIONS" \ @@ -145,4 +147,5 @@ MODELS_DIR="${MODELS_DIR:-"./models"}" check_platform check_python +setup "$DEVICE" run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR" From 8ebfd596049624ff6bbb3b9f3ac5fc2c1b6df423 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 01:37:28 +0530 Subject: [PATCH 18/22] removed ctransformers in requirements file --- bench_ctransformers/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/bench_ctransformers/requirements.txt b/bench_ctransformers/requirements.txt index 1816ee0d..24ce15ab 100644 --- a/bench_ctransformers/requirements.txt +++ b/bench_ctransformers/requirements.txt @@ -1,2 +1 @@ -ctransformers numpy From de4090935b3fdbe2f45d23ead60d5ac5da4a0c07 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 01:37:58 +0530 Subject: [PATCH 19/22] added ctransformers results inside llama2.md.template --- docs/llama2.md.template | 51 ++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/docs/llama2.md.template b/docs/llama2.md.template index 3233ba09..0a0560b6 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -8,14 +8,15 @@ - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|---------------|---------------|---------------| -| burn | 13.12 ± 0.85 | - | - | - | -| candle | - | 36.78 ± 2.17 | - | - | -| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | -| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | -| tinygrad | - | 20.32 ± 0.06 | - | - | -| onnx | - | 54.16 ± 3.15 | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|---------------|---------------|---------------| +| burn | 13.12 ± 0.85 | - | - | - | +| candle | - | 36.78 ± 2.17 | - | - | +| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | +| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | +| tinygrad | - | 20.32 ± 0.06 | - | - | +| onnx | - | 54.16 ± 3.15 | - | - | +| ctransformers | - | - | 81.61 ± 3.66 | 84.51 ± 7.93 | *(Data updated: ``) @@ -30,27 +31,29 @@ - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|--------------|--------------|--------------| -| burn | 0.30 ± 0.09 | - | - | - | -| candle | - | 3.43 ± 0.02 | - | - | -| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | -| ctranslate | - | - | 2.11 ± 0.73 | - | -| tinygrad | - | 4.21 ± 0.38 | - | - | -| onnx | - | - | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|--------------|--------------|--------------| +| burn | 0.30 ± 0.09 | - | - | - | +| candle | - | 3.43 ± 0.02 | - | - | +| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | +| ctranslate | - | - | 2.11 ± 0.73 | - | +| tinygrad | - | 4.21 ± 0.38 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 13.79 ± 0.50 | 22.93 ± 0.86 | ### GPU (Metal) **Command:** `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|-------------|--------------|--------------|--------------|--------------| -| burn | - | - | - | - | -| candle | - | - | - | - | -| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | -| ctranslate | - | - | - | - | -| tinygrad | - | 29.78 ± 1.18 | - | - | -| onnx | - | - | - | - | +| Engine | float32 | float16 | int8 | int4 | +|----------------------|--------------|---------------|--------------|--------------| +| burn | - | - | - | - | +| candle | - | - | - | - | +| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | +| ctranslate | - | - | - | - | +| tinygrad | - | 29.78 ± 1.18 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | *(Data updated: ``) From 99707b37a847902e24f52bb00e401e95a562d920 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 02:14:57 +0530 Subject: [PATCH 20/22] fix: quite installation for metal devices --- bench_ctransformers/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh index 08cc5b01..2f1efe3d 100755 --- a/bench_ctransformers/setup.sh +++ b/bench_ctransformers/setup.sh @@ -44,7 +44,7 @@ install_device_specific_ctransformers() { ;; metal) echo "Installing CTransformers for Metal." - CT_METAL=1 pip install ctransformers --no-binary ctransformers + CT_METAL=1 pip install ctransformers --no-binary ctransformers > /dev/null ;; cpu) echo "Installing CTransformers for CPU." From 4cf57878bc8435a5caa43f67eb6559dcf602df4d Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 15:58:17 +0530 Subject: [PATCH 21/22] fix: syntax Co-authored-by: Nicola Sosio --- bench_ctransformers/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh index 2f1efe3d..0fd16d22 100755 --- a/bench_ctransformers/setup.sh +++ b/bench_ctransformers/setup.sh @@ -20,7 +20,7 @@ install_ctransformers_cuda() { CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1) CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2) - if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then + if [ "$CUDA_MAJOR" -gt 12 ] || [ "$CUDA_MAJOR" -eq 12 -a "$CUDA_MINOR" -ge 2 ]; then echo "Detected CUDA version >= 12.2" pip install ctransformers[cuda] > /dev/null else From 0f332b995862a78dff9ccce4b0b3d16a4a186a75 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Sat, 2 Dec 2023 16:10:16 +0530 Subject: [PATCH 22/22] Refactor: setup sctipt - reverted back the conditional statement. - placed install_ctransformers_cuda() function after requirements installation. --- bench_ctransformers/setup.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bench_ctransformers/setup.sh b/bench_ctransformers/setup.sh index 0fd16d22..7d83fb65 100755 --- a/bench_ctransformers/setup.sh +++ b/bench_ctransformers/setup.sh @@ -20,7 +20,7 @@ install_ctransformers_cuda() { CUDA_MAJOR=$(echo "$CUDA_VERSION" | cut -d. -f1) CUDA_MINOR=$(echo "$CUDA_VERSION" | cut -d. -f2) - if [ "$CUDA_MAJOR" -gt 12 ] || [ "$CUDA_MAJOR" -eq 12 -a "$CUDA_MINOR" -ge 2 ]; then + if [ "$CUDA_MAJOR" -gt 12 ] || { [ "$CUDA_MAJOR" -eq 12 ] && [ "$CUDA_MINOR" -ge 2 ]; }; then echo "Detected CUDA version >= 12.2" pip install ctransformers[cuda] > /dev/null else @@ -77,9 +77,8 @@ if [ ! -d "$VENV_DIR" ]; then source "$VENV_DIR/bin/activate" pip install --upgrade pip > /dev/null pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null + install_device_specific_ctransformers "$DEVICE" else # shellcheck disable=SC1091 source "$VENV_DIR/bin/activate" fi - -install_device_specific_ctransformers "$DEVICE"