From e9659c1680a004c97a6b0d3477f1fe554bab70d3 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 18:14:31 +0530 Subject: [PATCH 01/29] Feat: Benchmark for transformers pytorch. This commit adds the initial script for doing benchmarking on pytorch implementation of LLama by huggingface transformers. Benchmark is done on fp16/32 and bf16 format. --- bench_pytorch/bench.py | 143 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 bench_pytorch/bench.py diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py new file mode 100644 index 00000000..29f4cc00 --- /dev/null +++ b/bench_pytorch/bench.py @@ -0,0 +1,143 @@ +import argparse +import logging +import sys +import time +from collections import defaultdict +from typing import Optional + +import numpy as np +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +logging.getLogger("transformers").setLevel(logging.ERROR) +logging.basicConfig( + stream=sys.stdout, + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + + +class LlamaPyTorchBenchmark: + def __init__( + self, model_path: str, precision: str, device: Optional[str] = "cuda" + ) -> None: + self.model_path = model_path + self.precision = precision + self.results = [] + self.precision_to_dtype_map = { + "fp16": torch.float16, + "fp32": torch.float32, + "bf16": torch.bfloat16, + } + + # some of the conditions where things can not be supported + assert precision in ["bf16", "fp16", "fp32"], ValueError( + "Supported precisions are: p16', 'fp32', 'int8', 'int4'" + ) + assert device in ["cpu", "cuda", "mps"], ValueError( + "Supported devices are: 'cpu', 'cuda', 'mps'" + ) + + if device == "cpu" and precision != "fp32": + raise ValueError( + "When device is set to CPU, fp32 is the only supported precision." + ) + + self.device = "cuda:0" if device == "cuda" else device + # build the params + self.model_args = { + "device_map": self.device, + "torch_dtype": self.precision_to_dtype_map[self.precision], + } + + def load_model(self): + """Loads the model into various formats and device.""" + self.model = AutoModelForCausalLM.from_pretrained( + self.model_path, **self.model_args + ) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) + return self + + def run_model(self, prompt: str, max_tokens: int) -> float: + start = time.time() + tokenized_input = self.tokenizer.encode(prompt, return_tensors="pt").to( + self.device + ) + output = ( + self.model.generate(**tokenized_input, max_new_tokens=max_tokens) + .detach() + .cpu() + .numpy() + ) + delta = time.time() - start + return len(output) / delta + + def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: + for i in range(repetitions): + logging.info( + f"Running repetition [{str(i+1).zfill(len(str(repetitions)))}/{repetitions}]" + ) + tokens_per_second = self.run_model(prompt, max_tokens) + self.results.append(tokens_per_second) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="CTransformers Benchmark.") + parser.add_argument( + "--prompt", + type=str, + help="The prompt for the model.", + ) + parser.add_argument("--max_tokens", type=int, help="The maximum number of tokens.") + parser.add_argument( + "--repetitions", + type=int, + help="The number of repetitions for the benchmark.", + ) + parser.add_argument( + "--device", + help="Device to use for the benchmark.", + ) + parser.add_argument( + "--log_file", + type=str, + help="Path to the log file for writing logs (in append mode).", + ) + parser.add_argument( + "--models_dir", + type=str, + help="Path to the models directory.", + ) + args = parser.parse_args() + logging.info( + f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} " + + f"repetitions={args.repetitions} device={args.device}" + ) + report = defaultdict(lambda: defaultdict(float)) + + for precision in ("bf16", "fp16", "fp32") if args.device != "cpu" else ("fp32"): + logging.info( + f"Running Transformer benchmark (pytorch backend) on Llama with precision: {precision}" + ) + llama_transformers_pytorch_benchmark = LlamaPyTorchBenchmark( + model_path=args.model_dir, device=args.device, precision=precision + ).load_model() + llama_transformers_pytorch_benchmark.benchmark( + max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions + ) + + report["llama_transformers_pytorch"][precision] = { + "mean": np.mean(llama_transformers_pytorch_benchmark.results), + "std": np.mean(llama_transformers_pytorch_benchmark.results), + } + logging.info("Benchmark Report") + with open(args.log_file, "a") as file: + for framework, quantizations in report.items(): + for quantization, stats in quantizations.items(): + logging.info( + f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}" + ) + print( + f"{framework}, {quantization}: {stats['mean']:.2f} ± {stats['std']:.2f}", + file=file, + ) From d86788ce19a8b9b74432cf3bf4d085c40165bd75 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 18:16:09 +0530 Subject: [PATCH 02/29] initial script for doing benchmarking transformers pytorch --- bench_pytorch/bench.sh | 151 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 bench_pytorch/bench.sh diff --git a/bench_pytorch/bench.sh b/bench_pytorch/bench.sh new file mode 100644 index 00000000..37d36003 --- /dev/null +++ b/bench_pytorch/bench.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +######################################################################################################## +# Script: bench.sh +# Description: This script runs benchmarks llama.cpp llama benchmark. +# +# Usage: ./bench.sh [OPTIONS] +# OPTIONS: +# -p, --prompt Prompt for benchmarks (default: 'Explain what is a transformer') +# -r, --repetitions Number of repetitions for benchmarks (default: 2) +# -m, --max_tokens Maximum number of tokens for benchmarks (default: 100) +# -d, --device Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu') +# -lf, --log_file Logging file name. +# -md, --models_dir Models directory. +# -h, --help Show this help message +######################################################################################################## + +set -euo pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +print_usage() { + echo "Usage: $0 [OPTIONS]" + echo "OPTIONS:" + echo " -p, --prompt Prompt for benchmarks (default: 'Explain what is a transformer')" + echo " -r, --repetitions Number of repetitions for benchmarks (default: 2)" + echo " -m, --max_tokens Maximum number of tokens for benchmarks (default: 100)" + echo " -d, --device Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')" + echo " -lf, --log_file Logging file name." + echo " -md, --models_dir Models directory." + echo " -h, --help Show this help message" + exit 1 +} + +check_cuda() { + if command -v nvcc &> /dev/null + then + echo -e "\nUsing CUDA" + nvcc --version + else + echo -e "\nCUDA is not available." + exit 1 + fi +} + +check_platform() { + local platform + platform=$(uname -s) + if [[ "$platform" == "Linux" ]]; then + echo "Running on Linux." + elif [[ "$platform" == "Darwin" ]]; then + echo "Running on Mac OS." + else + echo "Unknown platform." + exit 1 + fi +} + +check_python() { + if command -v python &> /dev/null + then + echo -e "\nUsing $(python --version)." + else + echo -e "\nPython does not exist." + exit 1 + fi +} + +setup() { + echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." + bash "$SCRIPT_DIR"/setup.sh "$1" +} + +run_benchmarks() { + local PROMPT="$1" + local REPETITIONS="$2" + local MAX_TOKENS="$3" + local DEVICE="$4" + local LOG_FILENAME="$5" + local MODELS_DIR="$6" + + # shellcheck disable=SC1091 + source "$SCRIPT_DIR/venv/bin/activate" + python "$SCRIPT_DIR"/bench.py \ + --prompt "$PROMPT" \ + --repetitions "$REPETITIONS" \ + --max_tokens "$MAX_TOKENS" \ + --log_file "$LOG_FILENAME" \ + --models_dir "$MODELS_DIR" \ + --device "$DEVICE" +} + +# Parse command-line arguments +while [ "$#" -gt 0 ]; do + case "$1" in + -p|--prompt) + PROMPT="$2" + shift 2 + ;; + -r|--repetitions) + REPETITIONS="$2" + shift 2 + ;; + -m|--max_tokens) + MAX_TOKENS="$2" + shift 2 + ;; + -d|--device) + DEVICE="$2" + case "$DEVICE" in + "cuda" | "metal" | "cpu") + ;; + *) + echo "Invalid value for --device. Please use 'cuda', 'gpu' or 'cpu'." + print_usage + ;; + esac + if [ "$DEVICE" == "cuda" ]; then + check_cuda + fi + shift 2 + ;; + -lf|--log_file) + LOG_FILENAME="$2" + shift 2 + ;; + -md|--models_dir) + MODELS_DIR="$2" + shift 2 + ;; + -h|--help) + print_usage + ;; + *) + echo "Unknown option: $1" + print_usage + ;; + esac +done +# Set default values if not provided +PROMPT="${PROMPT:-"Explain what is a transformer"}" +REPETITIONS="${REPETITIONS:-10}" +MAX_TOKENS="${MAX_TOKENS:-100}" +DEVICE="${DEVICE:-'cpu'}" +LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}" +MODELS_DIR="${MODELS_DIR:-"./models"}" + +check_platform +check_python +setup "$DEVICE" +run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR" From f625ef72685e33edb9a094d86a98ff50ca4e7a7e Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 18:16:33 +0530 Subject: [PATCH 03/29] added the requirements to install for benchmarking transformers pytorch --- bench_pytorch/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 bench_pytorch/requirements.txt diff --git a/bench_pytorch/requirements.txt b/bench_pytorch/requirements.txt new file mode 100644 index 00000000..3f76c56b --- /dev/null +++ b/bench_pytorch/requirements.txt @@ -0,0 +1,4 @@ +transformers==4.34.1 +torch==2.0.0 +dataset==2.14.6 +accelerate==0.24.1 From 5e0f43690c182ae25d568782c1cd3af573069d69 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 18:17:12 +0530 Subject: [PATCH 04/29] added the installation setup sh files --- bench_pytorch/setup.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 bench_pytorch/setup.sh diff --git a/bench_pytorch/setup.sh b/bench_pytorch/setup.sh new file mode 100644 index 00000000..563e0815 --- /dev/null +++ b/bench_pytorch/setup.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +################################################################################ +# Script: setup.sh +# Description: Automates the setup of a virtual environment and installs project +# requirements. +################################################################################ + +set -euo pipefail + +# Main script starts here. + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/venv" + +if [ ! -d "$VENV_DIR" ]; then + python3 -m venv "$VENV_DIR" + echo "Virtual environment '$VENV_DIR' created." + # shellcheck disable=SC1091 + source "$VENV_DIR/bin/activate" + pip install --upgrade pip > /dev/null + pip install -r "$SCRIPT_DIR/requirements.txt" --no-cache-dir > /dev/null +else + # shellcheck disable=SC1091 + source "$VENV_DIR/bin/activate" +fi From 19ced2da3d4c1a2ba1210058ceb62a08d43af62c Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 14:18:38 +0000 Subject: [PATCH 05/29] refactor: Fix minor bugs in benchmark python script. --- bench_pytorch/bench.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 29f4cc00..e41c814b 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -32,7 +32,7 @@ def __init__( # some of the conditions where things can not be supported assert precision in ["bf16", "fp16", "fp32"], ValueError( - "Supported precisions are: p16', 'fp32', 'int8', 'int4'" + "Supported precisions are: 'bf16', fp16', 'fp32'" ) assert device in ["cpu", "cuda", "mps"], ValueError( "Supported devices are: 'cpu', 'cuda', 'mps'" @@ -64,7 +64,7 @@ def run_model(self, prompt: str, max_tokens: int) -> float: self.device ) output = ( - self.model.generate(**tokenized_input, max_new_tokens=max_tokens) + self.model.generate(input_ids=tokenized_input, max_new_tokens=max_tokens) .detach() .cpu() .numpy() @@ -79,6 +79,8 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: ) tokens_per_second = self.run_model(prompt, max_tokens) self.results.append(tokens_per_second) + del self.model + torch.cuda.synchronize() if __name__ == "__main__": @@ -115,12 +117,12 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: ) report = defaultdict(lambda: defaultdict(float)) - for precision in ("bf16", "fp16", "fp32") if args.device != "cpu" else ("fp32"): + for precision in ("bf16", "fp16", "fp32") if args.device != "cpu" else ("fp32",): logging.info( f"Running Transformer benchmark (pytorch backend) on Llama with precision: {precision}" ) llama_transformers_pytorch_benchmark = LlamaPyTorchBenchmark( - model_path=args.model_dir, device=args.device, precision=precision + model_path=args.models_dir, device=args.device, precision=precision ).load_model() llama_transformers_pytorch_benchmark.benchmark( max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions From 545e1a676a2fe16827963cf6f732dbfbf7f11196 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 14:19:47 +0000 Subject: [PATCH 06/29] fix: model_dir path --- bench_pytorch/bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 bench_pytorch/bench.sh diff --git a/bench_pytorch/bench.sh b/bench_pytorch/bench.sh old mode 100644 new mode 100755 index 37d36003..0518c69a --- a/bench_pytorch/bench.sh +++ b/bench_pytorch/bench.sh @@ -143,7 +143,7 @@ REPETITIONS="${REPETITIONS:-10}" MAX_TOKENS="${MAX_TOKENS:-100}" DEVICE="${DEVICE:-'cpu'}" LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}" -MODELS_DIR="${MODELS_DIR:-"./models"}" +MODELS_DIR="${MODELS_DIR:-"./models/llama-2-7b-hf"}" check_platform check_python From d38f50e8ea9eff4fb3392cf1744ac77e7e926e23 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 14:20:28 +0000 Subject: [PATCH 07/29] fix: requirements versioning --- bench_pytorch/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench_pytorch/requirements.txt b/bench_pytorch/requirements.txt index 3f76c56b..8068fb56 100644 --- a/bench_pytorch/requirements.txt +++ b/bench_pytorch/requirements.txt @@ -1,4 +1,4 @@ transformers==4.34.1 torch==2.0.0 -dataset==2.14.6 -accelerate==0.24.1 +dataset +accelerate From 769399a5b2a9da63c3d290f11009eed1cc5c706b Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 14:21:08 +0000 Subject: [PATCH 08/29] minor fixes in setup script --- bench_pytorch/setup.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 bench_pytorch/setup.sh diff --git a/bench_pytorch/setup.sh b/bench_pytorch/setup.sh old mode 100644 new mode 100755 From 08735ec31cf5ef5486c9ed26759cd1b69a99a2be Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Tue, 5 Dec 2023 14:21:29 +0000 Subject: [PATCH 09/29] added transformers for cuda in fp16/32 --- docs/llama2.md.template | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/llama2.md.template b/docs/llama2.md.template index 0a0560b6..4ed84d77 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -8,15 +8,15 @@ - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cuda --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|----------------------|--------------|---------------|---------------|---------------| -| burn | 13.12 ± 0.85 | - | - | - | -| candle | - | 36.78 ± 2.17 | - | - | -| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | -| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | -| tinygrad | - | 20.32 ± 0.06 | - | - | -| onnx | - | 54.16 ± 3.15 | - | - | -| ctransformers | - | - | 81.61 ± 3.66 | 84.51 ± 7.93 | +| Engine | float32 | float16 | int8 | int4 | +|------------------------------|--------------|---------------|---------------|---------------| +| burn | 13.12 ± 0.85 | - | - | - | +| candle | - | 36.78 ± 2.17 | - | - | +| llama.cpp | - | - | 84.48 ± 3.76 | 106.76 ± 1.29 | +| ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | +| tinygrad | - | 20.32 ± 0.06 | - | - | +| onnx | - | 54.16 ± 3.15 | - | - | +| transformers (pytorch) | 0.44 ± 0.44 | 0.44 ± 0.44 | - | - | *(Data updated: ``) From 027530af09afaa338aa1a2873bf2a901a8b2ec4d Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 6 Dec 2023 10:37:59 +0000 Subject: [PATCH 10/29] added llama2 results as none for mac devices --- docs/llama2.md.template | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/llama2.md.template b/docs/llama2.md.template index 4ed84d77..fe9eb667 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -31,29 +31,31 @@ - Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device cpu --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|----------------------|--------------|--------------|--------------|--------------| -| burn | 0.30 ± 0.09 | - | - | - | -| candle | - | 3.43 ± 0.02 | - | - | -| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | -| ctranslate | - | - | 2.11 ± 0.73 | - | -| tinygrad | - | 4.21 ± 0.38 | - | - | -| onnx | - | - | - | - | -| ctransformers | - | - | 13.79 ± 0.50 | 22.93 ± 0.86 | +| Engine | float32 | float16 | int8 | int4 | +|-----------------------|--------------|--------------|--------------|--------------| +| burn | 0.30 ± 0.09 | - | - | - | +| candle | - | 3.43 ± 0.02 | - | - | +| llama.cpp | - | - | 14.41 ± 1.59 | 20.96 ± 1.94 | +| ctranslate | - | - | 2.11 ± 0.73 | - | +| tinygrad | - | 4.21 ± 0.38 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 13.79 ± 0.50 | 22.93 ± 0.86 | +| transformers (pytorch)| - | - | - | - | ### GPU (Metal) **Command:** `./benchmark.sh --repetitions 10 --max_tokens 100 --device metal --prompt 'Explain what is a transformer'` **Performance Metrics:** -| Engine | float32 | float16 | int8 | int4 | -|----------------------|--------------|---------------|--------------|--------------| -| burn | - | - | - | - | -| candle | - | - | - | - | -| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | -| ctranslate | - | - | - | - | -| tinygrad | - | 29.78 ± 1.18 | - | - | -| onnx | - | - | - | - | -| ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | +| Engine | float32 | float16 | int8 | int4 | +|-----------------------|--------------|---------------|--------------|--------------| +| burn | - | - | - | - | +| candle | - | - | - | - | +| llama.cpp | - | - | 31.24 ± 7.82 | 46.75 ± 9.55 | +| ctranslate | - | - | - | - | +| tinygrad | - | 29.78 ± 1.18 | - | - | +| onnx | - | - | - | - | +| ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | +| transformers (pytorch)| - | - | - | - | *(Data updated: ``) From b61612227f6c6e10014b440afa0e9e4accf00277 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 6 Dec 2023 10:38:48 +0000 Subject: [PATCH 11/29] added a note on benchmarking condition on mac using transformers (pytorch) --- docs/llama2.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/llama2.md b/docs/llama2.md index 1128add4..3963bc8c 100644 --- a/docs/llama2.md +++ b/docs/llama2.md @@ -57,3 +57,5 @@ | ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | *(Data updated: `02th December 2023`) + +*Note: Although benchmarking for pytorch transformers on mac is possible. But, we are not doing it, since it is very much time taking, and so makes it very less significant. From 96c87c516cb2bca98aee2dea060370b7a7d3fa02 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 6 Dec 2023 22:50:15 +0530 Subject: [PATCH 12/29] fix: change device from mps to metal. Co-authored-by: Nicola Sosio --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index e41c814b..9c78682a 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -34,7 +34,7 @@ def __init__( assert precision in ["bf16", "fp16", "fp32"], ValueError( "Supported precisions are: 'bf16', fp16', 'fp32'" ) - assert device in ["cpu", "cuda", "mps"], ValueError( + assert device in ["cpu", "cuda", "metal"], ValueError( "Supported devices are: 'cpu', 'cuda', 'mps'" ) From 6794c1ad93d18d842bf95a1c1a0a6dad875a56b9 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 6 Dec 2023 22:51:13 +0530 Subject: [PATCH 13/29] change default value from cuda to cpu Co-authored-by: Nicola Sosio --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 9c78682a..e0c89d76 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -19,7 +19,7 @@ class LlamaPyTorchBenchmark: def __init__( - self, model_path: str, precision: str, device: Optional[str] = "cuda" + self, model_path: str, precision: str, device: Optional[str] = "cpu" ) -> None: self.model_path = model_path self.precision = precision From ea405adaf7f95524c5f0e7a73ac1f153a0af6160 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Wed, 6 Dec 2023 22:52:03 +0530 Subject: [PATCH 14/29] fix: starting timer after tokenizer. Co-authored-by: Nicola Sosio --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index e0c89d76..13414ead 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -59,10 +59,10 @@ def load_model(self): return self def run_model(self, prompt: str, max_tokens: int) -> float: - start = time.time() tokenized_input = self.tokenizer.encode(prompt, return_tensors="pt").to( self.device ) + start = time.time() output = ( self.model.generate(input_ids=tokenized_input, max_new_tokens=max_tokens) .detach() From ec8663f9de2f753eb2cf9adcdb54be83cac5f805 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:04:48 +0000 Subject: [PATCH 15/29] removed device argument, not required --- bench_pytorch/setup.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bench_pytorch/setup.sh b/bench_pytorch/setup.sh index 563e0815..6927184c 100755 --- a/bench_pytorch/setup.sh +++ b/bench_pytorch/setup.sh @@ -9,12 +9,6 @@ set -euo pipefail # Main script starts here. - -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " - exit 1 -fi - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VENV_DIR="$SCRIPT_DIR/venv" From 9f7b6ac4529354a21f7aac3b73e7c10b29f8de59 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:06:11 +0000 Subject: [PATCH 16/29] removed using device as argument for installation --- bench_pytorch/bench.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench_pytorch/bench.sh b/bench_pytorch/bench.sh index 0518c69a..2fa0c702 100755 --- a/bench_pytorch/bench.sh +++ b/bench_pytorch/bench.sh @@ -68,7 +68,7 @@ check_python() { setup() { echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..." - bash "$SCRIPT_DIR"/setup.sh "$1" + bash "$SCRIPT_DIR"/setup.sh } run_benchmarks() { @@ -147,5 +147,5 @@ MODELS_DIR="${MODELS_DIR:-"./models/llama-2-7b-hf"}" check_platform check_python -setup "$DEVICE" +setup run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR" From b449e86ad2c0e989af542040fd6c945a0db32a2c Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:08:01 +0000 Subject: [PATCH 17/29] changed python3 to python --- bench_pytorch/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/setup.sh b/bench_pytorch/setup.sh index 6927184c..2ed48ba0 100755 --- a/bench_pytorch/setup.sh +++ b/bench_pytorch/setup.sh @@ -13,7 +13,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" VENV_DIR="$SCRIPT_DIR/venv" if [ ! -d "$VENV_DIR" ]; then - python3 -m venv "$VENV_DIR" + python -m venv "$VENV_DIR" echo "Virtual environment '$VENV_DIR' created." # shellcheck disable=SC1091 source "$VENV_DIR/bin/activate" From df607ca52c85d7e6f10e3ed19740150240db9ab1 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:10:50 +0000 Subject: [PATCH 18/29] torch synchronize only if device set to cuda --- bench_pytorch/bench.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 13414ead..0f022905 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -80,7 +80,8 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: tokens_per_second = self.run_model(prompt, max_tokens) self.results.append(tokens_per_second) del self.model - torch.cuda.synchronize() + if self.device == "cuda": + torch.cuda.synchronize() if __name__ == "__main__": From 27bb50b3b409372195bdc93228ffb89415b61660 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:20:49 +0000 Subject: [PATCH 19/29] removed bf16 benchmarking --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 0f022905..08145800 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -118,7 +118,7 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: ) report = defaultdict(lambda: defaultdict(float)) - for precision in ("bf16", "fp16", "fp32") if args.device != "cpu" else ("fp32",): + for precision in ("fp16", "fp32") if args.device != "cpu" else ("fp32",): logging.info( f"Running Transformer benchmark (pytorch backend) on Llama with precision: {precision}" ) From 91946d4c2d97fd448c04e8cfe84fad7b579f05e8 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:21:11 +0000 Subject: [PATCH 20/29] removed datasets in requirements --- bench_pytorch/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/bench_pytorch/requirements.txt b/bench_pytorch/requirements.txt index 8068fb56..613d23dd 100644 --- a/bench_pytorch/requirements.txt +++ b/bench_pytorch/requirements.txt @@ -1,4 +1,3 @@ transformers==4.34.1 torch==2.0.0 -dataset accelerate From eded8920afdc295c11142c18f1a4dca5e9141a30 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 12:21:35 +0000 Subject: [PATCH 21/29] updated the benchmarking results for fp16 --- docs/llama2.md.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/llama2.md.template b/docs/llama2.md.template index fe9eb667..1e007744 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -16,7 +16,7 @@ | ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | | tinygrad | - | 20.32 ± 0.06 | - | - | | onnx | - | 54.16 ± 3.15 | - | - | -| transformers (pytorch) | 0.44 ± 0.44 | 0.44 ± 0.44 | - | - | +| transformers (pytorch) | 0.40 ± 0.40 | 0.37 ± 0.37 | - | - | *(Data updated: ``) From aa488f9957e77530edb8dbe9c3318d348cc56d48 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 22:24:54 +0530 Subject: [PATCH 22/29] fix: output to tuple Co-authored-by: Nicola Sosio --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 08145800..50700558 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -70,7 +70,7 @@ def run_model(self, prompt: str, max_tokens: int) -> float: .numpy() ) delta = time.time() - start - return len(output) / delta + return len(output[0]) / delta def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: for i in range(repetitions): From c5d84548601e5c5ffdfd8ae0d443d43a9c029ef8 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 22:25:36 +0530 Subject: [PATCH 23/29] remove Co-authored-by: Nicola Sosio --- bench_pytorch/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/setup.sh b/bench_pytorch/setup.sh index 2ed48ba0..6c231a5f 100755 --- a/bench_pytorch/setup.sh +++ b/bench_pytorch/setup.sh @@ -1,7 +1,7 @@ #!/bin/bash ################################################################################ -# Script: setup.sh +# Script: setup.sh # Description: Automates the setup of a virtual environment and installs project # requirements. ################################################################################ From f2cde7c72801edb8d0f713d1ff82be0ba66f567e Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Thu, 7 Dec 2023 22:25:49 +0530 Subject: [PATCH 24/29] fix: change device from mps to metal. Co-authored-by: Nicola Sosio --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 50700558..31a37d28 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -35,7 +35,7 @@ def __init__( "Supported precisions are: 'bf16', fp16', 'fp32'" ) assert device in ["cpu", "cuda", "metal"], ValueError( - "Supported devices are: 'cpu', 'cuda', 'mps'" + "Supported devices are: 'cpu', 'cuda', 'metal'" ) if device == "cpu" and precision != "fp32": From 9c19b860620f0e49bbe3d6cb8f2b7d8c21c9973d Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 8 Dec 2023 00:23:45 +0530 Subject: [PATCH 25/29] Refactor: Change in models dir path. - in bench.sh: changed from models/llama2-7b-hf to models. - in bench.py: take the models_dir and add llama2-7b-hf by default. --- bench_pytorch/bench.py | 4 +++- bench_pytorch/bench.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 31a37d28..9aeb306e 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -123,7 +123,9 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: f"Running Transformer benchmark (pytorch backend) on Llama with precision: {precision}" ) llama_transformers_pytorch_benchmark = LlamaPyTorchBenchmark( - model_path=args.models_dir, device=args.device, precision=precision + model_path=f"{args.models_dir}/llama2-7b-hf", + device=args.device, + precision=precision, ).load_model() llama_transformers_pytorch_benchmark.benchmark( max_tokens=args.max_tokens, prompt=args.prompt, repetitions=args.repetitions diff --git a/bench_pytorch/bench.sh b/bench_pytorch/bench.sh index 2fa0c702..cfd10d13 100755 --- a/bench_pytorch/bench.sh +++ b/bench_pytorch/bench.sh @@ -143,7 +143,7 @@ REPETITIONS="${REPETITIONS:-10}" MAX_TOKENS="${MAX_TOKENS:-100}" DEVICE="${DEVICE:-'cpu'}" LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}" -MODELS_DIR="${MODELS_DIR:-"./models/llama-2-7b-hf"}" +MODELS_DIR="${MODELS_DIR:-"./models/"}" check_platform check_python From d1d18278903055512f51e43eecb037babf037d4b Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 8 Dec 2023 07:10:13 +0000 Subject: [PATCH 26/29] fix typo in path name --- bench_pytorch/bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.py b/bench_pytorch/bench.py index 9aeb306e..9ae119ba 100644 --- a/bench_pytorch/bench.py +++ b/bench_pytorch/bench.py @@ -123,7 +123,7 @@ def benchmark(self, prompt: str, max_tokens: int, repetitions: int) -> None: f"Running Transformer benchmark (pytorch backend) on Llama with precision: {precision}" ) llama_transformers_pytorch_benchmark = LlamaPyTorchBenchmark( - model_path=f"{args.models_dir}/llama2-7b-hf", + model_path=f"{args.models_dir}/llama-2-7b-hf", device=args.device, precision=precision, ).load_model() From ac503db8c0d4d20d92c3f8fb1077fcace7334be2 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 8 Dec 2023 07:11:00 +0000 Subject: [PATCH 27/29] fix: models dir path --- bench_pytorch/bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_pytorch/bench.sh b/bench_pytorch/bench.sh index cfd10d13..876793af 100755 --- a/bench_pytorch/bench.sh +++ b/bench_pytorch/bench.sh @@ -143,7 +143,7 @@ REPETITIONS="${REPETITIONS:-10}" MAX_TOKENS="${MAX_TOKENS:-100}" DEVICE="${DEVICE:-'cpu'}" LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}" -MODELS_DIR="${MODELS_DIR:-"./models/"}" +MODELS_DIR="${MODELS_DIR:-"./models"}" check_platform check_python From cbbb3f25a9e6236ed7949d16323a6b45e8174923 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 8 Dec 2023 07:14:12 +0000 Subject: [PATCH 28/29] fixed the benchmark scores for pytorch transformers gpu --- docs/llama2.md.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/llama2.md.template b/docs/llama2.md.template index 1e007744..ef8358ac 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -16,7 +16,7 @@ | ctranslate | - | 51.38 ± 16.01 | 36.12 ± 11.93 | - | | tinygrad | - | 20.32 ± 0.06 | - | - | | onnx | - | 54.16 ± 3.15 | - | - | -| transformers (pytorch) | 0.40 ± 0.40 | 0.37 ± 0.37 | - | - | +| transformers (pytorch) | 46.44 ± 46.44| 42.56 ± 42.56 | - | - | *(Data updated: ``) From db18c556b9c4599f7d0a56b6771862ebee6a5215 Mon Sep 17 00:00:00 2001 From: Anindyadeep Date: Fri, 8 Dec 2023 14:27:20 +0530 Subject: [PATCH 29/29] replaced pytorch cpu performance note from llama2 to llama2_template --- docs/llama2.md | 2 -- docs/llama2.md.template | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/llama2.md b/docs/llama2.md index 3963bc8c..1128add4 100644 --- a/docs/llama2.md +++ b/docs/llama2.md @@ -57,5 +57,3 @@ | ctransformers | - | - | 21.24 ± 0.81 | 34.08 ± 4.78 | *(Data updated: `02th December 2023`) - -*Note: Although benchmarking for pytorch transformers on mac is possible. But, we are not doing it, since it is very much time taking, and so makes it very less significant. diff --git a/docs/llama2.md.template b/docs/llama2.md.template index 1e007744..c72c4b39 100644 --- a/docs/llama2.md.template +++ b/docs/llama2.md.template @@ -59,3 +59,5 @@ | transformers (pytorch)| - | - | - | - | *(Data updated: ``) + +*Note: Although benchmarking for pytorch transformers on mac is possible. But, we are not doing it, since it is very much time taking, and so makes it very less significant.