From 8e0aedf3fff690fad74e4c0d194ba792a27b981b Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 12:15:18 +0100
Subject: [PATCH 1/5] added linter

---
 .pre-commit-config.yaml                       | 43 +++++++++++
 NOTES.md                                      |  2 +-
 README.md                                     |  2 +-
 bench.py                                      | 11 +--
 benchmark.sh                                  | 10 +--
 convert_to_safetensors.py                     |  7 +-
 download.sh                                   |  6 +-
 models.json                                   |  2 +-
 python_bench/benchmark.py                     |  3 +-
 python_bench/ctranslate.py                    |  2 +-
 python_bench/llama_cpp.py                     |  6 +-
 python_bench/tinygrad.py                      | 51 ++++++-------
 requirements.txt                              |  4 +-
 rust_bench/llama2-burn/README.md              |  8 +-
 rust_bench/llama2-burn/llama-py/dump.py       | 72 ++++++++++--------
 rust_bench/llama2-burn/llama-py/dump_model.py | 72 ++++++++++++------
 rust_bench/llama2-burn/llama-py/dump_test.py  | 26 ++++---
 rust_bench/llama2-burn/llama-py/model.py      | 49 +++++--------
 .../llama2-burn/llama-py/requirements.txt     |  2 +-
 rust_bench/llama2-burn/llama-py/test.py       | 73 +++++++++++--------
 .../llama2-burn/llama-py/test_tokenizer.py    |  4 +-
 rust_bench/llama2-burn/llama-py/tokenizer.py  | 14 ++--
 rust_bench/llama2-candle/Cargo.toml           |  2 +-
 setup.cfg                                     |  3 +
 setup.sh                                      |  4 +-
 25 files changed, 286 insertions(+), 192 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 setup.cfg

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..fc065c44
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,43 @@
+default_stages: [commit]
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: check-toml
+      - id: check-xml
+      - id: debug-statements
+      - id: check-builtin-literals
+      - id: check-case-conflict
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.15.0
+    hooks:
+      - id: pyupgrade
+        args: [--py311-plus]
+
+  - repo: https://github.com/psf/black
+    rev: 23.11.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
+        args: ["--config=setup.cfg"]
+        additional_dependencies: [flake8-isort]
+
+ci:
+  autoupdate_schedule: weekly
+  skip: []
+  submodules: false
diff --git a/NOTES.md b/NOTES.md
index 92a1ac6f..c04443ee 100644
--- a/NOTES.md
+++ b/NOTES.md
@@ -14,7 +14,7 @@ Currently working on requirement and understanding different project constraints
 ## Early Investigation
 
 The overall investigation assumes PyTorch as the performance base and all relevant understand should be built in context of that,
-the specific benchmark might not be directly comparable to each other but it should provide a rough picture of the state of 
+the specific benchmark might not be directly comparable to each other but it should provide a rough picture of the state of
 open source ML framework performance.
 
 This generally is to port, add and support new things into burn and other platforms.
diff --git a/README.md b/README.md
index 77294d97..6e12d2cf 100644
--- a/README.md
+++ b/README.md
@@ -115,4 +115,4 @@ Command: `./benchmark.sh --repetitions 10 --max_tokens 100 --device gpu --prompt
 | ctranslate  |      -       |      -       |      -       |      -       |
 | tinygrad    |      -       | 29.78 ± 1.18 |      -       |      -       |
 
-*(data updated: 15th November 2023)
\ No newline at end of file
+*(data updated: 15th November 2023)
diff --git a/bench.py b/bench.py
index ee9ceb3b..ca316af5 100644
--- a/bench.py
+++ b/bench.py
@@ -1,12 +1,12 @@
 import argparse
-from collections import defaultdict
 import logging
 import sys
+from collections import defaultdict
 
 import numpy as np
 
-from python_bench.llama_cpp import LlamaCPPBenchmark
 from python_bench.ctranslate import CTranslateBenchmark, get_compute_types
+from python_bench.llama_cpp import LlamaCPPBenchmark
 from python_bench.tinygrad import TinyGradBenchmark
 
 logging.basicConfig(
@@ -53,7 +53,8 @@
     args = parser.parse_args()
 
     logging.info(
-        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} repetitions={args.repetitions} gpu={args.gpu} nvidia={args.gpu}"
+        f"Running benchmark with: max_tokens={args.max_tokens} prompt={args.prompt} "
+        + f"repetitions={args.repetitions} gpu={args.gpu} nvidia={args.gpu}"
     )
     report = defaultdict(lambda: defaultdict(float))
     for quantize in ("Q8_0", "Q4_0"):
@@ -74,7 +75,7 @@
     for compute_type in compute_types.intersection({"float16", "int8"}):
         logging.info(f"Running ctranslate benchmark with {compute_type}")
         ctranslate_bench = CTranslateBenchmark(
-            f"./models/llama-2-7b-hf-float16",
+            "./models/llama-2-7b-hf-float16",
             gpu=args.gpu,
             compute_type=compute_type,
         ).load_model()
@@ -86,7 +87,7 @@
             "std": np.std(ctranslate_bench.results),
         }
 
-    logging.info(f"Running tinygrad benchmark")
+    logging.info("Running tinygrad benchmark")
     tinygrad_bench = TinyGradBenchmark(
         "./models/llama-2-7b-hf",
         quantize=False,
diff --git a/benchmark.sh b/benchmark.sh
index 2e5edb18..d2ca1d3c 100755
--- a/benchmark.sh
+++ b/benchmark.sh
@@ -2,8 +2,8 @@
 
 ##############################################################################################
 # Script: run_benchmarks.sh
-# Description: This script runs benchmarks for a transformer model using both 
-# Rust and Python implementations. It provides options to customize the 
+# Description: This script runs benchmarks for a transformer model using both
+# Rust and Python implementations. It provides options to customize the
 # benchmarks, such as the prompt, repetitions, maximum tokens, device, and NVIDIA flag.
 #
 # Usage: ./run_benchmarks.sh [OPTIONS]
@@ -150,8 +150,8 @@ run_benchmarks() {
             --prompt "$PROMPT" \
             --sample-len $MAX_TOKENS \
             --log-file $LOG_FILENAME
-    fi 
-    
+    fi
+
     # Set options based on $DEVICE and $USE_NVIDIA
     [ "$DEVICE" == "gpu" ] && PYTHON_DEVICE="--gpu"
     [ "$USE_NVIDIA" == true ] && PYTHON_NVIDIA="--nvidia"
@@ -235,4 +235,4 @@ check_rust
 check_jq
 download_models
 setup
-run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" $USE_NVIDIA "$log_filename"
\ No newline at end of file
+run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" $USE_NVIDIA "$log_filename"
diff --git a/convert_to_safetensors.py b/convert_to_safetensors.py
index a4ff3e28..6cd2fddf 100644
--- a/convert_to_safetensors.py
+++ b/convert_to_safetensors.py
@@ -1,9 +1,8 @@
 import argparse
-import os
 import logging
-from collections import defaultdict
-from typing import List
+import os
 import shutil
+from collections import defaultdict
 
 import torch
 from safetensors.torch import load_file, save_file
@@ -80,7 +79,7 @@ def convert_file(pt_filename: str, sf_filename: str):
             raise RuntimeError(f"The output tensors do not match for key {k}")
 
 
-def convert_multi(input_dir: str, output_dir: str) -> List[str]:
+def convert_multi(input_dir: str, output_dir: str) -> list[str]:
     if os.path.exists(output_dir):
         logging.warning(f"{output_dir} already exists!")
         return []
diff --git a/download.sh b/download.sh
index b12b5018..85d31847 100755
--- a/download.sh
+++ b/download.sh
@@ -2,9 +2,9 @@
 
 ################################################################################
 # Script: download.sh
-# Description: Downloads files from a list of URLs specified in a JSON file. 
-# The JSON file should contain an array of objects, each with a 'url', 'file', 
-# and 'folder' property. The script checks if the file already exists before 
+# Description: Downloads files from a list of URLs specified in a JSON file.
+# The JSON file should contain an array of objects, each with a 'url', 'file',
+# and 'folder' property. The script checks if the file already exists before
 # downloading it.
 #
 # Usage: ./download.sh --models <json_file> --cache <cache_file> --force-download
diff --git a/models.json b/models.json
index da726b39..8058fc09 100644
--- a/models.json
+++ b/models.json
@@ -19,4 +19,4 @@
         "file": "llama-2-7b-raw.zip",
         "folder": "./models/llama-2-7b-raw"
     }
-] 
\ No newline at end of file
+]
diff --git a/python_bench/benchmark.py b/python_bench/benchmark.py
index ff808bbe..efbbeba2 100644
--- a/python_bench/benchmark.py
+++ b/python_bench/benchmark.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
-from abc import ABC, abstractmethod
+
 import logging
+from abc import ABC, abstractmethod
 
 logger = logging.getLogger(__name__)
 
diff --git a/python_bench/ctranslate.py b/python_bench/ctranslate.py
index 5dce718a..37cc10b3 100644
--- a/python_bench/ctranslate.py
+++ b/python_bench/ctranslate.py
@@ -1,5 +1,5 @@
-import os
 import logging
+import os
 import time
 
 import ctranslate2
diff --git a/python_bench/llama_cpp.py b/python_bench/llama_cpp.py
index e5d72e3e..f7c1365c 100644
--- a/python_bench/llama_cpp.py
+++ b/python_bench/llama_cpp.py
@@ -1,8 +1,10 @@
-import time
 import logging
-from python_bench.benchmark import Benchmark
+import time
+
 from llama_cpp import Llama
 
+from python_bench.benchmark import Benchmark
+
 logging.getLogger("llama_cpp").setLevel(logging.ERROR)
 
 
diff --git a/python_bench/tinygrad.py b/python_bench/tinygrad.py
index b8c9e666..b238ffdd 100644
--- a/python_bench/tinygrad.py
+++ b/python_bench/tinygrad.py
@@ -1,21 +1,16 @@
+import json
+import logging
 import os
+import time
 from pathlib import Path
-import json
+
 import numpy as np
-from typing import Optional, Tuple, Union
-from tinygrad.shape.symbolic import Variable
-from tinygrad.jit import TinyJit, JIT_SUPPORTED_DEVICE
-from tinygrad.nn.state import safe_load, torch_load, load_state_dict
+from tinygrad.helpers import CI, dtypes, getenv
+from tinygrad.jit import JIT_SUPPORTED_DEVICE, TinyJit
 from tinygrad.nn import Embedding, Linear
+from tinygrad.nn.state import load_state_dict, safe_load, torch_load
+from tinygrad.shape.symbolic import Variable
 from tinygrad.tensor import Tensor
-from tinygrad.helpers import getenv, dtypes, CI
-from typing import Optional, Tuple
-from pathlib import Path
-import json
-import time
-import numpy as np
-from pathlib import Path
-import logging
 
 from python_bench.benchmark import Benchmark
 
@@ -43,7 +38,7 @@ def complex_mult(A, c, d):
     return ro.cat(co, dim=-1)
 
 
-def apply_rotary_emb(xq, xk, freqs_cis) -> Tuple[Tensor, Tensor]:
+def apply_rotary_emb(xq, xk, freqs_cis) -> tuple[Tensor, Tensor]:
     assert (
         freqs_cis.shape[1] == xq.shape[1] and freqs_cis.shape[1] == xk.shape[1]
     ), f"freqs_cis shape mismatch {freqs_cis.shape} xq:{xq.shape} xk:{xk.shape}"
@@ -95,9 +90,9 @@ def __init__(self, dim, n_heads, n_kv_heads, linear=Linear):
     def __call__(
         self,
         x: Tensor,
-        start_pos: Union[Variable, int],
+        start_pos: Variable | int,
         freqs_cis: Tensor,
-        mask: Optional[Tensor],
+        mask: Tensor | None,
     ) -> Tensor:
         xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
         xq = xq.reshape(xq.shape[0], xq.shape[1], self.n_heads, self.head_dim)
@@ -181,9 +176,9 @@ def __init__(
     def __call__(
         self,
         x: Tensor,
-        start_pos: Union[Variable, int],
+        start_pos: Variable | int,
         freqs_cis: Tensor,
-        mask: Optional[Tensor],
+        mask: Tensor | None,
     ):
         h = x + self.attention(self.attention_norm(x), start_pos, freqs_cis, mask)
         return (h + self.feed_forward(self.ffn_norm(h))).realize()
@@ -228,7 +223,7 @@ def __init__(
         self.forward_jit = TinyJit(self.forward)
 
     def forward(
-        self, tokens: Tensor, start_pos: Union[Variable, int], temperature: float = 0.0
+        self, tokens: Tensor, start_pos: Variable | int, temperature: float = 0.0
     ):
         _bsz, seqlen = tokens.shape
         freqs_cis = self.freqs_cis.shrink(
@@ -513,22 +508,22 @@ def convert_from_huggingface(weights, model):
     keymap = {
         "model.embed_tokens.weight": "tok_embeddings.weight",
         **{
-            f"model.layers.{l}.input_layernorm.weight": f"layers.{l}.attention_norm.weight"
-            for l in range(len(model.layers))
+            f"model.layers.{layer}.input_layernorm.weight": f"layers.{layer}.attention_norm.weight"
+            for layer in range(len(model.layers))
         },
         **{
-            f"model.layers.{l}.self_attn.{x}_proj.weight": f"layers.{l}.attention.w{x}.weight"
+            f"model.layers.{layer}.self_attn.{x}_proj.weight": f"layers.{layer}.attention.w{x}.weight"
             for x in ["q", "k", "v", "o"]
-            for l in range(len(model.layers))
+            for layer in range(len(model.layers))
         },
         **{
-            f"model.layers.{l}.post_attention_layernorm.weight": f"layers.{l}.ffn_norm.weight"
-            for l in range(len(model.layers))
+            f"model.layers.{layer}.post_attention_layernorm.weight": f"layers.{layer}.ffn_norm.weight"
+            for layer in range(len(model.layers))
         },
         **{
-            f"model.layers.{l}.mlp.{x}_proj.weight": f"layers.{l}.feed_forward.w{y}.weight"
+            f"model.layers.{layer}.mlp.{x}_proj.weight": f"layers.{layer}.feed_forward.w{y}.weight"
             for x, y in {"gate": "1", "down": "2", "up": "3"}.items()
-            for l in range(len(model.layers))
+            for layer in range(len(model.layers))
         },
         "model.norm.weight": "norm.weight",
         "lm_head.weight": "output.weight",
@@ -538,7 +533,7 @@ def convert_from_huggingface(weights, model):
 
 class AbsmaxQuantizedLinear:
     def __init__(self, in_features, out_features, bias=False):
-        assert bias == False
+        assert not bias
         self.weight = Tensor.ones(out_features, in_features, dtype=dtypes.int8)
         self.scale = Tensor.ones(out_features, dtype=dtypes.half)
 
diff --git a/requirements.txt b/requirements.txt
index 1526cdbd..b37ebfcc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 llama_cpp_python==0.2.15
 sentencepiece==0.1.99
 ctranslate2==3.20.0
-huggingface-hub==0.17.3 
+huggingface-hub==0.17.3
 transformers==4.35.0
 torch==2.1.0
 # Using fixed commit (a72b3700) for tinygrad to ensure stability in benchmarking.
 # Helps maintain reproducibility and guards against potential breaking changes.
-git+https://github.com/tinygrad/tinygrad.git@a72b370066837af5b4d44eeb5c4fb30aebf5c502
\ No newline at end of file
+git+https://github.com/tinygrad/tinygrad.git@a72b370066837af5b4d44eeb5c4fb30aebf5c502
diff --git a/rust_bench/llama2-burn/README.md b/rust_bench/llama2-burn/README.md
index 77a3b5ef..09448b06 100644
--- a/rust_bench/llama2-burn/README.md
+++ b/rust_bench/llama2-burn/README.md
@@ -61,7 +61,7 @@ python3 dump_model.py <model_dir> <tokenizer_path>
 ```
 Example: `python3 dump_model.py llama2-7b-chat tokenizer.model`
 
-3. **Test the Tokenizer**: Finally, run the `test_tokenizer.py` script to load the tokenizer.model file and verify an example encoding and decoding. This script should be run in the same directory as the tokenizer file. Execute this script using the command: 
+3. **Test the Tokenizer**: Finally, run the `test_tokenizer.py` script to load the tokenizer.model file and verify an example encoding and decoding. This script should be run in the same directory as the tokenizer file. Execute this script using the command:
 ```
 python3 test_tokenizer.py
 ```
@@ -70,7 +70,7 @@ python3 test_tokenizer.py
 
 Inside the 'src/bin' folder, you will find Rust binaries: `convert`, `sample`, and `test`.
 
-1. **Converting Dumped Weights**: The `convert` binary converts dumped weights into burn's model format. It saves them for further use. Execute this using the following command: 
+1. **Converting Dumped Weights**: The `convert` binary converts dumped weights into burn's model format. It saves them for further use. Execute this using the following command:
 ```
 cargo run --bin convert <dump_path> <burn_model_name>
 ```
@@ -82,11 +82,11 @@ cargo run --bin test <tokenizer_filepath> <dump_path>
 ```
 Example: `cargo run --release --bin test tokenizer.model params`
 
-3. **Sampling Text**: The `sample` binary loads the converted burn model file and generates a sample output based on an input prompt. The model can run on either the cpu or gpu. Execute this using the following command: 
+3. **Sampling Text**: The `sample` binary loads the converted burn model file and generates a sample output based on an input prompt. The model can run on either the cpu or gpu. Execute this using the following command:
 ```
 cargo run --bin sample <model_name> <tokenizer_filepath> <prompt> <n_tokens>
 ```
-Example: 
+Example:
 ```
 #export TORCH_CUDA_VERSION=cu113 # if running on gpu
 cargo run --release --bin sample llama2-7b-chat tokenizer.model "Hello, I am " 10 cpu
diff --git a/rust_bench/llama2-burn/llama-py/dump.py b/rust_bench/llama2-burn/llama-py/dump.py
index 072cd1b2..f758084e 100644
--- a/rust_bench/llama2-burn/llama-py/dump.py
+++ b/rust_bench/llama2-burn/llama-py/dump.py
@@ -1,73 +1,83 @@
 import pathlib
-import torch
+
 import numpy as np
+import torch
 
-import model
 
 def save_scalar(s, name, path):
     s = np.array([1.0, float(s)]).astype(np.float32)
-    np.save(pathlib.Path(path, f'{name}.npy'), s)
+    np.save(pathlib.Path(path, f"{name}.npy"), s)
+
 
 def save_tensor(tensor, name, path):
     tensor_numpy = tensor.numpy()
     tensor_dims = np.array(tensor_numpy.shape)
     tensor_values = tensor_numpy.flatten()
     tensor_to_save = np.concatenate((tensor_dims, tensor_values)).astype(np.float32)
-    np.save(pathlib.Path(path, f'{name}.npy'), tensor_to_save)
+    np.save(pathlib.Path(path, f"{name}.npy"), tensor_to_save)
+
 
 def save_linear(linear, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(linear.weight.t(), 'weight', path) # PyTorch and Tinygrad strangely transpose linear weights so reverse that
+    save_tensor(
+        linear.weight.t(), "weight", path
+    )  # PyTorch and Tinygrad strangely transpose linear weights so reverse that
     if linear.bias is not None:
-        save_tensor(linear.bias, 'bias', path)
-
+        save_tensor(linear.bias, "bias", path)
 
 
 def save_rmsnorm(norm, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(norm.weight, 'weight', path)
-    save_scalar(norm.eps, 'eps', path)
+    save_tensor(norm.weight, "weight", path)
+    save_scalar(norm.eps, "eps", path)
+
 
 def save_attention(attention, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_linear(attention.wq, pathlib.Path(path, 'wq'))
-    save_linear(attention.wk, pathlib.Path(path, 'wk'))
-    save_linear(attention.wv, pathlib.Path(path, 'wv'))
-    save_linear(attention.wo, pathlib.Path(path, 'wo'))
+    save_linear(attention.wq, pathlib.Path(path, "wq"))
+    save_linear(attention.wk, pathlib.Path(path, "wk"))
+    save_linear(attention.wv, pathlib.Path(path, "wv"))
+    save_linear(attention.wo, pathlib.Path(path, "wo"))
     n_kv_head = attention.n_kv_heads
     n_head = n_kv_head * attention.n_rep
     save_scalar(n_head, "n_head", path)
     save_scalar(n_kv_head, "n_kv_head", path)
 
+
 def save_feedforward(feed_forward, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_linear(feed_forward.w1, pathlib.Path(path, 'w1'))
-    save_linear(feed_forward.w2, pathlib.Path(path, 'w2'))
-    save_linear(feed_forward.w3, pathlib.Path(path, 'w3'))
+    save_linear(feed_forward.w1, pathlib.Path(path, "w1"))
+    save_linear(feed_forward.w2, pathlib.Path(path, "w2"))
+    save_linear(feed_forward.w3, pathlib.Path(path, "w3"))
+
 
 def save_embedding(embedding, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_tensor(embedding.weight, 'weight', path)
+    save_tensor(embedding.weight, "weight", path)
+
 
 def save_transformer_block(transformer_block, path):
     pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    save_attention(transformer_block.attention, pathlib.Path(path, 'attention'))
-    save_feedforward(transformer_block.feed_forward, pathlib.Path(path, 'feedforward'))
-    save_rmsnorm(transformer_block.attention_norm, pathlib.Path(path, 'attention_norm'))
-    save_rmsnorm(transformer_block.ffn_norm, pathlib.Path(path, 'ffn_norm'))
+    save_attention(transformer_block.attention, pathlib.Path(path, "attention"))
+    save_feedforward(transformer_block.feed_forward, pathlib.Path(path, "feedforward"))
+    save_rmsnorm(transformer_block.attention_norm, pathlib.Path(path, "attention_norm"))
+    save_rmsnorm(transformer_block.ffn_norm, pathlib.Path(path, "ffn_norm"))
+
 
 def save_transformer(transformer, path):
     with torch.no_grad():
         pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-        save_scalar(len(transformer.layers), 'n_layer', path)
+        save_scalar(len(transformer.layers), "n_layer", path)
         for idx, layer in enumerate(transformer.layers):
-            save_transformer_block(layer, pathlib.Path(path, f'layer{idx}'))
-        save_rmsnorm(transformer.norm, pathlib.Path(path, 'norm'))
-        save_embedding(transformer.tok_embeddings, pathlib.Path(path, 'tok_embeddings'))
-        save_linear(transformer.output, pathlib.Path(path, 'output'))
-        save_scalar(10000.0, 'theta', path)
-        save_scalar(transformer.params.max_seq_len, 'n_ctx', path)
-        save_scalar(transformer.params.multiple_of, 'multiple_of', path)
+            save_transformer_block(layer, pathlib.Path(path, f"layer{idx}"))
+        save_rmsnorm(transformer.norm, pathlib.Path(path, "norm"))
+        save_embedding(transformer.tok_embeddings, pathlib.Path(path, "tok_embeddings"))
+        save_linear(transformer.output, pathlib.Path(path, "output"))
+        save_scalar(10000.0, "theta", path)
+        save_scalar(transformer.params.max_seq_len, "n_ctx", path)
+        save_scalar(transformer.params.multiple_of, "multiple_of", path)
         if transformer.params.ffn_dim_multiplier is not None:
-            save_scalar(transformer.params.ffn_dim_multiplier, 'ffn_dim_multiplier', path)
-        #save_tensor(transformer.freqs_cis, 'freqs_cis', path)
+            save_scalar(
+                transformer.params.ffn_dim_multiplier, "ffn_dim_multiplier", path
+            )
+        # save_tensor(transformer.freqs_cis, 'freqs_cis', path)
diff --git a/rust_bench/llama2-burn/llama-py/dump_model.py b/rust_bench/llama2-burn/llama-py/dump_model.py
index 816dcaa6..3cfcad6a 100644
--- a/rust_bench/llama2-burn/llama-py/dump_model.py
+++ b/rust_bench/llama2-burn/llama-py/dump_model.py
@@ -1,28 +1,31 @@
-import torch
-from pathlib import Path
-import json
 import argparse
+import json
 import logging
 import sys
+from pathlib import Path
 
 import dump
-from model import Transformer, ModelArgs
 import tokenizer
+import torch
+from model import ModelArgs, Transformer
 
 # Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
+
 def load_model(model_dir):
     tok = tokenizer.Tokenizer(model_path=str(model_dir / "tokenizer.model"))
     checkpoints = sorted((model_dir).glob("*.pth"))
     if len(checkpoints) == 0:
         raise ValueError(f"No checkpoint files found in {model_dir}")
-    
+
     weights = [torch.load(filename, map_location="cpu") for filename in checkpoints]
-    with open(model_dir / "params.json", "r") as f:
+    with open(model_dir / "params.json") as f:
         params = json.loads(f.read())
-    
+
     model_args: ModelArgs = ModelArgs(
         max_batch_size=1,
         **params,
@@ -31,7 +34,7 @@ def load_model(model_dir):
     model = Transformer(model_args)
     model.load_state_dict(concat_weights(weights), strict=False)
     model.max_seq_len = model.tok_embeddings.weight.shape[0]
-    logger.info('Loaded model')
+    logger.info("Loaded model")
 
     return model
 
@@ -41,15 +44,35 @@ def convert(name) -> torch.Tensor:
         disk_tensors = [model[name] for model in models]
         if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
             return disk_tensors[0]
-        axis = 1 if name.startswith('tok_embeddings.') or name.endswith('.attention.wo.weight') or name.endswith('.feed_forward.w2.weight') else 0
+        axis = (
+            1
+            if name.startswith("tok_embeddings.")
+            or name.endswith(".attention.wo.weight")
+            or name.endswith(".feed_forward.w2.weight")
+            else 0
+        )
         return disk_tensors[0].cat(*disk_tensors[1:], dim=axis)
-    return {name: convert(name) for name in {name: None for model in models for name in model}}
+
+    return {
+        name: convert(name)
+        for name in {name: None for model in models for name in model}
+    }
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Load and dump transformer model.')
-    parser.add_argument('--model-dir', type=Path, required=True, help='Path to the directory containing the model checkpoints')
-    parser.add_argument('--output-dir', type=Path, required=True, help='Path to the directory where to dump the model.')
+    parser = argparse.ArgumentParser(description="Load and dump transformer model.")
+    parser.add_argument(
+        "--model-dir",
+        type=Path,
+        required=True,
+        help="Path to the directory containing the model checkpoints",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        required=True,
+        help="Path to the directory where to dump the model.",
+    )
 
     args = parser.parse_args()
 
@@ -59,21 +82,28 @@ def convert(name) -> torch.Tensor:
     # Check if the model-dir/params directory already exists
     params_dir = output_dir / "params"
     if params_dir.is_dir():
-        logger.info(f"The {params_dir} directory already exists. Model dump will not be performed.")
+        logger.info(
+            f"The {params_dir} directory already exists. Model dump will not be performed."
+        )
         sys.exit(0)
 
     # Check that the model dir contains the required files
-    if not (model_dir / "params.json").is_file() or not (model_dir / "tokenizer.model").is_file() or not any(model_dir.glob("*.pth")):
-        logger.error("The model directory must contain params.json, tokenizer.model, and at least one .pth file")
+    if (
+        not (model_dir / "params.json").is_file()
+        or not (model_dir / "tokenizer.model").is_file()
+        or not any(model_dir.glob("*.pth"))
+    ):
+        logger.error(
+            "The model directory must contain params.json, tokenizer.model, and at least one .pth file"
+        )
         sys.exit(1)
 
-
     try:
         logger.info(f"Loading model from {model_dir}")
         llama = load_model(model_dir)
 
-        logger.info('Dumping model...')
+        logger.info("Dumping model...")
         dump.save_transformer(llama, params_dir)
-        logger.info(f'Dump saved in {params_dir} folder.')
+        logger.info(f"Dump saved in {params_dir} folder.")
     except Exception as e:
-        logger.error(f"An error occurred: {e}")
\ No newline at end of file
+        logger.error(f"An error occurred: {e}")
diff --git a/rust_bench/llama2-burn/llama-py/dump_test.py b/rust_bench/llama2-burn/llama-py/dump_test.py
index a8b4b3a4..d8a6f73f 100644
--- a/rust_bench/llama2-burn/llama-py/dump_test.py
+++ b/rust_bench/llama2-burn/llama-py/dump_test.py
@@ -1,7 +1,6 @@
-import torch
 import dump
-import model
-from model import Transformer, ModelArgs
+import torch
+from model import ModelArgs, Transformer
 
 if __name__ == "__main__":
     n_vocab = 10
@@ -14,18 +13,25 @@
     norm_eps = 1e-6
     max_batch_size = 1
 
-    model_args = ModelArgs(dim=n_state, n_layers=n_layer, n_heads=n_head, n_kv_heads=n_kv_head,
-                           vocab_size=n_vocab, multiple_of=multiple_of, norm_eps=norm_eps, 
-                           max_batch_size=max_batch_size)
+    model_args = ModelArgs(
+        dim=n_state,
+        n_layers=n_layer,
+        n_heads=n_head,
+        n_kv_heads=n_kv_head,
+        vocab_size=n_vocab,
+        multiple_of=multiple_of,
+        norm_eps=norm_eps,
+        max_batch_size=max_batch_size,
+    )
 
     llama = Transformer(model_args)
 
     with torch.no_grad():
         tokens = torch.tensor([0, 2, 1], dtype=torch.int32).unsqueeze(0)
         output = llama(tokens, 0)
-        print(f'Test input {tokens.numpy()}')
-        print(f'Test output {output.numpy()}')
+        print(f"Test input {tokens.numpy()}")
+        print(f"Test output {output.numpy()}")
 
-        print('Dumping test model...')
+        print("Dumping test model...")
         dump.save_transformer(llama, "params")
-        print('Dump saved in params folder.')
\ No newline at end of file
+        print("Dump saved in params folder.")
diff --git a/rust_bench/llama2-burn/llama-py/model.py b/rust_bench/llama2-burn/llama-py/model.py
index d63ff075..562ef0b9 100644
--- a/rust_bench/llama2-burn/llama-py/model.py
+++ b/rust_bench/llama2-burn/llama-py/model.py
@@ -1,18 +1,16 @@
 # This file is adapted from the LLama project:
 # https://github.com/facebookresearch/llama/blob/main/llama/model.py
 
-# Original LLama code by Facebook AI Research 
+# Original LLama code by Facebook AI Research
 # Adapted by Gadersd
 
 import math
 from dataclasses import dataclass
-from typing import Any, Optional, Tuple
 
 import torch
 import torch.nn.functional as F
 from torch import nn
 from torch.nn import Embedding, Linear
-from torch import Tensor
 
 
 @dataclass
@@ -20,10 +18,10 @@ class ModelArgs:
     dim: int = 4096
     n_layers: int = 32
     n_heads: int = 32
-    n_kv_heads: Optional[int] = None
+    n_kv_heads: int | None = None
     vocab_size: int = -1  # defined later by tokenizer
     multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
-    ffn_dim_multiplier: Optional[float] = None
+    ffn_dim_multiplier: float | None = None
     norm_eps: float = 1e-5
 
     max_batch_size: int = 32
@@ -64,7 +62,7 @@ def apply_rotary_emb(
     xq: torch.Tensor,
     xk: torch.Tensor,
     freqs_cis: torch.Tensor,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
     xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
     xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
     freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
@@ -89,7 +87,7 @@ class Attention(nn.Module):
     def __init__(self, args: ModelArgs):
         super().__init__()
         self.n_kv_heads = args.n_heads if args.n_kv_heads is None else args.n_kv_heads
-        model_parallel_size = 1#fs_init.get_model_parallel_world_size()
+        model_parallel_size = 1  # fs_init.get_model_parallel_world_size()
         self.n_local_heads = args.n_heads // model_parallel_size
         self.n_local_kv_heads = self.n_kv_heads // model_parallel_size
         self.n_rep = self.n_local_heads // self.n_local_kv_heads
@@ -138,7 +136,7 @@ def forward(
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        mask: Optional[torch.Tensor],
+        mask: torch.Tensor | None,
     ):
         bsz, seqlen, _ = x.shape
         xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
@@ -152,8 +150,8 @@ def forward(
         self.cache_k = self.cache_k.to(xq)
         self.cache_v = self.cache_v.to(xq)
 
-        self.cache_k[:bsz, start_pos : start_pos + seqlen] = xk
-        self.cache_v[:bsz, start_pos : start_pos + seqlen] = xv
+        self.cache_k[:bsz, start_pos : start_pos + seqlen] = xk  # noqa
+        self.cache_v[:bsz, start_pos : start_pos + seqlen] = xv  # noqa
 
         keys = self.cache_k[:bsz, : start_pos + seqlen]
         values = self.cache_v[:bsz, : start_pos + seqlen]
@@ -180,7 +178,7 @@ def __init__(
         dim: int,
         hidden_dim: int,
         multiple_of: int,
-        ffn_dim_multiplier: Optional[float],
+        ffn_dim_multiplier: float | None,
     ):
         super().__init__()
         hidden_dim = int(2 * hidden_dim / 3)
@@ -189,15 +187,9 @@ def __init__(
             hidden_dim = int(ffn_dim_multiplier * hidden_dim)
         hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
 
-        self.w1 = Linear(
-            dim, hidden_dim, bias=False
-        )
-        self.w2 = Linear(
-            hidden_dim, dim, bias=False
-        )
-        self.w3 = Linear(
-            dim, hidden_dim, bias=False
-        )
+        self.w1 = Linear(dim, hidden_dim, bias=False)
+        self.w2 = Linear(hidden_dim, dim, bias=False)
+        self.w3 = Linear(dim, hidden_dim, bias=False)
 
     def forward(self, x):
         return self.w2(F.silu(self.w1(x)) * self.w3(x))
@@ -225,7 +217,7 @@ def forward(
         x: torch.Tensor,
         start_pos: int,
         freqs_cis: torch.Tensor,
-        mask: Optional[torch.Tensor],
+        mask: torch.Tensor | None,
     ):
         h = x + self.attention.forward(
             self.attention_norm(x), start_pos, freqs_cis, mask
@@ -241,18 +233,14 @@ def __init__(self, params: ModelArgs):
         self.vocab_size = params.vocab_size
         self.n_layers = params.n_layers
 
-        self.tok_embeddings = Embedding(
-            params.vocab_size, params.dim
-        )
+        self.tok_embeddings = Embedding(params.vocab_size, params.dim)
 
         self.layers = torch.nn.ModuleList()
         for layer_id in range(params.n_layers):
             self.layers.append(TransformerBlock(layer_id, params))
 
         self.norm = RMSNorm(params.dim, eps=params.norm_eps)
-        self.output = Linear(
-            params.dim, params.vocab_size, bias=False
-        )
+        self.output = Linear(params.dim, params.vocab_size, bias=False)
 
         self.freqs_cis = precompute_freqs_cis(
             self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
@@ -262,10 +250,10 @@ def __init__(self, params: ModelArgs):
     def forward(self, tokens: torch.Tensor, start_pos: int):
         _bsz, seqlen = tokens.shape
         h = self.tok_embeddings(tokens)
-        #print(h.numpy())
-        #print(h.shape)
+        # print(h.numpy())
+        # print(h.shape)
         self.freqs_cis = self.freqs_cis.to(h.device)
-        freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen]
+        freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen]  # noqa
 
         mask = None
         if seqlen > 1:
@@ -279,4 +267,3 @@ def forward(self, tokens: torch.Tensor, start_pos: int):
         h = self.norm(h)
         output = self.output(h).float()
         return output
-    
\ No newline at end of file
diff --git a/rust_bench/llama2-burn/llama-py/requirements.txt b/rust_bench/llama2-burn/llama-py/requirements.txt
index 3c977dc1..4ba53e28 100644
--- a/rust_bench/llama2-burn/llama-py/requirements.txt
+++ b/rust_bench/llama2-burn/llama-py/requirements.txt
@@ -1,3 +1,3 @@
 numpy
 torch
-sentencepiece
\ No newline at end of file
+sentencepiece
diff --git a/rust_bench/llama2-burn/llama-py/test.py b/rust_bench/llama2-burn/llama-py/test.py
index 91b6e27e..e8a3facc 100644
--- a/rust_bench/llama2-burn/llama-py/test.py
+++ b/rust_bench/llama2-burn/llama-py/test.py
@@ -1,23 +1,24 @@
-import torch
-from pathlib import Path
 import json
 import sys
-#from safetensors.torch import load_file
+from pathlib import Path
 
-import dump
-from model import Transformer, ModelArgs
 import tokenizer
+import torch
+from model import ModelArgs, Transformer
+
+# from safetensors.torch import load_file
+
 
 def load_model(model_dir, tokenizer_path):
     tok = tokenizer.Tokenizer(model_path=tokenizer_path)
     checkpoints = sorted(Path(model_dir).glob("*.pth"))
     if len(checkpoints) == 0:
         raise ValueError(f"No checkpoint files found in {model_dir}")
-    
+
     weights = [torch.load(filename, map_location="cpu") for filename in checkpoints]
-    with open(Path(model_dir) / "params.json", "r") as f:
+    with open(Path(model_dir) / "params.json") as f:
         params = json.loads(f.read())
-    
+
     model_args: ModelArgs = ModelArgs(
         max_batch_size=1,
         **params,
@@ -26,50 +27,62 @@ def load_model(model_dir, tokenizer_path):
     model = Transformer(model_args)
     model.load_state_dict(concat_weights(weights), strict=False)
     model.max_seq_len = model.tok_embeddings.weight.shape[0]
-    print('Loaded model')
+    print("Loaded model")
 
     return model
 
 
-# The concat_weights function is adapted from the tinygrad library:  
+# The concat_weights function is adapted from the tinygrad library:
 # https://github.com/tinygrad/tinygrad/blob/master/tinygrad/examples/llama.py
 # Original code by TinyGrad authors
 # Adapted by [Your Name]
 def concat_weights(models):
-  def convert(name) -> torch.Tensor:
-    disk_tensors = [model[name] for model in models]
-    if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
-      return disk_tensors[0]
-    axis = 1 if name.startswith('tok_embeddings.') or name.endswith('.attention.wo.weight') or name.endswith('.feed_forward.w2.weight') else 0
-    return disk_tensors[0].cat(*disk_tensors[1:], dim=axis)
-  return {name: convert(name) for name in {name: None for model in models for name in model}}
+    def convert(name) -> torch.Tensor:
+        disk_tensors = [model[name] for model in models]
+        if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1:
+            return disk_tensors[0]
+        axis = (
+            1
+            if name.startswith("tok_embeddings.")
+            or name.endswith(".attention.wo.weight")
+            or name.endswith(".feed_forward.w2.weight")
+            else 0
+        )
+        return disk_tensors[0].cat(*disk_tensors[1:], dim=axis)
+
+    return {
+        name: convert(name)
+        for name in {name: None for model in models for name in model}
+    }
 
 
 if __name__ == "__main__":
     if len(sys.argv) != 3:
-        raise ValueError("You must provide the model_dir and tok_path as command line parameters")
+        raise ValueError(
+            "You must provide the model_dir and tok_path as command line parameters"
+        )
 
     model_dir = sys.argv[1]
     tokenizer_path = sys.argv[2]
 
     try:
         with torch.no_grad():
-          tok = tokenizer.Tokenizer(model_path=tokenizer_path)
-          llama = load_model(model_dir, tokenizer_path)
+            tok = tokenizer.Tokenizer(model_path=tokenizer_path)
+            llama = load_model(model_dir, tokenizer_path)
 
-          '''tokens = torch.tensor([0, 2, 1])
+            """tokens = torch.tensor([0, 2, 1])
           out = llama(tokens.unsqueeze(0), 0)
 
-          print(out[0, :3, :10].numpy())'''
+          print(out[0, :3, :10].numpy())"""
 
-          tokens = tok.encode("Hello, I am ", True, False)
-          for i in range(0, 10):
-             token_tensor = torch.tensor(tokens)
-             logits = llama(token_tensor.unsqueeze(0), 0)
-             sample = logits[:, -1, :].argmax(dim=-1).item()
-             print(f'Sample is {sample} {tok.decode(sample)}')
-             tokens = tokens + [sample]
+            tokens = tok.encode("Hello, I am ", True, False)
+            for i in range(0, 10):
+                token_tensor = torch.tensor(tokens)
+                logits = llama(token_tensor.unsqueeze(0), 0)
+                sample = logits[:, -1, :].argmax(dim=-1).item()
+                print(f"Sample is {sample} {tok.decode(sample)}")
+                tokens = tokens + [sample]
         decoded = tok.decode(tokens)
         print(f"Sampled output: {decoded}")
     except Exception as e:
-        print(f"An error occurred: {e}")
\ No newline at end of file
+        print(f"An error occurred: {e}")
diff --git a/rust_bench/llama2-burn/llama-py/test_tokenizer.py b/rust_bench/llama2-burn/llama-py/test_tokenizer.py
index 1e9a3a96..1304b823 100644
--- a/rust_bench/llama2-burn/llama-py/test_tokenizer.py
+++ b/rust_bench/llama2-burn/llama-py/test_tokenizer.py
@@ -1,5 +1,5 @@
 import tokenizer
-    
+
 if __name__ == "__main__":
     tok = tokenizer.Tokenizer("tokenizer.model")
 
@@ -9,4 +9,4 @@
 
     print(f"Test string: {test_str}")
     print(f"Encoded tokens: {encoded}")
-    print(f"Decoded string: {decoded}")
\ No newline at end of file
+    print(f"Decoded string: {decoded}")
diff --git a/rust_bench/llama2-burn/llama-py/tokenizer.py b/rust_bench/llama2-burn/llama-py/tokenizer.py
index e3c04db5..40b701a8 100644
--- a/rust_bench/llama2-burn/llama-py/tokenizer.py
+++ b/rust_bench/llama2-burn/llama-py/tokenizer.py
@@ -4,11 +4,13 @@
 # Original LLama code by Facebook AI Research
 # Adapted by Gadersd
 
-from sentencepiece import SentencePieceProcessor
-from typing import List
 import logging
+
+from sentencepiece import SentencePieceProcessor
+
 logger = logging.getLogger(__name__)
 
+
 class Tokenizer:
     def __init__(self, model_path: str):
         self.sp_model = SentencePieceProcessor(model_file=model_path)
@@ -19,9 +21,11 @@ def __init__(self, model_path: str):
         self.eos_id: int = self.sp_model.eos_id()
         self.pad_id: int = self.sp_model.pad_id()
 
-        logger.info(f'#words: {self.n_words} BOS ID: {self.bos_id} EOS ID: {self.eos_id} PAD ID: {self.pad_id}')
+        logger.info(
+            f"#words: {self.n_words} BOS ID: {self.bos_id} EOS ID: {self.eos_id} PAD ID: {self.pad_id}"
+        )
 
-    def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
+    def encode(self, s: str, bos: bool, eos: bool) -> list[int]:
         assert type(s) is str
         t = self.sp_model.encode(s)
         if bos:
@@ -30,5 +34,5 @@ def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
             t = t + [self.eos_id]
         return t
 
-    def decode(self, t: List[int]) -> str:
+    def decode(self, t: list[int]) -> str:
         return self.sp_model.decode(t)
diff --git a/rust_bench/llama2-candle/Cargo.toml b/rust_bench/llama2-candle/Cargo.toml
index b6f7fbdb..67da7e93 100644
--- a/rust_bench/llama2-candle/Cargo.toml
+++ b/rust_bench/llama2-candle/Cargo.toml
@@ -25,4 +25,4 @@ tracing-subscriber = "0.3.7"
 
 [features]
 accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate", "candle-transformers/accelerate"]
-cuda = ["candle/cuda", "candle-nn/cuda", "candle-transformers/cuda"]
\ No newline at end of file
+cuda = ["candle/cuda", "candle-nn/cuda", "candle-transformers/cuda"]
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..6aee7c7b
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 120
+exclude = .tox,.git,*/migrations/*,*/static/CACHE/*,docs,node_modules,venv
diff --git a/setup.sh b/setup.sh
index 5af5f326..d8e7cf6a 100755
--- a/setup.sh
+++ b/setup.sh
@@ -2,7 +2,7 @@
 
 ################################################################################
 # Script: setup_and_convert.sh
-# Description: This script automates the setup of a virtual environment, 
+# Description: This script automates the setup of a virtual environment,
 # installs project requirements, converts and stores models.
 ################################################################################
 
@@ -61,7 +61,7 @@ fi
 # Check and create llama-2-7b-burn model
 if [ ! -e "$BURN_MODEL_FOLDER/$BURN_MODEL_NAME.cfg" ]; then
     check_and_create_directory "$BURN_MODEL_FOLDER"
-    
+
     if [ ! -d "$BURN_MODEL_FOLDER/params" ]; then
         create_and_activate_venv
         echo "Installing requirements for dumping"

From 7d43a634ecbc4a261707ac014493f1672c1268b5 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 11:58:01 +0000
Subject: [PATCH 2/5] tinygrad fixes

---
 .pre-commit-config.yaml  |  6 ------
 python_bench/tinygrad.py | 11 ++++++-----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fc065c44..af896c6a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,12 +14,6 @@ repos:
       - id: check-builtin-literals
       - id: check-case-conflict
 
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.0
-    hooks:
-      - id: pyupgrade
-        args: [--py311-plus]
-
   - repo: https://github.com/psf/black
     rev: 23.11.0
     hooks:
diff --git a/python_bench/tinygrad.py b/python_bench/tinygrad.py
index b238ffdd..15d976e7 100644
--- a/python_bench/tinygrad.py
+++ b/python_bench/tinygrad.py
@@ -3,6 +3,7 @@
 import os
 import time
 from pathlib import Path
+from typing import Optional, Union
 
 import numpy as np
 from tinygrad.helpers import CI, dtypes, getenv
@@ -90,9 +91,9 @@ def __init__(self, dim, n_heads, n_kv_heads, linear=Linear):
     def __call__(
         self,
         x: Tensor,
-        start_pos: Variable | int,
+        start_pos: Union[Variable, int],
         freqs_cis: Tensor,
-        mask: Tensor | None,
+        mask: Optional[Tensor],
     ) -> Tensor:
         xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
         xq = xq.reshape(xq.shape[0], xq.shape[1], self.n_heads, self.head_dim)
@@ -176,9 +177,9 @@ def __init__(
     def __call__(
         self,
         x: Tensor,
-        start_pos: Variable | int,
+        start_pos: Union[Variable, int],
         freqs_cis: Tensor,
-        mask: Tensor | None,
+        mask: Union[Tensor, None],
     ):
         h = x + self.attention(self.attention_norm(x), start_pos, freqs_cis, mask)
         return (h + self.feed_forward(self.ffn_norm(h))).realize()
@@ -223,7 +224,7 @@ def __init__(
         self.forward_jit = TinyJit(self.forward)
 
     def forward(
-        self, tokens: Tensor, start_pos: Variable | int, temperature: float = 0.0
+        self, tokens: Tensor, start_pos: Union[Variable, int], temperature: float = 0.0
     ):
         _bsz, seqlen = tokens.shape
         freqs_cis = self.freqs_cis.shrink(

From 37150babd490db7019b8a90bbea46aa0579dcdcb Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 13:04:02 +0100
Subject: [PATCH 3/5] added gha

---
 .github/workflows/precommit.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .github/workflows/precommit.yaml

diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
new file mode 100644
index 00000000..c2f7e71f
--- /dev/null
+++ b/.github/workflows/precommit.yaml
@@ -0,0 +1,14 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - uses: pre-commit/action@v3.0.0

From 7cab5e9247f1beb18068e632511708f96bfc8fa9 Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 13:06:10 +0100
Subject: [PATCH 4/5] fixes

---
 .github/workflows/precommit.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
index c2f7e71f..f1478189 100644
--- a/.github/workflows/precommit.yaml
+++ b/.github/workflows/precommit.yaml
@@ -2,6 +2,7 @@ name: pre-commit
 
 on:
   pull_request:
+    branches: [main]
   push:
     branches: [main]
 

From d3281efbbc9e7764afc21297609d1d2f14ac2ded Mon Sep 17 00:00:00 2001
From: nsosio <sosio.nicola94@tiscali.it>
Date: Thu, 16 Nov 2023 13:07:39 +0100
Subject: [PATCH 5/5] fixes

---
 .github/workflows/precommit.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/precommit.yaml b/.github/workflows/precommit.yaml
index f1478189..076db3ba 100644
--- a/.github/workflows/precommit.yaml
+++ b/.github/workflows/precommit.yaml
@@ -3,8 +3,6 @@ name: pre-commit
 on:
   pull_request:
     branches: [main]
-  push:
-    branches: [main]
 
 jobs:
   pre-commit: