From a998d7a6e4814f0142f3a6937c6ef7fb85428323 Mon Sep 17 00:00:00 2001
From: Ramakrishnan Sivakumar <ramkrishna2910@gmail.com>
Date: Mon, 4 Mar 2024 11:58:16 -0800
Subject: [PATCH 01/11] Fix hang on OEM info read (#128)

* enable passwordless sudo

* redirect stderr
---
 src/turnkeyml/common/build.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 28c55158..74e5bf17 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -558,14 +558,18 @@ def get_system_info():
             try:
                 oem_info = (
                     subprocess.check_output(
-                        "sudo dmidecode -s system-product-name",
+                        "sudo -n dmidecode -s system-product-name",
                         shell=True,
+                        stderr=subprocess.DEVNULL,
                     )
                     .decode()
                     .strip()
                     .replace("\n", " ")
                 )
                 info_dict["OEM System"] = oem_info
+            except subprocess.CalledProcessError:
+                # This catches the case where sudo requires a password
+                info_dict["OEM System"] = "Unable to get oem info - password required"
             except Exception as e:  # pylint: disable=broad-except
                 info_dict["Error OEM System"] = str(e)
 

From 15737448633f336ce6fbba15c09c1ef299e84be6 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 11 Mar 2024 10:15:30 -0400
Subject: [PATCH 02/11] Add LLaMA2 models (#130)

---
 models/transformers/llama2_13b.py | 48 +++++++++++++++++++++++++++++++
 models/transformers/llama2_34b.py | 48 +++++++++++++++++++++++++++++++
 models/transformers/llama2_70b.py | 48 +++++++++++++++++++++++++++++++
 models/transformers/llama2_7b.py  | 48 +++++++++++++++++++++++++++++++
 src/turnkeyml/common/build.py     | 15 ++++++++--
 src/turnkeyml/parser.py           |  7 +++++
 6 files changed, 211 insertions(+), 3 deletions(-)
 create mode 100644 models/transformers/llama2_13b.py
 create mode 100644 models/transformers/llama2_34b.py
 create mode 100644 models/transformers/llama2_70b.py
 create mode 100644 models/transformers/llama2_7b.py

diff --git a/models/transformers/llama2_13b.py b/models/transformers/llama2_13b.py
new file mode 100644
index 00000000..684029a5
--- /dev/null
+++ b/models/transformers/llama2_13b.py
@@ -0,0 +1,48 @@
+# labels: name::llama2_13b author::transformers task::Generative_AI license::apache-2.0
+from turnkeyml.parser import parse
+from transformers import LlamaConfig, LlamaForCausalLM
+import torch
+
+torch.manual_seed(0)
+
+# Parsing command-line arguments
+pretrained, batch_size, max_seq_length, model_path = parse(
+    ["pretrained", "batch_size", "max_seq_length", "model_path"]
+)
+
+# Model and input configurations
+if pretrained:
+    if not model_path:
+        raise ValueError(
+            "TurnkeyML does not include pretrained weights for LLaMA2 "
+            "because it has special licensing terms. See for details: "
+            "https://huggingface.co/docs/transformers/model_doc/llama2"
+        )
+
+    model = LlamaForCausalLM.from_pretrained(model_path)
+else:
+    config = LlamaConfig(
+        architectures=["LlamaForCausalLM"],
+        hidden_size=5120,
+        intermediate_size=13824,
+        max_position_embeddings=4096,
+        num_attention_heads=40,
+        num_hidden_layers=40,
+        num_key_value_heads=40,
+        pad_token_id=0,
+        vocab_size=32000,
+        use_cache=True,
+    )
+    model = LlamaForCausalLM(config)
+
+inputs = {
+    "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
+    "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float),
+}
+
+# Call model
+# Generate two tokens so that we can instrument both the prefill
+#   and token generation stages.
+# The token generation stage is the invocation that has "past_key_values"
+#   in the input shape.
+model.generate(**inputs, max_length=max_seq_length + 2)
diff --git a/models/transformers/llama2_34b.py b/models/transformers/llama2_34b.py
new file mode 100644
index 00000000..a272978e
--- /dev/null
+++ b/models/transformers/llama2_34b.py
@@ -0,0 +1,48 @@
+# labels: name::llama2_34b author::transformers task::Generative_AI license::apache-2.0
+from turnkeyml.parser import parse
+from transformers import LlamaConfig, LlamaForCausalLM
+import torch
+
+torch.manual_seed(0)
+
+# Parsing command-line arguments
+pretrained, batch_size, max_seq_length, model_path = parse(
+    ["pretrained", "batch_size", "max_seq_length", "model_path"]
+)
+
+# Model and input configurations
+if pretrained:
+    if not model_path:
+        raise ValueError(
+            "TurnkeyML does not include pretrained weights for LLaMA2 "
+            "because it has special licensing terms. See for details: "
+            "https://huggingface.co/docs/transformers/model_doc/llama2"
+        )
+
+    model = LlamaForCausalLM.from_pretrained(model_path)
+else:
+    config = LlamaConfig(
+        architectures=["LlamaForCausalLM"],
+        hidden_size=8192,
+        intermediate_size=22016,
+        max_position_embeddings=4096,
+        num_attention_heads=64,
+        num_hidden_layers=48,
+        num_key_value_heads=8,
+        pad_token_id=0,
+        vocab_size=32000,
+        use_cache=True,
+    )
+    model = LlamaForCausalLM(config)
+
+inputs = {
+    "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
+    "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float),
+}
+
+# Call model
+# Generate two tokens so that we can instrument both the prefill
+#   and token generation stages.
+# The token generation stage is the invocation that has "past_key_values"
+#   in the input shape.
+model.generate(**inputs, max_length=max_seq_length + 2)
diff --git a/models/transformers/llama2_70b.py b/models/transformers/llama2_70b.py
new file mode 100644
index 00000000..2ec45987
--- /dev/null
+++ b/models/transformers/llama2_70b.py
@@ -0,0 +1,48 @@
+# labels: name::llama2_70b author::transformers task::Generative_AI license::apache-2.0
+from turnkeyml.parser import parse
+from transformers import LlamaConfig, LlamaForCausalLM
+import torch
+
+torch.manual_seed(0)
+
+# Parsing command-line arguments
+pretrained, batch_size, max_seq_length, model_path = parse(
+    ["pretrained", "batch_size", "max_seq_length", "model_path"]
+)
+
+# Model and input configurations
+if pretrained:
+    if not model_path:
+        raise ValueError(
+            "TurnkeyML does not include pretrained weights for LLaMA2 "
+            "because it has special licensing terms. See for details: "
+            "https://huggingface.co/docs/transformers/model_doc/llama2"
+        )
+
+    model = LlamaForCausalLM.from_pretrained(model_path)
+else:
+    config = LlamaConfig(
+        architectures=["LlamaForCausalLM"],
+        hidden_size=8192,
+        intermediate_size=28672,
+        max_position_embeddings=4096,
+        num_attention_heads=64,
+        num_hidden_layers=80,
+        num_key_value_heads=8,
+        pad_token_id=0,
+        vocab_size=32000,
+        use_cache=True,
+    )
+    model = LlamaForCausalLM(config)
+
+inputs = {
+    "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
+    "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float),
+}
+
+# Call model
+# Generate two tokens so that we can instrument both the prefill
+#   and token generation stages.
+# The token generation stage is the invocation that has "past_key_values"
+#   in the input shape.
+model.generate(**inputs, max_length=max_seq_length + 2)
diff --git a/models/transformers/llama2_7b.py b/models/transformers/llama2_7b.py
new file mode 100644
index 00000000..a4a92e13
--- /dev/null
+++ b/models/transformers/llama2_7b.py
@@ -0,0 +1,48 @@
+# labels: name::llama2_7b author::transformers task::Generative_AI license::apache-2.0
+from turnkeyml.parser import parse
+from transformers import LlamaConfig, LlamaForCausalLM
+import torch
+
+torch.manual_seed(0)
+
+# Parsing command-line arguments
+pretrained, batch_size, max_seq_length, model_path = parse(
+    ["pretrained", "batch_size", "max_seq_length", "model_path"]
+)
+
+# Model and input configurations
+if pretrained:
+    if not model_path:
+        raise ValueError(
+            "TurnkeyML does not include pretrained weights for LLaMA2 "
+            "because it has special licensing terms. See for details: "
+            "https://huggingface.co/docs/transformers/model_doc/llama2"
+        )
+
+    model = LlamaForCausalLM.from_pretrained(model_path)
+else:
+    config = LlamaConfig(
+        architectures=["LlamaForCausalLM"],
+        hidden_size=4096,
+        intermediate_size=11008,
+        max_position_embeddings=4096,
+        num_attention_heads=32,
+        num_hidden_layers=32,
+        num_key_value_heads=32,
+        pad_token_id=0,
+        vocab_size=32000,
+        use_cache=True,
+    )
+    model = LlamaForCausalLM(config)
+
+inputs = {
+    "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
+    "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float),
+}
+
+# Call model
+# Generate two tokens so that we can instrument both the prefill
+#   and token generation stages.
+# The token generation stage is the invocation that has "past_key_values"
+#   in the input shape.
+model.generate(**inputs, max_length=max_seq_length + 2)
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index 74e5bf17..fc0342ba 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -203,9 +203,18 @@ def get_shapes_and_dtypes(inputs: dict):
             (list, tuple),
         ):
             for v, i in zip(value, range(len(value))):
-                subkey = f"{key}[{i}]"
-                shapes[subkey] = np.array(v).shape
-                dtypes[subkey] = np.array(v).dtype.name
+                if isinstance(v, (list, tuple)):
+                    # Handle nested lists/tuples, for example past_key_values
+                    # in an LLM that has KV-caching enabled
+                    for v2, i2 in zip(v, range(len(v))):
+                        subsubkey = f"{key}[{i}][{i2}]"
+                        shapes[subsubkey] = np.array(v2).shape
+                        dtypes[subsubkey] = np.array(v2).dtype.name
+                else:
+                    # Handle single list/tuple
+                    subkey = f"{key}[{i}]"
+                    shapes[subkey] = np.array(v).shape
+                    dtypes[subkey] = np.array(v).dtype.name
         elif torch.is_tensor(value):
             shapes[key] = np.array(value.detach()).shape
             dtypes[key] = np.array(value.detach()).dtype.name
diff --git a/src/turnkeyml/parser.py b/src/turnkeyml/parser.py
index 1cb09855..0ba1b20a 100644
--- a/src/turnkeyml/parser.py
+++ b/src/turnkeyml/parser.py
@@ -69,6 +69,13 @@ def parse(valid_args: List[str]) -> List[Union[int, float]]:
         "in_channels": Arg("in_channels", default=1433, type=int),
         # Pretrained indicates whether pretrained weights should be used on the model
         "pretrained": Arg("pretrained", default=False, type=bool, action="store_true"),
+        # Path on the filesystem to a copy of the model
+        # Useful when models have special licensing terms and we can't directly
+        # provide access
+        "model_path": Arg("model_path", default=None, type=str),
+        # True: use an LLM's KV-cache (ie, token phase). False: disable the
+        # KV-cache (ie, prefill phase).
+        "kvcache": Arg("kvcache", default=False, type=bool, action="store_true"),
     }
 
     # Create parser that accepts only the args received as part of valid_args

From 034631ba687dae71bad5e457771960731a87b441 Mon Sep 17 00:00:00 2001
From: Ramakrishnan Sivakumar <ramkrishna2910@gmail.com>
Date: Mon, 11 Mar 2024 12:15:30 -0700
Subject: [PATCH 03/11] add cache warmup for onnxrt (#133)

---
 src/turnkeyml/run/onnxrt/within_conda.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/turnkeyml/run/onnxrt/within_conda.py b/src/turnkeyml/run/onnxrt/within_conda.py
index 27498a94..57434939 100644
--- a/src/turnkeyml/run/onnxrt/within_conda.py
+++ b/src/turnkeyml/run/onnxrt/within_conda.py
@@ -22,6 +22,15 @@ def run_ort_profile(
     input_feed = dummy_inputs(sess_input)
     output_name = onnx_session.get_outputs()[0].name
 
+    # Warm the CPU cache by executing a small number of inferences
+    # Stop after 100 iterations or 15s warming up
+    warmup_max_duration = 15
+    warmup_start_time = time.time()
+    for _ in range(100):
+        onnx_session.run([output_name], input_feed)
+        if time.time() - warmup_start_time > warmup_max_duration:
+            break
+
     for _ in range(iterations):
         start = time.perf_counter()
         onnx_session.run([output_name], input_feed)

From 871d11df86987ef6b1ea6de30b1048c8620acb12 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 11 Mar 2024 16:18:40 -0400
Subject: [PATCH 04/11] Allow BaseRT to benchmark onnx models with external
 data files (#134)

---
 src/turnkeyml/run/basert.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/turnkeyml/run/basert.py b/src/turnkeyml/run/basert.py
index 468323a1..c399a9fb 100644
--- a/src/turnkeyml/run/basert.py
+++ b/src/turnkeyml/run/basert.py
@@ -193,6 +193,16 @@ def benchmark(self) -> MeasuredPerformance:
         os.makedirs(self.local_onnx_dir, exist_ok=True)
         shutil.copy(model_file, self.local_onnx_file)
 
+        # Copy any ONNX external data files present in the onnx build directory
+        onnx_build_dir = os.path.dirname(model_file)
+        external_data_files = [
+            os.path.join(onnx_build_dir, f)
+            for f in os.listdir(onnx_build_dir)
+            if ".onnx" not in f
+        ]
+        for f in external_data_files:
+            shutil.copy(f, os.path.dirname(self.local_onnx_file))
+
         # Execute benchmarking in hardware
         if self.requires_docker:
             _check_docker_install()

From cbd3abc173b4bc93d06131f6199d08ef2f6bc143 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 11 Mar 2024 16:58:57 -0400
Subject: [PATCH 05/11] Add phi2 to transformers corpus (#135)

---
 models/transformers/phi2.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 models/transformers/phi2.py

diff --git a/models/transformers/phi2.py b/models/transformers/phi2.py
new file mode 100644
index 00000000..8db9e131
--- /dev/null
+++ b/models/transformers/phi2.py
@@ -0,0 +1,35 @@
+# labels: name::phi2 author::transformers task::Generative_AI license::mit
+from turnkeyml.parser import parse
+from transformers import AutoModelForCausalLM
+import torch
+
+torch.manual_seed(0)
+
+# Parsing command-line arguments
+pretrained, batch_size, max_seq_length = parse(
+    ["pretrained", "batch_size", "max_seq_length"]
+)
+
+# Model and input configurations
+if pretrained:
+    model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+else:
+    raise ValueError(
+        "This model is only supported with pretrained weights, try again with --pretrained"
+    )
+
+# Make sure the user's sequence length fits within the model's maximum
+assert max_seq_length <= model.config.max_position_embeddings
+
+
+inputs = {
+    "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
+    "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float),
+}
+
+# Call model
+# Generate two tokens so that we can instrument both the prefill
+#   and token generation stages.
+# The token generation stage is the invocation that has "past_key_values"
+#   in the input shape.
+model.generate(**inputs, max_length=max_seq_length + 2)

From 13451b942774b19879964746a735e462ecb0fbb6 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Thu, 14 Mar 2024 15:56:12 -0400
Subject: [PATCH 06/11] Save the exception into error_log when the subprocess
 is killed (#139)

---
 src/turnkeyml/cli/spawn.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/turnkeyml/cli/spawn.py b/src/turnkeyml/cli/spawn.py
index 279976e3..ca2240cc 100644
--- a/src/turnkeyml/cli/spawn.py
+++ b/src/turnkeyml/cli/spawn.py
@@ -374,6 +374,9 @@ def run_turnkey(
                             ):
                                 stats.save_model_eval_stat(key, evaluation_status.value)
 
+                        # Save the exception into the error log stat
+                        stats.save_model_eval_stat(filesystem.Keys.ERROR_LOG, str(e))
+
                     except Exception as stats_exception:  # pylint: disable=broad-except
                         printing.log_info(
                             "Stats file found, but unable to perform cleanup due to "

From 4bfaa361241ebd44c80d5dcc45f15bf7bb84cca2 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Fri, 15 Mar 2024 09:41:13 -0400
Subject: [PATCH 07/11] Save device name stat prior to running stages (#138)

---
 .../runtime.py                                | 13 +++----
 .../turnkeyml_plugin_example_rt/runtime.py    |  6 +--
 src/turnkeyml/analyze/script.py               |  9 ++++-
 src/turnkeyml/common/filesystem.py            |  4 +-
 src/turnkeyml/run/basert.py                   |  6 +--
 src/turnkeyml/run/onnxrt/runtime.py           | 12 ++++--
 src/turnkeyml/run/tensorrt/runtime.py         | 38 +++++++++++++------
 src/turnkeyml/run/torchrt/runtime.py          |  6 +--
 src/turnkeyml/version.py                      |  2 +-
 9 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py
index ee10c557..db27f679 100644
--- a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py
+++ b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py
@@ -28,7 +28,7 @@ def __init__(
         inputs=None,
         delay_before_benchmarking: str = "0",
     ):
-        # Custom runtime args always arive as strings, so we need to convert them
+        # Custom runtime args always arrive as strings, so we need to convert them
         # to the appropriate data type here
         self.delay_before_benchmarking = int(delay_before_benchmarking)
 
@@ -86,7 +86,7 @@ def benchmark(self):
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
             throughput=self.throughput,
-            device=self.device_name,
+            device=self.device_name(),
             device_type=self.device_type,
             runtime=self.runtime,
             runtime_version=self.runtime_version,
@@ -111,9 +111,6 @@ def throughput(self) -> float:
                 "Queried throughput before self.benchmark() was called"
             )
 
-    @property
-    def device_name(self) -> str:
-        return (
-            f"Device Family {self.device_type.family}, Device Part {self.device_type.part}, "
-            f"Device Configuration {self.device_type.config}"
-        )
+    @staticmethod
+    def device_name() -> str:
+        return "Example Device"
diff --git a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
index 2dc117d6..8e91b633 100644
--- a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
+++ b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py
@@ -57,7 +57,7 @@ def benchmark(self) -> MeasuredPerformance:
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
             throughput=self.throughput,
-            device=self.device_name,
+            device=self.device_name(),
             device_type=self.device_type,
             runtime=self.runtime,
             runtime_version=self.runtime_version,
@@ -82,6 +82,6 @@ def throughput(self) -> float:
                 "Queried throughput before self.benchmark() was called"
             )
 
-    @property
-    def device_name(self) -> str:
+    @staticmethod
+    def device_name() -> str:
         return "the x86 cpu of your dreams"
diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py
index f75962f5..fd1f8054 100644
--- a/src/turnkeyml/analyze/script.py
+++ b/src/turnkeyml/analyze/script.py
@@ -371,6 +371,11 @@ def explore_invocation(
             fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.NOT_STARTED.value
         )
 
+        # Save the device name that will be used for the benchmark
+        stats.save_model_eval_stat(
+            fs.Keys.DEVICE, runtime_info["RuntimeClass"].device_name()
+        )
+
     build_state = None
     perf = None
     benchmark_logfile_path = ""
@@ -667,7 +672,7 @@ def explore_frame(
 
             # Starting in version 2.2.0, torch dynamo added wrappers to callbacks
             # while tracing frames, which conflicts with TurnkeML's analysis. Here,
-            # we supress errors caused by those callback wrappers and only raise an
+            # we suppress errors caused by those callback wrappers and only raise an
             # error if the compiled model actually tries to execute within TurnkeyML.
             td = torch._dynamo  # pylint: disable=protected-access
             td.config.suppress_errors = True
@@ -800,7 +805,7 @@ def forward_spy(*args, **kwargs):
                 and invocation_info.is_target
                 and (model_info.build_model)
             ):
-                # Disable all modifications while we evalute the model
+                # Disable all modifications while we evaluate the model
                 # This is needed in case a tool called during evaluation wants to
                 # trace the model. There are some scenarios (e.g., ipex.quantization.prepare),
                 # that raise an exception when they encounter forward_spy()
diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py
index ad54b073..b6b339c6 100644
--- a/src/turnkeyml/common/filesystem.py
+++ b/src/turnkeyml/common/filesystem.py
@@ -332,8 +332,10 @@ class Keys:
     PERFORMANCE = "performance"
     # Runtime used for the benchmark
     RUNTIME = "runtime"
-    # Device used for the benchmark
+    # Type of device used for the benchmark (e.g., "x86")
     DEVICE_TYPE = "device_type"
+    # Specific device used for the benchmark
+    DEVICE = "device"
     # Name of the model
     MODEL_NAME = "model_name"
     # References the per-evaluation stats section
diff --git a/src/turnkeyml/run/basert.py b/src/turnkeyml/run/basert.py
index c399a9fb..b797f99f 100644
--- a/src/turnkeyml/run/basert.py
+++ b/src/turnkeyml/run/basert.py
@@ -228,7 +228,7 @@ def benchmark(self) -> MeasuredPerformance:
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
             throughput=self.throughput,
-            device=self.device_name,
+            device=self.device_name(),
             device_type=self.device_type,
             runtime=self.runtime,
             runtime_version=self.runtime_version,
@@ -260,9 +260,9 @@ def throughput(self) -> float:
         Returns the throughput, in IPS, for the benchmarking run.
         """
 
-    @property
+    @staticmethod
     @abstractmethod
-    def device_name(self) -> str:
+    def device_name() -> str:
         """
         Returns the full device name for the device used in benchmarking.
         For example, a benchmark on a `x86` device might have a device name like
diff --git a/src/turnkeyml/run/onnxrt/runtime.py b/src/turnkeyml/run/onnxrt/runtime.py
index 08421220..f3782e61 100644
--- a/src/turnkeyml/run/onnxrt/runtime.py
+++ b/src/turnkeyml/run/onnxrt/runtime.py
@@ -5,7 +5,11 @@
 import turnkeyml.common.exceptions as exp
 from turnkeyml.run.onnxrt.execute import ORT_VERSION
 from turnkeyml.common.filesystem import Stats
-from turnkeyml.run.onnxrt.execute import create_conda_env, execute_benchmark
+from turnkeyml.run.onnxrt.execute import (
+    create_conda_env,
+    execute_benchmark,
+    get_cpu_specs,
+)
 import turnkeyml.run.plugin_helpers as plugin_helpers
 
 
@@ -95,6 +99,6 @@ def mean_latency(self):
     def throughput(self):
         return float(self._get_stat("Throughput"))
 
-    @property
-    def device_name(self):
-        return self._get_stat("CPU Name")
+    @staticmethod
+    def device_name() -> str:
+        return get_cpu_specs()["CPU Name"]
diff --git a/src/turnkeyml/run/tensorrt/runtime.py b/src/turnkeyml/run/tensorrt/runtime.py
index 83d2e219..e56896af 100644
--- a/src/turnkeyml/run/tensorrt/runtime.py
+++ b/src/turnkeyml/run/tensorrt/runtime.py
@@ -59,18 +59,24 @@ def __init__(
             requires_docker=True,
         )
 
+        self.device_name = self._dynamic_device_name
+
     def _setup(self) -> None:
         # Check if at least one NVIDIA GPU is available locally
-        result = subprocess.run(
-            ["nvidia-smi"],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            encoding="utf-8",
-            check=False,
-        )
-
-        if "NVIDIA" not in result.stdout or result.returncode == 1:
-            msg = "No NVIDIA GPUs available on the local machine"
+        msg = "No NVIDIA GPUs available on the local machine"
+        try:
+            result = subprocess.run(
+                ["nvidia-smi"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                encoding="utf-8",
+                check=False,
+            )
+
+            if "NVIDIA" not in result.stdout or result.returncode == 1:
+
+                raise exp.BenchmarkException(msg)
+        except FileNotFoundError:
             raise exp.BenchmarkException(msg)
 
     def _execute(
@@ -143,6 +149,14 @@ def mean_latency(self):
     def throughput(self):
         return float(self._get_stat("Throughput").split(" ")[0])
 
-    @property
-    def device_name(self):
+    def _dynamic_device_name(self):
+        # Return the specific Nvidia GPU, which is only known
+        # after we invoke the TensorRT docker
         return self._get_stat("Selected Device")
+
+    @staticmethod
+    def device_name():
+        # Return a generic response, since we haven't invoked
+        # the docker yet. At instantiation time, this method
+        # is replaced by the dynamic version.
+        return "Nvidia GPU"
diff --git a/src/turnkeyml/run/torchrt/runtime.py b/src/turnkeyml/run/torchrt/runtime.py
index 9a24de4f..d604e670 100644
--- a/src/turnkeyml/run/torchrt/runtime.py
+++ b/src/turnkeyml/run/torchrt/runtime.py
@@ -124,7 +124,7 @@ def _calculate_performance(
         return MeasuredPerformance(
             mean_latency=self.mean_latency,
             throughput=self.throughput,
-            device=self.device_name,
+            device=self.device_name(),
             device_type=self.device_type,
             runtime=self.runtime,
             runtime_version=self.runtime_version,
@@ -218,6 +218,6 @@ def throughput(self) -> float:
                 "Queried throughput before self.benchmark() was called"
             )
 
-    @property
-    def device_name(self) -> str:
+    @staticmethod
+    def device_name() -> str:
         return get_cpu_specs()["CPU Name"]
diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py
index 0b2f79db..8c0d5d5b 100644
--- a/src/turnkeyml/version.py
+++ b/src/turnkeyml/version.py
@@ -1 +1 @@
-__version__ = "1.1.3"
+__version__ = "2.0.0"

From c0986fbfe51c59da1b00e37ce462ccea1b038690 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Mon, 18 Mar 2024 11:29:54 -0400
Subject: [PATCH 08/11] Update ignition.py (#142)

Fix a typo

Signed-off-by: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
---
 src/turnkeyml/build/ignition.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index ebf43958..a06d8ddc 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -117,7 +117,7 @@ def validate_cached_model(
         msg = (
             f"Your build {state.config.build_name} was previously built against "
             f"turnkey version {state.turnkey_version}, "
-            f"however you are now using onxxflow version {turnkey_version}. The previous build is "
+            f"however you are now using turnkey version {turnkey_version}. The previous build is "
             f"incompatible with this version of turnkey, as indicated by the {out_of_date} "
             "version number changing. See **docs/versioning.md** for details."
         )

From 40c27b06aeebc9151b78850275e60e63baeb108f Mon Sep 17 00:00:00 2001
From: Daniel Holanda <holand.daniel@gmail.com>
Date: Wed, 20 Mar 2024 12:45:01 -0700
Subject: [PATCH 09/11] Ensure child subprocesses are killed on timeout (#144)

* Ensure child subprocesses are killed on timeout

* Suggested Changes
---
 src/turnkeyml/cli/spawn.py           | 18 ++++++++++++------
 src/turnkeyml/run/benchmark_build.py |  6 +++++-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/turnkeyml/cli/spawn.py b/src/turnkeyml/cli/spawn.py
index ca2240cc..3dc34014 100644
--- a/src/turnkeyml/cli/spawn.py
+++ b/src/turnkeyml/cli/spawn.py
@@ -13,6 +13,7 @@
 import getpass
 from typing import List, Optional, Dict, Union
 from enum import Enum
+import psutil
 import turnkeyml.common.filesystem as filesystem
 import turnkeyml.common.printing as printing
 import turnkeyml.common.build as build
@@ -22,7 +23,7 @@
 
 class WatchdogTimer(Thread):
     """
-    Run *callback* in *timeout* seconds unless the timer is restarted.
+    Kill process in *timeout* seconds unless the timer is restarted.
 
     This is needed because Popen natively supports streaming output to the terminal,
     checking that output, and timeouts--but not all 3 at the same time.
@@ -31,11 +32,10 @@ class WatchdogTimer(Thread):
     to stream and check output.
     """
 
-    def __init__(self, timeout, callback, *args, timer=monotonic, **kwargs):
+    def __init__(self, timeout, pid, timer=monotonic, **kwargs):
         super().__init__(**kwargs)
         self.timeout = timeout
-        self.callback = callback
-        self.args = args
+        self.pid = pid
         self.timer = timer
         self.cancelled = Event()
         self.blocked = Lock()
@@ -50,7 +50,7 @@ def run(self):
             with self.blocked:
                 if self.deadline <= self.timer() and not self.cancelled.is_set():
                     self.timeout_reached = True
-                    return self.callback(*self.args)
+                    return self.kill_process_tree()
 
     def restart(self):
         self.deadline = self.timer() + self.timeout
@@ -58,6 +58,12 @@ def restart(self):
     def cancel(self):
         self.cancelled.set()
 
+    def kill_process_tree(self):
+        parent = psutil.Process(self.pid)
+        for child in parent.children(recursive=True):
+            child.kill()
+        parent.kill()
+
 
 def parse_evaluation_id(line: str, current_value: str) -> Optional[str]:
     """
@@ -284,7 +290,7 @@ def run_turnkey(
                 # Create our own watchdog timer in a thread
                 # This is needed because the `for line in p.stdout` is a blocking
                 # call that is incompatible with Popen's native timeout features
-                watchdog = WatchdogTimer(timeout, callback=p.kill, daemon=True)
+                watchdog = WatchdogTimer(timeout, p.pid)
                 watchdog.start()
 
                 # Print the subprocess's output to the command line as it comes in,
diff --git a/src/turnkeyml/run/benchmark_build.py b/src/turnkeyml/run/benchmark_build.py
index 24ff6c0b..1c22165b 100644
--- a/src/turnkeyml/run/benchmark_build.py
+++ b/src/turnkeyml/run/benchmark_build.py
@@ -1,6 +1,7 @@
 from typing import Dict, Optional
 import multiprocessing
 import traceback
+import psutil
 import turnkeyml.common.build as build
 import turnkeyml.common.exceptions as exp
 import turnkeyml.common.filesystem as fs
@@ -275,7 +276,10 @@ def benchmark_cache(
         if p.is_alive():
             # Handle the timeout, which is needed if the process is still alive after
             # waiting `timeout` seconds
-            p.terminate()
+            parent = psutil.Process(p.pid)
+            for child in parent.children(recursive=True):
+                child.kill()
+            parent.kill()
             stats.save_model_eval_stat(
                 fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.TIMEOUT.value
             )

From 5c59aa169b2cd7dd3494956ad407fdc5cc501df5 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Wed, 20 Mar 2024 18:56:51 -0400
Subject: [PATCH 10/11] Rev version to v2.0.1 (#145)

Signed-off-by: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
---
 src/turnkeyml/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py
index 8c0d5d5b..159d48b8 100644
--- a/src/turnkeyml/version.py
+++ b/src/turnkeyml/version.py
@@ -1 +1 @@
-__version__ = "2.0.0"
+__version__ = "2.0.1"

From b814d0b78c6e89da73df56d8fa65add6f8bb475c Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com>
Date: Fri, 22 Mar 2024 19:01:35 -0400
Subject: [PATCH 11/11] Do not set benchmarking error status when a benchmark
 is skipped (#146)

---
 src/turnkeyml/run/benchmark_build.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/turnkeyml/run/benchmark_build.py b/src/turnkeyml/run/benchmark_build.py
index 1c22165b..27b058b0 100644
--- a/src/turnkeyml/run/benchmark_build.py
+++ b/src/turnkeyml/run/benchmark_build.py
@@ -22,6 +22,12 @@ def tqdm(iterable, **kwargs):  # pylint: disable=unused-argument
         return iterable
 
 
+class SkippedBenchmark(Exception):
+    """
+    Indicates that a benchmark was skipped
+    """
+
+
 class Process(multiprocessing.Process):
     """
     Standardized way to make it possible to catch exceptions from a
@@ -81,7 +87,7 @@ def benchmark_build(
     state = build.load_state(cache_dir, build_name)
 
     if state.build_status != build.FunctionStatus.SUCCESSFUL:
-        raise exp.BenchmarkException(
+        raise SkippedBenchmark(
             "Only successful builds can be benchmarked with this "
             f"function, however selected build at {build_name} "
             f"has state: {state.build_status}"
@@ -99,7 +105,7 @@ def benchmark_build(
     except KeyError as e:
         # User should never get this far without hitting an actionable error message,
         # but let's raise an exception just in case.
-        raise exp.BenchmarkException(
+        raise SkippedBenchmark(
             f"Selected runtime is not supported: {selected_runtime}"
         ) from e
 
@@ -295,9 +301,15 @@ def benchmark_cache(
             # is not able to conduct any more benchmarking. In this case the program
             # should exit and the user should follow the suggestion in the exception
             # message (e.g., restart their computer).
-            stats.save_model_eval_stat(
-                fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR.value
-            )
+
+            if isinstance(p.exception[0], SkippedBenchmark):
+                stats.save_model_eval_stat(
+                    fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.NOT_STARTED.value
+                )
+            else:
+                stats.save_model_eval_stat(
+                    fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR.value
+                )
 
             if isinstance(p.exception[0], exp.HardwareError):
                 stats.save_model_eval_stat(fs.Keys.ERROR_LOG, p.exception[1])