From a998d7a6e4814f0142f3a6937c6ef7fb85428323 Mon Sep 17 00:00:00 2001 From: Ramakrishnan Sivakumar Date: Mon, 4 Mar 2024 11:58:16 -0800 Subject: [PATCH 01/11] Fix hang on OEM info read (#128) * enable passwordless sudo * redirect stderr --- src/turnkeyml/common/build.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py index 28c55158..74e5bf17 100644 --- a/src/turnkeyml/common/build.py +++ b/src/turnkeyml/common/build.py @@ -558,14 +558,18 @@ def get_system_info(): try: oem_info = ( subprocess.check_output( - "sudo dmidecode -s system-product-name", + "sudo -n dmidecode -s system-product-name", shell=True, + stderr=subprocess.DEVNULL, ) .decode() .strip() .replace("\n", " ") ) info_dict["OEM System"] = oem_info + except subprocess.CalledProcessError: + # This catches the case where sudo requires a password + info_dict["OEM System"] = "Unable to get oem info - password required" except Exception as e: # pylint: disable=broad-except info_dict["Error OEM System"] = str(e) From 15737448633f336ce6fbba15c09c1ef299e84be6 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Mon, 11 Mar 2024 10:15:30 -0400 Subject: [PATCH 02/11] Add LLaMA2 models (#130) --- models/transformers/llama2_13b.py | 48 +++++++++++++++++++++++++++++++ models/transformers/llama2_34b.py | 48 +++++++++++++++++++++++++++++++ models/transformers/llama2_70b.py | 48 +++++++++++++++++++++++++++++++ models/transformers/llama2_7b.py | 48 +++++++++++++++++++++++++++++++ src/turnkeyml/common/build.py | 15 ++++++++-- src/turnkeyml/parser.py | 7 +++++ 6 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 models/transformers/llama2_13b.py create mode 100644 models/transformers/llama2_34b.py create mode 100644 models/transformers/llama2_70b.py create mode 100644 models/transformers/llama2_7b.py diff --git a/models/transformers/llama2_13b.py b/models/transformers/llama2_13b.py new file mode 100644 index 00000000..684029a5 --- /dev/null +++ b/models/transformers/llama2_13b.py @@ -0,0 +1,48 @@ +# labels: name::llama2_13b author::transformers task::Generative_AI license::apache-2.0 +from turnkeyml.parser import parse +from transformers import LlamaConfig, LlamaForCausalLM +import torch + +torch.manual_seed(0) + +# Parsing command-line arguments +pretrained, batch_size, max_seq_length, model_path = parse( + ["pretrained", "batch_size", "max_seq_length", "model_path"] +) + +# Model and input configurations +if pretrained: + if not model_path: + raise ValueError( + "TurnkeyML does not include pretrained weights for LLaMA2 " + "because it has special licensing terms. See for details: " + "https://huggingface.co/docs/transformers/model_doc/llama2" + ) + + model = LlamaForCausalLM.from_pretrained(model_path) +else: + config = LlamaConfig( + architectures=["LlamaForCausalLM"], + hidden_size=5120, + intermediate_size=13824, + max_position_embeddings=4096, + num_attention_heads=40, + num_hidden_layers=40, + num_key_value_heads=40, + pad_token_id=0, + vocab_size=32000, + use_cache=True, + ) + model = LlamaForCausalLM(config) + +inputs = { + "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long), + "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float), +} + +# Call model +# Generate two tokens so that we can instrument both the prefill +# and token generation stages. +# The token generation stage is the invocation that has "past_key_values" +# in the input shape. +model.generate(**inputs, max_length=max_seq_length + 2) diff --git a/models/transformers/llama2_34b.py b/models/transformers/llama2_34b.py new file mode 100644 index 00000000..a272978e --- /dev/null +++ b/models/transformers/llama2_34b.py @@ -0,0 +1,48 @@ +# labels: name::llama2_34b author::transformers task::Generative_AI license::apache-2.0 +from turnkeyml.parser import parse +from transformers import LlamaConfig, LlamaForCausalLM +import torch + +torch.manual_seed(0) + +# Parsing command-line arguments +pretrained, batch_size, max_seq_length, model_path = parse( + ["pretrained", "batch_size", "max_seq_length", "model_path"] +) + +# Model and input configurations +if pretrained: + if not model_path: + raise ValueError( + "TurnkeyML does not include pretrained weights for LLaMA2 " + "because it has special licensing terms. See for details: " + "https://huggingface.co/docs/transformers/model_doc/llama2" + ) + + model = LlamaForCausalLM.from_pretrained(model_path) +else: + config = LlamaConfig( + architectures=["LlamaForCausalLM"], + hidden_size=8192, + intermediate_size=22016, + max_position_embeddings=4096, + num_attention_heads=64, + num_hidden_layers=48, + num_key_value_heads=8, + pad_token_id=0, + vocab_size=32000, + use_cache=True, + ) + model = LlamaForCausalLM(config) + +inputs = { + "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long), + "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float), +} + +# Call model +# Generate two tokens so that we can instrument both the prefill +# and token generation stages. +# The token generation stage is the invocation that has "past_key_values" +# in the input shape. +model.generate(**inputs, max_length=max_seq_length + 2) diff --git a/models/transformers/llama2_70b.py b/models/transformers/llama2_70b.py new file mode 100644 index 00000000..2ec45987 --- /dev/null +++ b/models/transformers/llama2_70b.py @@ -0,0 +1,48 @@ +# labels: name::llama2_70b author::transformers task::Generative_AI license::apache-2.0 +from turnkeyml.parser import parse +from transformers import LlamaConfig, LlamaForCausalLM +import torch + +torch.manual_seed(0) + +# Parsing command-line arguments +pretrained, batch_size, max_seq_length, model_path = parse( + ["pretrained", "batch_size", "max_seq_length", "model_path"] +) + +# Model and input configurations +if pretrained: + if not model_path: + raise ValueError( + "TurnkeyML does not include pretrained weights for LLaMA2 " + "because it has special licensing terms. See for details: " + "https://huggingface.co/docs/transformers/model_doc/llama2" + ) + + model = LlamaForCausalLM.from_pretrained(model_path) +else: + config = LlamaConfig( + architectures=["LlamaForCausalLM"], + hidden_size=8192, + intermediate_size=28672, + max_position_embeddings=4096, + num_attention_heads=64, + num_hidden_layers=80, + num_key_value_heads=8, + pad_token_id=0, + vocab_size=32000, + use_cache=True, + ) + model = LlamaForCausalLM(config) + +inputs = { + "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long), + "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float), +} + +# Call model +# Generate two tokens so that we can instrument both the prefill +# and token generation stages. +# The token generation stage is the invocation that has "past_key_values" +# in the input shape. +model.generate(**inputs, max_length=max_seq_length + 2) diff --git a/models/transformers/llama2_7b.py b/models/transformers/llama2_7b.py new file mode 100644 index 00000000..a4a92e13 --- /dev/null +++ b/models/transformers/llama2_7b.py @@ -0,0 +1,48 @@ +# labels: name::llama2_7b author::transformers task::Generative_AI license::apache-2.0 +from turnkeyml.parser import parse +from transformers import LlamaConfig, LlamaForCausalLM +import torch + +torch.manual_seed(0) + +# Parsing command-line arguments +pretrained, batch_size, max_seq_length, model_path = parse( + ["pretrained", "batch_size", "max_seq_length", "model_path"] +) + +# Model and input configurations +if pretrained: + if not model_path: + raise ValueError( + "TurnkeyML does not include pretrained weights for LLaMA2 " + "because it has special licensing terms. See for details: " + "https://huggingface.co/docs/transformers/model_doc/llama2" + ) + + model = LlamaForCausalLM.from_pretrained(model_path) +else: + config = LlamaConfig( + architectures=["LlamaForCausalLM"], + hidden_size=4096, + intermediate_size=11008, + max_position_embeddings=4096, + num_attention_heads=32, + num_hidden_layers=32, + num_key_value_heads=32, + pad_token_id=0, + vocab_size=32000, + use_cache=True, + ) + model = LlamaForCausalLM(config) + +inputs = { + "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long), + "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float), +} + +# Call model +# Generate two tokens so that we can instrument both the prefill +# and token generation stages. +# The token generation stage is the invocation that has "past_key_values" +# in the input shape. +model.generate(**inputs, max_length=max_seq_length + 2) diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py index 74e5bf17..fc0342ba 100644 --- a/src/turnkeyml/common/build.py +++ b/src/turnkeyml/common/build.py @@ -203,9 +203,18 @@ def get_shapes_and_dtypes(inputs: dict): (list, tuple), ): for v, i in zip(value, range(len(value))): - subkey = f"{key}[{i}]" - shapes[subkey] = np.array(v).shape - dtypes[subkey] = np.array(v).dtype.name + if isinstance(v, (list, tuple)): + # Handle nested lists/tuples, for example past_key_values + # in an LLM that has KV-caching enabled + for v2, i2 in zip(v, range(len(v))): + subsubkey = f"{key}[{i}][{i2}]" + shapes[subsubkey] = np.array(v2).shape + dtypes[subsubkey] = np.array(v2).dtype.name + else: + # Handle single list/tuple + subkey = f"{key}[{i}]" + shapes[subkey] = np.array(v).shape + dtypes[subkey] = np.array(v).dtype.name elif torch.is_tensor(value): shapes[key] = np.array(value.detach()).shape dtypes[key] = np.array(value.detach()).dtype.name diff --git a/src/turnkeyml/parser.py b/src/turnkeyml/parser.py index 1cb09855..0ba1b20a 100644 --- a/src/turnkeyml/parser.py +++ b/src/turnkeyml/parser.py @@ -69,6 +69,13 @@ def parse(valid_args: List[str]) -> List[Union[int, float]]: "in_channels": Arg("in_channels", default=1433, type=int), # Pretrained indicates whether pretrained weights should be used on the model "pretrained": Arg("pretrained", default=False, type=bool, action="store_true"), + # Path on the filesystem to a copy of the model + # Useful when models have special licensing terms and we can't directly + # provide access + "model_path": Arg("model_path", default=None, type=str), + # True: use an LLM's KV-cache (ie, token phase). False: disable the + # KV-cache (ie, prefill phase). + "kvcache": Arg("kvcache", default=False, type=bool, action="store_true"), } # Create parser that accepts only the args received as part of valid_args From 034631ba687dae71bad5e457771960731a87b441 Mon Sep 17 00:00:00 2001 From: Ramakrishnan Sivakumar Date: Mon, 11 Mar 2024 12:15:30 -0700 Subject: [PATCH 03/11] add cache warmup for onnxrt (#133) --- src/turnkeyml/run/onnxrt/within_conda.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/turnkeyml/run/onnxrt/within_conda.py b/src/turnkeyml/run/onnxrt/within_conda.py index 27498a94..57434939 100644 --- a/src/turnkeyml/run/onnxrt/within_conda.py +++ b/src/turnkeyml/run/onnxrt/within_conda.py @@ -22,6 +22,15 @@ def run_ort_profile( input_feed = dummy_inputs(sess_input) output_name = onnx_session.get_outputs()[0].name + # Warm the CPU cache by executing a small number of inferences + # Stop after 100 iterations or 15s warming up + warmup_max_duration = 15 + warmup_start_time = time.time() + for _ in range(100): + onnx_session.run([output_name], input_feed) + if time.time() - warmup_start_time > warmup_max_duration: + break + for _ in range(iterations): start = time.perf_counter() onnx_session.run([output_name], input_feed) From 871d11df86987ef6b1ea6de30b1048c8620acb12 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:18:40 -0400 Subject: [PATCH 04/11] Allow BaseRT to benchmark onnx models with external data files (#134) --- src/turnkeyml/run/basert.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/turnkeyml/run/basert.py b/src/turnkeyml/run/basert.py index 468323a1..c399a9fb 100644 --- a/src/turnkeyml/run/basert.py +++ b/src/turnkeyml/run/basert.py @@ -193,6 +193,16 @@ def benchmark(self) -> MeasuredPerformance: os.makedirs(self.local_onnx_dir, exist_ok=True) shutil.copy(model_file, self.local_onnx_file) + # Copy any ONNX external data files present in the onnx build directory + onnx_build_dir = os.path.dirname(model_file) + external_data_files = [ + os.path.join(onnx_build_dir, f) + for f in os.listdir(onnx_build_dir) + if ".onnx" not in f + ] + for f in external_data_files: + shutil.copy(f, os.path.dirname(self.local_onnx_file)) + # Execute benchmarking in hardware if self.requires_docker: _check_docker_install() From cbd3abc173b4bc93d06131f6199d08ef2f6bc143 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:58:57 -0400 Subject: [PATCH 05/11] Add phi2 to transformers corpus (#135) --- models/transformers/phi2.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 models/transformers/phi2.py diff --git a/models/transformers/phi2.py b/models/transformers/phi2.py new file mode 100644 index 00000000..8db9e131 --- /dev/null +++ b/models/transformers/phi2.py @@ -0,0 +1,35 @@ +# labels: name::phi2 author::transformers task::Generative_AI license::mit +from turnkeyml.parser import parse +from transformers import AutoModelForCausalLM +import torch + +torch.manual_seed(0) + +# Parsing command-line arguments +pretrained, batch_size, max_seq_length = parse( + ["pretrained", "batch_size", "max_seq_length"] +) + +# Model and input configurations +if pretrained: + model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2") +else: + raise ValueError( + "This model is only supported with pretrained weights, try again with --pretrained" + ) + +# Make sure the user's sequence length fits within the model's maximum +assert max_seq_length <= model.config.max_position_embeddings + + +inputs = { + "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long), + "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.float), +} + +# Call model +# Generate two tokens so that we can instrument both the prefill +# and token generation stages. +# The token generation stage is the invocation that has "past_key_values" +# in the input shape. +model.generate(**inputs, max_length=max_seq_length + 2) From 13451b942774b19879964746a735e462ecb0fbb6 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Thu, 14 Mar 2024 15:56:12 -0400 Subject: [PATCH 06/11] Save the exception into error_log when the subprocess is killed (#139) --- src/turnkeyml/cli/spawn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/turnkeyml/cli/spawn.py b/src/turnkeyml/cli/spawn.py index 279976e3..ca2240cc 100644 --- a/src/turnkeyml/cli/spawn.py +++ b/src/turnkeyml/cli/spawn.py @@ -374,6 +374,9 @@ def run_turnkey( ): stats.save_model_eval_stat(key, evaluation_status.value) + # Save the exception into the error log stat + stats.save_model_eval_stat(filesystem.Keys.ERROR_LOG, str(e)) + except Exception as stats_exception: # pylint: disable=broad-except printing.log_info( "Stats file found, but unable to perform cleanup due to " From 4bfaa361241ebd44c80d5dcc45f15bf7bb84cca2 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Fri, 15 Mar 2024 09:41:13 -0400 Subject: [PATCH 07/11] Save device name stat prior to running stages (#138) --- .../runtime.py | 13 +++---- .../turnkeyml_plugin_example_rt/runtime.py | 6 +-- src/turnkeyml/analyze/script.py | 9 ++++- src/turnkeyml/common/filesystem.py | 4 +- src/turnkeyml/run/basert.py | 6 +-- src/turnkeyml/run/onnxrt/runtime.py | 12 ++++-- src/turnkeyml/run/tensorrt/runtime.py | 38 +++++++++++++------ src/turnkeyml/run/torchrt/runtime.py | 6 +-- src/turnkeyml/version.py | 2 +- 9 files changed, 59 insertions(+), 37 deletions(-) diff --git a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py index ee10c557..db27f679 100644 --- a/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py +++ b/examples/cli/plugins/example_combined/turnkeyml_plugin_example_combined/runtime.py @@ -28,7 +28,7 @@ def __init__( inputs=None, delay_before_benchmarking: str = "0", ): - # Custom runtime args always arive as strings, so we need to convert them + # Custom runtime args always arrive as strings, so we need to convert them # to the appropriate data type here self.delay_before_benchmarking = int(delay_before_benchmarking) @@ -86,7 +86,7 @@ def benchmark(self): return MeasuredPerformance( mean_latency=self.mean_latency, throughput=self.throughput, - device=self.device_name, + device=self.device_name(), device_type=self.device_type, runtime=self.runtime, runtime_version=self.runtime_version, @@ -111,9 +111,6 @@ def throughput(self) -> float: "Queried throughput before self.benchmark() was called" ) - @property - def device_name(self) -> str: - return ( - f"Device Family {self.device_type.family}, Device Part {self.device_type.part}, " - f"Device Configuration {self.device_type.config}" - ) + @staticmethod + def device_name() -> str: + return "Example Device" diff --git a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py index 2dc117d6..8e91b633 100644 --- a/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py +++ b/examples/cli/plugins/example_rt/turnkeyml_plugin_example_rt/runtime.py @@ -57,7 +57,7 @@ def benchmark(self) -> MeasuredPerformance: return MeasuredPerformance( mean_latency=self.mean_latency, throughput=self.throughput, - device=self.device_name, + device=self.device_name(), device_type=self.device_type, runtime=self.runtime, runtime_version=self.runtime_version, @@ -82,6 +82,6 @@ def throughput(self) -> float: "Queried throughput before self.benchmark() was called" ) - @property - def device_name(self) -> str: + @staticmethod + def device_name() -> str: return "the x86 cpu of your dreams" diff --git a/src/turnkeyml/analyze/script.py b/src/turnkeyml/analyze/script.py index f75962f5..fd1f8054 100644 --- a/src/turnkeyml/analyze/script.py +++ b/src/turnkeyml/analyze/script.py @@ -371,6 +371,11 @@ def explore_invocation( fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.NOT_STARTED.value ) + # Save the device name that will be used for the benchmark + stats.save_model_eval_stat( + fs.Keys.DEVICE, runtime_info["RuntimeClass"].device_name() + ) + build_state = None perf = None benchmark_logfile_path = "" @@ -667,7 +672,7 @@ def explore_frame( # Starting in version 2.2.0, torch dynamo added wrappers to callbacks # while tracing frames, which conflicts with TurnkeML's analysis. Here, - # we supress errors caused by those callback wrappers and only raise an + # we suppress errors caused by those callback wrappers and only raise an # error if the compiled model actually tries to execute within TurnkeyML. td = torch._dynamo # pylint: disable=protected-access td.config.suppress_errors = True @@ -800,7 +805,7 @@ def forward_spy(*args, **kwargs): and invocation_info.is_target and (model_info.build_model) ): - # Disable all modifications while we evalute the model + # Disable all modifications while we evaluate the model # This is needed in case a tool called during evaluation wants to # trace the model. There are some scenarios (e.g., ipex.quantization.prepare), # that raise an exception when they encounter forward_spy() diff --git a/src/turnkeyml/common/filesystem.py b/src/turnkeyml/common/filesystem.py index ad54b073..b6b339c6 100644 --- a/src/turnkeyml/common/filesystem.py +++ b/src/turnkeyml/common/filesystem.py @@ -332,8 +332,10 @@ class Keys: PERFORMANCE = "performance" # Runtime used for the benchmark RUNTIME = "runtime" - # Device used for the benchmark + # Type of device used for the benchmark (e.g., "x86") DEVICE_TYPE = "device_type" + # Specific device used for the benchmark + DEVICE = "device" # Name of the model MODEL_NAME = "model_name" # References the per-evaluation stats section diff --git a/src/turnkeyml/run/basert.py b/src/turnkeyml/run/basert.py index c399a9fb..b797f99f 100644 --- a/src/turnkeyml/run/basert.py +++ b/src/turnkeyml/run/basert.py @@ -228,7 +228,7 @@ def benchmark(self) -> MeasuredPerformance: return MeasuredPerformance( mean_latency=self.mean_latency, throughput=self.throughput, - device=self.device_name, + device=self.device_name(), device_type=self.device_type, runtime=self.runtime, runtime_version=self.runtime_version, @@ -260,9 +260,9 @@ def throughput(self) -> float: Returns the throughput, in IPS, for the benchmarking run. """ - @property + @staticmethod @abstractmethod - def device_name(self) -> str: + def device_name() -> str: """ Returns the full device name for the device used in benchmarking. For example, a benchmark on a `x86` device might have a device name like diff --git a/src/turnkeyml/run/onnxrt/runtime.py b/src/turnkeyml/run/onnxrt/runtime.py index 08421220..f3782e61 100644 --- a/src/turnkeyml/run/onnxrt/runtime.py +++ b/src/turnkeyml/run/onnxrt/runtime.py @@ -5,7 +5,11 @@ import turnkeyml.common.exceptions as exp from turnkeyml.run.onnxrt.execute import ORT_VERSION from turnkeyml.common.filesystem import Stats -from turnkeyml.run.onnxrt.execute import create_conda_env, execute_benchmark +from turnkeyml.run.onnxrt.execute import ( + create_conda_env, + execute_benchmark, + get_cpu_specs, +) import turnkeyml.run.plugin_helpers as plugin_helpers @@ -95,6 +99,6 @@ def mean_latency(self): def throughput(self): return float(self._get_stat("Throughput")) - @property - def device_name(self): - return self._get_stat("CPU Name") + @staticmethod + def device_name() -> str: + return get_cpu_specs()["CPU Name"] diff --git a/src/turnkeyml/run/tensorrt/runtime.py b/src/turnkeyml/run/tensorrt/runtime.py index 83d2e219..e56896af 100644 --- a/src/turnkeyml/run/tensorrt/runtime.py +++ b/src/turnkeyml/run/tensorrt/runtime.py @@ -59,18 +59,24 @@ def __init__( requires_docker=True, ) + self.device_name = self._dynamic_device_name + def _setup(self) -> None: # Check if at least one NVIDIA GPU is available locally - result = subprocess.run( - ["nvidia-smi"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - check=False, - ) - - if "NVIDIA" not in result.stdout or result.returncode == 1: - msg = "No NVIDIA GPUs available on the local machine" + msg = "No NVIDIA GPUs available on the local machine" + try: + result = subprocess.run( + ["nvidia-smi"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + check=False, + ) + + if "NVIDIA" not in result.stdout or result.returncode == 1: + + raise exp.BenchmarkException(msg) + except FileNotFoundError: raise exp.BenchmarkException(msg) def _execute( @@ -143,6 +149,14 @@ def mean_latency(self): def throughput(self): return float(self._get_stat("Throughput").split(" ")[0]) - @property - def device_name(self): + def _dynamic_device_name(self): + # Return the specific Nvidia GPU, which is only known + # after we invoke the TensorRT docker return self._get_stat("Selected Device") + + @staticmethod + def device_name(): + # Return a generic response, since we haven't invoked + # the docker yet. At instantiation time, this method + # is replaced by the dynamic version. + return "Nvidia GPU" diff --git a/src/turnkeyml/run/torchrt/runtime.py b/src/turnkeyml/run/torchrt/runtime.py index 9a24de4f..d604e670 100644 --- a/src/turnkeyml/run/torchrt/runtime.py +++ b/src/turnkeyml/run/torchrt/runtime.py @@ -124,7 +124,7 @@ def _calculate_performance( return MeasuredPerformance( mean_latency=self.mean_latency, throughput=self.throughput, - device=self.device_name, + device=self.device_name(), device_type=self.device_type, runtime=self.runtime, runtime_version=self.runtime_version, @@ -218,6 +218,6 @@ def throughput(self) -> float: "Queried throughput before self.benchmark() was called" ) - @property - def device_name(self) -> str: + @staticmethod + def device_name() -> str: return get_cpu_specs()["CPU Name"] diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py index 0b2f79db..8c0d5d5b 100644 --- a/src/turnkeyml/version.py +++ b/src/turnkeyml/version.py @@ -1 +1 @@ -__version__ = "1.1.3" +__version__ = "2.0.0" From c0986fbfe51c59da1b00e37ce462ccea1b038690 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:29:54 -0400 Subject: [PATCH 08/11] Update ignition.py (#142) Fix a typo Signed-off-by: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> --- src/turnkeyml/build/ignition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py index ebf43958..a06d8ddc 100644 --- a/src/turnkeyml/build/ignition.py +++ b/src/turnkeyml/build/ignition.py @@ -117,7 +117,7 @@ def validate_cached_model( msg = ( f"Your build {state.config.build_name} was previously built against " f"turnkey version {state.turnkey_version}, " - f"however you are now using onxxflow version {turnkey_version}. The previous build is " + f"however you are now using turnkey version {turnkey_version}. The previous build is " f"incompatible with this version of turnkey, as indicated by the {out_of_date} " "version number changing. See **docs/versioning.md** for details." ) From 40c27b06aeebc9151b78850275e60e63baeb108f Mon Sep 17 00:00:00 2001 From: Daniel Holanda Date: Wed, 20 Mar 2024 12:45:01 -0700 Subject: [PATCH 09/11] Ensure child subprocesses are killed on timeout (#144) * Ensure child subprocesses are killed on timeout * Suggested Changes --- src/turnkeyml/cli/spawn.py | 18 ++++++++++++------ src/turnkeyml/run/benchmark_build.py | 6 +++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/turnkeyml/cli/spawn.py b/src/turnkeyml/cli/spawn.py index ca2240cc..3dc34014 100644 --- a/src/turnkeyml/cli/spawn.py +++ b/src/turnkeyml/cli/spawn.py @@ -13,6 +13,7 @@ import getpass from typing import List, Optional, Dict, Union from enum import Enum +import psutil import turnkeyml.common.filesystem as filesystem import turnkeyml.common.printing as printing import turnkeyml.common.build as build @@ -22,7 +23,7 @@ class WatchdogTimer(Thread): """ - Run *callback* in *timeout* seconds unless the timer is restarted. + Kill process in *timeout* seconds unless the timer is restarted. This is needed because Popen natively supports streaming output to the terminal, checking that output, and timeouts--but not all 3 at the same time. @@ -31,11 +32,10 @@ class WatchdogTimer(Thread): to stream and check output. """ - def __init__(self, timeout, callback, *args, timer=monotonic, **kwargs): + def __init__(self, timeout, pid, timer=monotonic, **kwargs): super().__init__(**kwargs) self.timeout = timeout - self.callback = callback - self.args = args + self.pid = pid self.timer = timer self.cancelled = Event() self.blocked = Lock() @@ -50,7 +50,7 @@ def run(self): with self.blocked: if self.deadline <= self.timer() and not self.cancelled.is_set(): self.timeout_reached = True - return self.callback(*self.args) + return self.kill_process_tree() def restart(self): self.deadline = self.timer() + self.timeout @@ -58,6 +58,12 @@ def restart(self): def cancel(self): self.cancelled.set() + def kill_process_tree(self): + parent = psutil.Process(self.pid) + for child in parent.children(recursive=True): + child.kill() + parent.kill() + def parse_evaluation_id(line: str, current_value: str) -> Optional[str]: """ @@ -284,7 +290,7 @@ def run_turnkey( # Create our own watchdog timer in a thread # This is needed because the `for line in p.stdout` is a blocking # call that is incompatible with Popen's native timeout features - watchdog = WatchdogTimer(timeout, callback=p.kill, daemon=True) + watchdog = WatchdogTimer(timeout, p.pid) watchdog.start() # Print the subprocess's output to the command line as it comes in, diff --git a/src/turnkeyml/run/benchmark_build.py b/src/turnkeyml/run/benchmark_build.py index 24ff6c0b..1c22165b 100644 --- a/src/turnkeyml/run/benchmark_build.py +++ b/src/turnkeyml/run/benchmark_build.py @@ -1,6 +1,7 @@ from typing import Dict, Optional import multiprocessing import traceback +import psutil import turnkeyml.common.build as build import turnkeyml.common.exceptions as exp import turnkeyml.common.filesystem as fs @@ -275,7 +276,10 @@ def benchmark_cache( if p.is_alive(): # Handle the timeout, which is needed if the process is still alive after # waiting `timeout` seconds - p.terminate() + parent = psutil.Process(p.pid) + for child in parent.children(recursive=True): + child.kill() + parent.kill() stats.save_model_eval_stat( fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.TIMEOUT.value ) From 5c59aa169b2cd7dd3494956ad407fdc5cc501df5 Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Wed, 20 Mar 2024 18:56:51 -0400 Subject: [PATCH 10/11] Rev version to v2.0.1 (#145) Signed-off-by: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> --- src/turnkeyml/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/turnkeyml/version.py b/src/turnkeyml/version.py index 8c0d5d5b..159d48b8 100644 --- a/src/turnkeyml/version.py +++ b/src/turnkeyml/version.py @@ -1 +1 @@ -__version__ = "2.0.0" +__version__ = "2.0.1" From b814d0b78c6e89da73df56d8fa65add6f8bb475c Mon Sep 17 00:00:00 2001 From: Jeremy Fowers <80718789+jeremyfowers@users.noreply.github.com> Date: Fri, 22 Mar 2024 19:01:35 -0400 Subject: [PATCH 11/11] Do not set benchmarking error status when a benchmark is skipped (#146) --- src/turnkeyml/run/benchmark_build.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/turnkeyml/run/benchmark_build.py b/src/turnkeyml/run/benchmark_build.py index 1c22165b..27b058b0 100644 --- a/src/turnkeyml/run/benchmark_build.py +++ b/src/turnkeyml/run/benchmark_build.py @@ -22,6 +22,12 @@ def tqdm(iterable, **kwargs): # pylint: disable=unused-argument return iterable +class SkippedBenchmark(Exception): + """ + Indicates that a benchmark was skipped + """ + + class Process(multiprocessing.Process): """ Standardized way to make it possible to catch exceptions from a @@ -81,7 +87,7 @@ def benchmark_build( state = build.load_state(cache_dir, build_name) if state.build_status != build.FunctionStatus.SUCCESSFUL: - raise exp.BenchmarkException( + raise SkippedBenchmark( "Only successful builds can be benchmarked with this " f"function, however selected build at {build_name} " f"has state: {state.build_status}" @@ -99,7 +105,7 @@ def benchmark_build( except KeyError as e: # User should never get this far without hitting an actionable error message, # but let's raise an exception just in case. - raise exp.BenchmarkException( + raise SkippedBenchmark( f"Selected runtime is not supported: {selected_runtime}" ) from e @@ -295,9 +301,15 @@ def benchmark_cache( # is not able to conduct any more benchmarking. In this case the program # should exit and the user should follow the suggestion in the exception # message (e.g., restart their computer). - stats.save_model_eval_stat( - fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR.value - ) + + if isinstance(p.exception[0], SkippedBenchmark): + stats.save_model_eval_stat( + fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.NOT_STARTED.value + ) + else: + stats.save_model_eval_stat( + fs.Keys.BENCHMARK_STATUS, build.FunctionStatus.ERROR.value + ) if isinstance(p.exception[0], exp.HardwareError): stats.save_model_eval_stat(fs.Keys.ERROR_LOG, p.exception[1])