From d92cfba21657515516162c79803149834f995e8d Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 27 Mar 2024 17:07:33 -0500 Subject: [PATCH 1/6] Rough draft support for custom intervals --- ...y_parameter_search_run_config_generator.py | 8 ++++ .../perf_analyzer_config_generator.py | 10 ++++- ..._concurrency_sweep_run_config_generator.py | 14 +++++- .../generate/quick_run_config_generator.py | 20 ++++++--- .../objects/config_model_profile_spec.py | 2 +- model_analyzer/perf_analyzer/perf_config.py | 1 + model_analyzer/plots/detailed_plot.py | 44 ++++++++++++------- model_analyzer/record/metrics_manager.py | 6 ++- model_analyzer/result/parameter_search.py | 11 +++-- 9 files changed, 83 insertions(+), 33 deletions(-) diff --git a/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py b/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py index b0a217274..0f3ac5588 100755 --- a/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +++ b/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py @@ -132,9 +132,11 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]: for result in top_results: run_config = deepcopy(result.run_config()) model_parameters = self._get_model_parameters(model_name) + perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name) parameter_search = ParameterSearch( config=self._config, model_parameters=model_parameters, + perf_analyzer_flags=perf_analyzer_flags, skip_parameter_sweep=True, ) for parameter in parameter_search.search_parameters(): @@ -151,6 +153,12 @@ def _get_model_parameters(self, model_name: str) -> Dict: return {} + def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict: + for model in self._models: + if model_name == model.model_name(): + return model.perf_analyzer_flags() + return {} + def _set_parameter( self, run_config: RunConfig, model_parameters: Dict, parameter: int ) -> RunConfig: diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py index 985032564..a60381af8 100755 --- a/model_analyzer/config/generate/perf_analyzer_config_generator.py +++ b/model_analyzer/config/generate/perf_analyzer_config_generator.py @@ -169,10 +169,13 @@ def set_last_results( self._parameter_results.extend(measurement) def _create_parameter_list(self) -> List[int]: + # FIXME 1772 comment this # The two possible parameters are request rate or concurrency # Concurrency is the default and will be used unless the user specifies # request rate, either as a model parameter or a config option - if self._cli_config.is_request_rate_specified(self._model_parameters): + if "request-intervals" in self._perf_analyzer_flags: + return [self._perf_analyzer_flags["request-intervals"]] + elif self._cli_config.is_request_rate_specified(self._model_parameters): return self._create_request_rate_list() else: return self._create_concurrency_list() @@ -207,6 +210,7 @@ def _generate_perf_configs(self) -> None: for params in utils.generate_parameter_combinations( perf_config_non_parameter_values ): + # FIXME 1772 variable name configs_with_concurrency = [] for parameter in self._parameters: new_perf_config = PerfAnalyzerConfig() @@ -217,7 +221,9 @@ def _generate_perf_configs(self) -> None: new_perf_config.update_config(params) - if self._cli_config.is_request_rate_specified(self._model_parameters): + if "request-intervals" in self._perf_analyzer_flags: + pass + elif self._cli_config.is_request_rate_specified(self._model_parameters): new_perf_config.update_config({"request-rate-range": parameter}) else: new_perf_config.update_config({"concurrency-range": parameter}) diff --git a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py index b7adbef97..fa72ae5f0 100755 --- a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +++ b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py @@ -16,7 +16,7 @@ import logging from copy import deepcopy -from typing import Generator, List, Optional +from typing import Dict, Generator, List, Optional from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec from model_analyzer.config.generate.model_variant_name_manager import ( @@ -139,7 +139,10 @@ def _sweep_concurrency_over_top_results(self) -> Generator[RunConfig, None, None for result in top_results: run_config = deepcopy(result.run_config()) - parameter_search = ParameterSearch(self._config) + perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name) + parameter_search = ParameterSearch( + self._config, perf_analyzer_flags=perf_analyzer_flags + ) for concurrency in parameter_search.search_parameters(): run_config = self._set_concurrency(run_config, concurrency) yield run_config @@ -151,3 +154,10 @@ def _set_concurrency(self, run_config: RunConfig, concurrency: int) -> RunConfig perf_config.update_config({"concurrency-range": concurrency}) return run_config + + # FIXME 1772 -- this method is duplicated. Maybe it should be a static method in ModelProfileSpec? + def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict: + for model in self._models: + if model_name == model.model_name(): + return model.perf_analyzer_flags() + return {} diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py index 1704d03ee..d1a7d1e73 100755 --- a/model_analyzer/config/generate/quick_run_config_generator.py +++ b/model_analyzer/config/generate/quick_run_config_generator.py @@ -512,9 +512,14 @@ def _get_next_perf_analyzer_config( perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - concurrency = self._calculate_concurrency(dimension_values) + # FIXME 1772 -- would be cleaner if PerfAnalyzerConfig() initialized bs:1 + perf_config_params = {"batch-size": 1} + + # FIXME 1772 -- use new method in perf_config + if not "request-intervals" in model.perf_analyzer_flags(): + concurrency = self._calculate_concurrency(dimension_values) + perf_config_params["concurrency-range"] = concurrency - perf_config_params = {"batch-size": 1, "concurrency-range": concurrency} perf_analyzer_config.update_config(perf_config_params) perf_analyzer_config.update_config(model.perf_analyzer_flags()) @@ -705,12 +710,13 @@ def _create_default_perf_analyzer_config( model_config.get_field("name"), self._config ) - default_concurrency = self._calculate_default_concurrency(model_config) + # FIXME 1772 see above comments + perf_config_params = {"batch-size": 1} + + if not "request-intervals" in model.perf_analyzer_flags(): + default_concurrency = self._calculate_default_concurrency(model_config) + perf_config_params["concurrency-range"] = default_concurrency - perf_config_params = { - "batch-size": DEFAULT_BATCH_SIZES, - "concurrency-range": default_concurrency, - } default_perf_analyzer_config.update_config(perf_config_params) default_perf_analyzer_config.update_config(model.perf_analyzer_flags()) diff --git a/model_analyzer/config/input/objects/config_model_profile_spec.py b/model_analyzer/config/input/objects/config_model_profile_spec.py index d45e68d41..7b1416441 100755 --- a/model_analyzer/config/input/objects/config_model_profile_spec.py +++ b/model_analyzer/config/input/objects/config_model_profile_spec.py @@ -32,7 +32,7 @@ def __init__( weighting=None, parameters=None, model_config_parameters=None, - perf_analyzer_flags=None, + perf_analyzer_flags={}, triton_server_flags=None, triton_server_environment=None, triton_docker_args=None, diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py index e9160a44a..ae1293bd0 100755 --- a/model_analyzer/perf_analyzer/perf_config.py +++ b/model_analyzer/perf_analyzer/perf_config.py @@ -273,6 +273,7 @@ def extract_model_specific_parameters(self): "batch-size": self._options["-b"], "concurrency-range": self._args["concurrency-range"], "request-rate-range": self._args["request-rate-range"], + "request-intervals": self._args["request-intervals"], } @classmethod diff --git a/model_analyzer/plots/detailed_plot.py b/model_analyzer/plots/detailed_plot.py index d33b03afc..9198071e1 100755 --- a/model_analyzer/plots/detailed_plot.py +++ b/model_analyzer/plots/detailed_plot.py @@ -89,7 +89,6 @@ def __init__(self, name, title, bar_width=0.5): self._fig.set_figheight(8) self._fig.set_figwidth(12) - self._ax_latency.set_xlabel("Concurrent Client Requests") self._ax_latency.set_ylabel(latency_axis_label) self._ax_throughput.set_ylabel(throughput_axis_label) @@ -144,6 +143,19 @@ def add_run_config_measurement(self, run_config_measurement): ] ) + # FIXME 1772 -- clean this up?? + if ( + "request-intervals" in run_config_measurement.model_specific_pa_params()[0] + and run_config_measurement.model_specific_pa_params()[0][ + "request-intervals" + ] + ): + self._data["request-intervals"].append( + run_config_measurement.model_specific_pa_params()[0][ + "request-intervals" + ] + ) + self._data["perf_throughput"].append( run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput") ) @@ -164,25 +176,28 @@ def plot_data(self): on this plot's Axes object """ - # Need to change the default x-axis plot title for request rates - if "request_rate" in self._data and self._data["request_rate"][0]: + # Update the x-axis plot title + if "request-intervals" in self._data and self._data["request-intervals"][0]: + self._ax_latency.set_xlabel("Request Intervals File") + sort_indices_key = "request-intervals" + elif "request_rate" in self._data and self._data["request_rate"][0]: self._ax_latency.set_xlabel("Client Request Rate") - - # Sort the data by request rate or concurrency - if "request_rate" in self._data and self._data["request_rate"][0]: - sort_indices = list( - zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1])) - )[0] + sort_indices_key = "request_rate" else: - sort_indices = list( - zip(*sorted(enumerate(self._data["concurrency"]), key=lambda x: x[1])) - )[0] + self._ax_latency.set_xlabel("Concurrent Client Requests") + sort_indices_key = "concurrency" + + sort_indices = list( + zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1])) + )[0] sorted_data = { key: [data_list[i] for i in sort_indices] for key, data_list in self._data.items() } + sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key])) + # Plot latency breakdown bars labels = dict( zip( @@ -197,11 +212,6 @@ def plot_data(self): ) bottoms = None - if "request_rate" in self._data: - sorted_data["indices"] = list(map(str, sorted_data["request_rate"])) - else: - sorted_data["indices"] = list(map(str, sorted_data["concurrency"])) - # Plot latency breakdown with concurrency casted as string to make uniform x for metric, label in labels.items(): self._ax_latency.bar( diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py index 581cae88b..5a76b43aa 100755 --- a/model_analyzer/record/metrics_manager.py +++ b/model_analyzer/record/metrics_manager.py @@ -753,7 +753,11 @@ def _get_triton_metrics_gpus(self): def _print_run_config_info(self, run_config): for model_run_config in run_config.model_run_configs(): perf_config = model_run_config.perf_config() - if perf_config["request-rate-range"]: + if perf_config["request-intervals"]: + logger.info( + f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-intervals={perf_config['request-intervals']}" + ) + elif perf_config["request-rate-range"]: logger.info( f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}" ) diff --git a/model_analyzer/result/parameter_search.py b/model_analyzer/result/parameter_search.py index e716a5b7d..cf87b6cd7 100755 --- a/model_analyzer/result/parameter_search.py +++ b/model_analyzer/result/parameter_search.py @@ -37,6 +37,8 @@ class ParameterSearch: - Will sweep from by powers of two from min to max parameter - If the user specifies a constraint, the algorithm will perform a binary search around the boundary if the constraint is violated + - Will not sweep at all if custom stimulus is provided by the user (via the + "request-intervals" perf analyzer flag) Invariant: It is necessary for the user to add new measurements as they are taken """ @@ -45,6 +47,7 @@ def __init__( self, config: ConfigCommandProfile, model_parameters: dict = {}, + perf_analyzer_flags: dict = {}, skip_parameter_sweep: bool = False, ) -> None: """ @@ -59,6 +62,7 @@ def __init__( self._parameter_is_request_rate = config.is_request_rate_specified( model_parameters ) + self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags if self._parameter_is_request_rate: self._min_parameter_index = int( @@ -98,10 +102,11 @@ def search_parameters(self) -> Generator[int, None, None]: a binary parameter search around the point where the constraint violated """ - yield from self._perform_parameter_sweep() + if not self._inference_load_is_custom: + yield from self._perform_parameter_sweep() - if self._was_constraint_violated(): - yield from self._perform_binary_parameter_search() + if self._was_constraint_violated(): + yield from self._perform_binary_parameter_search() def _perform_parameter_sweep(self) -> Generator[int, None, None]: for parameter in ( From 8c0c70e17426984f603867361bc58bcb84ec75be Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 27 Mar 2024 17:50:12 -0500 Subject: [PATCH 2/6] default batch size of 1 so it doesn't need to be specified every time --- .../generate/quick_run_config_generator.py | 16 ++++------------ model_analyzer/perf_analyzer/perf_config.py | 4 +++- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py index d1a7d1e73..0ea631eb4 100755 --- a/model_analyzer/config/generate/quick_run_config_generator.py +++ b/model_analyzer/config/generate/quick_run_config_generator.py @@ -512,15 +512,11 @@ def _get_next_perf_analyzer_config( perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - # FIXME 1772 -- would be cleaner if PerfAnalyzerConfig() initialized bs:1 - perf_config_params = {"batch-size": 1} - # FIXME 1772 -- use new method in perf_config if not "request-intervals" in model.perf_analyzer_flags(): concurrency = self._calculate_concurrency(dimension_values) - perf_config_params["concurrency-range"] = concurrency - - perf_analyzer_config.update_config(perf_config_params) + perf_config_params = {"concurrency-range": concurrency} + perf_analyzer_config.update_config(perf_config_params) perf_analyzer_config.update_config(model.perf_analyzer_flags()) return perf_analyzer_config @@ -710,14 +706,10 @@ def _create_default_perf_analyzer_config( model_config.get_field("name"), self._config ) - # FIXME 1772 see above comments - perf_config_params = {"batch-size": 1} - if not "request-intervals" in model.perf_analyzer_flags(): default_concurrency = self._calculate_default_concurrency(model_config) - perf_config_params["concurrency-range"] = default_concurrency - - default_perf_analyzer_config.update_config(perf_config_params) + perf_config_params = {"concurrency-range": default_concurrency} + default_perf_analyzer_config.update_config(perf_config_params) default_perf_analyzer_config.update_config(model.perf_analyzer_flags()) diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py index ae1293bd0..6fcbbf44c 100755 --- a/model_analyzer/perf_analyzer/perf_config.py +++ b/model_analyzer/perf_analyzer/perf_config.py @@ -106,7 +106,9 @@ def __init__(self): self._options = { "-m": None, "-x": None, - "-b": None, + # Default to batch size of 1. This would be handled by PA if unspecified, + # but we want to be explicit so we can properly print/track values + "-b": 1, "-u": None, "-i": None, "-f": None, From 0ebae086f9d874a640ae8c4784d2ddfc3c600076 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 27 Mar 2024 18:05:35 -0500 Subject: [PATCH 3/6] fix variable name --- .../config/generate/perf_analyzer_config_generator.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py index a60381af8..dae2a6c3d 100755 --- a/model_analyzer/config/generate/perf_analyzer_config_generator.py +++ b/model_analyzer/config/generate/perf_analyzer_config_generator.py @@ -210,8 +210,7 @@ def _generate_perf_configs(self) -> None: for params in utils.generate_parameter_combinations( perf_config_non_parameter_values ): - # FIXME 1772 variable name - configs_with_concurrency = [] + configs_with_inference_load = [] for parameter in self._parameters: new_perf_config = PerfAnalyzerConfig() @@ -231,8 +230,8 @@ def _generate_perf_configs(self) -> None: # User provided flags can override the search parameters new_perf_config.update_config(self._perf_analyzer_flags) - configs_with_concurrency.append(new_perf_config) - self._configs.append(configs_with_concurrency) + configs_with_inference_load.append(new_perf_config) + self._configs.append(configs_with_inference_load) def _create_non_parameter_perf_config_values(self) -> dict: perf_config_values = { From 71e7a901f6a64aade045ee5fb3b321c3b6e2caca Mon Sep 17 00:00:00 2001 From: tgerdes Date: Sat, 30 Mar 2024 08:20:40 -0500 Subject: [PATCH 4/6] Clean up code around load args --- .../config/generate/model_profile_spec.py | 10 +++ .../generate/quick_run_config_generator.py | 3 +- model_analyzer/perf_analyzer/perf_config.py | 17 +++++ model_analyzer/plots/detailed_plot.py | 65 ++++++++----------- 4 files changed, 55 insertions(+), 40 deletions(-) diff --git a/model_analyzer/config/generate/model_profile_spec.py b/model_analyzer/config/generate/model_profile_spec.py index 7dfb95649..00077bac4 100755 --- a/model_analyzer/config/generate/model_profile_spec.py +++ b/model_analyzer/config/generate/model_profile_spec.py @@ -22,6 +22,7 @@ ConfigModelProfileSpec, ) from model_analyzer.device.gpu_device import GPUDevice +from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig from model_analyzer.triton.client.client import TritonClient from model_analyzer.triton.model.model_config import ModelConfig @@ -72,3 +73,12 @@ def supports_dynamic_batching(self) -> bool: def is_ensemble(self) -> bool: """Returns true if the model is an ensemble""" return "ensemble_scheduling" in self._default_model_config + + def is_load_specified(self) -> bool: + """ + Returns true if the model's PA config has specified any of the + inference load args (such as concurrency). Else returns false + """ + load_args = PerfAnalyzerConfig.get_inference_load_args() + pa_flags = self.perf_analyzer_flags() + return any(e in pa_flags for e in load_args) diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py index 0ea631eb4..73f8a20ff 100755 --- a/model_analyzer/config/generate/quick_run_config_generator.py +++ b/model_analyzer/config/generate/quick_run_config_generator.py @@ -512,8 +512,7 @@ def _get_next_perf_analyzer_config( perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - # FIXME 1772 -- use new method in perf_config - if not "request-intervals" in model.perf_analyzer_flags(): + if not model.is_load_specified(): concurrency = self._calculate_concurrency(dimension_values) perf_config_params = {"concurrency-range": concurrency} perf_analyzer_config.update_config(perf_config_params) diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py index 6fcbbf44c..71bb7bd0b 100755 --- a/model_analyzer/perf_analyzer/perf_config.py +++ b/model_analyzer/perf_analyzer/perf_config.py @@ -96,6 +96,13 @@ class PerfAnalyzerConfig: "collect-metrics", ] + # Only one of these args can be sent to PA, as each one controls the inference load in a different way + inference_load_args = [ + "concurrency-range", + "request-rate-range", + "request-intervals", + ] + def __init__(self): """ Construct a PerfAnalyzerConfig @@ -160,6 +167,16 @@ def additive_keys(cls): return cls.additive_args[:] + @classmethod + def get_inference_load_args(cls): + """ + Returns + ------- + list of str + The Perf Analyzer args that control the inference load + """ + return cls.inference_load_args + def update_config(self, params=None): """ Allows setting values from a params dict diff --git a/model_analyzer/plots/detailed_plot.py b/model_analyzer/plots/detailed_plot.py index 9198071e1..42589ce6f 100755 --- a/model_analyzer/plots/detailed_plot.py +++ b/model_analyzer/plots/detailed_plot.py @@ -22,6 +22,7 @@ from matplotlib import patches as mpatches from model_analyzer.constants import LOGGER_NAME +from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig from model_analyzer.record.metrics_manager import MetricsManager logging.getLogger("matplotlib").setLevel(logging.ERROR) @@ -119,42 +120,15 @@ def add_run_config_measurement(self, run_config_measurement): """ # TODO-TMA-568: This needs to be updated because there will be multiple model configs - if ( - "concurrency-range" in run_config_measurement.model_specific_pa_params()[0] - and run_config_measurement.model_specific_pa_params()[0][ - "concurrency-range" - ] - ): - self._data["concurrency"].append( - run_config_measurement.model_specific_pa_params()[0][ - "concurrency-range" - ] - ) - - if ( - "request-rate-range" in run_config_measurement.model_specific_pa_params()[0] - and run_config_measurement.model_specific_pa_params()[0][ - "request-rate-range" - ] - ): - self._data["request_rate"].append( - run_config_measurement.model_specific_pa_params()[0][ - "request-rate-range" - ] - ) - - # FIXME 1772 -- clean this up?? - if ( - "request-intervals" in run_config_measurement.model_specific_pa_params()[0] - and run_config_measurement.model_specific_pa_params()[0][ - "request-intervals" - ] - ): - self._data["request-intervals"].append( - run_config_measurement.model_specific_pa_params()[0][ - "request-intervals" - ] - ) + for load_arg in PerfAnalyzerConfig.get_inference_load_args(): + if ( + load_arg in run_config_measurement.model_specific_pa_params()[0] + and run_config_measurement.model_specific_pa_params()[0][load_arg] + ): + data_key = self._get_data_key_from_load_arg(load_arg) + self._data[data_key].append( + run_config_measurement.model_specific_pa_params()[0][load_arg] + ) self._data["perf_throughput"].append( run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput") @@ -177,9 +151,9 @@ def plot_data(self): """ # Update the x-axis plot title - if "request-intervals" in self._data and self._data["request-intervals"][0]: + if "request_intervals" in self._data and self._data["request_intervals"][0]: self._ax_latency.set_xlabel("Request Intervals File") - sort_indices_key = "request-intervals" + sort_indices_key = "request_intervals" elif "request_rate" in self._data and self._data["request_rate"][0]: self._ax_latency.set_xlabel("Client Request Rate") sort_indices_key = "request_rate" @@ -274,3 +248,18 @@ def save(self, filepath): """ self._fig.savefig(os.path.join(filepath, self._name)) + + def _get_data_key_from_load_arg(self, load_arg): + """ + Gets the key into _data corresponding with the input load arg + + For example, the load arg "request-rate-range" has the key "request_rate" + """ + # Check if '-range' exists at the end of the input string and remove it + if load_arg.endswith("-range"): + load_arg = load_arg[:-6] + + # Replace any '-' with '_' in the remaining string + data_key = load_arg.replace("-", "_") + + return data_key From 6b3a1999711d67c60c3d5317b12017680d4be214 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Sat, 30 Mar 2024 08:39:50 -0500 Subject: [PATCH 5/6] fix unit tests --- tests/test_perf_analyzer.py | 14 ++++++++------ tests/test_results.py | 18 +++++++++--------- tests/test_run_config.py | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/test_perf_analyzer.py b/tests/test_perf_analyzer.py index e95f0d4a1..488cc71e3 100755 --- a/tests/test_perf_analyzer.py +++ b/tests/test_perf_analyzer.py @@ -132,7 +132,7 @@ def test_perf_analyzer_config(self): def test_perf_analyzer_boolean_args(self): """Test that only positive boolean args get added""" - expected_cli_str = "-m test_model --measurement-interval=1000 --binary-search --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 --measurement-interval=1000 --binary-search --measurement-request-count=50" self.config["async"] = "False" self.config["binary-search"] = "True" @@ -141,7 +141,7 @@ def test_perf_analyzer_boolean_args(self): def test_perf_analyzer_additive_args(self): shape = ["name1:1,2,3", "name2:4,5,6"] - expected_cli_str = "-m test_model --measurement-interval=1000 --shape=name1:1,2,3 --shape=name2:4,5,6 --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 --measurement-interval=1000 --shape=name1:1,2,3 --shape=name2:4,5,6 --measurement-request-count=50" self.config["shape"] = shape[:] @@ -149,7 +149,7 @@ def test_perf_analyzer_additive_args(self): self.assertEqual(self.config.to_cli_string(), expected_cli_str) shape = "name1:1,2,3" - expected_cli_str = "-m test_model --measurement-interval=1000 --shape=name1:1,2,3 --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 --measurement-interval=1000 --shape=name1:1,2,3 --measurement-request-count=50" self.config["shape"] = shape self.assertEqual(self.config.to_cli_string(), expected_cli_str) @@ -177,7 +177,7 @@ def test_perf_analyzer_ssl_args(self): ssl_https_private_key_file = "h" expected_cli_str = ( - f"-m test_model --measurement-interval=1000 --measurement-request-count=50 --ssl-grpc-use-ssl " + f"-m test_model -b 1 --measurement-interval=1000 --measurement-request-count=50 --ssl-grpc-use-ssl " f"--ssl-grpc-root-certifications-file=a --ssl-grpc-private-key-file=b --ssl-grpc-certificate-chain-file=c " f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d --ssl-https-client-certificate-type=e " f"--ssl-https-client-certificate-file=f --ssl-https-private-key-type=g --ssl-https-private-key-file=h" @@ -241,7 +241,7 @@ def test_perf_analyzer_ssl_args(self): self.config["ssl-grpc-use-ssl"] = ssl_grpc_use_ssl self.assertEqual(self.config["ssl-grpc-use-ssl"], ssl_grpc_use_ssl) expected_cli_str = ( - f"-m test_model --measurement-interval=1000 --measurement-request-count=50 " + f"-m test_model -b 1 --measurement-interval=1000 --measurement-request-count=50 " f"--ssl-grpc-root-certifications-file=a --ssl-grpc-private-key-file=b --ssl-grpc-certificate-chain-file=c " f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d --ssl-https-client-certificate-type=e " f"--ssl-https-client-certificate-file=f --ssl-https-private-key-type=g --ssl-https-private-key-file=h" @@ -651,6 +651,8 @@ def test_get_cmd_single_model(self): "perf_analyzer", "-m", "test_model", + "-b", + "1", "--measurement-interval", "1000", "--measurement-request-count", @@ -688,7 +690,7 @@ def test_get_cmd_multi_model(self): expected_cmd = [ 'mpiexec', '--allow-run-as-root', '--tag-output', '-n', '1', 'perf_analyzer', '--enable-mpi', - '-m', 'MultiModel1', + '-m', 'MultiModel1', '-b', '1', '--measurement-interval', '1000', '--measurement-request-count', '50', ':', '-n', '1', 'perf_analyzer', '--enable-mpi', diff --git a/tests/test_results.py b/tests/test_results.py index 2fc45162b..5bdf6b11a 100755 --- a/tests/test_results.py +++ b/tests/test_results.py @@ -179,23 +179,23 @@ def _construct_results(self): self._measurements = [] self._measurements.append( { - "model_config_0 -m key_A": "1", - "model_config_0 -m key_B": "2", - "model_config_0 -m key_C": "3", + "model_config_0 -m key_A -b 1": "1", + "model_config_0 -m key_B -b 1": "2", + "model_config_0 -m key_C -b 1": "3", } ) self._measurements.append( { - "model_config_1 -m key_D": "4", - "model_config_1 -m key_E": "5", - "model_config_1 -m key_F": "6", + "model_config_1 -m key_D -b 1": "4", + "model_config_1 -m key_E -b 1": "5", + "model_config_1 -m key_F -b 1": "6", } ) self._measurements.append( { - "model_config_2 -m key_G": "7", - "model_config_2 -m key_H": "8", - "model_config_2 -m key_I": "9", + "model_config_2 -m key_G -b 1": "7", + "model_config_2 -m key_H -b 1": "8", + "model_config_2 -m key_I -b 1": "9", } ) diff --git a/tests/test_run_config.py b/tests/test_run_config.py index 03dc53c9f..4aaea7e63 100755 --- a/tests/test_run_config.py +++ b/tests/test_run_config.py @@ -91,7 +91,7 @@ def test_representation_mrc_removal(self): "model1", ModelConfigVariant(MagicMock(), "model1_config_0"), pc ) - expected_representation = "model1_config_0 -m TestModel1" + expected_representation = "model1_config_0 -m TestModel1 -b 1" self.assertEqual(mrc.representation(), expected_representation) def test_cpu_only(self): From 57735dce26d0823667948eaba93cf808af4034fa Mon Sep 17 00:00:00 2001 From: tgerdes Date: Sat, 30 Mar 2024 16:15:56 -0500 Subject: [PATCH 6/6] update comments --- .../config/generate/perf_analyzer_config_generator.py | 7 +++---- .../quick_plus_concurrency_sweep_run_config_generator.py | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py index dae2a6c3d..ceabd0361 100755 --- a/model_analyzer/config/generate/perf_analyzer_config_generator.py +++ b/model_analyzer/config/generate/perf_analyzer_config_generator.py @@ -169,10 +169,9 @@ def set_last_results( self._parameter_results.extend(measurement) def _create_parameter_list(self) -> List[int]: - # FIXME 1772 comment this - # The two possible parameters are request rate or concurrency - # Concurrency is the default and will be used unless the user specifies - # request rate, either as a model parameter or a config option + # Determines the inference load (concurrency or request-rate or request-intervals) + # and creates the list of values to use. If nothing is specified by the user, then + # concurrency will be used. if "request-intervals" in self._perf_analyzer_flags: return [self._perf_analyzer_flags["request-intervals"]] elif self._cli_config.is_request_rate_specified(self._model_parameters): diff --git a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py index fa72ae5f0..1a0c50cd7 100755 --- a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +++ b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py @@ -155,7 +155,6 @@ def _set_concurrency(self, run_config: RunConfig, concurrency: int) -> RunConfig return run_config - # FIXME 1772 -- this method is duplicated. Maybe it should be a static method in ModelProfileSpec? def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict: for model in self._models: if model_name == model.model_name():