Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for custom intervals #814

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,11 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]:
for result in top_results:
run_config = deepcopy(result.run_config())
model_parameters = self._get_model_parameters(model_name)
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
parameter_search = ParameterSearch(
config=self._config,
model_parameters=model_parameters,
perf_analyzer_flags=perf_analyzer_flags,
skip_parameter_sweep=True,
)
for parameter in parameter_search.search_parameters():
Expand All @@ -151,6 +153,12 @@ def _get_model_parameters(self, model_name: str) -> Dict:

return {}

def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
for model in self._models:
if model_name == model.model_name():
return model.perf_analyzer_flags()
return {}

def _set_parameter(
self, run_config: RunConfig, model_parameters: Dict, parameter: int
) -> RunConfig:
Expand Down
10 changes: 10 additions & 0 deletions model_analyzer/config/generate/model_profile_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
ConfigModelProfileSpec,
)
from model_analyzer.device.gpu_device import GPUDevice
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
from model_analyzer.triton.client.client import TritonClient
from model_analyzer.triton.model.model_config import ModelConfig

Expand Down Expand Up @@ -72,3 +73,12 @@ def supports_dynamic_batching(self) -> bool:
def is_ensemble(self) -> bool:
"""Returns true if the model is an ensemble"""
return "ensemble_scheduling" in self._default_model_config

def is_load_specified(self) -> bool:
"""
Returns true if the model's PA config has specified any of the
inference load args (such as concurrency). Else returns false
"""
load_args = PerfAnalyzerConfig.get_inference_load_args()
pa_flags = self.perf_analyzer_flags()
return any(e in pa_flags for e in load_args)
20 changes: 12 additions & 8 deletions model_analyzer/config/generate/perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,12 @@ def set_last_results(
self._parameter_results.extend(measurement)

def _create_parameter_list(self) -> List[int]:
# The two possible parameters are request rate or concurrency
# Concurrency is the default and will be used unless the user specifies
# request rate, either as a model parameter or a config option
if self._cli_config.is_request_rate_specified(self._model_parameters):
# Determines the inference load (concurrency or request-rate or request-intervals)
# and creates the list of values to use. If nothing is specified by the user, then
# concurrency will be used.
if "request-intervals" in self._perf_analyzer_flags:
tgerdesnv marked this conversation as resolved.
Show resolved Hide resolved
return [self._perf_analyzer_flags["request-intervals"]]
elif self._cli_config.is_request_rate_specified(self._model_parameters):
return self._create_request_rate_list()
else:
return self._create_concurrency_list()
Expand Down Expand Up @@ -207,7 +209,7 @@ def _generate_perf_configs(self) -> None:
for params in utils.generate_parameter_combinations(
perf_config_non_parameter_values
):
configs_with_concurrency = []
configs_with_inference_load = []
for parameter in self._parameters:
new_perf_config = PerfAnalyzerConfig()

Expand All @@ -217,16 +219,18 @@ def _generate_perf_configs(self) -> None:

new_perf_config.update_config(params)

if self._cli_config.is_request_rate_specified(self._model_parameters):
if "request-intervals" in self._perf_analyzer_flags:
pass
elif self._cli_config.is_request_rate_specified(self._model_parameters):
new_perf_config.update_config({"request-rate-range": parameter})
else:
new_perf_config.update_config({"concurrency-range": parameter})

# User provided flags can override the search parameters
new_perf_config.update_config(self._perf_analyzer_flags)

configs_with_concurrency.append(new_perf_config)
self._configs.append(configs_with_concurrency)
configs_with_inference_load.append(new_perf_config)
self._configs.append(configs_with_inference_load)

def _create_non_parameter_perf_config_values(self) -> dict:
perf_config_values = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import logging
from copy import deepcopy
from typing import Generator, List, Optional
from typing import Dict, Generator, List, Optional

from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
from model_analyzer.config.generate.model_variant_name_manager import (
Expand Down Expand Up @@ -139,7 +139,10 @@ def _sweep_concurrency_over_top_results(self) -> Generator[RunConfig, None, None

for result in top_results:
run_config = deepcopy(result.run_config())
parameter_search = ParameterSearch(self._config)
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
parameter_search = ParameterSearch(
self._config, perf_analyzer_flags=perf_analyzer_flags
)
for concurrency in parameter_search.search_parameters():
run_config = self._set_concurrency(run_config, concurrency)
yield run_config
Expand All @@ -151,3 +154,9 @@ def _set_concurrency(self, run_config: RunConfig, concurrency: int) -> RunConfig
perf_config.update_config({"concurrency-range": concurrency})

return run_config

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is duplicated (also in brute search). Maybe this should be a static method in ModelProfileSpec?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is still duplicated. I didn't clean it up yet. Both classes implement ConfigGeneratorInterface. You could create a base class with common code if you want.

def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
Fixed Show fixed Hide fixed
for model in self._models:
if model_name == model.model_name():
return model.perf_analyzer_flags()
return {}
19 changes: 8 additions & 11 deletions model_analyzer/config/generate/quick_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,10 @@ def _get_next_perf_analyzer_config(

perf_analyzer_config.update_config_from_profile_config(model_name, self._config)

concurrency = self._calculate_concurrency(dimension_values)

perf_config_params = {"batch-size": 1, "concurrency-range": concurrency}
perf_analyzer_config.update_config(perf_config_params)
if not model.is_load_specified():
concurrency = self._calculate_concurrency(dimension_values)
perf_config_params = {"concurrency-range": concurrency}
perf_analyzer_config.update_config(perf_config_params)

perf_analyzer_config.update_config(model.perf_analyzer_flags())
return perf_analyzer_config
Expand Down Expand Up @@ -705,13 +705,10 @@ def _create_default_perf_analyzer_config(
model_config.get_field("name"), self._config
)

default_concurrency = self._calculate_default_concurrency(model_config)

perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"concurrency-range": default_concurrency,
}
default_perf_analyzer_config.update_config(perf_config_params)
if not "request-intervals" in model.perf_analyzer_flags():
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing a self-review: I'm wondering if this should be if not model.is_load_specified() just like line 515

default_concurrency = self._calculate_default_concurrency(model_config)
perf_config_params = {"concurrency-range": default_concurrency}
default_perf_analyzer_config.update_config(perf_config_params)

default_perf_analyzer_config.update_config(model.perf_analyzer_flags())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
weighting=None,
parameters=None,
model_config_parameters=None,
perf_analyzer_flags=None,
perf_analyzer_flags={},
triton_server_flags=None,
triton_server_environment=None,
triton_docker_args=None,
Expand Down
22 changes: 21 additions & 1 deletion model_analyzer/perf_analyzer/perf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ class PerfAnalyzerConfig:
"collect-metrics",
]

# Only one of these args can be sent to PA, as each one controls the inference load in a different way
inference_load_args = [
"concurrency-range",
"request-rate-range",
"request-intervals",
]

def __init__(self):
"""
Construct a PerfAnalyzerConfig
Expand All @@ -106,7 +113,9 @@ def __init__(self):
self._options = {
"-m": None,
"-x": None,
"-b": None,
# Default to batch size of 1. This would be handled by PA if unspecified,
# but we want to be explicit so we can properly print/track values
"-b": 1,
"-u": None,
"-i": None,
"-f": None,
Expand Down Expand Up @@ -158,6 +167,16 @@ def additive_keys(cls):

return cls.additive_args[:]

@classmethod
def get_inference_load_args(cls):
"""
Returns
-------
list of str
The Perf Analyzer args that control the inference load
"""
return cls.inference_load_args

def update_config(self, params=None):
"""
Allows setting values from a params dict
Expand Down Expand Up @@ -273,6 +292,7 @@ def extract_model_specific_parameters(self):
"batch-size": self._options["-b"],
"concurrency-range": self._args["concurrency-range"],
"request-rate-range": self._args["request-rate-range"],
"request-intervals": self._args["request-intervals"],
}

@classmethod
Expand Down
79 changes: 39 additions & 40 deletions model_analyzer/plots/detailed_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from matplotlib import patches as mpatches

from model_analyzer.constants import LOGGER_NAME
from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
from model_analyzer.record.metrics_manager import MetricsManager

logging.getLogger("matplotlib").setLevel(logging.ERROR)
Expand Down Expand Up @@ -89,7 +90,6 @@ def __init__(self, name, title, bar_width=0.5):
self._fig.set_figheight(8)
self._fig.set_figwidth(12)

self._ax_latency.set_xlabel("Concurrent Client Requests")
self._ax_latency.set_ylabel(latency_axis_label)
self._ax_throughput.set_ylabel(throughput_axis_label)

Expand Down Expand Up @@ -120,29 +120,15 @@ def add_run_config_measurement(self, run_config_measurement):
"""

# TODO-TMA-568: This needs to be updated because there will be multiple model configs
if (
"concurrency-range" in run_config_measurement.model_specific_pa_params()[0]
and run_config_measurement.model_specific_pa_params()[0][
"concurrency-range"
]
):
self._data["concurrency"].append(
run_config_measurement.model_specific_pa_params()[0][
"concurrency-range"
]
)

if (
"request-rate-range" in run_config_measurement.model_specific_pa_params()[0]
and run_config_measurement.model_specific_pa_params()[0][
"request-rate-range"
]
):
self._data["request_rate"].append(
run_config_measurement.model_specific_pa_params()[0][
"request-rate-range"
]
)
for load_arg in PerfAnalyzerConfig.get_inference_load_args():
if (
load_arg in run_config_measurement.model_specific_pa_params()[0]
and run_config_measurement.model_specific_pa_params()[0][load_arg]
):
data_key = self._get_data_key_from_load_arg(load_arg)
self._data[data_key].append(
run_config_measurement.model_specific_pa_params()[0][load_arg]
)

self._data["perf_throughput"].append(
run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput")
Expand All @@ -164,25 +150,28 @@ def plot_data(self):
on this plot's Axes object
"""

# Need to change the default x-axis plot title for request rates
if "request_rate" in self._data and self._data["request_rate"][0]:
# Update the x-axis plot title
if "request_intervals" in self._data and self._data["request_intervals"][0]:
self._ax_latency.set_xlabel("Request Intervals File")
sort_indices_key = "request_intervals"
elif "request_rate" in self._data and self._data["request_rate"][0]:
self._ax_latency.set_xlabel("Client Request Rate")

# Sort the data by request rate or concurrency
if "request_rate" in self._data and self._data["request_rate"][0]:
sort_indices = list(
zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
)[0]
sort_indices_key = "request_rate"
else:
sort_indices = list(
zip(*sorted(enumerate(self._data["concurrency"]), key=lambda x: x[1]))
)[0]
self._ax_latency.set_xlabel("Concurrent Client Requests")
sort_indices_key = "concurrency"

sort_indices = list(
zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1]))
)[0]

sorted_data = {
key: [data_list[i] for i in sort_indices]
for key, data_list in self._data.items()
}

sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key]))

# Plot latency breakdown bars
labels = dict(
zip(
Expand All @@ -197,11 +186,6 @@ def plot_data(self):
)
bottoms = None

if "request_rate" in self._data:
sorted_data["indices"] = list(map(str, sorted_data["request_rate"]))
else:
sorted_data["indices"] = list(map(str, sorted_data["concurrency"]))

# Plot latency breakdown with concurrency casted as string to make uniform x
for metric, label in labels.items():
self._ax_latency.bar(
Expand Down Expand Up @@ -264,3 +248,18 @@ def save(self, filepath):
"""

self._fig.savefig(os.path.join(filepath, self._name))

def _get_data_key_from_load_arg(self, load_arg):
"""
Gets the key into _data corresponding with the input load arg

For example, the load arg "request-rate-range" has the key "request_rate"
"""
# Check if '-range' exists at the end of the input string and remove it
if load_arg.endswith("-range"):
load_arg = load_arg[:-6]

# Replace any '-' with '_' in the remaining string
data_key = load_arg.replace("-", "_")

return data_key
6 changes: 5 additions & 1 deletion model_analyzer/record/metrics_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,11 @@ def _get_triton_metrics_gpus(self):
def _print_run_config_info(self, run_config):
for model_run_config in run_config.model_run_configs():
perf_config = model_run_config.perf_config()
if perf_config["request-rate-range"]:
if perf_config["request-intervals"]:
logger.info(
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-intervals={perf_config['request-intervals']}"
)
elif perf_config["request-rate-range"]:
logger.info(
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
)
Expand Down
11 changes: 8 additions & 3 deletions model_analyzer/result/parameter_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class ParameterSearch:
- Will sweep from by powers of two from min to max parameter
- If the user specifies a constraint, the algorithm will perform a binary search
around the boundary if the constraint is violated
- Will not sweep at all if custom stimulus is provided by the user (via the
"request-intervals" perf analyzer flag)

Invariant: It is necessary for the user to add new measurements as they are taken
"""
Expand All @@ -45,6 +47,7 @@ def __init__(
self,
config: ConfigCommandProfile,
model_parameters: dict = {},
perf_analyzer_flags: dict = {},
skip_parameter_sweep: bool = False,
) -> None:
"""
Expand All @@ -59,6 +62,7 @@ def __init__(
self._parameter_is_request_rate = config.is_request_rate_specified(
model_parameters
)
self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags

if self._parameter_is_request_rate:
self._min_parameter_index = int(
Expand Down Expand Up @@ -98,10 +102,11 @@ def search_parameters(self) -> Generator[int, None, None]:
a binary parameter search around the point where the constraint
violated
"""
yield from self._perform_parameter_sweep()
if not self._inference_load_is_custom:
yield from self._perform_parameter_sweep()

if self._was_constraint_violated():
yield from self._perform_binary_parameter_search()
if self._was_constraint_violated():
yield from self._perform_binary_parameter_search()

def _perform_parameter_sweep(self) -> Generator[int, None, None]:
for parameter in (
Expand Down
Loading
Loading