Skip to content

Commit

Permalink
Merge branch 'main' into feature/damian/llm-harness
Browse files Browse the repository at this point in the history
  • Loading branch information
dbogunowicz authored Nov 3, 2023
2 parents edead87 + 02348f2 commit cea51e1
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 43 deletions.
65 changes: 40 additions & 25 deletions src/deepsparse/benchmark/benchmark_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,37 +364,52 @@ def benchmark_model(
model_path = model_to_path(model_path)

cached_outputs = None
if not disable_kv_cache_overrides and has_model_kv_cache(model_path):
if not sequence_length:
sequence_length = infer_sequence_length(model_path)
if input_ids_length > sequence_length:
if has_model_kv_cache(model_path):
if not disable_kv_cache_overrides:
if not sequence_length:
sequence_length = infer_sequence_length(model_path)
if input_ids_length > sequence_length:
raise ValueError(
f"input_ids_length: {input_ids_length} "
f"must be less than sequence_length: {sequence_length}"
)

_LOGGER.info(
"Found model with KV cache support. "
"Benchmarking the autoregressive model with "
f"input_ids_length: {input_ids_length} and "
f"sequence length: {sequence_length}."
)

(
model_path,
cached_outputs,
_,
) = overwrite_onnx_model_inputs_for_kv_cache_models(
onnx_file_path=model_path,
input_ids_length=input_ids_length,
sequence_length=sequence_length,
batch_size=batch_size,
)

if internal_kv_cache and engine != DEEPSPARSE_ENGINE:
raise ValueError(
f"input_ids_length: {input_ids_length} "
f"must be less than sequence_length: {sequence_length}"
"Attempting to benchmark a model using engine: "
f"{engine} and internal_kv_cache set to True. "
"The use of internal_kv_cache is only "
f"supported for the engine: {DEEPSPARSE_ENGINE}. "
f"To disable the use of the internal_kv_cache, "
f"set the flag: --no-internal-kv-cache"
)

_LOGGER.info(
"Found model with KV cache support. "
"Benchmarking the autoregressive model with "
f"input_ids_length: {input_ids_length} and "
f"sequence length: {sequence_length}."
f"Benchmarking Engine: {engine} with "
f"{'internal' if internal_kv_cache else 'external'} KV cache management"
)

model_path, cached_outs, _ = overwrite_onnx_model_inputs_for_kv_cache_models(
onnx_file_path=model_path,
input_ids_length=input_ids_length,
sequence_length=sequence_length,
batch_size=batch_size,
)

if internal_kv_cache:
_LOGGER.info(
"Benchmarking DeepSparse Engine with internal KV Cache management"
)
cached_outputs = cached_outs
else:
input_ids_length = None
sequence_length = None
internal_kv_cache = False

num_streams = parse_num_streams(num_streams, num_cores, scenario)

Expand All @@ -407,7 +422,7 @@ def benchmark_model(
num_streams=num_streams,
scheduler=scheduler,
input_shapes=input_shapes,
cached_outputs=cached_outputs,
cached_outputs=cached_outputs if internal_kv_cache else None,
)
elif engine == ORT_ENGINE:
model = ORTEngine(
Expand Down Expand Up @@ -450,7 +465,7 @@ def benchmark_model(
seconds_to_run=time,
seconds_to_warmup=warmup_time,
num_streams=num_streams,
internal_kv_cache=cached_outputs,
internal_kv_cache=internal_kv_cache,
)
export_dict = {
"engine": str(model),
Expand Down
26 changes: 8 additions & 18 deletions src/deepsparse/transformers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@
from onnx import ModelProto

from deepsparse.log import get_main_logger
from deepsparse.utils.onnx import _MODEL_DIR_ONNX_NAME, truncate_onnx_model
from sparsezoo import Model
from deepsparse.utils.onnx import (
_MODEL_DIR_ONNX_NAME,
model_to_path,
truncate_onnx_model,
)
from sparsezoo.utils import save_onnx


Expand Down Expand Up @@ -71,22 +74,9 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
)
return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)

elif model_path.startswith("zoo:"):
zoo_model = Model(model_path)
deployment_path = zoo_model.deployment_directory_path
return deployment_path, os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
elif model_path.startswith("hf:"):
from huggingface_hub import snapshot_download

deployment_path = snapshot_download(repo_id=model_path.replace("hf:", "", 1))
onnx_path = os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
if not os.path.isfile(onnx_path):
raise ValueError(
f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
f"{deployment_path}. Be sure that an export of the model is written to "
f"{onnx_path}"
)
return deployment_path, onnx_path
elif model_path.startswith("zoo:") or model_path.startswith("hf:"):
onnx_model_path = model_to_path(model_path)
return os.path.dirname(onnx_model_path), onnx_model_path
else:
raise ValueError(
f"model_path {model_path} is not a valid file, directory, or zoo stub"
Expand Down
15 changes: 15 additions & 0 deletions src/deepsparse/utils/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,21 @@ def model_to_path(model: Union[str, Model, File]) -> str:
# get the downloaded_path -- will auto download if not on local system
model = model.path

if isinstance(model, str) and model.startswith("hf:"):
# load Hugging Face model from stub
from huggingface_hub import snapshot_download

deployment_path = snapshot_download(repo_id=model.replace("hf:", "", 1))
onnx_path = os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
if not os.path.isfile(onnx_path):
raise ValueError(
f"Could not find the ONNX model file '{_MODEL_DIR_ONNX_NAME}' in the "
f"Hugging Face Hub repository located at {deployment_path}. Please "
f"ensure the model has been correctly exported to ONNX format and "
f"exists in the repository."
)
return onnx_path

if not isinstance(model, str):
raise ValueError("unsupported type for model: {}".format(type(model)))

Expand Down

0 comments on commit cea51e1

Please sign in to comment.