Skip to content
This repository has been archived by the owner on Apr 8, 2024. It is now read-only.

Commit

Permalink
add onnx inference
Browse files Browse the repository at this point in the history
  • Loading branch information
majercakdavid committed Oct 17, 2022
1 parent 84154e0 commit b449733
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 29 deletions.
4 changes: 4 additions & 0 deletions conf/experiments/benchmarks/lightgbm-inferencing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ experiment:
name: "lightgbm_inferencing_dev"
description: "something interesting to say about this"

run:
submit: true

lightgbm_inferencing_config:
# name of your particular benchmark
benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime!
Expand Down Expand Up @@ -80,6 +83,7 @@ lightgbm_inferencing_config:
- framework: lightgbm_c_api # v3.2.1 with C API prediction
build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
- framework: lightgbm_ray # ray implementation
- framework: lightgbm_ort # ONNX RT implementation
- framework: treelite_python # v1.3.0

# to use custom_win_cli, you need to compile your own binaries
Expand Down
4 changes: 4 additions & 0 deletions conf/experiments/lightgbm-inferencing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ experiment:
name: "lightgbm_inferencing_dev"
description: "something interesting to say about this"

run:
submit: true

lightgbm_inferencing_config:
# name of your particular benchmark
benchmark_name: "benchmark-dev" # override this with a unique name
Expand All @@ -36,6 +39,7 @@ lightgbm_inferencing_config:
- framework: lightgbm_c_api # v3.2.1 with C API prediction
build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
- framework: lightgbm_ray # ray implementation
- framework: lightgbm_ort # ONNX RT implementation
- framework: treelite_python # v1.3.0

# to use custom_win_cli, you need to compile your own binaries
Expand Down
6 changes: 6 additions & 0 deletions src/scripts/inferencing/lightgbm_ort/default.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1"

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm

ARG lightgbm_version="3.3.0"

# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
python=3.8 pip=20.2.4
Expand All @@ -25,3 +27,7 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
'onnxruntime==1.12.1' \
'onnxmltools==1.11.1' \
'onnxconverter-common==1.12.2'

# install lightgbm with mpi
RUN pip install lightgbm==${lightgbm_version} \
pip install 'protobuf==3.20'
40 changes: 11 additions & 29 deletions src/scripts/inferencing/lightgbm_ort/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)

logger.info(f"Creating inference session")
sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess)
sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString())

# capture data shape as property
metrics_logger.set_properties(
Expand All @@ -157,45 +157,27 @@ def run(self, args, logger, metrics_logger, unknown_args):
)

logger.info(f"Running .predict()")

batch_start_time = time.monotonic()
sessionml.run(
predictions_array = sessionml.run(
[sessionml.get_outputs()[0].name],
{sessionml.get_inputs()[0].name: inference_raw_data},
)
)[0]
prediction_time = time.monotonic() - batch_start_time
metrics_logger.log_metric("time_inferencing", prediction_time)

# TODO: Discuss alternative?
# onnxml_time = timeit.timeit(
# "sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data} )",
# lambda: sessionml.run(
# [sessionml.get_outputs()[0].name],
# {sessionml.get_inputs()[0].name: inference_raw_data},
# ),
# number=10,
# setup="from __main__ import sessionml, inference_raw_data",
# )
onnxml_time = timeit.timeit(
lambda: sessionml.run(
[sessionml.get_outputs()[0].name],
{sessionml.get_inputs()[0].name: inference_raw_data},
),
number=10,
)
print(
"LGBM->ONNXML (CPU): {}".format(
num_classes, max_depth, n_estimators, onnxml_time
)
)

booster.num_trees
batch_start_time = time.monotonic()
predictions_array = booster.predict(
data=inference_raw_data,
num_threads=args.num_threads,
predict_disable_shape_check=bool(args.predict_disable_shape_check),
)
prediction_time = time.monotonic() - batch_start_time
metrics_logger.log_metric("time_inferencing", prediction_time)

# use helper to log latency with the right metric names
metrics_logger.log_inferencing_latencies(
[prediction_time], # only one big batch
batch_length=inference_data.num_data(),
batch_length=len(inference_raw_data),
factor_to_usecs=1000000.0, # values are in seconds
)

Expand Down

0 comments on commit b449733

Please sign in to comment.