add onnx inference

microsoft · Oct 17, 2022 · b449733 · b449733
1 parent 84154e0
commit b449733
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 29 deletions.
diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -14,6 +14,9 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
+run:
+  submit: true
+
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime!
@@ -80,6 +83,7 @@ lightgbm_inferencing_config:
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ort # ONNX RT implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries

diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml
@@ -14,6 +14,9 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
+run:
+  submit: true
+
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-dev" # override this with a unique name
@@ -36,6 +39,7 @@ lightgbm_inferencing_config:
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ort # ONNX RT implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries

diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -3,6 +3,8 @@ LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1"
 
 ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
+ARG lightgbm_version="3.3.0"
+
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
     python=3.8 pip=20.2.4
@@ -25,3 +27,7 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
                 'onnxruntime==1.12.1' \
                 'onnxmltools==1.11.1' \
                 'onnxconverter-common==1.12.2'
+
+# install lightgbm with mpi
+RUN pip install lightgbm==${lightgbm_version} \
+    pip install 'protobuf==3.20'
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -148,7 +148,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
         onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
 
         logger.info(f"Creating inference session")
-        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess)
+        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString())
 
         # capture data shape as property
         metrics_logger.set_properties(
@@ -157,45 +157,27 @@ def run(self, args, logger, metrics_logger, unknown_args):
         )
 
         logger.info(f"Running .predict()")
-
         batch_start_time = time.monotonic()
-        sessionml.run(
+        predictions_array = sessionml.run(
             [sessionml.get_outputs()[0].name],
             {sessionml.get_inputs()[0].name: inference_raw_data},
-        )
+        )[0]
+        prediction_time = time.monotonic() - batch_start_time
+        metrics_logger.log_metric("time_inferencing", prediction_time)
 
+        # TODO: Discuss alternative?
         # onnxml_time = timeit.timeit(
-        #     "sessionml.run( [sessionml.get_outputs()[0].name],  {sessionml.get_inputs()[0].name: inference_raw_data} )",
+        #     lambda: sessionml.run(
+        #         [sessionml.get_outputs()[0].name],
+        #         {sessionml.get_inputs()[0].name: inference_raw_data},
+        #     ),
         #     number=10,
-        #     setup="from __main__ import sessionml, inference_raw_data",
         # )
-        onnxml_time = timeit.timeit(
-            lambda: sessionml.run(
-                [sessionml.get_outputs()[0].name],
-                {sessionml.get_inputs()[0].name: inference_raw_data},
-            ),
-            number=10,
-        )
-        print(
-            "LGBM->ONNXML (CPU): {}".format(
-                num_classes, max_depth, n_estimators, onnxml_time
-            )
-        )
-
-        booster.num_trees
-        batch_start_time = time.monotonic()
-        predictions_array = booster.predict(
-            data=inference_raw_data,
-            num_threads=args.num_threads,
-            predict_disable_shape_check=bool(args.predict_disable_shape_check),
-        )
-        prediction_time = time.monotonic() - batch_start_time
-        metrics_logger.log_metric("time_inferencing", prediction_time)
 
         # use helper to log latency with the right metric names
         metrics_logger.log_inferencing_latencies(
             [prediction_time],  # only one big batch
-            batch_length=inference_data.num_data(),
+            batch_length=len(inference_raw_data),
             factor_to_usecs=1000000.0,  # values are in seconds
         )