From 7b1b22ace65aed19b43388ab054ecc65b3a36de1 Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Mon, 13 Nov 2023 16:22:57 -0800
Subject: [PATCH] Add L0_pytorch_python_runtime

---
 qa/L0_pytorch_python_runtime/infer.py     | 148 +++++++++++++++++++++
 qa/L0_pytorch_python_runtime/test.sh      | 147 +++++++++++++++++++++
 qa/L0_pytorch_python_runtime/unit_test.py | 152 ++++++++++++++++++++++
 3 files changed, 447 insertions(+)
 create mode 100755 qa/L0_pytorch_python_runtime/infer.py
 create mode 100755 qa/L0_pytorch_python_runtime/test.sh
 create mode 100755 qa/L0_pytorch_python_runtime/unit_test.py

diff --git a/qa/L0_pytorch_python_runtime/infer.py b/qa/L0_pytorch_python_runtime/infer.py
new file mode 100755
index 00000000000..32b3f80abf7
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/infer.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import concurrent.futures
+import json
+import sys
+
+import numpy as np
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+
+def infer_model_without_parameter_file():
+    model_name = "addsub"
+    shape = [4]
+
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        input0_data = np.random.rand(*shape).astype(np.float32)
+        input1_data = np.random.rand(*shape).astype(np.float32)
+        inputs = [
+            httpclient.InferInput(
+                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
+            ),
+            httpclient.InferInput(
+                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
+            ),
+        ]
+
+        inputs[0].set_data_from_numpy(input0_data)
+        inputs[1].set_data_from_numpy(input1_data)
+
+        outputs = [
+            httpclient.InferRequestedOutput("OUTPUT0"),
+            httpclient.InferRequestedOutput("OUTPUT1"),
+        ]
+
+        response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
+
+        result = response.get_response()
+        output0_data = response.as_numpy("OUTPUT0")
+        output1_data = response.as_numpy("OUTPUT1")
+
+        print(
+            "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
+                input0_data, input1_data, output0_data
+            )
+        )
+        print(
+            "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
+                input0_data, input1_data, output1_data
+            )
+        )
+
+        if not np.allclose(input0_data + input1_data, output0_data):
+            print(model_name + " error: incorrect sum")
+            return False
+
+        if not np.allclose(input0_data - input1_data, output1_data):
+            print(model_name + " error: incorrect difference")
+            return False
+
+        print("PASS: " + model_name)
+        return True
+
+
+def infer_model_with_parameter_file(batch_size, data_offset=0):
+    model_name = "neuralnet"
+    test_data_file = "neuralnet_test_data.json"
+    np_dtype = np.single
+
+    # prepare input data
+    with open(test_data_file) as f:
+        test_data = json.load(f)
+    input_data = np.array(test_data["input_data"], dtype=np_dtype)
+    input_data = input_data[data_offset : (data_offset + batch_size)]
+    labels = test_data["labels"][data_offset : (data_offset + batch_size)]
+
+    # inference
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = [
+            httpclient.InferInput(
+                "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+
+        response = client.infer(model_name, inputs, request_id=str(1))
+        result = response.get_response()
+        output_data = response.as_numpy("OUTPUT")
+        output_data_max = np.max(output_data, axis=1)
+
+        print("Inference result: " + str(output_data))
+        print("Inference result (max): " + str(output_data_max))
+        print("Expected result: " + str(labels))
+
+        if not np.all(np.isclose(np.max(output_data, axis=1), labels, atol=8)):
+            print(model_name + " error: incorrect result")
+            return False
+
+    print("PASS: " + model_name)
+    return True
+
+
+def parallel_infer_a_full_dynamic_batch(max_batch_size):
+    batch_size = 1
+    success = True
+    with concurrent.futures.ThreadPoolExecutor() as pool:
+        threads = []
+        for i in range(max_batch_size // batch_size):
+            t = pool.submit(infer_model_with_parameter_file, batch_size, i)
+            threads.append(t)
+        for t in threads:
+            success &= t.result()
+    return success
+
+
+if __name__ == "__main__":
+    success = infer_model_without_parameter_file()
+    success &= infer_model_with_parameter_file(batch_size=4)
+    success &= parallel_infer_a_full_dynamic_batch(max_batch_size=8)
+    if not success:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/qa/L0_pytorch_python_runtime/test.sh b/qa/L0_pytorch_python_runtime/test.sh
new file mode 100755
index 00000000000..80a215599a3
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/test.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATA_DIR=/data/inferenceserver/${REPO_VERSION}
+IMAGE_DIR="/opt/tritonserver/qa/images"
+SERVER=/opt/tritonserver/bin/tritonserver
+IMAGE_CLIENT="/opt/tritonserver/qa/clients/image_client.py"
+BACKENDS="/opt/tritonserver/backends"
+source ../common/util.sh
+
+rm -f *.log
+RET=0
+
+#
+# Unit tests
+#
+rm -rf py_runtime_exec_env py_runtime_exec_env.tar.gz py_runtime.py
+cp $BACKENDS/pytorch/model.py py_runtime.py
+cp $BACKENDS/pytorch/pb_exec_env_model.py.tar.gz py_runtime_exec_env.tar.gz
+mkdir py_runtime_exec_env && tar -xzf py_runtime_exec_env.tar.gz -C py_runtime_exec_env
+
+set +e
+
+UNIT_TEST_ENV="source py_runtime_exec_env/bin/activate && exec env LD_LIBRARY_PATH=`pwd`/py_runtime_exec_env/lib:$LD_LIBRARY_PATH"
+UNIT_TEST_LOG="./unit_test.log"
+bash -c "$UNIT_TEST_ENV python3 unit_test.py" > $UNIT_TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed PyTorch Python backend based runtime unit test\n***"
+    cat $UNIT_TEST_LOG
+    RET=1
+fi
+
+set -e
+
+#
+# End-to-end inference tests
+#
+rm -rf models && mkdir models
+cp -r $DATA_DIR/pytorch_model_store/* models
+cp -r $DATA_DIR/libtorch_model_store/resnet50_libtorch models && \
+    sed -i "/platform/d" models/resnet50_libtorch/config.pbtxt && \
+    echo "backend: \"pytorch\"" >> models/resnet50_libtorch/config.pbtxt && \
+    echo "runtime: \"model.py\"" >> models/resnet50_libtorch/config.pbtxt && \
+    echo "instance_group: [{ kind: KIND_MODEL }]" >> models/resnet50_libtorch/config.pbtxt
+mv models/neuralnet/1/test_data.json neuralnet_test_data.json
+
+SERVER_ARGS="--model-repository=models --log-verbose=1"
+SERVER_LOG="./infer.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+else
+    set +e
+
+    # Check correct model instance initialization
+    EXPECTED_LOG_MSGS=(
+        'Loading '"'"'resnet50_libtorch'"'"' as TorchScript'
+        'Torch parallelism settings for '"'"'addsub'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
+        'Torch parallelism settings for '"'"'neuralnet'"'"': NUM_THREADS = 4; NUM_INTEROP_THREADS = 2;'
+        'Torch parallelism settings for '"'"'resnet50_libtorch'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'addsub'"'"': {'"'"'disable'"'"': True}'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'neuralnet'"'"': {}'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'resnet50_libtorch'"'"': {}'
+    )
+    for EXPECTED_LOG_MSG in "${EXPECTED_LOG_MSGS[@]}"; do
+        grep "$EXPECTED_LOG_MSG" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Cannot find \"$EXPECTED_LOG_MSG\" on server log. \n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+    done
+
+    # Infer TorchScript model
+    CLIENT_LOG="./infer.torchscript.log"
+    python $IMAGE_CLIENT -m "resnet50_libtorch" -s INCEPTION -c 1 -b 2 "$IMAGE_DIR/vulture.jpeg" > $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to inference TorchScript model\n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    # Infer PyTorch models
+    CLIENT_LOG="./infer.pytorch.log"
+    python infer.py > $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to inference PyTorch models\n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+#
+# Print result and exit
+#
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_pytorch_python_runtime/unit_test.py b/qa/L0_pytorch_python_runtime/unit_test.py
new file mode 100755
index 00000000000..55e55f1fc60
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/unit_test.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import unittest
+
+import torch
+
+sys.modules["triton_python_backend_utils"] = unittest.mock.MagicMock()
+from py_runtime import _gather_torch_tensors, _scatter_torch_tensors
+
+
+class PyTorchPythonBackendRuntimeUnittest(unittest.TestCase):
+    # _gather_scatter_cases: [(tensors_scatter, tensors_gather, sections), ...]
+    #   tensors_scatter: [an_infer_request, ...]
+    #     an_infer_request: [a_torch_tensor_with_batch_dimension, ...]
+    #   tensors_gather: [a_torch_tensor_gathering_all_requests, ...]
+    #   sections: [batch_size_of_the_corresponding_infer_request, ...]
+    _gather_scatter_cases = [
+        # shape [batch=1, 1]
+        ([[torch.tensor([[1]])]], [torch.tensor([[1]])], [1]),
+        # shape [batch=1, 2]
+        ([[torch.tensor([[1, 2]])]], [torch.tensor([[1, 2]])], [1]),
+        # shape [batch=1, 2, 4]
+        ([[torch.arange(8).reshape(1, 2, 4)]], [torch.arange(8).reshape(1, 2, 4)], [1]),
+        # shape [batch=3, 1]
+        ([[torch.arange(3).reshape(3, 1)]], [torch.arange(3).reshape(3, 1)], [3]),
+        # shapes ([batch=1, 1], [batch=1, 2])
+        (
+            [[torch.tensor([[1]]), torch.tensor([[2, 3]])]],
+            [torch.tensor([[1]]), torch.tensor([[2, 3]])],
+            [1],
+        ),
+        # scatter shape [batch=1, 1] x 2 -> gather shape [batch=2, 1]
+        (
+            [[torch.tensor([[1]])], [torch.tensor([[2]])]],
+            [torch.tensor([[1], [2]])],
+            [1, 1],
+        ),
+        # scatter shape [batch=1, 2, 1] x 3 -> gather shape [batch=3, 2, 1]
+        (
+            [[torch.tensor([[[i], [i + 3]]])] for i in range(3)],
+            [torch.tensor([[[0], [3]], [[1], [4]], [[2], [5]]])],
+            [1, 1, 1],
+        ),
+        # scatter shape [batch=1, 1] & [batch=2, 1] -> gather shape [batch=3, 1]
+        (
+            [[torch.tensor([[1]])], [torch.tensor([[2], [3]])]],
+            [torch.tensor([[1], [2], [3]])],
+            [1, 2],
+        ),
+        # scatter shape [batch=3, 1, 1] & [batch=1, 1, 1] & [batch=2, 1, 1]
+        # -> gather shape [batch=6, 1, 1]
+        (
+            [
+                [torch.tensor([[[0]], [[1]], [[2]]])],
+                [torch.tensor([[[3]]])],
+                [torch.tensor([[[4]], [[5]]])],
+            ],
+            [torch.arange(6).reshape(6, 1, 1)],
+            [3, 1, 2],
+        ),
+        # scatter shapes ([batch=3, 1, 1], [batch=3, 2]) & ([batch=2, 1, 1], [batch=2, 2])
+        # -> gather shapes ([batch=5, 1, 1], [batch=5, 2])
+        (
+            [
+                [
+                    torch.tensor([[[0]], [[1]], [[2]]]),
+                    torch.tensor([[5, 6], [7, 8], [9, 10]]),
+                ],
+                [torch.tensor([[[3]], [[4]]]), torch.tensor([[11, 12], [13, 14]])],
+            ],
+            [
+                torch.arange(5).reshape(5, 1, 1),
+                torch.arange(start=5, end=15).reshape(5, 2),
+            ],
+            [3, 2],
+        ),
+    ]
+
+    def test_gather_torch_tensors(self):
+        for (
+            tensors_scatter,
+            expected_tensors_gather,
+            expected_sections,
+        ) in self._gather_scatter_cases:
+            tensors_gather, sections = _gather_torch_tensors(tensors_scatter)
+
+            self.assertIsInstance(tensors_gather, list)
+            self.assertEqual(len(tensors_gather), len(expected_tensors_gather))
+            for j in range(len(expected_tensors_gather)):
+                expected_tensor = expected_tensors_gather[j]
+                tensor = tensors_gather[j]
+                self.assertIsInstance(tensor, torch.Tensor)
+                self.assertTrue(torch.equal(tensor, expected_tensor))
+
+            self.assertIsInstance(sections, list)
+            self.assertEqual(len(sections), len(expected_sections))
+            for i in range(len(expected_sections)):
+                expected_section = expected_sections[i]
+                section = sections[i]
+                self.assertIsInstance(section, int)
+                self.assertEqual(section, expected_section)
+
+    def test_scatter_torch_tensors(self):
+        for (
+            expected_tensors_scatter,
+            tensors_gather,
+            sections,
+        ) in self._gather_scatter_cases:
+            tensors_scatter = _scatter_torch_tensors(tensors_gather, sections)
+            self.assertIsInstance(tensors_scatter, list)
+            self.assertEqual(len(tensors_scatter), len(expected_tensors_scatter))
+            for i in range(len(expected_tensors_scatter)):
+                expected_tensors = expected_tensors_scatter[i]
+                tensors = tensors_scatter[i]
+                self.assertIsInstance(tensors, list)
+                self.assertEqual(len(tensors), len(expected_tensors))
+                for j in range(len(expected_tensors)):
+                    expected_tensor = expected_tensors[j]
+                    tensor = tensors[j]
+                    self.assertIsInstance(tensor, torch.Tensor)
+                    self.assertTrue(torch.equal(tensor, expected_tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()