From 7b1b22ace65aed19b43388ab054ecc65b3a36de1 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:22:57 -0800 Subject: [PATCH] Add L0_pytorch_python_runtime --- qa/L0_pytorch_python_runtime/infer.py | 148 +++++++++++++++++++++ qa/L0_pytorch_python_runtime/test.sh | 147 +++++++++++++++++++++ qa/L0_pytorch_python_runtime/unit_test.py | 152 ++++++++++++++++++++++ 3 files changed, 447 insertions(+) create mode 100755 qa/L0_pytorch_python_runtime/infer.py create mode 100755 qa/L0_pytorch_python_runtime/test.sh create mode 100755 qa/L0_pytorch_python_runtime/unit_test.py diff --git a/qa/L0_pytorch_python_runtime/infer.py b/qa/L0_pytorch_python_runtime/infer.py new file mode 100755 index 00000000000..32b3f80abf7 --- /dev/null +++ b/qa/L0_pytorch_python_runtime/infer.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import concurrent.futures +import json +import sys + +import numpy as np +import tritonclient.http as httpclient +from tritonclient.utils import * + + +def infer_model_without_parameter_file(): + model_name = "addsub" + shape = [4] + + with httpclient.InferenceServerClient("localhost:8000") as client: + input0_data = np.random.rand(*shape).astype(np.float32) + input1_data = np.random.rand(*shape).astype(np.float32) + inputs = [ + httpclient.InferInput( + "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype) + ), + httpclient.InferInput( + "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype) + ), + ] + + inputs[0].set_data_from_numpy(input0_data) + inputs[1].set_data_from_numpy(input1_data) + + outputs = [ + httpclient.InferRequestedOutput("OUTPUT0"), + httpclient.InferRequestedOutput("OUTPUT1"), + ] + + response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs) + + result = response.get_response() + output0_data = response.as_numpy("OUTPUT0") + output1_data = response.as_numpy("OUTPUT1") + + print( + "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format( + input0_data, input1_data, output0_data + ) + ) + print( + "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format( + input0_data, input1_data, output1_data + ) + ) + + if not np.allclose(input0_data + input1_data, output0_data): + print(model_name + " error: incorrect sum") + return False + + if not np.allclose(input0_data - input1_data, output1_data): + print(model_name + " error: incorrect difference") + return False + + print("PASS: " + model_name) + return True + + +def infer_model_with_parameter_file(batch_size, data_offset=0): + model_name = "neuralnet" + test_data_file = "neuralnet_test_data.json" + np_dtype = np.single + + # prepare input data + with open(test_data_file) as f: + test_data = json.load(f) + input_data = np.array(test_data["input_data"], dtype=np_dtype) + input_data = input_data[data_offset : (data_offset + batch_size)] + labels = test_data["labels"][data_offset : (data_offset + batch_size)] + + # inference + with httpclient.InferenceServerClient("localhost:8000") as client: + inputs = [ + httpclient.InferInput( + "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype) + ) + ] + inputs[0].set_data_from_numpy(input_data) + + response = client.infer(model_name, inputs, request_id=str(1)) + result = response.get_response() + output_data = response.as_numpy("OUTPUT") + output_data_max = np.max(output_data, axis=1) + + print("Inference result: " + str(output_data)) + print("Inference result (max): " + str(output_data_max)) + print("Expected result: " + str(labels)) + + if not np.all(np.isclose(np.max(output_data, axis=1), labels, atol=8)): + print(model_name + " error: incorrect result") + return False + + print("PASS: " + model_name) + return True + + +def parallel_infer_a_full_dynamic_batch(max_batch_size): + batch_size = 1 + success = True + with concurrent.futures.ThreadPoolExecutor() as pool: + threads = [] + for i in range(max_batch_size // batch_size): + t = pool.submit(infer_model_with_parameter_file, batch_size, i) + threads.append(t) + for t in threads: + success &= t.result() + return success + + +if __name__ == "__main__": + success = infer_model_without_parameter_file() + success &= infer_model_with_parameter_file(batch_size=4) + success &= parallel_infer_a_full_dynamic_batch(max_batch_size=8) + if not success: + sys.exit(1) + sys.exit(0) diff --git a/qa/L0_pytorch_python_runtime/test.sh b/qa/L0_pytorch_python_runtime/test.sh new file mode 100755 index 00000000000..80a215599a3 --- /dev/null +++ b/qa/L0_pytorch_python_runtime/test.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} +if [ "$#" -ge 1 ]; then + REPO_VERSION=$1 +fi +if [ -z "$REPO_VERSION" ]; then + echo -e "Repository version must be specified" + echo -e "\n***\n*** Test Failed\n***" + exit 1 +fi +if [ ! -z "$TEST_REPO_ARCH" ]; then + REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} +fi + +export CUDA_VISIBLE_DEVICES=0 + +DATA_DIR=/data/inferenceserver/${REPO_VERSION} +IMAGE_DIR="/opt/tritonserver/qa/images" +SERVER=/opt/tritonserver/bin/tritonserver +IMAGE_CLIENT="/opt/tritonserver/qa/clients/image_client.py" +BACKENDS="/opt/tritonserver/backends" +source ../common/util.sh + +rm -f *.log +RET=0 + +# +# Unit tests +# +rm -rf py_runtime_exec_env py_runtime_exec_env.tar.gz py_runtime.py +cp $BACKENDS/pytorch/model.py py_runtime.py +cp $BACKENDS/pytorch/pb_exec_env_model.py.tar.gz py_runtime_exec_env.tar.gz +mkdir py_runtime_exec_env && tar -xzf py_runtime_exec_env.tar.gz -C py_runtime_exec_env + +set +e + +UNIT_TEST_ENV="source py_runtime_exec_env/bin/activate && exec env LD_LIBRARY_PATH=`pwd`/py_runtime_exec_env/lib:$LD_LIBRARY_PATH" +UNIT_TEST_LOG="./unit_test.log" +bash -c "$UNIT_TEST_ENV python3 unit_test.py" > $UNIT_TEST_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Failed PyTorch Python backend based runtime unit test\n***" + cat $UNIT_TEST_LOG + RET=1 +fi + +set -e + +# +# End-to-end inference tests +# +rm -rf models && mkdir models +cp -r $DATA_DIR/pytorch_model_store/* models +cp -r $DATA_DIR/libtorch_model_store/resnet50_libtorch models && \ + sed -i "/platform/d" models/resnet50_libtorch/config.pbtxt && \ + echo "backend: \"pytorch\"" >> models/resnet50_libtorch/config.pbtxt && \ + echo "runtime: \"model.py\"" >> models/resnet50_libtorch/config.pbtxt && \ + echo "instance_group: [{ kind: KIND_MODEL }]" >> models/resnet50_libtorch/config.pbtxt +mv models/neuralnet/1/test_data.json neuralnet_test_data.json + +SERVER_ARGS="--model-repository=models --log-verbose=1" +SERVER_LOG="./infer.server.log" +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + RET=1 +else + set +e + + # Check correct model instance initialization + EXPECTED_LOG_MSGS=( + 'Loading '"'"'resnet50_libtorch'"'"' as TorchScript' + 'Torch parallelism settings for '"'"'addsub'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;' + 'Torch parallelism settings for '"'"'neuralnet'"'"': NUM_THREADS = 4; NUM_INTEROP_THREADS = 2;' + 'Torch parallelism settings for '"'"'resnet50_libtorch'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;' + ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'addsub'"'"': {'"'"'disable'"'"': True}' + ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'neuralnet'"'"': {}' + ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'resnet50_libtorch'"'"': {}' + ) + for EXPECTED_LOG_MSG in "${EXPECTED_LOG_MSGS[@]}"; do + grep "$EXPECTED_LOG_MSG" $SERVER_LOG + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Cannot find \"$EXPECTED_LOG_MSG\" on server log. \n***" + cat $SERVER_LOG + RET=1 + fi + done + + # Infer TorchScript model + CLIENT_LOG="./infer.torchscript.log" + python $IMAGE_CLIENT -m "resnet50_libtorch" -s INCEPTION -c 1 -b 2 "$IMAGE_DIR/vulture.jpeg" > $CLIENT_LOG 2>&1 + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Failed to inference TorchScript model\n***" + cat $CLIENT_LOG + RET=1 + fi + + # Infer PyTorch models + CLIENT_LOG="./infer.pytorch.log" + python infer.py > $CLIENT_LOG 2>&1 + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Failed to inference PyTorch models\n***" + cat $CLIENT_LOG + RET=1 + fi + + set -e + + kill $SERVER_PID + wait $SERVER_PID +fi + +# +# Print result and exit +# +if [ $RET -eq 0 ]; then + echo -e "\n***\n*** Test Passed\n***" +else + echo -e "\n***\n*** Test FAILED\n***" +fi +exit $RET diff --git a/qa/L0_pytorch_python_runtime/unit_test.py b/qa/L0_pytorch_python_runtime/unit_test.py new file mode 100755 index 00000000000..55e55f1fc60 --- /dev/null +++ b/qa/L0_pytorch_python_runtime/unit_test.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sys +import unittest + +import torch + +sys.modules["triton_python_backend_utils"] = unittest.mock.MagicMock() +from py_runtime import _gather_torch_tensors, _scatter_torch_tensors + + +class PyTorchPythonBackendRuntimeUnittest(unittest.TestCase): + # _gather_scatter_cases: [(tensors_scatter, tensors_gather, sections), ...] + # tensors_scatter: [an_infer_request, ...] + # an_infer_request: [a_torch_tensor_with_batch_dimension, ...] + # tensors_gather: [a_torch_tensor_gathering_all_requests, ...] + # sections: [batch_size_of_the_corresponding_infer_request, ...] + _gather_scatter_cases = [ + # shape [batch=1, 1] + ([[torch.tensor([[1]])]], [torch.tensor([[1]])], [1]), + # shape [batch=1, 2] + ([[torch.tensor([[1, 2]])]], [torch.tensor([[1, 2]])], [1]), + # shape [batch=1, 2, 4] + ([[torch.arange(8).reshape(1, 2, 4)]], [torch.arange(8).reshape(1, 2, 4)], [1]), + # shape [batch=3, 1] + ([[torch.arange(3).reshape(3, 1)]], [torch.arange(3).reshape(3, 1)], [3]), + # shapes ([batch=1, 1], [batch=1, 2]) + ( + [[torch.tensor([[1]]), torch.tensor([[2, 3]])]], + [torch.tensor([[1]]), torch.tensor([[2, 3]])], + [1], + ), + # scatter shape [batch=1, 1] x 2 -> gather shape [batch=2, 1] + ( + [[torch.tensor([[1]])], [torch.tensor([[2]])]], + [torch.tensor([[1], [2]])], + [1, 1], + ), + # scatter shape [batch=1, 2, 1] x 3 -> gather shape [batch=3, 2, 1] + ( + [[torch.tensor([[[i], [i + 3]]])] for i in range(3)], + [torch.tensor([[[0], [3]], [[1], [4]], [[2], [5]]])], + [1, 1, 1], + ), + # scatter shape [batch=1, 1] & [batch=2, 1] -> gather shape [batch=3, 1] + ( + [[torch.tensor([[1]])], [torch.tensor([[2], [3]])]], + [torch.tensor([[1], [2], [3]])], + [1, 2], + ), + # scatter shape [batch=3, 1, 1] & [batch=1, 1, 1] & [batch=2, 1, 1] + # -> gather shape [batch=6, 1, 1] + ( + [ + [torch.tensor([[[0]], [[1]], [[2]]])], + [torch.tensor([[[3]]])], + [torch.tensor([[[4]], [[5]]])], + ], + [torch.arange(6).reshape(6, 1, 1)], + [3, 1, 2], + ), + # scatter shapes ([batch=3, 1, 1], [batch=3, 2]) & ([batch=2, 1, 1], [batch=2, 2]) + # -> gather shapes ([batch=5, 1, 1], [batch=5, 2]) + ( + [ + [ + torch.tensor([[[0]], [[1]], [[2]]]), + torch.tensor([[5, 6], [7, 8], [9, 10]]), + ], + [torch.tensor([[[3]], [[4]]]), torch.tensor([[11, 12], [13, 14]])], + ], + [ + torch.arange(5).reshape(5, 1, 1), + torch.arange(start=5, end=15).reshape(5, 2), + ], + [3, 2], + ), + ] + + def test_gather_torch_tensors(self): + for ( + tensors_scatter, + expected_tensors_gather, + expected_sections, + ) in self._gather_scatter_cases: + tensors_gather, sections = _gather_torch_tensors(tensors_scatter) + + self.assertIsInstance(tensors_gather, list) + self.assertEqual(len(tensors_gather), len(expected_tensors_gather)) + for j in range(len(expected_tensors_gather)): + expected_tensor = expected_tensors_gather[j] + tensor = tensors_gather[j] + self.assertIsInstance(tensor, torch.Tensor) + self.assertTrue(torch.equal(tensor, expected_tensor)) + + self.assertIsInstance(sections, list) + self.assertEqual(len(sections), len(expected_sections)) + for i in range(len(expected_sections)): + expected_section = expected_sections[i] + section = sections[i] + self.assertIsInstance(section, int) + self.assertEqual(section, expected_section) + + def test_scatter_torch_tensors(self): + for ( + expected_tensors_scatter, + tensors_gather, + sections, + ) in self._gather_scatter_cases: + tensors_scatter = _scatter_torch_tensors(tensors_gather, sections) + self.assertIsInstance(tensors_scatter, list) + self.assertEqual(len(tensors_scatter), len(expected_tensors_scatter)) + for i in range(len(expected_tensors_scatter)): + expected_tensors = expected_tensors_scatter[i] + tensors = tensors_scatter[i] + self.assertIsInstance(tensors, list) + self.assertEqual(len(tensors), len(expected_tensors)) + for j in range(len(expected_tensors)): + expected_tensor = expected_tensors[j] + tensor = tensors[j] + self.assertIsInstance(tensor, torch.Tensor) + self.assertTrue(torch.equal(tensor, expected_tensor)) + + +if __name__ == "__main__": + unittest.main()