From 9c4aeddb1a4c7f10a3a0fc7dc866f5f9dcccd629 Mon Sep 17 00:00:00 2001 From: Krishnan Prashanth Date: Mon, 23 Sep 2024 17:24:11 -0700 Subject: [PATCH] Condensed ensemble model testing into existing test function --- .../vllm_backend/models/add_sub/1/model.py | 141 ------------------ .../vllm_backend/models/add_sub/config.pbtxt | 59 -------- .../models/ensemble_model/config.pbtxt | 59 -------- .../models/vllm_invalid_1/1/model.json | 6 - .../models/vllm_invalid_1/config.pbtxt | 37 ----- .../models/vllm_invalid_2/1/model.json | 6 - .../models/vllm_invalid_2/config.pbtxt | 37 ----- .../models/vllm_load_test/1/model.json | 6 - .../models/vllm_load_test/config.pbtxt | 37 ----- .../vllm_backend/models/vllm_opt/1/model.json | 6 - .../vllm_backend/models/vllm_opt/config.pbtxt | 37 ----- 11 files changed, 431 deletions(-) delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/add_sub/1/model.py delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/add_sub/config.pbtxt delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/ensemble_model/config.pbtxt delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/1/model.json delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/config.pbtxt delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/1/model.json delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/config.pbtxt delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/1/model.json delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/config.pbtxt delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_opt/1/model.json delete mode 100644 ci/L0_backend_vllm/vllm_backend/models/vllm_opt/config.pbtxt diff --git a/ci/L0_backend_vllm/vllm_backend/models/add_sub/1/model.py b/ci/L0_backend_vllm/vllm_backend/models/add_sub/1/model.py deleted file mode 100644 index f416e79d..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/add_sub/1/model.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json - -# triton_python_backend_utils is available in every Triton Python model. You -# need to use this module to create inference requests and responses. It also -# contains some utility functions for extracting information from model_config -# and converting Triton input/output types to numpy types. -import triton_python_backend_utils as pb_utils - - -class TritonPythonModel: - """Your Python model must use the same class name. Every Python model - that is created must have "TritonPythonModel" as the class name. - """ - - def initialize(self, args): - """`initialize` is called only once when the model is being loaded. - Implementing `initialize` function is optional. This function allows - the model to initialize any state associated with this model. - - Parameters - ---------- - args : dict - Both keys and values are strings. The dictionary keys and values are: - * model_config: A JSON string containing the model configuration - * model_instance_kind: A string containing model instance kind - * model_instance_device_id: A string containing model instance device ID - * model_repository: Model repository path - * model_version: Model version - * model_name: Model name - """ - - # You must parse model_config. JSON string is not parsed here - self.model_config = model_config = json.loads(args["model_config"]) - - # Get OUTPUT0 configuration - output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0") - - # Get OUTPUT1 configuration - output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1") - - # Convert Triton types to numpy types - self.output0_dtype = pb_utils.triton_string_to_numpy( - output0_config["data_type"] - ) - self.output1_dtype = pb_utils.triton_string_to_numpy( - output1_config["data_type"] - ) - - def execute(self, requests): - """`execute` MUST be implemented in every Python model. `execute` - function receives a list of pb_utils.InferenceRequest as the only - argument. This function is called when an inference request is made - for this model. Depending on the batching configuration (e.g. Dynamic - Batching) used, `requests` may contain multiple requests. Every - Python model, must create one pb_utils.InferenceResponse for every - pb_utils.InferenceRequest in `requests`. If there is an error, you can - set the error argument when creating a pb_utils.InferenceResponse - - Parameters - ---------- - requests : list - A list of pb_utils.InferenceRequest - - Returns - ------- - list - A list of pb_utils.InferenceResponse. The length of this list must - be the same as `requests` - """ - - output0_dtype = self.output0_dtype - output1_dtype = self.output1_dtype - - responses = [] - - # Every Python backend must iterate over everyone of the requests - # and create a pb_utils.InferenceResponse for each of them. - for request in requests: - # Get INPUT0 - in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") - # Get INPUT1 - in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") - - out_0, out_1 = ( - in_0.as_numpy() + in_1.as_numpy(), - in_0.as_numpy() - in_1.as_numpy(), - ) - - # Create output tensors. You need pb_utils.Tensor - # objects to create pb_utils.InferenceResponse. - out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) - out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) - - # Create InferenceResponse. You can set an error here in case - # there was a problem with handling this inference request. - # Below is an example of how you can set errors in inference - # response: - # - # pb_utils.InferenceResponse( - # output_tensors=..., TritonError("An error occurred")) - inference_response = pb_utils.InferenceResponse( - output_tensors=[out_tensor_0, out_tensor_1] - ) - responses.append(inference_response) - - # You should return a list of pb_utils.InferenceResponse. Length - # of this list must match the length of `requests` list. - return responses - - def finalize(self): - """`finalize` is called only once when the model is being unloaded. - Implementing `finalize` function is OPTIONAL. This function allows - the model to perform any necessary clean ups before exit. - """ - print("Cleaning up...") diff --git a/ci/L0_backend_vllm/vllm_backend/models/add_sub/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/add_sub/config.pbtxt deleted file mode 100644 index 0a932770..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/add_sub/config.pbtxt +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -name: "add_sub" -backend: "python" - -input [ - { - name: "INPUT0" - data_type: TYPE_FP32 - dims: [ 4 ] - } -] -input [ - { - name: "INPUT1" - data_type: TYPE_FP32 - dims: [ 4 ] - } -] -output [ - { - name: "OUTPUT0" - data_type: TYPE_FP32 - dims: [ 4 ] - } -] -output [ - { - name: "OUTPUT1" - data_type: TYPE_FP32 - dims: [ 4 ] - } -] - -instance_group [{ kind: KIND_CPU }] diff --git a/ci/L0_backend_vllm/vllm_backend/models/ensemble_model/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/ensemble_model/config.pbtxt deleted file mode 100644 index 07977d0d..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/ensemble_model/config.pbtxt +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -name: "ensemble_model" -platform: "ensemble" -max_batch_size: 1 -input [ - { - name: "text_input" - data_type: TYPE_STRING - dims: [ -1 ] - } -] -output [ - { - name: "text_output" - data_type: TYPE_STRING - dims: [ -1 ] - } -] -ensemble_scheduling { - step [ - { - model_name: "vllm_opt" - model_version: -1 - input_map { - key: "text_input" - value: "text_input" - } - output_map { - key: "text_output" - value: "text_output" - } - } - ] -} \ No newline at end of file diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/1/model.json b/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/1/model.json deleted file mode 100644 index c67b3d19..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/1/model.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "model":"facebook/opt-125m", - "invalid_attribute": true, - "gpu_memory_utilization": 0.5, - "enforce_eager": true -} diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/config.pbtxt deleted file mode 100644 index b5a6c1ae..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_1/config.pbtxt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Note: You do not need to change any fields in this configuration. - -backend: "vllm" - -# The usage of device is deferred to the vLLM engine -instance_group [ - { - count: 1 - kind: KIND_MODEL - } -] diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/1/model.json b/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/1/model.json deleted file mode 100644 index 7418f17f..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/1/model.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "model":"invalid_model", - "disable_log_requests": true, - "gpu_memory_utilization": 0.5, - "enforce_eager": true -} diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/config.pbtxt deleted file mode 100644 index b5a6c1ae..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_invalid_2/config.pbtxt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Note: You do not need to change any fields in this configuration. - -backend: "vllm" - -# The usage of device is deferred to the vLLM engine -instance_group [ - { - count: 1 - kind: KIND_MODEL - } -] diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/1/model.json b/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/1/model.json deleted file mode 100644 index 8fa8e151..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/1/model.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "model":"facebook/opt-125m", - "disable_log_requests": true, - "gpu_memory_utilization": 0.4, - "enforce_eager": true -} diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/config.pbtxt deleted file mode 100644 index b5a6c1ae..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_load_test/config.pbtxt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Note: You do not need to change any fields in this configuration. - -backend: "vllm" - -# The usage of device is deferred to the vLLM engine -instance_group [ - { - count: 1 - kind: KIND_MODEL - } -] diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/1/model.json b/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/1/model.json deleted file mode 100644 index 8fa8e151..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/1/model.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "model":"facebook/opt-125m", - "disable_log_requests": true, - "gpu_memory_utilization": 0.4, - "enforce_eager": true -} diff --git a/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/config.pbtxt b/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/config.pbtxt deleted file mode 100644 index b5a6c1ae..00000000 --- a/ci/L0_backend_vllm/vllm_backend/models/vllm_opt/config.pbtxt +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Note: You do not need to change any fields in this configuration. - -backend: "vllm" - -# The usage of device is deferred to the vLLM engine -instance_group [ - { - count: 1 - kind: KIND_MODEL - } -]