From 29099dfe119a283319711313cdb597915c05994c Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:48:52 -0800 Subject: [PATCH] Rename output_* to return_* --- .../additional_outputs_test.py | 60 +++++++++---------- docs/additional_outputs.md | 8 +-- src/model.py | 20 +++---- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/ci/L0_additional_outputs_vllm/additional_outputs_test.py b/ci/L0_additional_outputs_vllm/additional_outputs_test.py index 08cfc91e..a8dfb24d 100644 --- a/ci/L0_additional_outputs_vllm/additional_outputs_test.py +++ b/ci/L0_additional_outputs_vllm/additional_outputs_test.py @@ -42,9 +42,9 @@ def _get_inputs( prompt, stream=True, sampling_parameters=None, - output_finish_reason=None, - output_cumulative_logprob=None, - output_num_token_ids=None, + return_finish_reason=None, + return_cumulative_logprob=None, + return_num_token_ids=None, ): inputs = [] @@ -64,21 +64,21 @@ def _get_inputs( ) ) - if output_finish_reason is not None: - inputs.append(grpcclient.InferInput("output_finish_reason", [1], "BOOL")) - inputs[-1].set_data_from_numpy(np.array([output_finish_reason], dtype=bool)) + if return_finish_reason is not None: + inputs.append(grpcclient.InferInput("return_finish_reason", [1], "BOOL")) + inputs[-1].set_data_from_numpy(np.array([return_finish_reason], dtype=bool)) - if output_cumulative_logprob is not None: + if return_cumulative_logprob is not None: inputs.append( - grpcclient.InferInput("output_cumulative_logprob", [1], "BOOL") + grpcclient.InferInput("return_cumulative_logprob", [1], "BOOL") ) inputs[-1].set_data_from_numpy( - np.array([output_cumulative_logprob], dtype=bool) + np.array([return_cumulative_logprob], dtype=bool) ) - if output_num_token_ids is not None: - inputs.append(grpcclient.InferInput("output_num_token_ids", [1], "BOOL")) - inputs[-1].set_data_from_numpy(np.array([output_num_token_ids], dtype=bool)) + if return_num_token_ids is not None: + inputs.append(grpcclient.InferInput("return_num_token_ids", [1], "BOOL")) + inputs[-1].set_data_from_numpy(np.array([return_num_token_ids], dtype=bool)) return inputs @@ -104,12 +104,12 @@ def _assert_text_output_valid(self): assert len(text_output) > 0, "output is empty" assert text_output.count(" ") > 4, "output is not a sentence" - def _assert_finish_reason(self, output_finish_reason): + def _assert_finish_reason(self, return_finish_reason): for i in range(len(self._responses)): result, error = self._responses[i]["result"], self._responses[i]["error"] assert error is None finish_reason_np = result.as_numpy(name="finish_reason") - if output_finish_reason is None or output_finish_reason == False: + if return_finish_reason is None or return_finish_reason == False: assert finish_reason_np is None continue finish_reason = finish_reason_np[0].decode("utf-8") @@ -118,25 +118,25 @@ def _assert_finish_reason(self, output_finish_reason): else: assert finish_reason == "length" - def _assert_cumulative_logprob(self, output_cumulative_logprob): + def _assert_cumulative_logprob(self, return_cumulative_logprob): prev_cumulative_logprob = 0.0 for response in self._responses: result, error = response["result"], response["error"] assert error is None cumulative_logprob_np = result.as_numpy(name="cumulative_logprob") - if output_cumulative_logprob is None or output_cumulative_logprob == False: + if return_cumulative_logprob is None or return_cumulative_logprob == False: assert cumulative_logprob_np is None continue cumulative_logprob = cumulative_logprob_np[0].astype(float) assert cumulative_logprob != prev_cumulative_logprob prev_cumulative_logprob = cumulative_logprob - def _assert_num_token_ids(self, output_num_token_ids): + def _assert_num_token_ids(self, return_num_token_ids): for response in self._responses: result, error = response["result"], response["error"] assert error is None num_token_ids_np = result.as_numpy(name="num_token_ids") - if output_num_token_ids is None or output_num_token_ids == False: + if return_num_token_ids is None or return_num_token_ids == False: assert num_token_ids_np is None continue num_token_ids = num_token_ids_np[0].astype(int) @@ -160,26 +160,26 @@ def _assert_num_token_ids(self, output_num_token_ids): assert num_token_ids >= 0 @pytest.mark.parametrize("stream", [True, False]) - @pytest.mark.parametrize("output_finish_reason", [None, True, False]) - @pytest.mark.parametrize("output_cumulative_logprob", [None, True, False]) - @pytest.mark.parametrize("output_num_token_ids", [None, True, False]) + @pytest.mark.parametrize("return_finish_reason", [None, True, False]) + @pytest.mark.parametrize("return_cumulative_logprob", [None, True, False]) + @pytest.mark.parametrize("return_num_token_ids", [None, True, False]) def test_additional_outputs( self, stream, - output_finish_reason, - output_cumulative_logprob, - output_num_token_ids, + return_finish_reason, + return_cumulative_logprob, + return_num_token_ids, ): inputs = self._get_inputs( self._prompt, stream=stream, sampling_parameters=self._sampling_parameters, - output_finish_reason=output_finish_reason, - output_cumulative_logprob=output_cumulative_logprob, - output_num_token_ids=output_num_token_ids, + return_finish_reason=return_finish_reason, + return_cumulative_logprob=return_cumulative_logprob, + return_num_token_ids=return_num_token_ids, ) self._llm_infer(inputs) self._assert_text_output_valid() - self._assert_finish_reason(output_finish_reason) - self._assert_cumulative_logprob(output_cumulative_logprob) - self._assert_num_token_ids(output_num_token_ids) + self._assert_finish_reason(return_finish_reason) + self._assert_cumulative_logprob(return_cumulative_logprob) + self._assert_num_token_ids(return_num_token_ids) diff --git a/docs/additional_outputs.md b/docs/additional_outputs.md index c874435e..dcca0dc4 100644 --- a/docs/additional_outputs.md +++ b/docs/additional_outputs.md @@ -43,7 +43,7 @@ The reason why the sequence is finished. See [here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L26) for more details. -To enable, set `output_finish_reason` input tensor to `True`. The reason will be +To enable, set `return_finish_reason` input tensor to `True`. The reason will be sent as a string on the `finish_reason` output tensor. Supported since r24.11. @@ -54,7 +54,7 @@ The cumulative log probability of the generated output text. See [here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L22) for more details. -To enable, set `output_cumulative_logprob` input tensor to `True`. The floating +To enable, set `return_cumulative_logprob` input tensor to `True`. The floating point value will be sent on the `cumulative_logprob` output tensor. Supported since r24.11. @@ -68,7 +68,7 @@ presumed to be zero. See [here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L21) for more details on the token IDs of the generated output text. -To enable, set `output_num_token_ids` input tensor to `True`. The unsigned +To enable, set `return_num_token_ids` input tensor to `True`. The unsigned integer value will be sent on the `num_token_ids` output tensor. Supported since r24.11. @@ -88,7 +88,7 @@ inputs[-1].set_data_from_numpy( np.array(["example prompt".encode("utf-8")], dtype=np.object_) ) -inputs.append(grpcclient.InferInput("output_finish_reason", [1], "BOOL")) +inputs.append(grpcclient.InferInput("return_finish_reason", [1], "BOOL")) inputs[-1].set_data_from_numpy(np.array([True], dtype=bool)) def callback(result, error): diff --git a/src/model.py b/src/model.py index 0b4a6759..dfaebf61 100644 --- a/src/model.py +++ b/src/model.py @@ -89,19 +89,19 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config): "optional": True, }, { - "name": "output_finish_reason", + "name": "return_finish_reason", "data_type": "TYPE_BOOL", "dims": [1], "optional": True, }, { - "name": "output_cumulative_logprob", + "name": "return_cumulative_logprob", "data_type": "TYPE_BOOL", "dims": [1], "optional": True, }, { - "name": "output_num_token_ids", + "name": "return_num_token_ids", "data_type": "TYPE_BOOL", "dims": [1], "optional": True, @@ -348,11 +348,11 @@ def _get_input_tensors(self, request): else: parameters = request.parameters() - # output_finish_reason, output_cumulative_logprob, output_num_token_ids + # return_finish_reason, return_cumulative_logprob, return_num_token_ids additional_outputs = { - "output_finish_reason": None, - "output_cumulative_logprob": None, - "output_num_token_ids": None, + "return_finish_reason": None, + "return_cumulative_logprob": None, + "return_num_token_ids": None, } for tensor_name in additional_outputs.keys(): tensor = pb_utils.get_input_tensor_by_name(request, tensor_name) @@ -445,7 +445,7 @@ def _create_response( ) # finish_reason - if additional_outputs["output_finish_reason"]: + if additional_outputs["return_finish_reason"]: finish_reason = [ str(output.finish_reason) for output in request_output.outputs ] @@ -456,7 +456,7 @@ def _create_response( ) # cumulative_logprob - if additional_outputs["output_cumulative_logprob"]: + if additional_outputs["return_cumulative_logprob"]: cumulative_logprob = [ output.cumulative_logprob for output in request_output.outputs ] @@ -468,7 +468,7 @@ def _create_response( ) # num_token_ids - if additional_outputs["output_num_token_ids"]: + if additional_outputs["return_num_token_ids"]: if prev_request_output is None: # this is the first response prev_lens = [0] * len(request_output.outputs)