[Bug]: When apply continue_final_message for OpenAI server, the "echo…

…":false is ignored FIX: #10111 Signed-off-by: chaunceyjiang <[email protected]>
vllm-project · Nov 12, 2024 · 06c3776 · 06c3776
1 parent 8a4358e
commit 06c3776
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 8 deletions.
diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py
@@ -12,21 +12,33 @@
 
 # Define models, templates, and their corresponding expected outputs
 MODEL_TEMPLATE_GENERATON_OUTPUT = [
-    ("facebook/opt-125m", chatml_jinja_path, True, False, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, True, False, False, False,
+     """<|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
 <|im_start|>user
 What is the capital of<|im_end|>
 <|im_start|>assistant
 """),
-    ("facebook/opt-125m", chatml_jinja_path, False, False, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, False, False, False,
+     """<|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
 <|im_start|>user
 What is the capital of"""),
-    ("facebook/opt-125m", chatml_jinja_path, False, True, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, False, True, False,
+     """<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there!<|im_end|>
+<|im_start|>user
+What is the capital of<|im_end|>
+<|im_start|>assistant
+The capital of"""),
+    ("facebook/opt-125m", chatml_jinja_path, False, True, True,
+     """<|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
@@ -85,10 +97,10 @@ def test_no_load_chat_template_literallike():
 
 
 @pytest.mark.parametrize(
-    "model,template,add_generation_prompt,continue_final_message,expected_output",
+    "model,template,add_generation_prompt,continue_final_message,echo,expected_output",
     MODEL_TEMPLATE_GENERATON_OUTPUT)
 def test_get_gen_prompt(model, template, add_generation_prompt,
-                        continue_final_message, expected_output):
+                        continue_final_message, echo, expected_output):
     # Initialize the tokenizer
     tokenizer = get_tokenizer(tokenizer_name=model)
     template_content = load_chat_template(chat_template=template)
@@ -97,9 +109,10 @@ def test_get_gen_prompt(model, template, add_generation_prompt,
     mock_request = ChatCompletionRequest(
         model=model,
         messages=TEST_MESSAGES + [ASSISTANT_MESSAGE_TO_CONTINUE]
-        if continue_final_message else TEST_MESSAGES,
+        if continue_final_message and echo else TEST_MESSAGES,
         add_generation_prompt=add_generation_prompt,
         continue_final_message=continue_final_message,
+        echo=echo,
     )
 
     # Call the function and get the result

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -339,7 +339,7 @@ async def chat_completion_stream_generator(
 
                     # Send response to echo the input portion of the
                     # last message
-                    if request.echo or request.continue_final_message:
+                    if request.echo:
                         last_msg_content: Union[str, List[Dict[str, str]]] = ""
                         if conversation and "content" in conversation[
                                 -1] and conversation[-1].get("role") == role:
@@ -682,7 +682,7 @@ async def chat_completion_full_generator(
                 stop_reason=output.stop_reason)
             choices.append(choice_data)
 
-        if request.echo or request.continue_final_message:
+        if request.echo:
             last_msg_content: Union[str, List[Dict[str, str]]] = ""
             if conversation and "content" in conversation[-1] and conversation[
                     -1].get("role") == role: