From a1e17c41977b88a9640806e2d21d4d48a2cee197 Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Mon, 23 Dec 2024 19:18:48 +0000
Subject: [PATCH] updated

---
 vllm/v1/engine/async_llm.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index 7f2a597bbdd97..3601085801a4c 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -89,7 +89,7 @@ def __init__(
         to_engine_core_path = get_open_zmq_ipc_path()
 
         # Detokenizer (background process).
-        self.detokenizer = MPDetokenizerClient(
+        self.detokenizer_client = MPDetokenizerClient(
             from_engine_core_path=from_engine_core_path,
             to_engine_core_path=to_engine_core_path,
             tokenizer_name=vllm_config.model_config.tokenizer,
@@ -99,7 +99,7 @@ def __init__(
         )
 
         # EngineCore (background process).
-        self.engine_core = MPEngineCoreClient(
+        self.engine_core_client = MPEngineCoreClient(
             input_path=to_engine_core_path,
             output_path=from_engine_core_path,
             vllm_config=vllm_config,
@@ -148,11 +148,11 @@ def shutdown(self):
         if output_handler := getattr(self, "output_hander", None):
             output_handler.cancel()
 
-        if engine_core := getattr(self, "engine_core", None):
-            engine_core.shutdown()
+        if engine_core_client := getattr(self, "engine_core_client", None):
+            engine_core_client.shutdown()
 
-        if detokenizer := getattr(self, "detokenizer", None):
-            detokenizer.shutdown()
+        if detokenizer_client := getattr(self, "detokenizer_client", None):
+            detokenizer_client.shutdown()
 
     @classmethod
     def _get_executor_cls(cls, vllm_config: VllmConfig) -> Type[Executor]:
@@ -190,7 +190,7 @@ async def add_request(
         self.rid_to_queue[request_id] = asyncio.Queue()
 
         # 3) Send to Detokenizer (which forwards to EngineCore).
-        await self.detokenizer.input_socket.send_pyobj(engine_request)
+        await self.detokenizer_client.input_socket.send_pyobj(engine_request)
 
         return self.rid_to_queue[request_id]
 
@@ -272,7 +272,7 @@ async def output_handler_loop(self):
             # Note: use socket directly to avoid calling await multiple
             # times, which causes too much task switching at high QPS.
             outputs: List[RequestOutput] = []
-            outputs = await self.detokenizer.output_socket.recv_pyobj()
+            outputs = await self.detokenizer_client.output_socket.recv_pyobj()
 
             for out in outputs:
                 # Note: it is possible that a request was aborted
@@ -286,7 +286,7 @@ async def abort(self, request_id: str):
         """Abort request if the client cancels the request."""
 
         # Send abort to Detokenizer (which will fwd to EngineCore)
-        await self.detokenizer.input_socket.send_pyobj(
+        await self.detokenizer_client.input_socket.send_pyobj(
             EngineAbortRequest([request_id]))
 
         # Remove from request output queues.
@@ -336,10 +336,10 @@ async def check_health(self) -> None:
         logger.debug("Called check_health.")
 
     async def start_profile(self) -> None:
-        await self.engine_core.profile_async(True)
+        await self.engine_core_client.profile_async(True)
 
     async def stop_profile(self) -> None:
-        await self.engine_core.profile_async(False)
+        await self.engine_core_client.profile_async(False)
 
     @property
     def is_running(self) -> bool: