Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat committed Dec 23, 2024
1 parent 40c5cd5 commit a1e17c4
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(
to_engine_core_path = get_open_zmq_ipc_path()

# Detokenizer (background process).
self.detokenizer = MPDetokenizerClient(
self.detokenizer_client = MPDetokenizerClient(
from_engine_core_path=from_engine_core_path,
to_engine_core_path=to_engine_core_path,
tokenizer_name=vllm_config.model_config.tokenizer,
Expand All @@ -99,7 +99,7 @@ def __init__(
)

# EngineCore (background process).
self.engine_core = MPEngineCoreClient(
self.engine_core_client = MPEngineCoreClient(
input_path=to_engine_core_path,
output_path=from_engine_core_path,
vllm_config=vllm_config,
Expand Down Expand Up @@ -148,11 +148,11 @@ def shutdown(self):
if output_handler := getattr(self, "output_hander", None):
output_handler.cancel()

if engine_core := getattr(self, "engine_core", None):
engine_core.shutdown()
if engine_core_client := getattr(self, "engine_core_client", None):
engine_core_client.shutdown()

if detokenizer := getattr(self, "detokenizer", None):
detokenizer.shutdown()
if detokenizer_client := getattr(self, "detokenizer_client", None):
detokenizer_client.shutdown()

@classmethod
def _get_executor_cls(cls, vllm_config: VllmConfig) -> Type[Executor]:
Expand Down Expand Up @@ -190,7 +190,7 @@ async def add_request(
self.rid_to_queue[request_id] = asyncio.Queue()

# 3) Send to Detokenizer (which forwards to EngineCore).
await self.detokenizer.input_socket.send_pyobj(engine_request)
await self.detokenizer_client.input_socket.send_pyobj(engine_request)

return self.rid_to_queue[request_id]

Expand Down Expand Up @@ -272,7 +272,7 @@ async def output_handler_loop(self):
# Note: use socket directly to avoid calling await multiple
# times, which causes too much task switching at high QPS.
outputs: List[RequestOutput] = []
outputs = await self.detokenizer.output_socket.recv_pyobj()
outputs = await self.detokenizer_client.output_socket.recv_pyobj()

for out in outputs:
# Note: it is possible that a request was aborted
Expand All @@ -286,7 +286,7 @@ async def abort(self, request_id: str):
"""Abort request if the client cancels the request."""

# Send abort to Detokenizer (which will fwd to EngineCore)
await self.detokenizer.input_socket.send_pyobj(
await self.detokenizer_client.input_socket.send_pyobj(
EngineAbortRequest([request_id]))

# Remove from request output queues.
Expand Down Expand Up @@ -336,10 +336,10 @@ async def check_health(self) -> None:
logger.debug("Called check_health.")

async def start_profile(self) -> None:
await self.engine_core.profile_async(True)
await self.engine_core_client.profile_async(True)

async def stop_profile(self) -> None:
await self.engine_core.profile_async(False)
await self.engine_core_client.profile_async(False)

@property
def is_running(self) -> bool:
Expand Down

0 comments on commit a1e17c4

Please sign in to comment.