Skip to content

Commit

Permalink
update comment
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat committed Dec 23, 2024
1 parent 91aceba commit 87e7ebd
Showing 1 changed file with 13 additions and 19 deletions.
32 changes: 13 additions & 19 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,32 +257,26 @@ async def generate(
yield out

# Client request cancellation is handled through calling
# task.cancel() on generate. So if we get this error, we
# need to abort the request.
# task.cancel() on generate. So we call abort if canceled.
except asyncio.CancelledError:
await self.abort(request_id)
raise

async def output_handler_loop(self):
"""Background loop: pulls from Detokenizer and push to Queues."""

try:
while True:
# Note: use socket directly to avoid calling await multiple
# times, which causes too much task switching at high QPS.
outputs: List[RequestOutput] = []
outputs = await self.detokenizer.output_socket.recv_pyobj()

for out in outputs:
# Note: it is possible that a request was aborted
# due to client cancellation while EngineCoreOutputs
# are still flowing, so we just ignore.
if out.request_id in self.rid_to_queue:
self.rid_to_queue[out.request_id].put_nowait(out)

except asyncio.CancelledError:
logger.info("Shutting down output_handler_loop")
raise
while True:
# Note: use socket directly to avoid calling await multiple
# times, which causes too much task switching at high QPS.
outputs: List[RequestOutput] = []
outputs = await self.detokenizer.output_socket.recv_pyobj()

for out in outputs:
# Note: it is possible that a request was aborted
# due to client cancellation while EngineCoreOutputs
# are still flowing, so we just ignore.
if out.request_id in self.rid_to_queue:
self.rid_to_queue[out.request_id].put_nowait(out)


async def abort(self, request_id: str):
Expand Down

0 comments on commit 87e7ebd

Please sign in to comment.