-
-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Frontend][V1] Online serving performance improvements #12287
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import re | ||
import time | ||
from argparse import Namespace | ||
from typing import Any, Dict, List, Literal, Optional, Union | ||
from typing import Any, ClassVar, Dict, List, Literal, Optional, Set, Union | ||
|
||
import torch | ||
from pydantic import BaseModel, ConfigDict, Field, model_validator | ||
|
@@ -42,23 +42,31 @@ class OpenAIBaseModel(BaseModel): | |
# OpenAI API does allow extra fields | ||
model_config = ConfigDict(extra="allow") | ||
|
||
# Cache class field names | ||
field_names: ClassVar[Optional[Set[str]]] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was noticeable overhead creating this set every time one of these objects is instantiated. |
||
|
||
@model_validator(mode="before") | ||
@classmethod | ||
def __log_extra_fields__(cls, data): | ||
if isinstance(data, dict): | ||
|
||
field_names = cls.field_names | ||
if field_names is None: | ||
if not isinstance(data, dict): | ||
return data | ||
# Get all class field names and their potential aliases | ||
field_names = set() | ||
for field_name, field in cls.model_fields.items(): | ||
field_names.add(field_name) | ||
if hasattr(field, 'alias') and field.alias: | ||
field_names.add(field.alias) | ||
|
||
# Compare against both field names and aliases | ||
extra_fields = data.keys() - field_names | ||
if extra_fields: | ||
logger.warning( | ||
"The following fields were present in the request " | ||
"but ignored: %s", extra_fields) | ||
if alias := getattr(field, 'alias', None): | ||
field_names.add(alias) | ||
cls.field_names = field_names | ||
|
||
# Compare against both field names and aliases | ||
if any(k not in field_names for k in data): | ||
logger.warning( | ||
"The following fields were present in the request " | ||
"but ignored: %s", | ||
data.keys() - field_names) | ||
return data | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,6 +101,7 @@ def add_request( | |
def process_outputs( | ||
self, | ||
engine_core_outputs: List[EngineCoreOutput], | ||
iteration_stats: Optional[IterationStats] = None, | ||
) -> OutputProcessorOutput: | ||
""" | ||
Process the EngineCoreOutputs: | ||
|
@@ -133,7 +134,8 @@ def process_outputs( | |
|
||
request_outputs: List[RequestOutput] = [] | ||
reqs_to_abort: List[str] = [] | ||
iteration_stats = IterationStats(self.log_stats) | ||
if not iteration_stats: | ||
iteration_stats = IterationStats(self.log_stats) | ||
Comment on lines
-136
to
+138
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why pass in an iteration stats just to override it? Is the arg more like a bool? |
||
for engine_core_output in engine_core_outputs: | ||
req_id = engine_core_output.request_id | ||
req_state = self.request_states.get(req_id) | ||
|
@@ -175,8 +177,8 @@ def process_outputs( | |
iteration_stats=iteration_stats, | ||
) | ||
|
||
@staticmethod | ||
def _make_request_output( | ||
self, | ||
request_state: RequestState, | ||
detokenizer_output: Optional[DetokenizerOutput], | ||
) -> Optional[RequestOutput]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,12 @@ def __init__( | |
# recomputing. | ||
self._kv_block_hashes: List[BlockHashType] = [] | ||
|
||
# Read-only views | ||
# Prevent directly appending to the these lists since | ||
# they should also be updated simultaneously. | ||
self.output_token_ids = ConstantList(self._output_token_ids) | ||
self.all_token_ids = ConstantList(self._all_token_ids) | ||
|
||
@classmethod | ||
def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": | ||
return cls( | ||
|
@@ -79,18 +85,6 @@ def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": | |
lora_request=request.lora_request, | ||
) | ||
|
||
@property | ||
def output_token_ids(self) -> ConstantList[int]: | ||
# Prevent directly appending to the output_token_ids since | ||
# all_token_ids should also be updated simultaneously. | ||
return ConstantList(self._output_token_ids) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid constructing these objects every time the properties are accessed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually thought properties were cached after the first call, nice call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That would involve the use of |
||
|
||
@property | ||
def all_token_ids(self) -> ConstantList[int]: | ||
# Prevent directly appending to the all_token_ids since | ||
# output_token_ids should also be updated simultaneously | ||
return ConstantList(self._all_token_ids) | ||
|
||
def append_output_token_ids( | ||
self, | ||
token_ids: Union[int, List[int]], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to call unfreeze at some point?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this is mostly static stuff that will be around for the lifetime of the process anyhow.
https://www.rippling.com/blog/the-garbage-collector-fights-back