Skip to content

Commit

Permalink
comment the case of spec decode
Browse files Browse the repository at this point in the history
Signed-off-by: youkaichao <[email protected]>
  • Loading branch information
youkaichao committed Jan 22, 2025
1 parent 2463a79 commit 72488c2
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
5 changes: 0 additions & 5 deletions vllm/attention/backends/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,6 @@ def make_metadata(cls, *args, **kwargs) -> "AttentionMetadata":
def get_builder_cls() -> Type["AttentionMetadataBuilder"]:
raise NotImplementedError

@classmethod
def make_metadata_builder(cls, *args,
**kwargs) -> "AttentionMetadataBuilder":
return cls.get_builder_cls()(*args, **kwargs)

@staticmethod
@abstractmethod
def get_kv_cache_shape(
Expand Down
6 changes: 4 additions & 2 deletions vllm/worker/cpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ def __init__(self,
self.device = self.runner.device
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
self.enable_lora = self.runner.lora_config is not None
self.att_metadata_builder = self.runner.attn_backend.get_builder_cls()(
self)
if self.runner.attn_backend is not None:
# spec decode (e.g. Medusa) does not have atten backend
attn_backend = self.runner.attn_backend
self.att_metadata_builder = attn_backend.get_builder_cls()(self)

self.prepare(finished_requests_ids)

Expand Down
6 changes: 4 additions & 2 deletions vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,10 @@ def __init__(self,
self.decode_only = True

# Attention metadata inputs.
self.attn_metadata_builder = self.attn_backend.make_metadata_builder(
weakref.proxy(self))
if self.attn_backend is not None:
# spec decode (e.g. Medusa) does not have atten backend
self.attn_metadata_builder = self.attn_backend.get_builder_cls()(
weakref.proxy(self))

# Engine/Model configurations.
self.chunked_prefill_enabled = (
Expand Down

0 comments on commit 72488c2

Please sign in to comment.