Skip to content

Commit

Permalink
Use property
Browse files Browse the repository at this point in the history
Signed-off-by: wangxiyuan <[email protected]>
  • Loading branch information
wangxiyuan committed Jan 14, 2025
1 parent 39684a0 commit a50f429
Show file tree
Hide file tree
Showing 15 changed files with 5 additions and 29 deletions.
2 changes: 0 additions & 2 deletions tests/worker/test_model_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

class MockAttentionBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion vllm/attention/backends/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class AttentionBackend(ABC):
# For some attention backends, we allocate an output tensor before
# calling the custom op. When piecewise cudagraph is enabled, this
# makes sure the output tensor is allocated inside the cudagraph.
use_output: bool
accept_output_buffer: bool = False

@staticmethod
@abstractmethod
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/blocksparse_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ def __post_init__(self):

class BlocksparseFlashAttentionBackend(AttentionBackend):

use_output: bool = True

@staticmethod
def get_name() -> str:
# For attention layer compatibility
Expand Down
2 changes: 1 addition & 1 deletion vllm/attention/backends/flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

class FlashAttentionBackend(AttentionBackend):

use_output: bool = True
accept_output_buffer: bool = True

@staticmethod
def get_supported_head_sizes() -> List[int]:
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/flashinfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@

class FlashInferBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "FLASHINFER"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/hpu_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

class HPUAttentionBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "HPU_ATTN"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/ipex_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

class IpexAttnBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "IPEX"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ def create_roi_tensor(

class OpenVINOAttentionBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "OPENVINO"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/pallas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

class PallasAttentionBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "PALLAS"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/placeholder_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
class PlaceholderAttentionBackend(AttentionBackend):
"""Placeholder backend for when no attention is needed."""

use_output: bool = False

@staticmethod
def get_name() -> str:
return "NO_ATTENTION"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/rocm_flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@

class ROCmFlashAttentionBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "ROCM_FLASH"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/torch_sdpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

class TorchSDPABackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "TORCH_SDPA"
Expand Down
2 changes: 0 additions & 2 deletions vllm/attention/backends/xformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@

class XFormersBackend(AttentionBackend):

use_output: bool = False

@staticmethod
def get_name() -> str:
return "XFORMERS"
Expand Down
2 changes: 1 addition & 1 deletion vllm/attention/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(
self.use_direct_call = not current_platform.is_cuda_alike(
) and not current_platform.is_cpu()

self.use_output = attn_backend.use_output
self.use_output = attn_backend.accept_output_buffer
compilation_config = get_current_vllm_config().compilation_config
if prefix in compilation_config.static_forward_context:
raise ValueError(f"Duplicate layer name: {prefix}")
Expand Down
6 changes: 2 additions & 4 deletions vllm/v1/attention/backends/flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

class FlashAttentionBackend(AttentionBackend):

accept_output_buffer: bool = True

@staticmethod
def get_supported_head_sizes() -> List[int]:
return [32, 64, 96, 128, 160, 192, 224, 256]
Expand Down Expand Up @@ -46,10 +48,6 @@ def get_kv_cache_shape(
def use_cascade_attention(*args, **kwargs) -> bool:
return use_cascade_attention(*args, **kwargs)

@staticmethod
def use_output():
return True


@dataclass
class FlashAttentionMetadata:
Expand Down

0 comments on commit a50f429

Please sign in to comment.