Skip to content

Commit

Permalink
Merge branch 'habana_main' into dev/mfylcek/sampler-aware_batch_size_…
Browse files Browse the repository at this point in the history
…padding
  • Loading branch information
mfylcek authored Jan 22, 2025
2 parents e61dd86 + 1df1c2c commit 1a93fdc
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
3 changes: 2 additions & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,8 +608,9 @@ def _verify_cuda_graph(self) -> None:
self.max_model_len)

MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
from vllm.platforms import current_platform
if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
and not self.enforce_eager):
and not self.enforce_eager and not current_platform.is_hpu()):
logger.warning(
"CUDA graph is not supported for %s yet, fallback to the eager "
"mode.", self.hf_config.model_type)
Expand Down
23 changes: 13 additions & 10 deletions vllm/model_executor/layers/fused_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,19 @@ def forward_cuda(
topk_ids=topk_ids,
inplace=True)

def forward_hpu(self,
layer: torch.nn.Module,
x: torch.Tensor,
use_grouped_topk: bool,
top_k: int,
router_logits: torch.Tensor,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None):
def forward_hpu(
self,
layer: torch.nn.Module,
x: torch.Tensor,
use_grouped_topk: bool,
top_k: int,
router_logits: torch.Tensor,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
**kwargs,
):
assert not use_grouped_topk, 'use_grouped_topk must be False on HPU'
assert num_expert_group is None, ('num_expert_group is '
'not supported on HPU')
Expand Down

0 comments on commit 1a93fdc

Please sign in to comment.