Merge branch 'habana_main' into dev/mfylcek/sampler-aware_batch_size_…

…padding
HabanaAI · Jan 22, 2025 · 1a93fdc · 1a93fdc
2 parents e61dd86 + 1df1c2c
commit 1a93fdc
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 11 deletions.
diff --git a/vllm/config.py b/vllm/config.py
@@ -608,8 +608,9 @@ def _verify_cuda_graph(self) -> None:
                                           self.max_model_len)
 
         MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
+        from vllm.platforms import current_platform
         if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
-                and not self.enforce_eager):
+                and not self.enforce_eager and not current_platform.is_hpu()):
             logger.warning(
                 "CUDA graph is not supported for %s yet, fallback to the eager "
                 "mode.", self.hf_config.model_type)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -160,16 +160,19 @@ def forward_cuda(
                              topk_ids=topk_ids,
                              inplace=True)
 
-    def forward_hpu(self,
-                    layer: torch.nn.Module,
-                    x: torch.Tensor,
-                    use_grouped_topk: bool,
-                    top_k: int,
-                    router_logits: torch.Tensor,
-                    renormalize: bool,
-                    topk_group: Optional[int] = None,
-                    num_expert_group: Optional[int] = None,
-                    custom_routing_function: Optional[Callable] = None):
+    def forward_hpu(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        use_grouped_topk: bool,
+        top_k: int,
+        router_logits: torch.Tensor,
+        renormalize: bool,
+        topk_group: Optional[int] = None,
+        num_expert_group: Optional[int] = None,
+        custom_routing_function: Optional[Callable] = None,
+        **kwargs,
+    ):
         assert not use_grouped_topk, 'use_grouped_topk must be False on HPU'
         assert num_expert_group is None, ('num_expert_group is '
                                           'not supported on HPU')