Add compile_one_hot flag condition

HabanaAI · Jan 22, 2025 · 28c228e · 28c228e
1 parent 29568f8
commit 28c228e
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -305,7 +305,10 @@ def _set_block_mapping(self, metadata, batch_size, device, dtype):
         attn_bias = (torch.zeros_like(mask, dtype=dtype).masked_fill_(
             mask, -math.inf))
 
-        if not is_fake_hpu():
+        if (is_fake_hpu() and htorch.utils.internal.is_lazy())\
+              or "compile_one_hot" in enabled_flags():
+            # Use one_hot directly on HPU on lazy or on t.compile when
+            # build is >= 20.0.0.370
             block_mapping = torch.nn.functional.one_hot(metadata.block_groups,
                                                         num_classes=batch_size)
         else: