diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
index fd0c40e803f54..7c3666eca50f3 100755
--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -305,7 +305,10 @@ def _set_block_mapping(self, metadata, batch_size, device, dtype):
         attn_bias = (torch.zeros_like(mask, dtype=dtype).masked_fill_(
             mask, -math.inf))
 
-        if not is_fake_hpu():
+        if (is_fake_hpu() and htorch.utils.internal.is_lazy())\
+              or "compile_one_hot" in enabled_flags():
+            # Use one_hot directly on HPU on lazy or on t.compile when
+            # build is >= 20.0.0.370
             block_mapping = torch.nn.functional.one_hot(metadata.block_groups,
                                                         num_classes=batch_size)
         else: