Skip to content

Commit

Permalink
Add compile_one_hot flag condition
Browse files Browse the repository at this point in the history
  • Loading branch information
afierka-intel committed Jan 22, 2025
1 parent 29568f8 commit 28c228e
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,10 @@ def _set_block_mapping(self, metadata, batch_size, device, dtype):
attn_bias = (torch.zeros_like(mask, dtype=dtype).masked_fill_(
mask, -math.inf))

if not is_fake_hpu():
if (is_fake_hpu() and htorch.utils.internal.is_lazy())\
or "compile_one_hot" in enabled_flags():
# Use one_hot directly on HPU on lazy or on t.compile when
# build is >= 20.0.0.370
block_mapping = torch.nn.functional.one_hot(metadata.block_groups,
num_classes=batch_size)
else:
Expand Down

0 comments on commit 28c228e

Please sign in to comment.