diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py
index 98f109accea06..9042924f68b3d 100644
--- a/vllm/hpu/cache_ops.py
+++ b/vllm/hpu/cache_ops.py
@@ -5,6 +5,8 @@
 # LICENSE file in the root directory of this source tree.
 ###############################################################################
 
+import math
+
 import habana_frameworks.torch as htorch
 import torch
 
@@ -30,8 +32,7 @@ def reshape_and_cache(key,
     # lots of padding, or are doing warmup.
     # This loop is a workaround for this issue. Please remove it
     # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = torch.div(num_slots_requested,
-                                    num_slots_available).ceil().int().item()
+    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
     for i in range(num_kv_cache_passes):
         start_idx = i * num_slots_available
         end_idx = (i + 1) * num_slots_available
@@ -58,8 +59,7 @@ def prepare_to_cache(cache, slot_mapping):
     # lots of padding, or are doing warmup.
     # This loop is a workaround for this issue. Please remove it
     # once key_cache.index_put_(indices, offsets), key) works.
-    num_kv_cache_passes = torch.div(num_slots_requested,
-                                    num_slots_available).ceil().int().item()
+    num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available)
 
     return num_kv_cache_passes, num_slots_available, indices, offsets
 
diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py
index 3ae3c8c8f712c..5d4387dbb9f48 100644
--- a/vllm/model_executor/models/gpt_bigcode.py
+++ b/vllm/model_executor/models/gpt_bigcode.py
@@ -44,6 +44,8 @@
 
 from .interfaces import SupportsLoRA
 
+is_hpu = current_platform.is_hpu()
+
 
 class GPTBigCodeAttention(nn.Module):
 
@@ -225,13 +227,13 @@ def forward(
         position_embeds = self.wpe(position_ids)
         hidden_states = inputs_embeds + position_embeds
 
-        if current_platform.is_hpu():
+        if is_hpu:
             import habana_frameworks.torch as htorch
             htorch.core.mark_step()
         for i in range(len(self.h)):
             layer = self.h[i]
             hidden_states = layer(hidden_states, kv_caches[i], attn_metadata)
-            if current_platform.is_hpu():
+            if is_hpu:
                 htorch.core.mark_step()
 
         hidden_states = self.ln_f(hidden_states)
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index d659d0a3f1127..51716b12513d8 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -55,6 +55,8 @@
 from .interfaces import SupportsLoRA
 from .utils import PPMissingLayer, is_pp_missing_parameter, make_layers
 
+is_hpu = current_platform.is_hpu()
+
 
 class LlamaMLP(nn.Module):
 
@@ -318,7 +320,7 @@ def forward(
             hidden_states = intermediate_tensors["hidden_states"]
             residual = intermediate_tensors["residual"]
 
-        if current_platform.is_hpu():
+        if is_hpu:
             import habana_frameworks.torch as htorch
             htorch.core.mark_step()
         for i in range(self.start_layer, self.end_layer):
@@ -330,7 +332,7 @@ def forward(
                 attn_metadata,
                 residual,
             )
-            if current_platform.is_hpu():
+            if is_hpu:
                 htorch.core.mark_step()
 
         if not get_pp_group().is_last_rank: