From 941963fddbf25f58f9c0bce08d45f840e7d635cb Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Thu, 29 Aug 2024 08:32:58 +0300 Subject: [PATCH] update code Signed-off-by: yuwenzho --- vllm/hpu/cache_ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/hpu/cache_ops.py b/vllm/hpu/cache_ops.py index 2f8f387eb6529..facc01751dc5c 100644 --- a/vllm/hpu/cache_ops.py +++ b/vllm/hpu/cache_ops.py @@ -30,8 +30,7 @@ def reshape_and_cache(key, # lots of padding, or are doing warmup. # This loop is a workaround for this issue. Please remove it # once key_cache.index_put_(indices, offsets), key) works. - num_kv_cache_passes = int( - math.ceil(num_slots_requested / num_slots_available)) + num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available) for i in range(num_kv_cache_passes): start_idx = i * num_slots_available end_idx = (i + 1) * num_slots_available @@ -58,8 +57,7 @@ def prepare_to_cache(cache, slot_mapping): # lots of padding, or are doing warmup. # This loop is a workaround for this issue. Please remove it # once key_cache.index_put_(indices, offsets), key) works. - num_kv_cache_passes = int( - math.ceil(num_slots_requested / num_slots_available)) + num_kv_cache_passes = math.ceil(num_slots_requested / num_slots_available) return num_kv_cache_passes, num_slots_available, indices, offsets