diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index 3af52757ded86..4f12a6a253282 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -172,9 +172,13 @@ def __init__(
         self.num_preallocate_tokens = num_preallocate_tokens
         self.num_preallocate_blocks = cdiv(num_preallocate_tokens, block_size)
 
+        # A Block pool of all kv-cache blocks.
         self.block_pool: List[KVCacheBlock] = [
             KVCacheBlock(idx) for idx in range(num_gpu_blocks)
         ]
+        # Free block queue that constructs and manipulates a doubly linked
+        # list of free blocks (including eviction candidates when caching is
+        # enabled).
         self.free_block_queue = FreeKVCacheBlockQueue(self.block_pool)
 
         # {block_hash: {block ID: block}}. A cached block is
@@ -249,7 +253,7 @@ def append_slots(
             # slots, but we cannot allocate new blocks due to the limit.
             return None
 
-        # Assign token IDs to already allocated blocks.
+        # When caching is enabled, assign token IDs to already allocated blocks.
         new_token_ids = None
         parent_block_id = None
         if self.enable_caching:
@@ -343,11 +347,21 @@ def allocate_slots(
             num_required_blocks + self.num_preallocate_blocks,
             self.free_block_queue.num_free_blocks -
             num_evictable_computed_blocks)
-        # Get the token IDs for the blocks being allocated for hashing.
-        # Note that we expect this function to be called only once per
-        # request, so we must have all new token IDs in the prompt.
+
         num_computed_tokens = len(computed_block_ids) * self.block_size
+
+        # When caching is enabled, get the new token IDs and the parent block
+        # ID to generate cache keys.
+        new_token_ids = None
+        parent_block_id = None
         if self.enable_caching:
+            # Touch the computed blocks to make sure they won't be evicted.
+            self._touch(computed_block_ids)
+
+            # Get the token IDs for the blocks being allocated for hashing.
+            # Note that we expect allocate_slots to be called only once per
+            # new request, so num_computed_tokens + num_tokens must be less
+            # than or equal to the total number of tokens in the prompt.
             new_token_ids = request.prompt_token_ids[
                 num_computed_tokens:num_computed_tokens + num_tokens]
             if not new_token_ids:
@@ -356,15 +370,10 @@ def allocate_slots(
                     f"#prompt_tokens={len(request.prompt_token_ids)} < "
                     f"#computed_tokens={num_computed_tokens}")
 
-            # Touch the computed blocks to make sure they won't be evicted.
-            self._touch(computed_block_ids)
-
             # Get the parent block ID to construct the block chain.
             parent_block_id = computed_block_ids[
                 -1] if computed_block_ids else None
-        else:
-            new_token_ids = None
-            parent_block_id = None
+
         new_blocks = self._get_new_blocks(num_new_blocks, new_token_ids,
                                           parent_block_id)
         new_block_ids = [blk.block_id for blk in new_blocks]