diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 10bd6e5e59970..fe90cebe68f88 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -68,10 +68,10 @@ def __init__(self, blocks: List[KVCacheBlock]) -> None: # Initialize the doubly linked list of free blocks. self.free_list_head = blocks[0] self.free_list_tail = blocks[-1] - for i in range(self.size): + for i in range(self.num_free_blocks): if i > 0: blocks[i].prev_free_block = blocks[i - 1] - if i < self.size - 1: + if i < self.num_free_blocks - 1: blocks[i].next_free_block = blocks[i + 1] def popleft(self) -> KVCacheBlock: diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index 45bc82bc50f5f..299e2fb763f22 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -210,7 +210,6 @@ def schedule(self) -> "SchedulerOutput": ) self.finished_req_ids = set() - self.kv_cache_manager.async_remove_touched_blocks() return scheduler_output def _make_running_request_data(