diff --git a/vllm/v1/executor/uniproc_executor.py b/vllm/v1/executor/uniproc_executor.py index a9adc0114f76d..c63d7a4c47c15 100644 --- a/vllm/v1/executor/uniproc_executor.py +++ b/vllm/v1/executor/uniproc_executor.py @@ -70,6 +70,7 @@ def execute_model( scheduler_output, ) -> ModelRunnerOutput: output = self.worker.execute_model(scheduler_output) + assert output is not None return output def profile(self, is_start: bool = True): diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index e6feaee972a35..81b247e07ef4a 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -200,7 +200,7 @@ def compile_or_warm_up_model(self) -> None: def execute_model( self, scheduler_output: "SchedulerOutput", - ) -> ModelRunnerOutput: + ) -> Optional[ModelRunnerOutput]: output = self.model_runner.execute_model(scheduler_output) return output if self.rank == 0 else None