diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index 9f45387b55e19..7348096f90b22 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -483,7 +483,8 @@ def __init__( # Set after load_model. self.lora_manager: Optional[LRUCacheWorkerLoRAManager] = None - self.builder = self._builder_cls(weakref.proxy(self)) + if hasattr(self, "_builder_cls"): + self.builder = self._builder_cls(weakref.proxy(self)) def load_model(self) -> None: self.model = get_model(vllm_config=self.vllm_config) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 5825ad21e572b..ca4bba1b9b4f8 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1101,7 +1101,8 @@ def __init__( SamplingMetadataCache() \ if self.parallel_config.pipeline_parallel_size == 1 else None - self.builder = self._builder_cls(weakref.proxy(self)) + if hasattr(self, "_builder_cls"): + self.builder = self._builder_cls(weakref.proxy(self)) def load_model(self) -> None: logger.info("Starting to load model %s...", self.model_config.model)