diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py index c48b287ed181a..3fd668765a1b1 100644 --- a/vllm/model_executor/model_loader/tensorizer.py +++ b/vllm/model_executor/model_loader/tensorizer.py @@ -19,6 +19,7 @@ from vllm.logger import init_logger from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) +from vllm.plugins import set_current_vllm_config from vllm.utils import FlexibleArgumentParser tensorizer_error_msg = None @@ -284,7 +285,8 @@ def _init_model(self): model_args = self.tensorizer_config.hf_config model_args.torch_dtype = self.tensorizer_config.dtype assert self.tensorizer_config.model_class is not None - with no_init_or_tensor(): + # TODO: Do we need to consider old-style model class? + with no_init_or_tensor(), set_current_vllm_config(self.vllm_config): return self.tensorizer_config.model_class( vllm_config=self.vllm_config, )