From 3ebd7e40c584d2498eda9480f0baf03cb269a7a2 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 23 Nov 2024 18:07:25 +0000 Subject: [PATCH] Done Signed-off-by: Jee Jee Li --- vllm/model_executor/model_loader/tensorizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py index c48b287ed181a..3fd668765a1b1 100644 --- a/vllm/model_executor/model_loader/tensorizer.py +++ b/vllm/model_executor/model_loader/tensorizer.py @@ -19,6 +19,7 @@ from vllm.logger import init_logger from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) +from vllm.plugins import set_current_vllm_config from vllm.utils import FlexibleArgumentParser tensorizer_error_msg = None @@ -284,7 +285,8 @@ def _init_model(self): model_args = self.tensorizer_config.hf_config model_args.torch_dtype = self.tensorizer_config.dtype assert self.tensorizer_config.model_class is not None - with no_init_or_tensor(): + # TODO: Do we need to consider old-style model class? + with no_init_or_tensor(), set_current_vllm_config(self.vllm_config): return self.tensorizer_config.model_class( vllm_config=self.vllm_config, )