diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 163752e9fe06e..1ab42f8c126f8 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -34,6 +34,7 @@ "dtype": "half", "max_tokens": 5, "tensor_parallel_size": 2, + "model_kwargs": {"device_map": "auto"}, "image_size_factors": [(.25, 0.5, 1.0)], "distributed_executor_backend": ( "ray", diff --git a/tests/models/decoder_only/vision_language/vlm_utils/types.py b/tests/models/decoder_only/vision_language/vlm_utils/types.py index fd18c7c8346f0..8459476dc2d07 100644 --- a/tests/models/decoder_only/vision_language/vlm_utils/types.py +++ b/tests/models/decoder_only/vision_language/vlm_utils/types.py @@ -158,6 +158,7 @@ def get_non_parametrized_runner_kwargs(self): "max_model_len": self.max_model_len, "max_num_seqs": self.max_num_seqs, "task": self.task, + "tensor_parallel_size": self.tensor_parallel_size, "hf_output_post_proc": self.hf_output_post_proc, "vllm_output_post_proc": self.vllm_output_post_proc, "auto_cls": self.auto_cls,