diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst index cdcea70c6cb7d..5a474043078db 100644 --- a/docs/source/models/supported_models.rst +++ b/docs/source/models/supported_models.rst @@ -540,7 +540,7 @@ Text Generation - Qwen2-VL - T + I\ :sup:`E+` + V\ :sup:`+` - :code:`Qwen/Qwen2-VL-2B-Instruct`, :code:`Qwen/Qwen2-VL-7B-Instruct`, :code:`Qwen/Qwen2-VL-72B-Instruct`, etc. - - + - ✅︎ - ✅︎ * - :code:`UltravoxModel` - Ultravox diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index e30b84e8dd44c..fad9137d0dcc5 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -40,7 +40,7 @@ from vllm.attention import AttentionMetadata from vllm.attention.selector import _Backend -from vllm.config import CacheConfig, MultiModalConfig +from vllm.config import CacheConfig, LoRAConfig, MultiModalConfig from vllm.distributed import get_pp_group, parallel_state from vllm.distributed import utils as dist_utils from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, DummyData, @@ -65,7 +65,7 @@ from vllm.transformers_utils.config import uses_mrope from vllm.transformers_utils.processor import cached_get_processor -from .interfaces import SupportsMultiModal, SupportsPP +from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP from .utils import (PPMissingLayer, get_vit_attn_backend, is_pp_missing_parameter, make_empty_intermediate_tensors_factory) @@ -927,13 +927,37 @@ def input_processor_for_qwen2_vl( @INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen2_vl) @INPUT_REGISTRY.register_input_processor(input_processor_for_qwen2_vl) class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, - SupportsPP): + SupportsLoRA, SupportsPP): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + # LoRA specific attributes + # TODO Support LoRA for the visual encoder in the future. + supported_lora_modules = [ + "qkv_proj", + "o_proj", + "gate_up_proj", + "down_proj", + ] + embedding_modules = {} + embedding_padding_modules = [] def __init__(self, config: Qwen2VLConfig, multimodal_config: MultiModalConfig, cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None) -> None: + quant_config: Optional[QuantizationConfig] = None, + lora_config: Optional[LoRAConfig] = None) -> None: + super().__init__() assert not cache_config.enable_prefix_caching, \