From 81db7fc45befd90c50617dc965219862cdae3bee Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Thu, 19 Dec 2024 02:16:40 +0800 Subject: [PATCH] [Bugfix] Fix broken phi3-v mm_processor_kwargs tests (#11263) Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: Sage Moore --- .../mm_processor_kwargs/test_phi3v.py | 12 +++++------- vllm/model_executor/models/phi3v.py | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/models/decoder_only/vision_language/mm_processor_kwargs/test_phi3v.py b/tests/models/decoder_only/vision_language/mm_processor_kwargs/test_phi3v.py index ce8ac8d8e0ceb..f95cee277f4e6 100644 --- a/tests/models/decoder_only/vision_language/mm_processor_kwargs/test_phi3v.py +++ b/tests/models/decoder_only/vision_language/mm_processor_kwargs/test_phi3v.py @@ -58,16 +58,14 @@ def test_max_tokens_override(get_max_phi3v_image_tokens, model: str, @pytest.mark.parametrize("model", models) @pytest.mark.parametrize( - "num_crops,expected_toks_per_img,num_imgs", + "num_crops,expected_toks_per_img", [ - (4, 757, 1), - (4, 757, 2), - (16, 1921, 1), - (16, 1921, 2), + (4, 757), + (16, 1921), # the default num_crops of phi-3.5-vision is 4 - (None, 757, 2), - (None, 757, 2), + (None, 757), ]) +@pytest.mark.parametrize("num_imgs", [1, 2]) def test_processor_override(processor_for_phi3v, image_assets: _ImageAssets, model: str, num_crops: Optional[int], expected_toks_per_img: int, num_imgs: int): diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index 7ab06768ae612..b19329a57a8cf 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -302,11 +302,18 @@ def add_image_newline(self, image_features_hd): return image_features_hd_newline -def get_max_phi3v_image_tokens(ctx: InputContext) -> int: - processor = ctx.get_hf_processor() - image_processor = processor.image_processor # type: ignore - - return image_processor.calc_num_image_tokens_from_image_size( +def get_max_phi3v_image_tokens( + ctx: InputContext, + *, + num_crops: Optional[int] = None, +) -> int: + mm_processor_kwargs = {} + if num_crops: + mm_processor_kwargs["num_crops"] = num_crops + + processor = ctx.get_hf_processor(**mm_processor_kwargs) + + return processor.calc_num_image_tokens_from_image_size( width=MAX_IMAGE_FEATURE_SIZE_WIDTH, height=MAX_IMAGE_FEATURE_SIZE_HEIGHT, )