diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index aacff87df6d79..853257c5ad71f 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -10,7 +10,7 @@ from vllm.multimodal.processing import MultiModalDataDict, MultiModalInputsV2 from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup -from vllm.utils import print_warning_once +from vllm.utils import print_info_once, print_warning_once from .data import (DecoderOnlyInputs, EncoderDecoderInputs, ProcessorInputs, PromptType, SingletonInputs, SingletonPrompt, token_inputs) @@ -212,7 +212,7 @@ def _can_process_multimodal(self) -> bool: # updated to use the new multi-modal processor can_process_multimodal = self.mm_registry.has_processor(model_config) if not can_process_multimodal: - logger.info( + print_info_once( "Your model uses the legacy input pipeline instead of the new " "multi-modal processor. Please note that the legacy pipeline " "will be removed in a future release. For more details, see: " diff --git a/vllm/utils.py b/vllm/utils.py index cb2ad43a2ae8d..424e7d0947790 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -705,6 +705,12 @@ def create_kv_caches_with_random( return key_caches, value_caches +@lru_cache +def print_info_once(msg: str) -> None: + # Set the stacklevel to 2 to print the caller's line info + logger.info(msg, stacklevel=2) + + @lru_cache def print_warning_once(msg: str) -> None: # Set the stacklevel to 2 to print the caller's line info