add tests

Signed-off-by: NickLucche <[email protected]>
vllm-project · Jan 20, 2025 · 5389ab6 · 5389ab6
1 parent 1191485
commit 5389ab6
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 46 deletions.
diff --git a/tests/models/decoder_only/language/test_gguf.py b/tests/models/decoder_only/language/test_gguf.py
@@ -66,12 +66,20 @@ def gguf_model(self):
     gguf_filename="starcoder2-3b.Q6_K.gguf",
 )
 
+DOLPHIN_CONFIG = GGUFTestConfig(
+    # Test VocabParallelEmbedding sharding issue.
+    original_model="cognitivecomputations/TinyDolphin-2.8-1.1b",
+    gguf_repo="tsunemoto/TinyDolphin-2.8-1.1b-GGUF",
+    gguf_filename="tinydolphin-2.8-1.1b.Q6_K.gguf",
+)
+
 MODELS = [
     LLAMA_CONFIG,
     QWEN2_CONFIG,
     PHI3_CONFIG,
     GPT2_CONFIG,
     STABLELM_CONFIG,
+    DOLPHIN_CONFIG
     # STARCODER_CONFIG, # broken
 ]
 
@@ -107,6 +115,7 @@ def test_models(
 
     # Run unquantized model.
     with vllm_runner(model_name=model.original_model,
+                     enforce_eager=True, # faster tests
                      dtype=dtype,
                      max_model_len=MAX_MODEL_LEN,
                      tensor_parallel_size=tp_size) as original_model:
@@ -115,6 +124,7 @@ def test_models(
 
     # Run gguf model.
     with vllm_runner(model_name=model.gguf_model,
+                     enforce_eager=True,
                      tokenizer_name=model.original_model,
                      dtype=dtype,
                      max_model_len=MAX_MODEL_LEN,

diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py
@@ -87,45 +87,3 @@ def print_model(model):
         name_0="hf",
         name_1="vllm",
     )
-
-@pytest.mark.parametrize(
-    "model",
-    [
-        pytest.param("cognitivecomputations/TinyDolphin-2.8-1.1b"), # testing VocabParallelEmbedding crash
-    ])
-@pytest.mark.parametrize("dtype", ["half"])
-@pytest.mark.parametrize("tp", [2])
-@pytest.mark.parametrize("max_tokens", [32])
-@pytest.mark.parametrize("num_logprobs", [5])
-def test_tp_models(
-    hf_runner,
-    vllm_runner,
-    example_prompts,
-    model: str,
-    dtype: str,
-    tp: int,
-    max_tokens: int,
-    num_logprobs: int,
-) -> None:
-
-    with hf_runner(model, dtype=dtype) as hf_model:
-        hf_outputs = hf_model.generate_greedy_logprobs_limit(
-            example_prompts, max_tokens, num_logprobs)
-
-    with vllm_runner(model, dtype=dtype, tensor_parallel_size=tp) as vllm_model:
-        vllm_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, max_tokens, num_logprobs)
-
-        # This test is for verifying whether the model's extra_repr
-        # can be printed correctly.
-        def print_model(model):
-            print(model)
-
-        vllm_model.apply_model(print_model)
-
-    check_logprobs_close(
-        outputs_0_lst=hf_outputs,
-        outputs_1_lst=vllm_outputs,
-        name_0="hf",
-        name_1="vllm",
-    )
diff --git a/vllm/config.py b/vllm/config.py
@@ -277,7 +277,7 @@ def __init__(self,
         self.max_logprobs = max_logprobs
         self.disable_sliding_window = disable_sliding_window
         self.skip_tokenizer_init = skip_tokenizer_init
-        # breakpoint()
+
         hf_config = get_config(self.model, trust_remote_code, revision,
                                code_revision, config_format)
 

diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -206,7 +206,7 @@ def get_config(
             token=HF_TOKEN,
             **kwargs,
         )
-        # config_dict["model_type"] = "granite"
+
         # Use custom model class if it's in our registry
         model_type = config_dict.get("model_type")
         if model_type in _CONFIG_REGISTRY:
@@ -228,7 +228,6 @@ def get_config(
                     token=HF_TOKEN,
                     **kwargs,
                 )
-                # config.model_type = 'granite'
             except ValueError as e:
                 if (not trust_remote_code
                         and "requires you to execute the configuration file"
@@ -253,7 +252,6 @@ def get_config(
         if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
             raise RuntimeError(
                 f"Can't get gguf config for {config.model_type}.")
-        # model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES['granite']
         model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
         config.update({"architectures": [model_type]})