Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
NickLucche committed Jan 20, 2025
1 parent 1191485 commit 5389ab6
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 46 deletions.
10 changes: 10 additions & 0 deletions tests/models/decoder_only/language/test_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,20 @@ def gguf_model(self):
gguf_filename="starcoder2-3b.Q6_K.gguf",
)

DOLPHIN_CONFIG = GGUFTestConfig(
# Test VocabParallelEmbedding sharding issue.
original_model="cognitivecomputations/TinyDolphin-2.8-1.1b",
gguf_repo="tsunemoto/TinyDolphin-2.8-1.1b-GGUF",
gguf_filename="tinydolphin-2.8-1.1b.Q6_K.gguf",
)

MODELS = [
LLAMA_CONFIG,
QWEN2_CONFIG,
PHI3_CONFIG,
GPT2_CONFIG,
STABLELM_CONFIG,
DOLPHIN_CONFIG
# STARCODER_CONFIG, # broken
]

Expand Down Expand Up @@ -107,6 +115,7 @@ def test_models(

# Run unquantized model.
with vllm_runner(model_name=model.original_model,
enforce_eager=True, # faster tests
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=tp_size) as original_model:
Expand All @@ -115,6 +124,7 @@ def test_models(

# Run gguf model.
with vllm_runner(model_name=model.gguf_model,
enforce_eager=True,
tokenizer_name=model.original_model,
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
Expand Down
42 changes: 0 additions & 42 deletions tests/models/decoder_only/language/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,45 +87,3 @@ def print_model(model):
name_0="hf",
name_1="vllm",
)

@pytest.mark.parametrize(
"model",
[
pytest.param("cognitivecomputations/TinyDolphin-2.8-1.1b"), # testing VocabParallelEmbedding crash
])
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("tp", [2])
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
def test_tp_models(
hf_runner,
vllm_runner,
example_prompts,
model: str,
dtype: str,
tp: int,
max_tokens: int,
num_logprobs: int,
) -> None:

with hf_runner(model, dtype=dtype) as hf_model:
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs)

with vllm_runner(model, dtype=dtype, tensor_parallel_size=tp) as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, max_tokens, num_logprobs)

# This test is for verifying whether the model's extra_repr
# can be printed correctly.
def print_model(model):
print(model)

vllm_model.apply_model(print_model)

check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def __init__(self,
self.max_logprobs = max_logprobs
self.disable_sliding_window = disable_sliding_window
self.skip_tokenizer_init = skip_tokenizer_init
# breakpoint()

hf_config = get_config(self.model, trust_remote_code, revision,
code_revision, config_format)

Expand Down
4 changes: 1 addition & 3 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def get_config(
token=HF_TOKEN,
**kwargs,
)
# config_dict["model_type"] = "granite"

# Use custom model class if it's in our registry
model_type = config_dict.get("model_type")
if model_type in _CONFIG_REGISTRY:
Expand All @@ -228,7 +228,6 @@ def get_config(
token=HF_TOKEN,
**kwargs,
)
# config.model_type = 'granite'
except ValueError as e:
if (not trust_remote_code
and "requires you to execute the configuration file"
Expand All @@ -253,7 +252,6 @@ def get_config(
if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
raise RuntimeError(
f"Can't get gguf config for {config.model_type}.")
# model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES['granite']
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
config.update({"architectures": [model_type]})

Expand Down

0 comments on commit 5389ab6

Please sign in to comment.