Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vllm test (#168) #169

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,52 @@ pipeline
. preflight_qeff/bin/activate
export TOKENIZERS_PARALLELISM=false
export QEFF_HOME=$PWD
pytest tests -m "not cli and not on_qaic" -n auto --junitxml=tests/tests_log1.xml &
pytest tests -m "not cli and on_qaic" -n 4 --junitxml=tests/tests_log2.xml &
pytest tests -m "not cli and not on_qaic" --ignore tests/vllm -n auto --junitxml=tests/tests_log1.xml &
pytest tests -m "not cli and on_qaic" --ignore tests/vllm -n 4 --junitxml=tests/tests_log2.xml &
wait
pytest tests -m cli --junitxml=tests/tests_log3.xml
pytest tests -m cli --ignore tests/vllm --junitxml=tests/tests_log3.xml
junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log.xml
deactivate
exit
'''
}
}
}


stage('Install vLLM')
{
steps
{
sh '''
. preflight_qeff/bin/activate
git clone https://github.com/vllm-project/vllm.git
cd vllm
git checkout v0.6.0
git apply /opt/qti-aic/integrations/vllm/qaic_vllm.patch
export VLLM_TARGET_DEVICE="qaic"
pip install -e .
'''
}
}


stage('vLLM Test')
{
steps
{

timeout(time: 660, unit: 'MINUTES') {
sh '''
. preflight_qeff/bin/activate
pytest --disable-warnings -s -v tests/vllm --junitxml=tests/tests_log4.xml
junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log.xml
deactivate
exit
'''
}
}
}
}
post
{
Expand Down
100 changes: 100 additions & 0 deletions tests/vllm/test_qaic_output_consistency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------

import random

import pytest
from vllm import LLM, SamplingParams

# Model to test
test_models = [
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]

# Constants for configuration
SEQ_LEN = 128
CTX_LEN = 256
DECOE_BSZ = 4
DTYPE = "mxfp6"
KV_DTYPE = "mxint8"
DEVICE_GROUP = [0]


@pytest.mark.parametrize("model_name", test_models)
def test_output_consistency(model_name):
"""This pytest function is used to check the consistency of vLLM.
1) Single prompt test to check if the output generated in 5 different
runs yields the same results
2) Multiple prompt check to test if multiple prompts yield same results
if run in different slots.

Parameters
----------
model_name : string
Huggingface model card name.
"""
sampling_params = SamplingParams(temperature=0.0, max_tokens=None)

# Creating LLM Object
qllm = LLM(
model=model_name,
device_group=DEVICE_GROUP,
max_num_seqs=DECOE_BSZ,
max_model_len=CTX_LEN,
max_seq_len_to_capture=SEQ_LEN,
quantization=DTYPE,
kv_cache_dtype=KV_DTYPE,
device="qaic",
)

# Single prompt test
prompt1 = ["My name is"]

output1 = qllm.generate(prompt1 * 5, sampling_params)

check_output1 = []
for i, op in enumerate(output1):
check_output1.append(op.outputs[0].text)

# Multiple prompt test
outputDict = dict()
prompt2 = [
"My name is",
"How to eat mangosteen?",
"How many people died in World War II",
"Hello ",
"Who is the president of United States",
"Who is the president of India",
"When it snowfalls in San Diego",
"In which country yamana river flows",
"How many people died in World War II",
"Thy youth is proud livery, so gazed on now",
"Will be a tattered weed, of small worth held:" "Then being asked where all thy beauty lies",
"Where all the treasure of thy lusty days",
"To say, within thine own deep-sunken eyes",
"Where is Statue of Liberty located?",
]

for p in prompt2:
outputDict[p] = []

for _ in range(5):
random.shuffle(prompt2)
output2 = qllm.generate(prompt2, sampling_params)
for i, op in enumerate(output2):
generated_text = op.outputs[0].text
outputDict[prompt2[i]].append(str(prompt2[i] + generated_text))

# Assertion to check the consistency of single prompt.
assert len(set(check_output1)) == 1, "Outputs from different slots for same prompt does not match!!"

# Assertion to check multiple prompts.
for key in outputDict.keys():
assert len(set(outputDict[key])) == 1, "Outputs from different slots for same prompt does not match!!"

# Assertion to check if any prompts are missed.
assert len(prompt2) == len(output2), "Number of Generated Tokens do not match the number of valid inputs!!"
Loading