From 4872ae39bee2f0482db75bb943ef1547749af92c Mon Sep 17 00:00:00 2001 From: Onkar Chougule <168134249+ochougul@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:11:14 +0530 Subject: [PATCH 1/2] Vllm test (#168) * [VLLM] vllm install and test added. Signed-off-by: Mahesh Balasubramanian * lint checked Signed-off-by: Mahesh Balasubramanian * Changes made according to the compliance Signed-off-by: Mahesh Balasubramanian * Updated the copyrights Signed-off-by: Mahesh Balasubramanian * Updated test and Jenkins file according to the comments Signed-off-by: Mahesh Balasubramanian --------- Signed-off-by: Mahesh Balasubramanian Co-authored-by: Mahesh Balasubramanian Signed-off-by: Onkar Chougule --- scripts/Jenkinsfile | 37 +++++++- tests/vllm/test_qaic_output_consistency.py | 102 +++++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 tests/vllm/test_qaic_output_consistency.py diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index ce2e66780..5b081a5c6 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -48,6 +48,41 @@ pipeline } } } + + + stage('Install vLLM') + { + steps + { + sh ''' + . preflight_qeff/bin/activate + git clone https://github.com/vllm-project/vllm.git + cd vllm + git checkout v0.6.0 + git apply /opt/qti-aic/integrations/vllm/qaic_vllm.patch + export VLLM_TARGET_DEVICE="qaic" + pip install -e . + ''' + } + } + + + stage('vLLM Test') + { + steps + { + + timeout(time: 660, unit: 'MINUTES') { + sh ''' + . preflight_qeff/bin/activate + pytest --disable-warnings -s -v tests/vllm --junitxml=tests/tests_log4.xml + junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log.xml + deactivate + exit + ''' + } + } + } } post { @@ -59,4 +94,4 @@ pipeline } } -} +} \ No newline at end of file diff --git a/tests/vllm/test_qaic_output_consistency.py b/tests/vllm/test_qaic_output_consistency.py new file mode 100644 index 000000000..e4c2b1a6a --- /dev/null +++ b/tests/vllm/test_qaic_output_consistency.py @@ -0,0 +1,102 @@ +# ----------------------------------------------------------------------------- +# +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# ----------------------------------------------------------------------------- + +import random + +import pytest +from vllm import LLM, SamplingParams + +# Model to test +test_models = [ + "TinyLlama/TinyLlama-1.1B-Chat-v1.0", +] + +# Constants for configuration +SEQ_LEN = 128 +CTX_LEN = 256 +DECOE_BSZ = 4 +DTYPE = "mxfp6" +KV_DTYPE = "mxint8" +DEVICE_GROUP = [0] + + +@pytest.mark.parametrize("model_name", test_models) +def test_output_consistency(model_name): + """This pytest function is used to check the consistency of vLLM. + 1) Single prompt test to check if the output generated in 5 different + runs yields the same results + 2) Multiple prompt check to test if multiple prompts yield same results + if run in different slots. + + Parameters + ---------- + model_name : string + Huggingface model card name. + """ + sampling_params = SamplingParams(temperature=0.0, max_tokens=None) + + # Creating LLM Object + qllm = LLM( + model=model_name, + device_group=DEVICE_GROUP, + max_num_seqs=DECOE_BSZ, + max_model_len=CTX_LEN, + max_seq_len_to_capture=SEQ_LEN, + quantization=DTYPE, + kv_cache_dtype=KV_DTYPE, + device="qaic", + ) + + # Single prompt test + prompt1 = ["My name is"] + + output1 = qllm.generate(prompt1 * 5, sampling_params) + + check_output1 = [] + for i, op in enumerate(output1): + check_output1.append(op.outputs[0].text) + + + # Multiple prompt test + outputDict = dict() + prompt2 = [ + "My name is", + "How to eat mangosteen?", + "How many people died in World War II", + "Hello ", + "Who is the president of United States", + "Who is the president of India", + "When it snowfalls in San Diego", + "In which country yamana river flows", + "How many people died in World War II", + "Thy youth is proud livery, so gazed on now", + "Will be a tattered weed, of small worth held:" "Then being asked where all thy beauty lies", + "Where all the treasure of thy lusty days", + "To say, within thine own deep-sunken eyes", + "Where is Statue of Liberty located?", + ] + + for p in prompt2: + outputDict[p] = [] + + for _ in range(5): + random.shuffle(prompt2) + output2 = qllm.generate(prompt2, sampling_params) + for i, op in enumerate(output2): + generated_text = op.outputs[0].text + outputDict[prompt2[i]].append(str(prompt2[i] + generated_text)) + + + # Assertion to check the consistency of single prompt. + assert len(set(check_output1)) == 1, "Outputs from different slots for same prompt does not match!!" + + # Assertion to check multiple prompts. + for key in outputDict.keys(): + assert len(set(outputDict[key])) == 1, "Outputs from different slots for same prompt does not match!!" + + # Assertion to check if any prompts are missed. + assert len(prompt2) == len(output2), "Number of Generated Tokens do not match the number of valid inputs!!" \ No newline at end of file From 4fdc2a6b949ddf41726d4d1bdb90423f44769474 Mon Sep 17 00:00:00 2001 From: Onkar Chougule Date: Thu, 7 Nov 2024 17:17:14 +0530 Subject: [PATCH 2/2] fixed jenkinsfile for tests run and ran formatter Signed-off-by: Onkar Chougule --- scripts/Jenkinsfile | 8 ++++---- tests/vllm/test_qaic_output_consistency.py | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 5b081a5c6..a522002e1 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -37,10 +37,10 @@ pipeline . preflight_qeff/bin/activate export TOKENIZERS_PARALLELISM=false export QEFF_HOME=$PWD - pytest tests -m "not cli and not on_qaic" -n auto --junitxml=tests/tests_log1.xml & - pytest tests -m "not cli and on_qaic" -n 4 --junitxml=tests/tests_log2.xml & + pytest tests -m "not cli and not on_qaic" --ignore tests/vllm -n auto --junitxml=tests/tests_log1.xml & + pytest tests -m "not cli and on_qaic" --ignore tests/vllm -n 4 --junitxml=tests/tests_log2.xml & wait - pytest tests -m cli --junitxml=tests/tests_log3.xml + pytest tests -m cli --ignore tests/vllm --junitxml=tests/tests_log3.xml junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log.xml deactivate exit @@ -94,4 +94,4 @@ pipeline } } -} \ No newline at end of file +} diff --git a/tests/vllm/test_qaic_output_consistency.py b/tests/vllm/test_qaic_output_consistency.py index e4c2b1a6a..00cd5765a 100644 --- a/tests/vllm/test_qaic_output_consistency.py +++ b/tests/vllm/test_qaic_output_consistency.py @@ -60,7 +60,6 @@ def test_output_consistency(model_name): for i, op in enumerate(output1): check_output1.append(op.outputs[0].text) - # Multiple prompt test outputDict = dict() prompt2 = [ @@ -90,7 +89,6 @@ def test_output_consistency(model_name): generated_text = op.outputs[0].text outputDict[prompt2[i]].append(str(prompt2[i] + generated_text)) - # Assertion to check the consistency of single prompt. assert len(set(check_output1)) == 1, "Outputs from different slots for same prompt does not match!!" @@ -99,4 +97,4 @@ def test_output_consistency(model_name): assert len(set(outputDict[key])) == 1, "Outputs from different slots for same prompt does not match!!" # Assertion to check if any prompts are missed. - assert len(prompt2) == len(output2), "Number of Generated Tokens do not match the number of valid inputs!!" \ No newline at end of file + assert len(prompt2) == len(output2), "Number of Generated Tokens do not match the number of valid inputs!!"