Reduce model len

Signed-off-by: Sumit Vij <[email protected]>
vllm-project · Jan 22, 2025 · d247036 · d247036
1 parent dd9a03f
commit d247036
Showing 1 changed file with 5 additions and 3 deletions.
diff --git a/tests/lora/test_ultravox.py b/tests/lora/test_ultravox.py
@@ -1,9 +1,9 @@
 import shutil
-import torch
 from os import path
 from tempfile import TemporaryDirectory
 from typing import List, Tuple
 
+import torch
 from huggingface_hub import snapshot_download
 from safetensors.torch import load_file, save_file
 from transformers import AutoTokenizer
@@ -63,8 +63,8 @@ def test_ultravox_lora(vllm_runner):
     """
     TODO: Train an Ultravox LoRA instead of using a Llama LoRA.
     """
-    #Check if set_default_device fixes the CI failure. Other lora tests set device to cuda
-    #which might be causing device mismatch in CI
+    #Check if set_default_device fixes the CI failure. Other lora tests set 
+    # device to cuda which might be causing device mismatch in CI
     torch.set_default_device("cpu")
 
     llama3_1_8b_chess_lora = llama3_1_8b_chess_lora_path()
@@ -79,6 +79,7 @@ def test_ultravox_lora(vllm_runner):
                 max_loras=4,
                 max_lora_rank=128,
                 dtype="bfloat16",
+                max_model_len = 256,
         ) as vllm_model:
             ultravox_outputs: List[Tuple[
                 List[int], str]] = vllm_model.generate_greedy(
@@ -100,6 +101,7 @@ def test_ultravox_lora(vllm_runner):
             max_loras=4,
             max_lora_rank=128,
             dtype="bfloat16",
+            max_model_len = 256,
     ) as vllm_model:
         llama_outputs_no_lora: List[Tuple[List[int], str]] = (
             vllm_model.generate_greedy(