Test setting cpu as a default device

Signed-off-by: Sumit Vij <[email protected]> Reduce model len Signed-off-by: Sumit Vij <[email protected]>
vllm-project · Jan 22, 2025 · 1195ad8 · 1195ad8
1 parent f483d9a
commit 1195ad8
Showing 1 changed file with 7 additions and 2 deletions.
diff --git a/tests/lora/test_ultravox.py b/tests/lora/test_ultravox.py
@@ -3,6 +3,7 @@
 from tempfile import TemporaryDirectory
 from typing import List, Tuple
 
+import torch
 from huggingface_hub import snapshot_download
 from safetensors.torch import load_file, save_file
 from transformers import AutoTokenizer
@@ -62,6 +63,10 @@ def test_ultravox_lora(vllm_runner):
     """
     TODO: Train an Ultravox LoRA instead of using a Llama LoRA.
     """
+    #Check if set_default_device fixes the CI failure. Other lora tests set 
+    # device to cuda which might be causing device mismatch in CI
+    torch.set_default_device("cpu")
+
     llama3_1_8b_chess_lora = llama3_1_8b_chess_lora_path()
     with TemporaryDirectory() as temp_ultravox_lora_dir:
         llama3_1_8b_ultravox_chess_lora = mk_llama3_1_8b_ultravox_chess_lora(
@@ -74,7 +79,7 @@ def test_ultravox_lora(vllm_runner):
                 max_loras=4,
                 max_lora_rank=128,
                 dtype="bfloat16",
-                max_model_len=4096,
+                max_model_len = 256,
         ) as vllm_model:
             ultravox_outputs: List[Tuple[
                 List[int], str]] = vllm_model.generate_greedy(
@@ -96,7 +101,7 @@ def test_ultravox_lora(vllm_runner):
             max_loras=4,
             max_lora_rank=128,
             dtype="bfloat16",
-            max_model_len=4096,
+            max_model_len = 256,
     ) as vllm_model:
         llama_outputs_no_lora: List[Tuple[List[int], str]] = (
             vllm_model.generate_greedy(