diff --git a/tests/lora/test_ultravox.py b/tests/lora/test_ultravox.py index e0049180710c3..19d8721cd13f1 100644 --- a/tests/lora/test_ultravox.py +++ b/tests/lora/test_ultravox.py @@ -3,6 +3,7 @@ from tempfile import TemporaryDirectory from typing import List, Tuple +import torch from huggingface_hub import snapshot_download from safetensors.torch import load_file, save_file from transformers import AutoTokenizer @@ -62,6 +63,10 @@ def test_ultravox_lora(vllm_runner): """ TODO: Train an Ultravox LoRA instead of using a Llama LoRA. """ + #Check if set_default_device fixes the CI failure. Other lora tests set + # device to cuda which might be causing device mismatch in CI + torch.set_default_device("cpu") + llama3_1_8b_chess_lora = llama3_1_8b_chess_lora_path() with TemporaryDirectory() as temp_ultravox_lora_dir: llama3_1_8b_ultravox_chess_lora = mk_llama3_1_8b_ultravox_chess_lora( @@ -74,7 +79,7 @@ def test_ultravox_lora(vllm_runner): max_loras=4, max_lora_rank=128, dtype="bfloat16", - max_model_len=4096, + max_model_len = 256, ) as vllm_model: ultravox_outputs: List[Tuple[ List[int], str]] = vllm_model.generate_greedy( @@ -96,7 +101,7 @@ def test_ultravox_lora(vllm_runner): max_loras=4, max_lora_rank=128, dtype="bfloat16", - max_model_len=4096, + max_model_len = 256, ) as vllm_model: llama_outputs_no_lora: List[Tuple[List[int], str]] = ( vllm_model.generate_greedy(