-
-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[V1] Multiprocessing Tensor Parallel Support for v1 #9856
Changes from 52 commits
5ad9c60
49869fa
71e08aa
4930246
3ea0cae
d4b55ae
feeed73
e3c9c5c
254714d
10a627e
d95c01e
c08bae4
2392755
bf3705c
d4ea706
2174a5b
25270ab
9322db5
b5bac31
c4fcfce
bedd593
c03ef6d
8d9d557
5f3a570
b59babc
66116c7
eaeebc3
6d53d6e
a7025fb
6a3f2da
d4e3813
52ef894
9f9883e
1990433
f8a1b9b
963c97f
0678911
3d71b53
88c9c7b
ab7cb89
024bcad
d77bab5
24ffb8a
be4260f
cb4b363
365ea06
c94e11b
2a36db7
536e5f2
998eb1d
ebb2544
c81b7f5
0817336
f10e5e8
e49b071
661278f
fce9696
c61a3e0
8bb2430
5271ec6
50a12bc
edab869
e0aea84
ce08cb2
65b79c4
143ed09
ab6bf27
819b229
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,14 @@ | |
TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4") | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def v1(run_with_both_engines): | ||
# Simple autouse wrapper to run both engines for each test | ||
# This can be promoted up to conftest.py to run for every | ||
# test in a package | ||
pass | ||
|
||
|
||
def test_vllm_gc_ed(): | ||
"""Verify vllm instance is GC'ed when it is deleted""" | ||
llm = LLM("facebook/opt-125m") | ||
|
@@ -36,6 +44,7 @@ def test_vllm_gc_ed(): | |
assert weak_llm() is None | ||
|
||
|
||
@pytest.mark.skip_v1 | ||
@pytest.mark.parametrize("model", MODELS) | ||
@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS", "FLASHINFER"]) | ||
@pytest.mark.parametrize("dtype", ["half"]) | ||
|
@@ -118,6 +127,11 @@ def test_models_distributed( | |
if attention_backend: | ||
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend | ||
|
||
# Import VLLM_USE_V1 dynamically to handle patching | ||
from vllm.envs import VLLM_USE_V1 | ||
if VLLM_USE_V1 and distributed_executor_backend != "mp": | ||
pytest.skip(f"Skip {distributed_executor_backend} for V1") | ||
|
||
dtype = "half" | ||
max_tokens = 5 | ||
|
||
|
@@ -143,6 +157,7 @@ def test_models_distributed( | |
) | ||
|
||
|
||
@pytest.mark.skip_v1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this skipped? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test fails on V1 but I don't know why. It's not related to this PR as it's not running TP and fails on current main (just enabled it on #10864) |
||
def test_model_with_failure(vllm_runner) -> None: | ||
try: | ||
with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward", | ||
|
@@ -169,6 +184,7 @@ def test_model_with_failure(vllm_runner) -> None: | |
os.remove(filename) | ||
|
||
|
||
@pytest.mark.skip_v1 | ||
def test_failure_with_async_out_proc(vllm_runner) -> None: | ||
|
||
filename = None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what does this dynamic patching mean?
envs.VLLM_USE_V1
should read the latest env var value.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved the import here to get the
VLLM_USE_V1
check using when we are using therun_with_both_engines
pytest fixture during testing.vllm/tests/conftest.py
Lines 112 to 126 in bf0e382
Please LMK if you have a better idea!