Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ci/lint] Add back default arg for pre-commit #12279

Merged
merged 6 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ jobs:
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
with:
extra_args: --hook-stage manual
extra_args: --all-files --hook-stage manual
17 changes: 7 additions & 10 deletions tests/models/decoder_only/language/test_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,7 @@ def gguf_model(self):
)

MODELS = [
LLAMA_CONFIG,
QWEN2_CONFIG,
PHI3_CONFIG,
GPT2_CONFIG,
STABLELM_CONFIG,
LLAMA_CONFIG, QWEN2_CONFIG, PHI3_CONFIG, GPT2_CONFIG, STABLELM_CONFIG,
DOLPHIN_CONFIG
# STARCODER_CONFIG, # broken
]
Expand Down Expand Up @@ -114,11 +110,12 @@ def test_models(
messages, tokenize=False, add_generation_prompt=True)

# Run unquantized model.
with vllm_runner(model_name=model.original_model,
enforce_eager=True, # faster tests
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=tp_size) as original_model:
with vllm_runner(
model_name=model.original_model,
enforce_eager=True, # faster tests
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=tp_size) as original_model:
original_outputs = original_model.generate_greedy_logprobs(
example_prompts[:-1], max_tokens, num_logprobs)

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/paligemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
"up_proj",
],
}

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config = vllm_config.model_config.hf_config
Expand Down
8 changes: 3 additions & 5 deletions vllm/model_executor/models/siglip.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,10 @@ def __init__(
if quant_config and quant_config.get_name() == "bitsandbytes":
quantizable = True
else:
# For other quantization, we require the hidden size to be a
# For other quantization, we require the hidden size to be a
# multiple of 64
quantizable = (
config.hidden_size % 64 == 0
and config.intermediate_size % 64 == 0
)
quantizable = (config.hidden_size % 64 == 0
and config.intermediate_size % 64 == 0)
self.fc1 = ColumnParallelLinear(
config.hidden_size,
config.intermediate_size,
Expand Down
2 changes: 1 addition & 1 deletion vllm/platforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
try:
from importlib.metadata import version
is_cpu = "cpu" in version("vllm")
if is_cpu == False:
if not is_cpu:
import platform
is_cpu = platform.machine().lower().startswith("arm")

Expand Down
24 changes: 13 additions & 11 deletions vllm/v1/stats/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from vllm.sampling_params import SamplingParams


class RequestStatsUpdate(msgspec.Struct,
array_like=True,
omit_defaults=True,
gc=False):
class RequestStatsUpdate(
msgspec.Struct, # type: ignore
array_like=True,
omit_defaults=True,
gc=False):
"""
An update to the request stats.

Expand Down Expand Up @@ -341,16 +342,16 @@ def update_from(self, update: "RequestStatsUpdate"):
self.queued_ts_s = ts
elif update.type == RequestStatsUpdate.Type.PREFILLING:
self.prefill_start_ts_s_lst.append(ts)
self.num_cached_tokens = update.num_cached_tokens
self.num_computed_tokens = update.num_computed_tokens
self.num_cached_tokens = update.num_cached_tokens or 0
self.num_computed_tokens = update.num_computed_tokens or 0
elif update.type == RequestStatsUpdate.Type.PREEMPTED:
self._reset_for_preemption(ts)
elif update.type == RequestStatsUpdate.Type.DECODING:
self.decoding_ts_s_lst.append(ts)
elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
self._record_detokenized_output(
ts,
update.num_new_tokens,
update.num_new_tokens or 0,
)
elif update.type == RequestStatsUpdate.Type.FINISHED:
self.finished_ts_s = ts
Expand Down Expand Up @@ -425,10 +426,11 @@ class EngineCoreProcessStats:
output_queue_size: Optional[int] = None


class EngineCoreStatsSnapshot(msgspec.Struct,
array_like=True,
omit_defaults=True,
gc=False):
class EngineCoreStatsSnapshot(
msgspec.Struct, # type: ignore
array_like=True,
omit_defaults=True,
gc=False):
"""
A snapshot of the EngineCore's current stats over a period of time.
"""
Expand Down
Loading