Skip to content

Commit

Permalink
[ci/lint] Add back default arg for pre-commit (#12279)
Browse files Browse the repository at this point in the history
Signed-off-by: kevin <[email protected]>
  • Loading branch information
khluu authored Jan 22, 2025
1 parent df76e5a commit 64ea24d
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ jobs:
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
with:
extra_args: --hook-stage manual
extra_args: --all-files --hook-stage manual
17 changes: 7 additions & 10 deletions tests/models/decoder_only/language/test_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,7 @@ def gguf_model(self):
)

MODELS = [
LLAMA_CONFIG,
QWEN2_CONFIG,
PHI3_CONFIG,
GPT2_CONFIG,
STABLELM_CONFIG,
LLAMA_CONFIG, QWEN2_CONFIG, PHI3_CONFIG, GPT2_CONFIG, STABLELM_CONFIG,
DOLPHIN_CONFIG
# STARCODER_CONFIG, # broken
]
Expand Down Expand Up @@ -114,11 +110,12 @@ def test_models(
messages, tokenize=False, add_generation_prompt=True)

# Run unquantized model.
with vllm_runner(model_name=model.original_model,
enforce_eager=True, # faster tests
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=tp_size) as original_model:
with vllm_runner(
model_name=model.original_model,
enforce_eager=True, # faster tests
dtype=dtype,
max_model_len=MAX_MODEL_LEN,
tensor_parallel_size=tp_size) as original_model:
original_outputs = original_model.generate_greedy_logprobs(
example_prompts[:-1], max_tokens, num_logprobs)

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/paligemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
"up_proj",
],
}

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config = vllm_config.model_config.hf_config
Expand Down
8 changes: 3 additions & 5 deletions vllm/model_executor/models/siglip.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,10 @@ def __init__(
if quant_config and quant_config.get_name() == "bitsandbytes":
quantizable = True
else:
# For other quantization, we require the hidden size to be a
# For other quantization, we require the hidden size to be a
# multiple of 64
quantizable = (
config.hidden_size % 64 == 0
and config.intermediate_size % 64 == 0
)
quantizable = (config.hidden_size % 64 == 0
and config.intermediate_size % 64 == 0)
self.fc1 = ColumnParallelLinear(
config.hidden_size,
config.intermediate_size,
Expand Down
2 changes: 1 addition & 1 deletion vllm/platforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
try:
from importlib.metadata import version
is_cpu = "cpu" in version("vllm")
if is_cpu == False:
if not is_cpu:
import platform
is_cpu = platform.machine().lower().startswith("arm")

Expand Down
24 changes: 13 additions & 11 deletions vllm/v1/stats/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from vllm.sampling_params import SamplingParams


class RequestStatsUpdate(msgspec.Struct,
array_like=True,
omit_defaults=True,
gc=False):
class RequestStatsUpdate(
msgspec.Struct, # type: ignore
array_like=True,
omit_defaults=True,
gc=False):
"""
An update to the request stats.
Expand Down Expand Up @@ -341,16 +342,16 @@ def update_from(self, update: "RequestStatsUpdate"):
self.queued_ts_s = ts
elif update.type == RequestStatsUpdate.Type.PREFILLING:
self.prefill_start_ts_s_lst.append(ts)
self.num_cached_tokens = update.num_cached_tokens
self.num_computed_tokens = update.num_computed_tokens
self.num_cached_tokens = update.num_cached_tokens or 0
self.num_computed_tokens = update.num_computed_tokens or 0
elif update.type == RequestStatsUpdate.Type.PREEMPTED:
self._reset_for_preemption(ts)
elif update.type == RequestStatsUpdate.Type.DECODING:
self.decoding_ts_s_lst.append(ts)
elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
self._record_detokenized_output(
ts,
update.num_new_tokens,
update.num_new_tokens or 0,
)
elif update.type == RequestStatsUpdate.Type.FINISHED:
self.finished_ts_s = ts
Expand Down Expand Up @@ -425,10 +426,11 @@ class EngineCoreProcessStats:
output_queue_size: Optional[int] = None


class EngineCoreStatsSnapshot(msgspec.Struct,
array_like=True,
omit_defaults=True,
gc=False):
class EngineCoreStatsSnapshot(
msgspec.Struct, # type: ignore
array_like=True,
omit_defaults=True,
gc=False):
"""
A snapshot of the EngineCore's current stats over a period of time.
"""
Expand Down

0 comments on commit 64ea24d

Please sign in to comment.