format

neuralmagic · Jun 25, 2024 · 811d3a6 · 811d3a6 · github-actions · Jun 25, 2024
1 parent a8f701a
commit 811d3a6
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 5 deletions.
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
@@ -9,7 +9,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
 
 # Install requirements
 $python_executable -m pip install wheel packaging
-$python_executable -m pip install -r requirements-cuda.txt -r requirements-build.txt
+$python_executable -m pip install -r requirements-cuda.txt
 
 # Limit the number of parallel jobs to avoid OOM
 export MAX_JOBS=1

diff --git a/format.sh b/format.sh
@@ -121,10 +121,9 @@ CODESPELL_EXCLUDES=(
     '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**'
 )
 
-
 # check spelling of specified files
 spell_check() {
-    codespell "$@ ${CODESPELL_EXCLUDES[@]}"
+    codespell "$@"
 }
 
 spell_check_all(){
@@ -157,7 +156,6 @@ elif [[ "$1" == '--all' ]]; then
    spell_check_all
 else
    # Check spelling only of the files that changed in last commit.
-   echo "${CODESPELL_EXCLUDES[@]}"
    spell_check_changed
 fi
 echo 'vLLM codespell: Done'

diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
@@ -61,7 +61,6 @@ def _get_quantization_config(
                 f"{model_config.dtype} is not supported for quantization "
                 f"method {model_config.quantization}. Supported dtypes: "
                 f"{supported_dtypes}")
-
         return quant_config
 
     elif model_config.sparsity is not None:
Benchmark suite	Current: `811d3a6`	Previous: `9b2e107`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4794327626874733` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`952.1021808719897` tokens/s
Benchmark suite	Current: `811d3a6`	Previous: `9b2e107`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4780561923390345` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`951.5735778581892` tokens/s