diff --git a/benchmarks/profiling/benchmark_latency.py b/benchmarks/profiling/benchmark_latency.py index b1d1c5602d98d..7a6deb570794d 100644 --- a/benchmarks/profiling/benchmark_latency.py +++ b/benchmarks/profiling/benchmark_latency.py @@ -81,7 +81,7 @@ def get_profiling_context(profile_result_dir: Optional[str] = None): top_p=1.0, ignore_eos=True, max_tokens=args.output_len, - detokenize = False, + detokenize=False, ) print(sampling_params) dummy_prompt_token_ids = np.random.randint(10000, diff --git a/benchmarks/profiling/benchmark_throughput.py b/benchmarks/profiling/benchmark_throughput.py index 22a4b1028f778..0e9dc248af2eb 100644 --- a/benchmarks/profiling/benchmark_throughput.py +++ b/benchmarks/profiling/benchmark_throughput.py @@ -135,7 +135,7 @@ def get_profiling_context(profile_dir: Optional[str] = None): top_p=1.0, ignore_eos=True, max_tokens=output_len, - detokenize = False, + detokenize=False, )) if args.profile_torch or args.profile_rpd: @@ -172,7 +172,7 @@ async def run_vllm_async( top_p=1.0, ignore_eos=True, max_tokens=output_len, - detokenize = False, + detokenize=False, )) generators = []