detokenize = False for benchmarks

ROCm · Jan 17, 2025 · 87d256a · 87d256a
1 parent c566cd9
commit 87d256a
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 0 deletions.
diff --git a/benchmarks/profiling/benchmark_latency.py b/benchmarks/profiling/benchmark_latency.py
@@ -81,6 +81,7 @@ def get_profiling_context(profile_result_dir: Optional[str] = None):
         top_p=1.0,
         ignore_eos=True,
         max_tokens=args.output_len,
+        detokenize = False,
     )
     print(sampling_params)
     dummy_prompt_token_ids = np.random.randint(10000,

diff --git a/benchmarks/profiling/benchmark_throughput.py b/benchmarks/profiling/benchmark_throughput.py
@@ -135,6 +135,7 @@ def get_profiling_context(profile_dir: Optional[str] = None):
                 top_p=1.0,
                 ignore_eos=True,
                 max_tokens=output_len,
+                detokenize = False,
             ))
 
     if args.profile_torch or args.profile_rpd:
@@ -171,6 +172,7 @@ async def run_vllm_async(
                     top_p=1.0,
                     ignore_eos=True,
                     max_tokens=output_len,
+                    detokenize = False,
                 ))
 
         generators = []