From aed074860a46536faf77bacd76d02efccbaf4a5d Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Thu, 21 Nov 2024 18:27:20 -0800
Subject: [PATCH] [Benchmark] Add new H100 machine  (#10547)

---
 .../benchmark-pipeline.yaml                   | 39 ++++++++++---------
 .../convert-results-json-to-markdown.py       | 13 +++++--
 2 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/.buildkite/nightly-benchmarks/benchmark-pipeline.yaml b/.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
index 5c069b38b2d7d..3db77d5f16022 100644
--- a/.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
+++ b/.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
@@ -13,6 +13,7 @@ steps:
   - wait
 
   - label: "A100"
+    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
     agents:
       queue: A100
     plugins:
@@ -45,6 +46,7 @@ steps:
               medium: Memory
 
   - label: "H200"
+    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
     agents:
       queue: H200
     plugins:
@@ -63,21 +65,22 @@ steps:
         - VLLM_USAGE_SOURCE
         - HF_TOKEN
 
-
-  # - label: "H100"
-  #   agents:
-  #     queue: H100
-  #   plugins:
-  #   - docker#v5.11.0:
-  #       image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
-  #       command:
-  #       - bash
-  #       - .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
-  #       mount-buildkite-agent: true
-  #       propagate-environment: true
-  #       ipc: host
-  #       gpus: all
-  #       environment:
-  #       - VLLM_USAGE_SOURCE
-  #       - HF_TOKEN
-
+  - label: "H100"
+    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
+    agents:
+      queue: H100
+    plugins:
+    - docker#v5.12.0:
+        image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+        command:
+        - bash
+        - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+        mount-buildkite-agent: true
+        propagate-environment: true
+        ipc: host
+        gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used
+        volumes:
+          - /data/benchmark-hf-cache:/root/.cache/huggingface
+        environment:
+        - VLLM_USAGE_SOURCE
+        - HF_TOKEN
diff --git a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
index d640563252a0c..9d3646e2f6a15 100644
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -157,10 +157,17 @@ def results_to_json(latency, throughput, serving):
                                              throughput_results,
                                              serving_results)
 
-    # Sort all dataframes by their respective "Test name" columns
     for df in [latency_results, serving_results, throughput_results]:
-        if not df.empty:
-            df.sort_values(by="Test name", inplace=True)
+        if df.empty:
+            continue
+
+        # Sort all dataframes by their respective "Test name" columns
+        df.sort_values(by="Test name", inplace=True)
+
+        # The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
+        # we want to turn it into "8xGPUTYPE"
+        df["GPU"] = df["GPU"].apply(
+            lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}")
 
     # get markdown tables
     latency_md_table = tabulate(latency_results,