Leaderboard tweaks

ml-energy · Sep 20, 2024 · 76bf85e · 76bf85e
1 parent 4e4fca8
commit 76bf85e
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 21 deletions.
diff --git a/app.py b/app.py
@@ -229,6 +229,13 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -242,10 +249,6 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
 
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
-
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -290,6 +293,13 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -303,10 +313,6 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
 
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
-
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -350,6 +356,13 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
+                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
+                You can tweak the TPOT slider to adjust the target average TPOT for the models.
+
+                Each row corresponds to one model, given a constraint on the maximum average TPOT.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per request.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Params (B)**: Number of parameters in the model.
@@ -363,10 +376,6 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Avg BS**: Average batch size of the serving engine over time.
                 - **Max BS**: Maximum batch size configuration of the serving engine.
 
-                **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
-                An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
-                You can tweak the TPOT slider to adjust the target average TPOT for the models.
-
                 For more detailed information, please take a look at the **About** tab.
                 """
         else:
@@ -499,7 +508,7 @@ def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
         )
 
         if not detail_mode:
-            core_columns = ["Model", "Denoising params", "GPU", "Denoising steps", "Resolution", "Frames", self.energy_col]
+            core_columns = ["Model", "Denoising params", "GPU", "Resolution", "Frames", self.energy_col]
             readable_name_mapping = {
                 "Denoising params": "Denoising parameters (Billions)",
                 "GPU": "GPU model",
@@ -521,7 +530,9 @@ def get_tab_name(self) -> str:
 
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion text-to-image generation</h2></br>
+            <h2>How much energy do GenAI models consume?</h2>
+
+            <h3>Diffusion text-to-image generation</h3>
 
             <p style="font-size: 16px">
             Diffusion models generate images that align with input text prompts.
@@ -537,6 +548,9 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per image.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -557,6 +571,7 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
+                - **Resolution**: Resolution of the generated image.
 
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
@@ -575,7 +590,9 @@ def get_tab_name(self) -> str:
 
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion text-to-video generation</h2></br>
+            <h2>How much energy do GenAI models consume?</h2>
+
+            <h3>Diffusion text-to-video generation</h3>
 
             <p style="font-size: 16px">
             Diffusion models generate videos that align with input text prompts.
@@ -591,6 +608,9 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -612,6 +632,8 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
+                - **Frames**: Number of frames in the generated video.
+                - **Resolution**: Resolution of the generated video.
 
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
@@ -630,7 +652,9 @@ def get_tab_name(self) -> str:
 
     def get_intro_text(self) -> str:
         text = """
-            <h2>Diffusion image-to-video generation</h2></br>
+            <h2>How much energy do GenAI models consume?</h2>
+
+            <h3>Diffusion image-to-video generation</h3>
 
             <p style="font-size: 16px">
             Diffusion models generate videos given an input image (and sometimes alongside with text).
@@ -646,6 +670,9 @@ def get_intro_text(self) -> str:
     def get_detail_text(self, detail_mode: bool) -> str:
         if detail_mode:
             text = """
+                Each row corresponds to one model, given a constraint on the maximum computation time for the whole batch.
+                If more than one GPU types were chosen, the row shows results from the GPU with the lowest energy consumption per video.
+
                 Columns
                 - **Model**: The name of the model.
                 - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
@@ -667,14 +694,16 @@ def get_detail_text(self, detail_mode: bool) -> str:
                 - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
                 - **GPU model**: Name of the GPU model used for benchmarking.
                 - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
+                - **Frames**: Number of frames in the generated video.
+                - **Resolution**: Resolution of the generated video.
 
                 Checking "Show more technical details" above the table will reveal more detailed columns.
                 Also, for more detailed information, please take a look at the **About** tab.
                 """
         return text
 
     def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
-        return {"Batch latency (s)": (0.0, 120.0, 1.0, 45.0)}
+        return {"Batch latency (s)": (0.0, 120.0, 1.0, 60.0)}
 
 
 class LegacyTableManager:
@@ -718,7 +747,7 @@ def format_model_link(model_name: str) -> str:
         self.full_df = df
 
         # Default view of the table is to only show the first options.
-        self.set_filter_get_df(detail_mode=False)
+        self.set_filter_get_df()
 
     def _read_tables(self, data_dir: str) -> pd.DataFrame:
         """Read tables."""
@@ -777,7 +806,7 @@ def update_dropdown(self):
             gr.Dropdown.update(choices=["None", *columns]),
         ]
 
-    def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
+    def set_filter_get_df(self, *filters) -> pd.DataFrame:
         """Set the current set of filters and return the filtered DataFrame."""
         # If the filter is empty, we default to the first choice for each key.
         if not filters:
@@ -1027,7 +1056,7 @@ def consumed_more_energy_message(energy_a, energy_b):
 # Colosseum event handlers
 def on_load():
     """Intialize the dataframe, shuffle the model preference dropdown choices."""
-    dataframe = global_ltbm.set_filter_get_df(detail_mode=False)
+    dataframe = global_ltbm.set_filter_get_df()
     dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
     return dataframe, *dataframes
 

diff --git a/data/diffusion/image-to-video/models.json b/data/diffusion/image-to-video/models.json
@@ -15,7 +15,7 @@
   },
   "stabilityai/stable-video-diffusion-img2vid-xt": {
     "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
-    "nickname": "Stable Video Diffusion xt",
+    "nickname": "Stable Video Diffusion XT",
     "total_params": 2.3,
     "denoising_params": 1.5,
     "resolution": "1024x576"