Updated diffusion benchmark and data

ml-energy · Sep 19, 2024 · c97bae1 · c97bae1
1 parent abd945c
commit c97bae1
Show file tree

Hide file tree

Showing 134 changed files with 525 additions and 348 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,4 @@ build/
 
 # Data files
 *.log
-pegasus/consumed.yaml
+figures/
diff --git a/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml
@@ -1,6 +1,6 @@
 - command:
-    - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 8 4 2 1 --power-limits 400 --num-inference-steps 25"
+    - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50"
   model:
-    - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt'
-    - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14'
-    - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25'
+    - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720'
+    - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576'
+    - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576'
diff --git a/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml
@@ -1,6 +1,6 @@
 - command:
-    - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_700.json --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 25"
+    - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50"
   model:
-    - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt'
-    - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14'
-    - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25'
+    - "--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720"
+    - "--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576"
+    - "--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576"
diff --git a/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py
@@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None:
     for model_dir in sorted(glob(f"{results_dir}/*/*")):
         model_name = "/".join(model_dir.split("/")[-2:])
         print(f"  {model_name}")
-        result_file_cand = glob(f"{model_dir}/bs1+*+results.json")
+        result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json")
         assert len(result_file_cand) == 1, model_name
         results_data = json.load(open(result_file_cand[0]))
         denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"

diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py
@@ -27,10 +27,10 @@ class Results:
     model: str
     num_parameters: dict[str, int]
     gpu_model: str
-    num_infernece_steps: int
-    num_frames: int
     power_limit: int
     batch_size: int
+    num_inference_steps: int
+    num_frames: int
     num_prompts: int
     total_runtime: float = 0.0
     total_energy: float = 0.0
@@ -80,6 +80,7 @@ def load_text_image_prompts(
     path: str,
     batch_size: int,
     num_batches: int | None = None,
+    image_resize: tuple[int, int] | None = None,
 ) -> tuple[int, list[tuple[list[str], list[Image.Image]]]]:
     """Load the dataset to feed the model and return it as a list of batches of prompts.
 
@@ -93,6 +94,9 @@ def load_text_image_prompts(
     dataset = json.load(open(path))
     assert len(dataset["caption"]) == len(dataset["video_id"])
 
+    dataset["caption"] *= 10
+    dataset["video_id"] *= 10
+
     if num_batches is not None:
         if len(dataset["caption"]) < num_batches * batch_size:
             raise ValueError("Not enough data for the requested number of batches.")
@@ -103,6 +107,8 @@ def load_text_image_prompts(
     dataset["first_frame"] = [
         load_image(str(image_path / f"{video_id}.jpg")) for video_id in dataset["video_id"]
     ]
+    if image_resize is not None:
+        dataset["first_frame"] = [image.resize(image_resize) for image in dataset["first_frame"]]
 
     batched = [
         (dataset["caption"][i : i + batch_size], dataset["first_frame"][i : i + batch_size])
@@ -135,8 +141,8 @@ def benchmark(args: argparse.Namespace) -> None:
 
     results_dir = Path(args.result_root) / args.model
     results_dir.mkdir(parents=True, exist_ok=True)
-    benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}")
-    video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated"
+    benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}")
+    video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated"
     video_dir.mkdir(exist_ok=True)
 
     arg_out_filename = f"{benchmark_name}+args.json"
@@ -150,11 +156,16 @@ def benchmark(args: argparse.Namespace) -> None:
     pynvml.nvmlInit()
     handle = pynvml.nvmlDeviceGetHandleByIndex(0)
     gpu_model = pynvml.nvmlDeviceGetName(handle)
-    pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED)
-    pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000)
+    # pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED)
+    # pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000)
     pynvml.nvmlShutdown()
 
-    num_prompts, batched_prompts = load_text_image_prompts(args.dataset_path, args.batch_size, args.num_batches)
+    num_prompts, batched_prompts = load_text_image_prompts(
+        args.dataset_path,
+        args.batch_size,
+        args.num_batches,
+        (args.width, args.height),
+    )
 
     pipeline = get_pipeline(args.model)
 
@@ -189,7 +200,7 @@ def benchmark(args: argparse.Namespace) -> None:
     fps_param_name = fps_param_name_candidates[0]
 
     torch.cuda.reset_peak_memory_stats(device="cuda:0")
-    zeus_monitor.begin_window("benchmark", sync_cuda=False)
+    zeus_monitor.begin_window("benchmark", sync_execution=False)
 
     # Build common parameter dict for all batches
     params: dict[str, Any] = dict(
@@ -210,15 +221,15 @@ def benchmark(args: argparse.Namespace) -> None:
         if args.add_text_prompt:
             params["prompt"] = intermediate.prompts
 
-        zeus_monitor.begin_window("batch", sync_cuda=False)
+        zeus_monitor.begin_window("batch", sync_execution=False)
         frames = pipeline(**params).frames
-        batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False)
+        batch_measurements = zeus_monitor.end_window("batch", sync_execution=False)
 
         intermediate.frames = frames
         intermediate.batch_latency = batch_measurements.time
         intermediate.batch_energy = batch_measurements.total_energy
 
-    measurements = zeus_monitor.end_window("benchmark", sync_cuda=False)
+    measurements = zeus_monitor.end_window("benchmark", sync_execution=False)
     peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
 
     results: list[Result] = []
@@ -255,10 +266,10 @@ def benchmark(args: argparse.Namespace) -> None:
         model=args.model,
         num_parameters=count_parameters(pipeline),
         gpu_model=gpu_model,
-        num_infernece_steps=args.num_inference_steps,
-        num_frames=args.num_frames,
         power_limit=args.power_limit,
         batch_size=args.batch_size,
+        num_inference_steps=args.num_inference_steps,
+        num_frames=args.num_frames,
         num_prompts=num_prompts,
         total_runtime=measurements.time,
         total_energy=measurements.total_energy,
@@ -289,8 +300,8 @@ def benchmark(args: argparse.Namespace) -> None:
     parser.add_argument("--num-inference-steps", type=int, default=50, help="The number of denoising steps.")
     parser.add_argument("--num-frames", type=int, default=1, help="The number of frames to generate.")
     parser.add_argument("--fps", type=int, default=16, help="Frames per second for micro-conditioning.")
-    parser.add_argument("--height", type=int, help="Height of the generated video.")
-    parser.add_argument("--width", type=int, help="Width of the generated video.")
+    parser.add_argument("--height", type=int, required=True, help="Height of the generated video.")
+    parser.add_argument("--width", type=int, required=True, help="Width of the generated video.")
     parser.add_argument("--num-batches", type=int, default=None, help="The number of batches to use from the dataset.")
     parser.add_argument("--save-every", type=int, default=10, help="Save generations to file every N prompts.")
     parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.")

diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py
@@ -28,44 +28,48 @@ def main(args: argparse.Namespace) -> None:
     print_and_write(outfile, f"Benchmarking {args.model}\n")
     print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
     print_and_write(outfile, f"Power limits: {args.power_limits}\n")
+    print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n")
 
     for batch_size in args.batch_sizes:
         for power_limit in args.power_limits:
-            print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True)
-            with subprocess.Popen(
-                args=[
-                    "docker", "run",
-                    "--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
-                    "--cap-add", "SYS_ADMIN",
-                    "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}",
-                    "--rm",
-                    "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface",
-                    "-v", f"{os.getcwd()}:/workspace/image-to-video",
-                    "mlenergy/leaderboard:diffusion-i2v",
-                    "--dataset-path", args.dataset_path,
-                    "--result-root", args.result_root,
-                    "--batch-size", batch_size,
-                    "--num-batches", "10",
-                    "--power-limit", power_limit,
-                    "--model", args.model,
-                    "--huggingface-token", hf_token,
-                    "--num-frames", args.num_frames,
-                    "--num-inference-steps", args.num_inference_steps,
-                ] + (["--add-text-prompt"] if args.add_text_prompt else []),
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=True,
-            ) as proc:
-                if proc.stdout:
-                    i = 0
-                    for line in proc.stdout:
-                        print_and_write(outfile, line, flush=i % 50 == 0)
-                        i += 1
+            for num_inference_steps in args.num_inference_steps:
+                print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True)
+                with subprocess.Popen(
+                    args=[
+                        "docker", "run",
+                        "--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
+                        "--cap-add", "SYS_ADMIN",
+                        "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}",
+                        "--rm",
+                        "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface",
+                        "-v", f"{os.getcwd()}:/workspace/image-to-video",
+                        "mlenergy/leaderboard:diffusion-i2v",
+                        "--dataset-path", args.dataset_path,
+                        "--result-root", args.result_root,
+                        "--batch-size", batch_size,
+                        "--num-batches", "8",
+                        "--power-limit", power_limit,
+                        "--model", args.model,
+                        "--huggingface-token", hf_token,
+                        "--num-frames", args.num_frames,
+                        "--num-inference-steps", num_inference_steps,
+                        "--width", str(args.width),
+                        "--height", str(args.height),
+                    ] + (["--add-text-prompt"] if args.add_text_prompt else []),
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    text=True,
+                ) as proc:
+                    if proc.stdout:
+                        i = 0
+                        for line in proc.stdout:
+                            print_and_write(outfile, line, flush=i % 50 == 0)
+                            i += 1
 
-            # If proc exited with non-zero status, it's probably an OOM.
-            # Move on to the next batch size.
-            if proc.returncode != 0:
-                break
+                # If proc exited with non-zero status, it's probably an OOM.
+                # Move on to the next batch size.
+                if proc.returncode != 0:
+                    break
 
 
 
@@ -77,8 +81,10 @@ def main(args: argparse.Namespace) -> None:
     parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
     parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
     parser.add_argument("--num-frames", type=str, help="Number of frames to generate")
-    parser.add_argument("--num-inference-steps", type=str, help="Number of denoising steps")
+    parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "30", "40", "50"], help="Number of inference steps to run")
     parser.add_argument("--add-text-prompt", action="store_true", help="Input text prompt alongside image.")
+    parser.add_argument("--height", type=int, required=True, help="Height of the generated video.")
+    parser.add_argument("--width", type=int, required=True, help="Width of the generated video.")
     parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.")
     args = parser.parse_args()
     main(args)
diff --git a/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py b/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py
@@ -3,7 +3,7 @@
 
 import cv2
 
-DATASET_PATH = "sharegpt4video_700.json"
+DATASET_PATH = "sharegpt4video_100.json"
 
 
 def main() -> None:

diff --git a/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml
@@ -1,5 +1,5 @@
 - command:
-    - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400"
+    - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --num-inference-steps 1 2 4 8 16 25 30 40 50 --power-limits 400"
   model:
     - stabilityai/stable-diffusion-2-1
     - stabilityai/stable-diffusion-xl-base-1.0

diff --git a/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py
@@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None:
     for model_dir in sorted(glob(f"{results_dir}/*/*")):
         model_name = "/".join(model_dir.split("/")[-2:])
         print(f"  {model_name}")
-        result_file_cand = glob(f"{model_dir}/bs1+*+results.json")
+        result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json")
         assert len(result_file_cand) == 1, model_name
         results_data = json.load(open(result_file_cand[0]))
         denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
@@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None:
             nickname=model_name.split("/")[-1].replace("-", " ").title(),
             total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())),
             denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]),
+            resolution="NA",
         )
         assert model_name not in models
         models[model_name] = model_info