Skip to content

Commit

Permalink
Merge pull request #30965 from def-/pr-pipeline-parallelism
Browse files Browse the repository at this point in the history
ci: Automatically append %N in parallel tests
  • Loading branch information
def- authored Jan 7, 2025
2 parents 9236f38 + 25f1b76 commit ebab660
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 51 deletions.
15 changes: 15 additions & 0 deletions ci/mkpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,21 @@ def set_default_agents_queue(pipeline: Any) -> None:
step["agents"] = {"queue": DEFAULT_AGENT}


def set_parallelism_name(pipeline: Any) -> None:
def visit(step: Any) -> None:
if step.get("parallelism", 1) > 1:
step["label"] += " %N"

for config in pipeline["steps"]:
if "trigger" in config or "wait" in config or "block" in config:
continue
if "group" in config:
for inner_config in config.get("steps", []):
visit(inner_config)
continue
visit(config)


def check_depends_on(pipeline: Any, pipeline_name: str) -> None:
if pipeline_name not in ("test", "nightly", "release-qualification"):
return
Expand Down
52 changes: 26 additions & 26 deletions ci/nightly/pipeline.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ steps:
key: benchmark
steps:
- id: feature-benchmark
label: "Feature benchmark against merge base or 'latest' %N"
label: "Feature benchmark against merge base or 'latest'"
depends_on: build-x86_64
timeout_in_minutes: 720
parallelism: 8
Expand Down Expand Up @@ -180,7 +180,7 @@ steps:
key: kafka
steps:
- id: kafka-matrix
label: Kafka smoke test against previous Kafka versions %N
label: Kafka smoke test against previous Kafka versions
depends_on: build-aarch64
timeout_in_minutes: 120
parallelism: 2
Expand Down Expand Up @@ -310,7 +310,7 @@ steps:
]

- id: testdrive-in-cloudtest
label: "Full Testdrive in Cloudtest (K8s) %N"
label: "Full Testdrive in Cloudtest (K8s)"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand Down Expand Up @@ -342,7 +342,7 @@ steps:
key: limits-group
steps:
- id: limits
label: "Product limits %N"
label: "Product limits"
depends_on: build-aarch64
agents:
queue: hetzner-aarch64-16cpu-32gb
Expand Down Expand Up @@ -377,7 +377,7 @@ steps:
limit: 1

- id: bounded-memory
label: "Bounded Memory %N"
label: "Bounded Memory"
depends_on: build-aarch64
timeout_in_minutes: 90
parallelism: 2
Expand All @@ -387,7 +387,7 @@ steps:
- ./ci/plugins/mzcompose:
composition: bounded-memory
- id: bounded-memory-search
label: "Bounded Memory Search (materialized only) %N"
label: "Bounded Memory Search (materialized only)"
depends_on: build-aarch64
timeout_in_minutes: 150
parallelism: 8
Expand Down Expand Up @@ -623,7 +623,7 @@ steps:
queue: hetzner-aarch64-4cpu-8gb

- id: testdrive-old-kafka-src-syntax
label: "Testdrive %N (before Kafka source versioning)"
label: "Testdrive (before Kafka source versioning)"
depends_on: build-aarch64
timeout_in_minutes: 180
plugins:
Expand Down Expand Up @@ -676,7 +676,7 @@ steps:
key: platform-checks
steps:
- id: checks-restart-entire-mz
label: "Checks + restart of the entire Mz %N"
label: "Checks + restart of the entire Mz"
depends_on: build-aarch64
timeout_in_minutes: 180
# Sometimes runs into query timeouts or entire test timeouts with parallelism 1, too much state, same in all other platform-checks
Expand All @@ -691,7 +691,7 @@ steps:

# TODO(def-) Remove this when old upsert implementation is removed
- id: checks-restart-entire-mz-old-upsert
label: "Checks + restart of the entire Mz with old Upsert %N"
label: "Checks + restart of the entire Mz with old Upsert"
depends_on: build-aarch64
timeout_in_minutes: 180
# Sometimes runs into query timeouts or entire test timeouts with parallelism 1, too much state, same in all other platform-checks
Expand All @@ -705,7 +705,7 @@ steps:
args: [--scenario=RestartEntireMz, --system-param=storage_use_continual_feedback_upsert=false, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-backup-rollback
label: "Checks + backup + rollback to previous %N"
label: "Checks + backup + rollback to previous"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -717,7 +717,7 @@ steps:
args: [--scenario=BackupAndRestoreToPreviousState, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-parallel-drop-create-default-replica
label: "Checks parallel + DROP/CREATE replica %N"
label: "Checks parallel + DROP/CREATE replica"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -729,7 +729,7 @@ steps:
args: [--scenario=DropCreateDefaultReplica, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-parallel-restart-clusterd-compute
label: "Checks parallel + restart compute clusterd %N"
label: "Checks parallel + restart compute clusterd"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -741,7 +741,7 @@ steps:
args: [--scenario=RestartClusterdCompute, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-parallel-restart-entire-mz
label: "Checks parallel + restart of the entire Mz %N"
label: "Checks parallel + restart of the entire Mz"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -753,7 +753,7 @@ steps:
args: [--scenario=RestartEntireMz, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-parallel-restart-environmentd-clusterd-storage
label: "Checks parallel + restart of environmentd & storage clusterd %N"
label: "Checks parallel + restart of environmentd & storage clusterd"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -765,7 +765,7 @@ steps:
args: [--scenario=RestartEnvironmentdClusterdStorage, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-parallel-kill-clusterd-storage
label: "Checks parallel + kill storage clusterd %N"
label: "Checks parallel + kill storage clusterd"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -777,7 +777,7 @@ steps:
args: [--scenario=KillClusterdStorage, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-upgrade-entire-mz
label: "Checks upgrade, whole-Mz restart %N"
label: "Checks upgrade, whole-Mz restart"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -789,7 +789,7 @@ steps:
args: [--scenario=UpgradeEntireMz, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-preflight-check-rollback
label: "Checks preflight-check and roll back upgrade %N"
label: "Checks preflight-check and roll back upgrade"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -801,7 +801,7 @@ steps:
args: [--scenario=PreflightCheckRollback, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-upgrade-entire-mz-two-versions
label: "Checks upgrade across two versions %N"
label: "Checks upgrade across two versions"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -813,7 +813,7 @@ steps:
args: [--scenario=UpgradeEntireMzTwoVersions, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-upgrade-entire-mz-four-versions
label: "Checks upgrade across four versions %N"
label: "Checks upgrade across four versions"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -825,7 +825,7 @@ steps:
args: [--scenario=UpgradeEntireMzFourVersions, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-0dt-restart-entire-mz-forced-migrations
label: "Checks 0dt restart of the entire Mz with forced migrations %N"
label: "Checks 0dt restart of the entire Mz with forced migrations"
depends_on: build-aarch64
timeout_in_minutes: 60
parallelism: 2
Expand All @@ -837,7 +837,7 @@ steps:
args: [--scenario=ZeroDowntimeRestartEntireMzForcedMigrations, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-0dt-upgrade-entire-mz
label: "Checks 0dt upgrade, whole-Mz restart %N"
label: "Checks 0dt upgrade, whole-Mz restart"
depends_on: build-aarch64
timeout_in_minutes: 60
parallelism: 2
Expand All @@ -849,7 +849,7 @@ steps:
args: [--scenario=ZeroDowntimeUpgradeEntireMz, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-0dt-upgrade-entire-mz-two-versions
label: "Checks 0dt upgrade across two versions %N"
label: "Checks 0dt upgrade across two versions"
depends_on: build-aarch64
timeout_in_minutes: 60
parallelism: 2
Expand All @@ -861,7 +861,7 @@ steps:
args: [--scenario=ZeroDowntimeUpgradeEntireMzTwoVersions, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-0dt-upgrade-entire-mz-four-versions
label: "Checks 0dt upgrade across four versions %N"
label: "Checks 0dt upgrade across four versions"
depends_on: build-aarch64
timeout_in_minutes: 60
parallelism: 2
Expand Down Expand Up @@ -1405,7 +1405,7 @@ steps:
composition: retain-history

- id: data-ingest
label: "Data Ingest %N"
label: "Data Ingest"
depends_on: build-aarch64
timeout_in_minutes: 90
parallelism: 2
Expand Down Expand Up @@ -1621,7 +1621,7 @@ steps:
sanitizer: skip

- id: txn-wal-fencing
label: Txn-wal fencing %N
label: Txn-wal fencing
depends_on: build-aarch64
timeout_in_minutes: 120
parallelism: 2
Expand Down Expand Up @@ -1699,7 +1699,7 @@ steps:
queue: hetzner-aarch64-8cpu-16gb

- id: sqllogictest
label: ":bulb: SQL logic tests %N"
label: ":bulb: SQL logic tests"
depends_on: build-aarch64
timeout_in_minutes: 240
parallelism: 10
Expand Down
26 changes: 13 additions & 13 deletions ci/release-qualification/pipeline.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ steps:
key: benchmark
steps:
- id: feature-benchmark-scale-plus-one
label: "Feature benchmark against 'common-ancestor' with --scale=+1 %N"
label: "Feature benchmark against 'common-ancestor' with --scale=+1"
depends_on: build-x86_64
timeout_in_minutes: 2880
parallelism: 8
Expand Down Expand Up @@ -320,7 +320,7 @@ steps:
key: platform-checks
steps:
- id: checks-restart-cockroach
label: "Checks + restart Cockroach %N"
label: "Checks + restart Cockroach"
depends_on: build-aarch64
timeout_in_minutes: 180
# Sometimes runs into query timeouts or entire test timeouts with parallelism 1, too much state, same in all other platform-checks
Expand All @@ -334,7 +334,7 @@ steps:
args: [--scenario=RestartCockroach, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-backup-restore-before-manipulate
label: "Checks backup + restore between the two manipulate() %N"
label: "Checks backup + restore between the two manipulate()"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -346,7 +346,7 @@ steps:
args: [--scenario=BackupAndRestoreBeforeManipulate, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-backup-restore-after-manipulate
label: "Checks backup + restore after manipulate() %N"
label: "Checks backup + restore after manipulate()"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -358,7 +358,7 @@ steps:
args: [--scenario=BackupAndRestoreAfterManipulate, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-backup-multi
label: "Checks + multiple backups/restores %N"
label: "Checks + multiple backups/restores"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -370,7 +370,7 @@ steps:
args: [--scenario=BackupAndRestoreMulti, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-preflight-check-continue
label: "Checks preflight-check and continue upgrade %N"
label: "Checks preflight-check and continue upgrade"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -382,7 +382,7 @@ steps:
args: [--scenario=PreflightCheckContinue, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-upgrade-clusterd-compute-first
label: "Platform checks upgrade, restarting compute clusterd first %N"
label: "Platform checks upgrade, restarting compute clusterd first"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -394,7 +394,7 @@ steps:
args: [--scenario=UpgradeClusterdComputeFirst, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-upgrade-clusterd-compute-last
label: "Platform checks upgrade, restarting compute clusterd last %N"
label: "Platform checks upgrade, restarting compute clusterd last"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -406,7 +406,7 @@ steps:
args: [--scenario=UpgradeClusterdComputeLast, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-kill-clusterd-storage
label: "Checks + kill storage clusterd %N"
label: "Checks + kill storage clusterd"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -429,7 +429,7 @@ steps:
args: [--scenario=RestartSourcePostgres, --check=PgCdc, --check=PgCdcNoWait, --check=PgCdcMzNow, --check=SshPg]

- id: checks-restart-clusterd-compute
label: "Checks + restart clusterd compute %N"
label: "Checks + restart clusterd compute"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -441,7 +441,7 @@ steps:
args: [--scenario=RestartClusterdCompute, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-drop-create-default-replica
label: "Checks + DROP/CREATE replica %N"
label: "Checks + DROP/CREATE replica"
depends_on: build-aarch64
timeout_in_minutes: 180
parallelism: 2
Expand All @@ -454,7 +454,7 @@ steps:
args: [--scenario=DropCreateDefaultReplica, "--seed=$BUILDKITE_JOB_ID"]

- id: checks-0dt-restart-entire-mz
label: "Checks 0dt restart of the entire Mz %N"
label: "Checks 0dt restart of the entire Mz"
depends_on: build-aarch64
timeout_in_minutes: 60
parallelism: 2
Expand All @@ -466,7 +466,7 @@ steps:
args: [--scenario=ZeroDowntimeRestartEntireMz, "--seed=$BUILDKITE_JOB_ID"]

- id: limits
label: "Product limits (finding new limits) %N"
label: "Product limits (finding new limits)"
depends_on: build-aarch64
agents:
queue: hetzner-aarch64-16cpu-32gb
Expand Down
Loading

0 comments on commit ebab660

Please sign in to comment.