From 6d0293276bb77d1f741fc60a25f4153368f5e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Fern=C3=A1ndez?= Date: Tue, 23 Jan 2024 10:59:01 +0100 Subject: [PATCH] Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results (#4623) --- .github/workflows/wasm-benchmarks.yml | 59 ++++++++++++++++--- .../driver-adapters/executor/src/bench.ts | 7 ++- .../driver-adapters/executor/src/recording.ts | 8 ++- 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/.github/workflows/wasm-benchmarks.yml b/.github/workflows/wasm-benchmarks.yml index e99f67d805b4..94241517f30f 100644 --- a/.github/workflows/wasm-benchmarks.yml +++ b/.github/workflows/wasm-benchmarks.yml @@ -24,14 +24,15 @@ jobs: - name: "Setup Node.js" uses: actions/setup-node@v4 - with: - node-version: ${{ matrix.node_version }} - name: "Setup pnpm" uses: pnpm/action-setup@v2 with: version: 8 + - name: Install bc + run: sudo apt update && sudo apt-get install -y bc + - name: "Login to Docker Hub" uses: docker/login-action@v3 continue-on-error: true @@ -51,25 +52,67 @@ jobs: - name: Run benchmarks id: bench run: | - make run-bench | tee results.txt + make run-bench | tee results.txt + + # Extract the values from the benchmark output + regressed_values=$(grep "slower than Web Assembly: Latest" results.txt | cut -f1 -d'x') + improved_values=$(grep "faster than Web Assembly: Latest" results.txt | cut -f1 -d'x') + + # Initialize sum variable and count + total_sum=0 + total_count=0 + + # Add the inverted regressed values to the sum + for value in $regressed_values; do + inverted=$(echo "scale=4; 1/$value" | bc) + echo "Regressed value: $inverted" + total_sum=$(echo "$total_sum + $inverted" | bc) + total_count=$((total_count + 1)) + done + + # Add the improved values to the sum + for value in $improved_values; do + echo "Improved value: $value" + total_sum=$(echo "$total_sum + $value" | bc) + total_count=$((total_count + 1)) + done + + if [ $total_count -eq 0 ]; then + echo "💥 something was wrong running the benchmarks" + exit 1 + fi + + mean=$(echo "scale=4; $total_sum / $total_count" | bc) - regressed=$(grep "slower than Web Assembly: Latest" results.txt | cut -f1 -d'x' | awk '$1 > 1.02' | wc -l ) - if [ "$regressed" -gt 0 ]; then - summary="🚨 WASM query-engine: $regressed benchmark(s) have regressed at least 2%" + echo "Extracted $total_count values from the benchmark output" + echo "Total sum: $total_sum" + echo "Total count: $total_count" + echo "Mean: $mean" + + # Report improvement or worsening. Fails if >= 1.5% worsening. + if (( $(echo "$mean < 0.985" | bc -l) )); then + percent=$(echo "scale=4; ((1 / $mean) - 1) * 100" | bc) + summary="❌ WASM query-engine performance will worsen by $(printf %.2f "$percent")%" status=failed + elif (( $(echo "$mean > 1.015" | bc -l) )); then + percent=$(echo "scale=4; ($mean - 1) * 100" | bc) + summary="🚀 WASM query-engine performance will improve by $(printf %.2f "$percent")%" + status=passed else - summary="✅ WASM query-engine: no benchmarks have regressed" + delta=$(echo "scale=3; (1 / $mean)" | bc) + summary="✅ WASM query-engine performance won't change substantially ($(printf %.3f "$delta")x)" status=passed fi echo "summary=$summary" >> "$GITHUB_OUTPUT" echo "status=$status" >> "$GITHUB_OUTPUT" + + # Save the output to a file so we can use it in the comment { echo 'bench_output<> "$GITHUB_OUTPUT" - - name: Find past report comment uses: peter-evans/find-comment@v2 id: findReportComment diff --git a/query-engine/driver-adapters/executor/src/bench.ts b/query-engine/driver-adapters/executor/src/bench.ts index 41ec1f63c9b9..e01c333e5c68 100644 --- a/query-engine/driver-adapters/executor/src/bench.ts +++ b/query-engine/driver-adapters/executor/src/bench.ts @@ -213,6 +213,7 @@ function initQeWasmBaseLine( return new WasmBaseline(qe.queryEngineOptions(datamodel), debug, adapter); } -const err = (...args: any[]) => console.error("[nodejs] ERROR:", ...args); - -main().catch(err); +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/query-engine/driver-adapters/executor/src/recording.ts b/query-engine/driver-adapters/executor/src/recording.ts index f72997212ea4..c7cd67ffe200 100644 --- a/query-engine/driver-adapters/executor/src/recording.ts +++ b/query-engine/driver-adapters/executor/src/recording.ts @@ -61,7 +61,13 @@ function createInMemoryRecordings() { const queryResults = {}; const commandResults = {}; - const queryToKey = (query) => JSON.stringify(query); + // Recording is currently only used in benchmarks. Before we used to serialize the whole query + // (sql + args) but since bigints are not serialized by JSON.stringify, and we didn't really need + // args for benchmarks, we just serialize the sql part. + // + // If this ever changes (we reuse query recording in tests) we need to make sure to serialize the + // args as well. + const queryToKey = (query) => JSON.stringify(query.sql); return { addQueryResults: (params, result) => {