Fix benchmark reporting when benchmark script fails, and provide more reliable and informative results #223
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "QE: WASM benchmarks" | |
on: | |
pull_request: | |
paths-ignore: | |
- ".github/**" | |
- "!.github/workflows/wasm-benchmarks.yml" | |
- ".buildkite/**" | |
- "*.md" | |
- "LICENSE" | |
- "CODEOWNERS" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
benchmarks: | |
runs-on: ubuntu-latest | |
env: # Set environment variables for the whole job | |
PROFILE: release | |
steps: | |
- name: Checkout PR branch | |
uses: actions/checkout@v4 | |
- name: "Setup Node.js" | |
uses: actions/setup-node@v4 | |
with: | |
node-version: ${{ matrix.node_version }} | |
- name: Install bc | |
run: sudo apt-get install -y bc | |
- name: "Setup pnpm" | |
uses: pnpm/action-setup@v2 | |
with: | |
version: 8 | |
- name: "Login to Docker Hub" | |
uses: docker/login-action@v3 | |
continue-on-error: true | |
env: | |
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} | |
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} | |
if: "${{ env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}" | |
with: | |
username: ${{ secrets.DOCKERHUB_USERNAME }} | |
password: ${{ secrets.DOCKERHUB_TOKEN }} | |
- uses: cachix/install-nix-action@v24 | |
- name: Setup benchmark | |
run: make setup-pg-bench | |
- name: Run benchmarks | |
id: bench | |
run: | | |
make run-bench | tee results.txt | |
# Save the output to a file so we can use it in the comment | |
{ | |
echo 'bench_output<<EOF' | |
cat results.txt | |
echo EOF | |
} >> "$GITHUB_OUTPUT" | |
regressed_values=$(grep "slower than Web Assembly: Latest" results.txt | cut -f1 -d'x') | |
improved_values=$(grep "faster than Web Assembly: Latest" results.txt | cut -f1 -d'x') | |
# Initialize sum variable and count | |
total_sum=0 | |
total_count=0 | |
# Add the inverted regressed values to the sum | |
for value in $regressed_values; do | |
inverted=$(echo "scale=4; 1/$value" | bc) | |
echo "Regressed value: $inverted" | |
total_sum=$(echo "$total_sum + $inverted" | bc) | |
((total_count++)) | |
done | |
# Add the improved values to the sum | |
for value in $improved_values; do | |
echo "Improved value: $value" | |
total_sum=$(echo "$total_sum + $value" | bc) | |
((total_count++)) | |
done | |
if [ $total_count -eq 0 ]; then | |
echo "summary=β WASM query-engine: no benchmarks have changed substantially" >> "$GITHUB_OUTPUT" | |
echo "status=passed" >> "$GITHUB_OUTPUT" | |
exit 0 | |
fi | |
mean=$(echo "scale=4; $total_sum / $total_count" | bc) | |
echo "Extracted $total_count values from the benchmark output" | |
echo "Total sum: $total_sum" | |
echo "Total count: $total_count" | |
echo "Mean: $mean" | |
# Calculate the percentage of improvement or worsening | |
if (( $(echo "$mean > 1.01" | bc -l) )); then | |
change_percentage=$(echo "scale=4; $mean - 1" | bc) | |
summary="π WASM query-engine performance will improve by $(echo "$change_percentage * 100" | bc) percent" | |
status=passed | |
elif (( $(echo "$mean < 0.99" | bc -l) )); then | |
change_percentage=$(echo "scale=4; (1 / $mean) - 1" | bc) | |
summary="β WASM query-engine performance will worsen by $(echo "$change_percentage * 100" | bc) percent" | |
status=failed | |
else | |
change_percentage=$(echo "scale=4; (1 / $mean)" | bc) | |
summary="β WASM query-engine performance won't change substantially. AVG(latency) = $(echo "$change_percentage")x" | |
status=passed | |
fi | |
echo "summary=$summary" >> "$GITHUB_OUTPUT" | |
echo "status=$status" >> "$GITHUB_OUTPUT" | |
- name: Find past report comment | |
uses: peter-evans/find-comment@v2 | |
id: findReportComment | |
with: | |
issue-number: ${{ github.event.pull_request.number }} | |
body-includes: "<!-- wasm-engine-perf -->" | |
- name: Create or update report | |
uses: peter-evans/create-or-update-comment@v3 | |
with: | |
comment-id: ${{ steps.findReportComment.outputs.comment-id }} | |
issue-number: ${{ github.event.pull_request.number }} | |
body: | | |
<!-- wasm-engine-perf --> | |
#### ${{ steps.bench.outputs.summary }} | |
<details> | |
<summary>Full benchmark report</summary> | |
``` | |
${{ steps.bench.outputs.bench_output }} | |
``` | |
</details> | |
After changes in ${{ github.event.pull_request.head.sha }} | |
edit-mode: replace | |
- name: Fail workflow if regression detected | |
if: steps.bench.outputs.status == 'failed' | |
run: | | |
echo "Workflow failed due to benchmark regression." | |
exit 1 |