.github/workflows/benchmark-call.yml

name: "Benchmark: Coordinate Runner & Reporting"

on:
  workflow_dispatch:
    inputs:
      benchmark_name:
        type: choice
        required: true
        description: The name of the benchmark to run
        options:
          - verify_fibair
          - fibonacci
          - revm_transfer
          - regex
          - base64_json
          - fib_e2e
      instance_type:
        type: string
        required: false
        description: The type of runner to start ({1,2,4,8,16,32,48,64}cpu-linux-{arm64,x64})
        default: 64cpu-linux-arm64
      memory_allocator:
        type: string
        required: false
        description: Memory allocator to use (mimalloc or jemalloc)
        default: mimalloc
      app_log_blowup:
        type: number
        required: false
        description: Application level log blowup
        default: 2
      agg_log_blowup:
        type: number
        required: false
        description: Aggregation (leaf) level log blowup
        default: 2
      root_log_blowup:
        type: number
        required: false
        description: Root level log blowup (only for e2e)
        default: 2
      internal_log_blowup:
        type: number
        required: false
        description: Internal level log blowup (only for e2e)
        default: 2
      max_segment_length:
        type: number
        required: false
        description: Max segment length for continuations, must be larger than 524288
        default: 1048476
  workflow_call:
    inputs:
      benchmark_name:
        type: string
        required: true
        description: The name of the benchmark to run
      instance_type:
        type: string
        required: false
        description: The type of runner to start ({1,2,4,8,16,32,48,64}cpu-linux-{arm64,x64})
        default: 64cpu-linux-arm64
      memory_allocator:
        type: string
        required: false
        description: Memory allocator to use (mimalloc or jemalloc)
        default: mimalloc
      app_log_blowup:
        type: number
        required: false
        description: Application level log blowup
        default: 2
      agg_log_blowup:
        type: number
        required: false
        description: Aggregation (leaf) level log blowup
        default: 2
      root_log_blowup:
        type: number
        required: false
        description: Root level log blowup (only for e2e)
        default: 2
      internal_log_blowup:
        type: number
        required: false
        description: Internal level log blowup (only for e2e)
        default: 2
      max_segment_length:
        type: number
        required: false
        description: Max segment length for continuations, must be larger than 524288
        default: 1048476

env:
  S3_PATH: s3://axiom-workflow-data-sandbox-us-east-1/benchmark/github/results
  S3_METRICS_PATH: s3://axiom-workflow-data-sandbox-us-east-1/benchmark/github/metrics
  PUBLIC_S3_PATH: s3://axiom-public-data-sandbox-us-east-1/benchmark/github/flamegraphs
  FEATURE_FLAGS: "bench-metrics,parallel,nightly-features"
  CMD_ARGS: ""
  INPUT_ARGS: ""
  CARGO_NET_GIT_FETCH_WITH_CLI: "true"

jobs:
  bench-new:
    name: Run benchmark on workflow ref/branch
    runs-on:
      - runs-on
      - runner=${{ inputs.instance_type }}
      - run-id=${{ github.run_id }}
      - family=m7
      - tag=bench-${{ inputs.benchmark_name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
    steps:
      ##########################################################################
      # Environment setup                                                      #
      ##########################################################################
      - uses: actions/checkout@v4
        with:
          ref: ${{ github.head_ref || github.ref }}
      - name: Give GitHub Actions access to axiom-crypto/revm
        uses: webfactory/ssh-agent@v0.9.0
        with:
          ssh-private-key: |
            ${{ secrets.GH_ACTIONS_DEPLOY_PRIVATE_KEY }}

      - uses: dtolnay/rust-toolchain@nightly
      - uses: Swatinem/rust-cache@v2
        with:
          cache-on-failure: true

      - name: Install architecture specific tools
        run: |
          arch=$(uname -m)
          case $arch in
            arm64|aarch64)
              rustup component add rust-src --toolchain nightly-2024-10-30-aarch64-unknown-linux-gnu
              S5CMD_BIN="s5cmd_2.2.2_linux_arm64.deb"
              ;;
            x86_64|amd64)
              rustup component add rust-src --toolchain nightly-2024-10-30-x86_64-unknown-linux-gnu
              S5CMD_BIN="s5cmd_2.2.2_linux_amd64.deb"
              ;;
            *)
              echo "Unsupported architecture: $arch"
              exit 1
              ;;
          esac

          echo "Checking s5cmd"
          if type s5cmd &>/dev/null; then
              echo "s5cmd was installed."
          else
              TMP_DIR=/tmp/s5cmd
              rm -rf $TMP_DIR
              mkdir $TMP_DIR
              echo "s5cmd was not installed. Installing.."
              wget "https://github.com/peak/s5cmd/releases/download/v2.2.2/${S5CMD_BIN}" -P $TMP_DIR
              sudo dpkg -i "${TMP_DIR}/${S5CMD_BIN}"
          fi

      - name: Feature flags
        if: contains(github.event.pull_request.labels.*.name, 'run-benchmark') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
        run: |
          echo "Adding aggregation feature flag"
          echo "FEATURE_FLAGS=${FEATURE_FLAGS},aggregation" >> $GITHUB_ENV

      - name: Setup e2e (halo2 and arguments)
        working-directory: extensions/native/recursion # We only ever run halo2 for recursion
        run: |
          E2E_BENCH=$(jq -r --arg name "${{ inputs.benchmark_name }}" \
          --arg instance_type "${{ inputs.instance_type }}" \
          --arg memory_allocator "${{ inputs.memory_allocator }}" \
          --argjson app_log_blowup "${{ inputs.app_log_blowup }}" \
          --argjson agg_log_blowup "${{ inputs.agg_log_blowup }}" \
          --argjson root_log_blowup "${{ inputs.root_log_blowup }}" \
          --argjson internal_log_blowup "${{ inputs.internal_log_blowup }}" \
          --argjson max_segment_length "${{ inputs.max_segment_length }}" \
          '.benchmarks[] |
            select(.name == $name) |
            select(.run_params[] | .instance_type == $instance_type) |
            select(.run_params[] | .memory_allocator == $memory_allocator) |
            select(.run_params[] | .app_log_blowup == $app_log_blowup) |
            select(.run_params[] | .agg_log_blowup == $agg_log_blowup) |
            select(.run_params[] | (.root_log_blowup // 0) == $root_log_blowup) |
            select(.run_params[] | (.internal_log_blowup // 0) == $internal_log_blowup) |
            select(.run_params[] | (.max_segment_length // 1048476) == $max_segment_length) |
            .e2e_bench
          ' ../../../ci/benchmark-config.json)
          echo "E2E_BENCH=${E2E_BENCH}" >> $GITHUB_ENV

          if [[ "${E2E_BENCH}" == "true" ]]; then
            ROOT_ARG="--root_log_blowup ${{ inputs.root_log_blowup }}"
            INTERNAL_ARG="--internal_log_blowup ${{ inputs.internal_log_blowup }}"
            echo "INPUT_ARGS=${ROOT_ARG} ${INTERNAL_ARG} ${INPUT_ARGS}" >> $GITHUB_ENV
          fi

      - name: Set BIN_NAME and CMD_ARGS
        run: |
          CMD_ARGS="--features ${FEATURE_FLAGS}"
          echo "CMD_ARGS=${CMD_ARGS}" >> $GITHUB_ENV
          echo "BIN_NAME=${{ inputs.benchmark_name }}" >> $GITHUB_ENV

      - name: Set application and aggregation level log blowup
        run: |
          INSTANCE_TYPE="--instance_type ${{ inputs.instance_type }}"
          MEMORY_ALLOCATOR="--memory_allocator ${{ inputs.memory_allocator }}"
          APP_ARG="--app_log_blowup ${{ inputs.app_log_blowup }}"
          AGG_ARG="--agg_log_blowup ${{ inputs.agg_log_blowup }}"
          MAX_SEGMENT_LENGTH="--max_segment_length ${{ inputs.max_segment_length }}"
          echo "INPUT_ARGS=${INSTANCE_TYPE} ${MEMORY_ALLOCATOR} ${APP_ARG} ${AGG_ARG} ${MAX_SEGMENT_LENGTH} ${INPUT_ARGS}" >> $GITHUB_ENV

      - name: Set working directory
        id: set-working-dir
        run: |
          WORKING_DIR=$(jq -r --arg name "${{ inputs.benchmark_name }}" '
            .benchmarks[] |
            select(.name == $name) |
            .working_directory
          ' ./ci/benchmark-config.json)
          RELATIVE_PATH=$(python3 -c "import os.path; print(os.path.relpath('.', '$WORKSPACE_DIR'))")
          echo "working_dir=$WORKSPACE_DIR" >> $GITHUB_OUTPUT
          echo "relative_path=$RELATIVE_PATH" >> $GITHUB_OUTPUT
          if [[ "${E2E_BENCH}" == "true" ]]; then
            bash ./extensions/native/recursion/trusted_setup_s3.sh
          fi

      ##########################################################################
      # Find working directory based on benchmark_name and run the benchmark   #
      ##########################################################################
      - name: Run benchmark
        working-directory: ${{ steps.set-working-dir.outputs.working_dir }}
        run: |
          python3 ${{ steps.set-working-dir.outputs.relative_path }}/ci/scripts/bench.py $BIN_NAME $CMD_ARGS $INPUT_ARGS

      ##########################################################################
      # Generate result .md files and flamegraphs, store them in S3            #
      ##########################################################################
      - name: Store metric json and compute diff with previous
        run: |
          METRIC_NAME="${BIN_NAME}-${{ inputs.app_log_blowup }}-${{ inputs.agg_log_blowup }}"
          if [[ "${E2E_BENCH}" == "true" ]]; then
            METRIC_NAME="${METRIC_NAME}-${{ inputs.root_log_blowup }}-${{ inputs.internal_log_blowup }}"
          fi
          METRIC_NAME="${METRIC_NAME}-${{ inputs.max_segment_length }}-${{ inputs.instance_type }}-${{ inputs.memory_allocator }}"
          echo "METRIC_NAME=${METRIC_NAME}" >> $GITHUB_ENV

          METRIC_PATH=".bench_metrics/${METRIC_NAME}.json"
          echo "METRIC_PATH=${METRIC_PATH}" >> $GITHUB_ENV

          current_sha=$(git rev-parse HEAD)
          echo "Current SHA: $current_sha"
          echo "current_sha=${current_sha}" >> $GITHUB_ENV

          if [[ -f $METRIC_PATH ]]; then
            s5cmd cp $METRIC_PATH ${{ env.S3_METRICS_PATH }}/${current_sha}-${METRIC_NAME}.json

            prev_path="${{ env.S3_METRICS_PATH }}/main-${METRIC_NAME}.json"
            count=`s5cmd ls $prev_path | wc -l`

            if [[ $count -gt 0 ]]; then
              s5cmd cp $prev_path prev.json
              python3 ci/scripts/metric_unify/main.py $METRIC_PATH --prev prev.json --aggregation-json ci/scripts/metric_unify/aggregation.json > results.md
            else
              echo "No previous benchmark on main branch found"
              python3 ci/scripts/metric_unify/main.py $METRIC_PATH --aggregation-json ci/scripts/metric_unify/aggregation.json > results.md
            fi
          else
            echo "No benchmark metrics found at ${METRIC_PATH}"
          fi

      - name: Install inferno-flamegraph
        run: cargo install inferno

      - name: Generate flamegraphs
        run: |
          if [[ -f $METRIC_PATH ]]; then
            python3 ci/scripts/metric_unify/flamegraph.py $METRIC_PATH
            s5cmd cp '.bench_metrics/flamegraphs/*.svg' "${{ env.PUBLIC_S3_PATH }}/${current_sha}/"
            echo "UPLOAD_FLAMEGRAPHS=1" >> $GITHUB_ENV
          fi

      - name: Add benchmark metadata
        run: |
          commit_url="https://github.com/${{ github.repository }}/commit/${current_sha}"
          RESULT_PATH=results.md
          echo "" >> $RESULT_PATH
          if [[ "$UPLOAD_FLAMEGRAPHS" == '1' ]]; then
            echo "<details>" >> $RESULT_PATH
            echo "<summary>Flamegraphs</summary>" >> $RESULT_PATH
            echo "" >> $RESULT_PATH
            for file in .bench_metrics/flamegraphs/*.svg; do
              filename=$(basename "$file")
              flamegraph_url=https://axiom-public-data-sandbox-us-east-1.s3.us-east-1.amazonaws.com/benchmark/github/flamegraphs/${current_sha}/${filename}
              echo "[![]($flamegraph_url)]($flamegraph_url)" >> $RESULT_PATH
            done
            echo "" >> $RESULT_PATH
            echo "</details>" >> $RESULT_PATH
            echo "" >> $RESULT_PATH
          fi
          echo "Commit: ${commit_url}" >> $RESULT_PATH
          echo "" >> $RESULT_PATH
          echo "Max Segment Length: ${{ inputs.max_segment_length }}" >> $RESULT_PATH
          echo "" >> $RESULT_PATH
          echo "Instance Type: ${{ inputs.instance_type }}" >> $RESULT_PATH
          echo "" >> $RESULT_PATH
          echo "Memory Allocator: ${{ inputs.memory_allocator }}" >> $RESULT_PATH
          echo "" >> $RESULT_PATH
          echo "[Benchmark Workflow](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> $RESULT_PATH
          s5cmd cp $RESULT_PATH "${{ env.S3_PATH }}/${current_sha}-${METRIC_NAME}.md"

      ##########################################################################
      # Update S3 with individual results upon a push event                    #
      ##########################################################################
      - name: Update latest main result in s3
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        run: |
          s5cmd cp "${{ env.S3_PATH }}/${{ env.current_sha }}-${METRIC_NAME}.md" "${{ env.S3_PATH }}/main-${METRIC_NAME}.md"
          if [[ -f $METRIC_PATH ]]; then
            s5cmd cp $METRIC_PATH "${{ env.S3_METRICS_PATH }}/main-${METRIC_NAME}.json"
          fi

      ##########################################################################
      # Update gh-pages with individual results                                #
      ##########################################################################
      - uses: actions/checkout@v4
        with:
          ref: gh-pages

      - name: Set up git
        run: |
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          git config --global user.name "github-actions[bot]"

      - name: Set github pages path for dispatch
        run: |
          GH_PAGES_PATH="benchmarks-dispatch/${{ github.head_ref || github.ref }}"
          echo "GH_PAGES_PATH=${GH_PAGES_PATH}" >> $GITHUB_ENV

      - name: Set github pages path for PR
        if: github.event_name == 'pull_request'
        run: |
          GH_PAGES_PATH="benchmarks-pr/${{ github.event.pull_request.number }}/individual"
          echo "GH_PAGES_PATH=${GH_PAGES_PATH}" >> $GITHUB_ENV

      - name: Set github pages path for push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        run: |
          GH_PAGES_PATH="benchmarks/individual"
          echo "GH_PAGES_PATH=${GH_PAGES_PATH}" >> $GITHUB_ENV

      - name: Update PR github pages with new bench results
        run: |
          mkdir -p ${GH_PAGES_PATH}
          s5cmd cp "${{ env.S3_PATH }}/${{ env.current_sha }}-${METRIC_NAME}.md" ${GH_PAGES_PATH}/${METRIC_NAME}.md
          git add ${GH_PAGES_PATH}/${METRIC_NAME}.md
          git commit --allow-empty -m "Update benchmark result at ${GH_PAGES_PATH}/${METRIC_NAME}.md"

          MAX_RETRIES=10
          RETRY_DELAY=5
          ATTEMPT=0
          SUCCESS=false

          while [ $ATTEMPT -lt $MAX_RETRIES ]; do
              echo "Attempt $((ATTEMPT + 1)) to push of $MAX_RETRIES..."
              git fetch origin gh-pages
              git merge origin/gh-pages --no-edit
              if git push origin gh-pages; then
                  SUCCESS=true
                  break
              else
                  echo "Push failed. Retrying in $RETRY_DELAY seconds..."
                  sleep $RETRY_DELAY
                  ATTEMPT=$((ATTEMPT + 1))
              fi
          done

          if [ "$SUCCESS" = false ]; then
              echo "PUSH_FAILED"
              exit 1
          fi