Skip to content

Commit

Permalink
Merge branch 'rocm-main' into ci-upstream-sync-87_1
Browse files Browse the repository at this point in the history
  • Loading branch information
charleshofer committed Jan 14, 2025
2 parents c72ed26 + ea6903b commit 23fcd11
Show file tree
Hide file tree
Showing 13 changed files with 462 additions and 205 deletions.
36 changes: 10 additions & 26 deletions .github/workflows/ci-build.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI
name: ROCm CPU CI

# We test all supported Python versions as follows:
# - 3.10 : Documentation build
Expand All @@ -11,10 +11,10 @@ on:
# but only for the main branch
push:
branches:
- main
- rocm-main
pull_request:
branches:
- main
- rocm-main

permissions:
contents: read # to fetch code
Expand Down Expand Up @@ -42,12 +42,8 @@ jobs:
- run: pre-commit run --show-diff-on-failure --color=always --all-files

build:
# Don't execute in fork due to runner type
if: github.repository == 'jax-ml/jax'
name: "build ${{ matrix.name-prefix }} (py ${{ matrix.python-version }} on ubuntu-20.04, x64=${{ matrix.enable-x64}})"
runs-on: linux-x86-n2-32
container:
image: index.docker.io/library/ubuntu@sha256:6d8d9799fe6ab3221965efac00b4c34a2bcc102c086a58dff9e19a08b913c7ef # ratchet:ubuntu:20.04
runs-on: ROCM-Ubuntu
timeout-minutes: 60
strategy:
matrix:
Expand All @@ -65,10 +61,6 @@ jobs:
num_generated_cases: 1
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Image Setup
run: |
apt update
apt install -y libssl-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
Expand Down Expand Up @@ -109,7 +101,7 @@ jobs:
documentation:
name: Documentation - test code snippets
runs-on: ubuntu-latest
runs-on: ROCM-Ubuntu
timeout-minutes: 10
strategy:
matrix:
Expand Down Expand Up @@ -146,19 +138,13 @@ jobs:
documentation_render:
name: Documentation - render documentation
runs-on: linux-x86-n2-16
container:
image: index.docker.io/library/ubuntu@sha256:6d8d9799fe6ab3221965efac00b4c34a2bcc102c086a58dff9e19a08b913c7ef # ratchet:ubuntu:20.04
timeout-minutes: 10
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
matrix:
python-version: ['3.10']
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Image Setup
run: |
apt update
apt install -y libssl-dev libsqlite3-dev
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
Expand Down Expand Up @@ -229,9 +215,7 @@ jobs:
ffi:
name: FFI example
runs-on: linux-x86-g2-16-l4-1gpu
container:
image: index.docker.io/tensorflow/build:latest-python3.12@sha256:48e99608fe9434ada5b14e19fdfd8e64f4cfc83aacd328b9c2101b210e984295 # ratchet:index.docker.io/tensorflow/build:latest-python3.12
runs-on: ROCM-Ubuntu
timeout-minutes: 30
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -250,7 +234,7 @@ jobs:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-ffi-examples-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt', 'examples/**/pyproject.toml') }}
- name: Install JAX
run: pip install .[cuda12]
run: pip install .
- name: Build and install example project
run: python -m pip install -v ./examples/ffi[test]
env:
Expand All @@ -259,7 +243,7 @@ jobs:
# a different toolchain. GCC is the default compiler on the
# 'ubuntu-latest' runner, but we still set this explicitly just to be
# clear.
CMAKE_ARGS: -DCMAKE_CXX_COMPILER=g++ -DJAX_FFI_EXAMPLE_ENABLE_CUDA=ON
CMAKE_ARGS: -DCMAKE_CXX_COMPILER=g++ #-DJAX_FFI_EXAMPLE_ENABLE_CUDA=ON
- name: Run CPU tests
run: python -m pytest examples/ffi/tests
env:
Expand Down
63 changes: 63 additions & 0 deletions .github/workflows/rocm-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: ROCm GPU CI

on:
# Trigger the workflow on push or pull request,
# but only for the rocm-main branch
push:
branches:
- rocm-main
pull_request:
branches:
- rocm-main

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true

jobs:
build-jax-in-docker: # strategy and matrix come here
runs-on: mi-250
env:
BASE_IMAGE: "ubuntu:22.04"
TEST_IMAGE: ubuntu-jax-${{ github.run_id }}_${{ github.run_number }}_${{ github.run_attempt }}
PYTHON_VERSION: "3.10"
ROCM_VERSION: "6.2.4"
WORKSPACE_DIR: workdir_${{ github.run_id }}_${{ github.run_number }}_${{ github.run_attempt }}
steps:
- name: Clean up old runs
run: |
ls
# Make sure that we own all of the files so that we have permissions to delete them
docker run -v "./:/jax" ubuntu /bin/bash -c "chown -R $UID /jax/workdir_* || true"
# Remove any old work directories from this machine
rm -rf workdir_*
ls
- name: Print system info
run: |
whoami
printenv
df -h
rocm-smi
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: ${{ env.WORKSPACE_DIR }}
- name: Build JAX
run: |
pushd $WORKSPACE_DIR
python3 build/rocm/ci_build \
--rocm-version $ROCM_VERSION \
--base-docker $BASE_IMAGE \
--python-versions $PYTHON_VERSION \
--compiler=clang \
dist_docker \
--image-tag $TEST_IMAGE
- name: Archive jax wheels
uses: actions/upload-artifact@v4
with:
name: rocm_jax_r${{ env.ROCM_VERSION }}_py${{ env.PYTHON_VERSION }}_id${{ github.run_id }}
path: ./dist/*.whl
- name: Run tests
run: |
cd $WORKSPACE_DIR
python3 build/rocm/ci_build test $TEST_IMAGE --test-cmd "pytest tests/core_test.py"
52 changes: 52 additions & 0 deletions .github/workflows/rocm-nightly-upstream-sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Pulls the latest changes from upstream into main and opens a PR to merge
# them into rocm-main branch.

name: ROCm Nightly Upstream Sync
on:
workflow_dispatch:
schedule:
- cron: '0 6 * * 1-5'
permissions:
contents: write
pull-requests: write
env:
SYNC_BRANCH_NAME: ci-upstream-sync-${{ github.run_number }}_${{ github.run_attempt }}
jobs:
sync-main:
runs-on: ubuntu-latest
steps:
- run: |
gh auth status
gh repo sync rocm/jax -b main
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
create-sync-branch:
needs: sync-main
runs-on: ubuntu-latest
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Create branch
run: |
git fetch
git checkout origin/main
git checkout -b $SYNC_BRANCH_NAME
# Try and merge rocm-main into this new branch so that we don't run upstream's CI code
git config --global user.email "[email protected]"
git config --global user.name "GitHub Actions"
git merge origin/rocm-main || true
# If the merge creates conflicts, we want to abort and push to origin anyways so that a dev can resolve the conflicts
git merge --abort || true
git push origin HEAD
open-sync-pr:
needs: create-sync-branch
runs-on: ubuntu-latest
steps:
- run: |
gh pr create --repo $GITHUB_REPOSITORY --head $SYNC_BRANCH_NAME --base rocm-main --title "CI: $(date +%x) upstream sync" --body "Daily sync with upstream"
gh pr merge --repo $GITHUB_REPOSITORY --merge --auto $SYNC_BRANCH_NAME
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 changes: 41 additions & 0 deletions .github/workflows/rocm-open-upstream-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: ROCm Open Upstream PR
on:
pull_request:
types: [ labeled ]
branches: [ rocm-main ]
jobs:
open-upstream:
if: ${{ github.event.label.name == 'open-upstream' }}
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
env:
NEW_BRANCH_NAME: "${{ github.head_ref }}-upstream"
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Rebase code to main
run: |
git config --global user.email "[email protected]"
git config --global user.name "Github Actions"
git fetch
git checkout -b $NEW_BRANCH_NAME origin/${{ github.head_ref }}
git rebase --onto origin/main origin/rocm-main
# Force push here so that we don't run into conflicts with the origin branch
git push origin HEAD --force
- name: Leave link to create PR
env:
GH_TOKEN: ${{ github.token }}
run: |
# Bash is not friendly with newline characters, so make our own
NL=$'\n'
# Encode the PR title and body for passing as URL get parameters
TITLE_ENC=$(jq -rn --arg x "[ROCm] ${{ github.event.pull_request.title }}" '$x|@uri')
BODY_ENC=$(jq -rn --arg x $"${{ github.event.pull_request.body }}${NL}${NL}Created from: rocm/jax#${{ github.event.pull_request.number }}" '$x|@uri')
# Create a link to the that will open up a new PR form to upstream and autofill the fields
CREATE_PR_LINK="https://github.com/jax-ml/jax/compare/main...ROCm:jax:$NEW_BRANCH_NAME?expand=1&title=$TITLE_ENC&body=$BODY_ENC"
# Add a comment with the link to the PR
COMMENT_BODY="Feature branch from main is ready. [Create a new PR][1] destined for upstream?${NL}${NL}[1]: $CREATE_PR_LINK"
gh pr comment ${{ github.event.pull_request.number }} --repo rocm/jax --body "$COMMENT_BODY"
2 changes: 1 addition & 1 deletion .github/workflows/upstream-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:

jobs:
upstream-dev:
runs-on: ubuntu-20.04-16core
runs-on: ROCM-Ubuntu
permissions:
contents: read
issues: write # for failed-build-issue
Expand Down
1 change: 1 addition & 0 deletions build/rocm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,4 @@ This will generate three wheels in the `dist/` directory:
### Simplified Build Script

For a streamlined process, consider using the `jax/build/rocm/dev_build_rocm.py` script.

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ ARG ROCM_BUILD_JOB
ARG ROCM_BUILD_NUM

# Install system GCC and C++ libraries.
RUN yum install -y gcc-c++.x86_64
# (charleshofer) This is not ideal, as we should already have GCC and C++ libraries in the
# manylinux base image. However, adding this does fix an issue where Bazel isn't able
# to find them.
RUN --mount=type=cache,target=/var/cache/dnf \
dnf install -y gcc-c++-8.5.0-22.el8_10.x86_64

RUN --mount=type=cache,target=/var/cache/dnf \
--mount=type=bind,source=build/rocm/tools/get_rocm.py,target=get_rocm.py \
Expand All @@ -20,3 +24,6 @@ RUN --mount=type=cache,target=/var/cache/dnf \
RUN mkdir /tmp/llvm-project && wget -qO - https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-18.1.8.tar.gz | tar -xz -C /tmp/llvm-project --strip-components 1 && \
mkdir /tmp/llvm-project/build && cd /tmp/llvm-project/build && cmake -DLLVM_ENABLE_PROJECTS='clang;lld' -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/lib/llvm-18/ ../llvm && \
make -j$(nproc) && make -j$(nproc) install && rm -rf /tmp/llvm-project

# Stop git from erroring out when we don't own the repo
RUN git config --global --add safe.directory '*'
Loading

0 comments on commit 23fcd11

Please sign in to comment.