diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index 5dd010ef66d..86ef67a46d6 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -14,6 +14,9 @@ on: - ".github/**" - "templates/**" - "utils/**" + pull_request: + types: [opened, reopened] + branches: ["main"] jobs: run_amd_ci: diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index a55378c4caa..67ad9c9a4c8 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -13,7 +13,10 @@ on: - "tests/**" - ".github/**" - "templates/**" - - "utils/**" + - "utils/**" + pull_request: + types: [opened, reopened] + branches: ["main"] jobs: run_amd_ci: diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 87516982776..313f3b85a63 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -17,25 +17,12 @@ env: RUN_PT_TF_CROSS_TESTS: 1 jobs: - check_runner_status: - name: Check Runner Status - runs-on: ubuntu-latest - steps: - - name: Checkout transformers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: Check Runner Status - run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - check_runners: name: Check Runners - needs: check_runner_status strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -54,7 +41,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -62,6 +49,13 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} steps: + - name: Remove transformers repository (installed during docker image build) + working-directory: / + shell: bash + run: | + rm -r transformers + git clone https://github.com/ROCmSoftwarePlatform/transformers.git + # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables @@ -152,11 +146,23 @@ jobs: matrix: folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: rocm container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: + + - name: Remove transformers repository (installed during docker image build) + working-directory: / + shell: bash + run: | + rm -r transformers + git clone https://github.com/ROCmSoftwarePlatform/transformers.git + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables @@ -189,10 +195,6 @@ jobs: git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - name: Echo folder ${{ matrix.folders }} shell: bash # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to @@ -244,19 +246,15 @@ jobs: runs-on: ubuntu-latest if: always() needs: [ - check_runner_status, check_runners, setup_gpu, - run_tests_amdgpu, -# run_tests_torch_cuda_extensions_single_gpu, -# run_tests_torch_cuda_extensions_multi_gpu + run_tests_amdgpu ] steps: - name: Preliminary job status shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - echo "Runner availability: ${{ needs.check_runner_status.result }}" echo "Setup status: ${{ needs.setup_gpu.result }}" echo "Runner status: ${{ needs.check_runners.result }}" @@ -297,28 +295,3 @@ jobs: echo "updated branch = $(git branch --show-current)" git checkout ${{ env.CI_SHA }} echo "log = $(git log -n 1)" - - - uses: actions/download-artifact@v3 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} - CI_TITLE_PUSH: ${{ github.event.head_commit.message }} - CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} - CI_SHA: ${{ env.CI_SHA }} - RUNNER_STATUS: ${{ needs.check_runner_status.result }} - RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} - SETUP_STATUS: ${{ needs.setup_gpu.result }} - - # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"