From 5bbbec0da6100aa7bc493fe0a8c2bde7acc3de8d Mon Sep 17 00:00:00 2001 From: Cemberk Date: Fri, 8 Nov 2024 17:56:46 -0800 Subject: [PATCH] Create fork-maintenance.yml (#61) Add fork maintenance system and add skip for multigpu issue --- .github/workflows/fork-maintenance.yml | 30 ++++++++++++++++++++++++++ tests/test_modeling_common.py | 1 + 2 files changed, 31 insertions(+) create mode 100644 .github/workflows/fork-maintenance.yml diff --git a/.github/workflows/fork-maintenance.yml b/.github/workflows/fork-maintenance.yml new file mode 100644 index 00000000000..8ec97ffd0f4 --- /dev/null +++ b/.github/workflows/fork-maintenance.yml @@ -0,0 +1,30 @@ +name: Run Scheduled Events Docker + +permissions: + actions: write + contents: write + issues: write + pull-requests: write + +on: + workflow_dispatch: + schedule: + - cron: '0 0 10 * *' + +jobs: + run-scheduled-events: + uses: Cemberk/fork-maintenance-system/.github/workflows/fork-maintenance-action.yml@artifacts + with: + platform: 'gfx90a' + upstream_repo: 'https://github.com/huggingface/transformers' + pr_branch_prefix: 'scheduled-merge' + requirements_command: 'sudo sed -i \"s/torchaudio//g\" examples/pytorch/_tests_requirements.txt && pip install -r examples/pytorch/_tests_requirements.txt && git restore examples/pytorch/_tests_requirements.txt && pip install --no-cache-dir GPUtil azureml azureml-core tokenizers ninja cerberus sympy sacremoses sacrebleu==1.5.1 sentencepiece scipy scikit-learn urllib3 && pip install huggingface_hub datasets && pip install parameterized && pip install -e .' + unit_test_command: cd tests; folders=\$(python3 -c \import os; tests = os.getcwd(); models = \"models\"; model_tests = os.listdir(os.path.join(tests, models)); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [os.path.join(models, x) for x in model_tests]))); d1.remove(models); d = d2 + d1; print(\" \".join(d[:5]))' ); cd ..; for folder in \${folders[@]}; do pytest tests/\${folder} -v --make-reports=huggingface_unit_tests_\${machine_type}_run_models_gpu_\${folder} -rfEs --continue-on-collection-errors -m \"not not_device_test\" -p no:cacheprovider; done; allstats=\$(find reports -name stats.txt); for stat in \${allstats[@]}; do echo \$stat; cat \$stat; done + performance_test_command: 'echo \"python examples/pytorch/language-modeling/run_mlm.py --model_name_or_path bert-base-uncased --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train --do_eval --output_dir /tmp/test-mlm --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --max_steps 500\"' + docker_image: 'rocm/pytorch:latest' + docker_options: '--device=/dev/kfd --device=/dev/dri --group-add video --shm-size 16G --network=host' + secrets: + GIT_TOKEN: ${{ secrets.CRED_TOKEN }} + schedule_json: ${{ secrets.SCHEDULE_CONFIG }} + + diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index dd84c92d103..d9a0e37dba3 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -2885,6 +2885,7 @@ def test_inputs_embeds_matches_input_ids_with_generate(self): ) self.assertTrue(torch.allclose(out_embeds, out_ids)) + @skipIfRocm @require_torch_multi_gpu def test_multi_gpu_data_parallel_forward(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()