diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 304a36e3cde..f305dc85948 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -230,17 +230,199 @@ def parse_int_from_env(key, default=None): _run_third_party_device_tests = parse_flag_from_env("RUN_THIRD_PARTY_DEVICE_TESTS", default=False) _test_with_rocm = parse_flag_from_env("TEST_WITH_ROCM", default=False) -def skipIfRocm(func=None, *, msg="test doesn't currently work on the ROCm stack"): + +import platform + +class RocmUtil: + def __init__(self): + pass + + def get_gpu_vendor(self): + """Returns the GPU vendor by checking for NVIDIA or ROCm utilities.""" + cmd = ( + "bash -c 'if [[ -f /usr/bin/nvidia-smi ]] && " + "$(/usr/bin/nvidia-smi > /dev/null 2>&1); then echo \"NVIDIA\"; " + "elif [[ -f /opt/rocm/bin/rocm-smi ]]; then echo \"AMD\"; " + "else echo \"Unable to detect GPU vendor\"; fi || true'" + ) + return subprocess.check_output(cmd, shell=True).decode("utf-8").strip() + + def get_system_gpu_architecture(self): + """ + Returns the GPU architecture string if the vendor is AMD. + For AMD, extracts a line starting with 'gfx' via `/opt/rocm/bin/rocminfo`. + For NVIDIA, returns the GPU name using `nvidia-smi` (informational only). + """ + vendor = self.get_gpu_vendor() + if vendor == "AMD": + cmd = "/opt/rocm/bin/rocminfo | grep -o -m 1 'gfx.*'" + return subprocess.check_output(cmd, shell=True).decode("utf-8").strip() + elif vendor == "NVIDIA": + cmd = ( + "nvidia-smi -L | head -n1 | sed 's/(UUID: .*)//g' | sed 's/GPU 0: //g'" + ) + return subprocess.check_output(cmd, shell=True).decode("utf-8").strip() + else: + raise RuntimeError("Unable to determine GPU architecture due to unknown GPU vendor.") + + def get_rocm_version(self): + """ + Returns the ROCm version as a string by reading the file /opt/rocm/.info/version. + Expected format (example): "6.4.0-15396" + """ + cmd = "cat /opt/rocm/.info/version" + return subprocess.check_output(cmd, shell=True).decode("utf-8").strip() + + def get_current_os(self): + """ + Attempts to determine the current operating system. + On Linux, parses /etc/os-release for the OS ID (e.g., "rhel", "sles", "ubuntu"). + Otherwise, falls back to platform.system(). + """ + if os.name == "posix" and os.path.exists("/etc/os-release"): + try: + with open("/etc/os-release") as f: + for line in f: + if line.startswith("ID="): + # ID value may be quoted. + return line.split("=")[1].strip().strip('"').lower() + except Exception: + # Fallback to platform information + pass + # For non-Linux systems or if /etc/os-release is not available. + return platform.system().lower() + + def is_rocm_skippable(self, arch=None, rocm_version=None, os_name=None): + """ + Determines whether the current system should be considered "skippable" based on ROCm criteria. + + This function returns True **only** if: + 1. The GPU vendor is AMD (i.e. a ROCm system), and + 2. EITHER no specific conditions are provided, + OR at least one of the provided conditions is met. + + Parameters: + arch (str or iterable of str, optional): GPU architecture(s) that should cause skipping. + rocm_version (str or iterable of str, optional): ROCm version(s) (or version prefixes) that should cause skipping. + os_name (str or iterable of str, optional): OS name(s) (e.g., "rhel", "sles", "ubuntu", "windows", "darwin") + for which the test should be skipped. + + Returns: + True if the system is AMD (ROCm) and meets any (or no) specified criteria (i.e. it is "skippable"), + otherwise False. + """ + vendor = self.get_gpu_vendor() + if vendor != "AMD": + # If the GPU vendor is not AMD, it is not a ROCm system and shouldn't be skipped. + return False + + # If no conditions are provided, skip unconditionally on any AMD system. + if arch is None and rocm_version is None and os_name is None: + return True + + # Check each condition; if any match, we mark the system as "skippable". + # Use OR logic. + # Check GPU architecture. + if arch is not None: + arch_list = (arch,) if isinstance(arch, str) else arch + current_gpu_arch = self.get_system_gpu_architecture() + if current_gpu_arch in arch_list: + return True + + # Check ROCm version. + if rocm_version is not None: + ver_list = (rocm_version,) if isinstance(rocm_version, str) else rocm_version + current_ver = self.get_rocm_version() + if any(current_ver.startswith(v) for v in ver_list): + return True + + # Check operating system. + if os_name is not None: + os_list = (os_name,) if isinstance(os_name, str) else os_name + current_os = self.get_current_os() + if current_os in os_list: + return True + + return False + +rocmUtils = RocmUtil() + +def skipIfRocm(func=None, *, msg="test doesn't currently work on the ROCm stack", arch=None, rocm_version=None, os_name=None): + """ + Pytest decorator to skip a test on AMD systems running ROCm, with additional conditions based on + GPU architecture, ROCm version, and/or operating system. + + The decorator uses shell commands to: + - Detect the GPU vendor. + - Extract the GPU architecture for AMD via `/opt/rocm/bin/rocminfo`. + - Read the ROCm version from `/opt/rocm/.info/version`. + + In addition, it can detect the current operating system: + - On Linux, it attempts to parse `/etc/os-release` for the OS "ID" (e.g. "rhel", "sles", "ubuntu"). + - If `/etc/os-release` is not available, it falls back to `platform.system()`. + + Behavior on an AMD (ROCm) system: + - If no additional conditions are provided (i.e. arch, rocm_version, and os_name are all None), + the test is skipped unconditionally. + - If `arch` is provided (as a string or list), the test is skipped if the detected GPU architecture + matches one of the provided values. + - If `rocm_version` is provided (as a string or list), the test is skipped if the ROCm version (from + `/opt/rocm/.info/version`) matches (or begins with) one of the provided strings. + - If `os_name` is provided (as a string or list), the test is skipped if the current OS is among the provided names. + - If more than one condition is provided, the test will be skipped if **any** of those conditions are met. + + On non-AMD systems (e.g. if the GPU vendor is detected as NVIDIA), the test will run normally. + + Parameters: + msg (str): The skip message. + arch (str or iterable of str, optional): GPU architecture(s) for which to skip the test. + rocm_version (str or iterable of str, optional): ROCm version(s) for which to skip the test. + os_name (str or iterable of str, optional): Operating system ID(s) (e.g. "rhel", "sles", "ubuntu") + for which to skip the test. + """ + def dec_fn(fn): reason = f"skipIfRocm: {msg}" @wraps(fn) def wrapper(*args, **kwargs): - if _test_with_rocm: - pytest.skip(reason) - else: - return fn(*args, **kwargs) + vendor = rocmUtils.get_gpu_vendor() + # Only consider the ROCm skip logic for AMD systems. + if vendor == "AMD": + should_skip = False + + # If no specific conditions are provided, skip unconditionally. + if arch is None and rocm_version is None and os_name is None: + should_skip = True + + # Check GPU architecture if provided. + if arch is not None: + arch_list = (arch,) if isinstance(arch, str) else arch + current_gpu_arch = rocmUtils.get_system_gpu_architecture() + if current_gpu_arch in arch_list: + should_skip = True + + # Check the ROCm version if provided. + if rocm_version is not None: + ver_list = (rocm_version,) if isinstance(rocm_version, str) else rocm_version + current_version = rocmUtils.get_rocm_version() + # Using startswith allows matching "6.4.0" even if the full version is "6.4.0-15396" + if any(current_version.startswith(v) for v in ver_list): + should_skip = True + + # Check the operating system if provided. + if os_name is not None: + os_list = (os_name,) if isinstance(os_name, str) else os_name + current_os = rocmUtils.get_current_os() + if current_os in os_list: + should_skip = True + + if should_skip: + pytest.skip(reason) + # For non-AMD systems the test runs normally. + return fn(*args, **kwargs) return wrapper + if func: return dec_fn(func) return dec_fn diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index fcf54547b2d..263bd4c43b1 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2531,6 +2531,21 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi "return_tensors": "pt", } + @skipIfRocm(arch='gfx1201') + def test_custom_logits_processor(self): + super().test_custom_logits_processor() + pass + + @skipIfRocm(arch='gfx1201') + def test_max_new_tokens_encoder_decoder(self): + super().test_max_new_tokens_encoder_decoder() + pass + + @skipIfRocm(arch='gfx1201') + def test_eos_token_id_int_and_list_beam_search(self): + super().test_eos_token_id_int_and_list_beam_search() + pass + @slow def test_diverse_beam_search(self): # PT-only test: TF doesn't have a diverse beam search implementation @@ -2580,6 +2595,7 @@ def test_max_length_if_input_embeds(self): out_gen_embeds = model.generate(inputs_embeds=inputs_embeds, max_length=max_length) self.assertEqual(out_gen.shape[-1], input_len + out_gen_embeds.shape[-1]) + @skipIfRocm(arch='gfx1201') def test_min_length_if_input_embeds(self): # PT-only test: TF doesn't have StoppingCriteria article = "Today a dragon flew over Paris." @@ -2632,6 +2648,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa ) # TODO (joao): replace `stop_sequence` in the pipeline by the more recent `generate` functionality + @skipIfRocm(arch='gfx1201') def test_stop_sequence_stopping_criteria(self): # PT-only test: TF doesn't have StoppingCriteria prompt = """Hello I believe in""" @@ -3214,6 +3231,7 @@ def test_logits_processor_not_inplace(self): self.assertListEqual(out.logits[-1].tolist(), out.scores[-1].tolist()) self.assertNotEqual(out_with_temp.logits[-1].tolist(), out_with_temp.scores[-1].tolist()) + @skipIfRocm(arch='gfx1201') def test_eos_token_id_int_and_list_top_k_top_sampling(self): # Has TF equivalent: this test relies on random sampling generation_kwargs = { @@ -3242,6 +3260,7 @@ def test_eos_token_id_int_and_list_top_k_top_sampling(self): generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs) self.assertTrue(expectation == len(generated_tokens[0])) + @skipIfRocm(arch='gfx1201') def test_model_kwarg_encoder_signature_filtering(self): # Has TF equivalent: ample use of framework-specific code bart_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart") @@ -3279,6 +3298,7 @@ def forward(self, input_ids, **kwargs): # FakeEncoder.forward() accepts **kwargs -> no filtering -> type error due to unexpected input "foo" bart_model.generate(input_ids, foo="bar") + @skipIfRocm(arch='gfx1201') def test_default_max_length_warning(self): model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") @@ -3336,6 +3356,7 @@ def test_default_assisted_generation(self): self.assertEqual(config.assistant_confidence_threshold, 0.4) self.assertEqual(config.is_assistant, False) + @skipIfRocm(arch='gfx1201') def test_generated_length_assisted_generation(self): # PT-only test: TF doesn't support assisted decoding yet. model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) @@ -3364,6 +3385,7 @@ def test_generated_length_assisted_generation(self): ) self.assertTrue((input_length + 10) <= out.shape[-1] <= 20) + @skipIfRocm(arch='gfx1201') def test_model_kwarg_assisted_decoding_decoder_only(self): # PT-only test: TF doesn't support assisted decoding yet. model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device) @@ -3398,6 +3420,7 @@ def test_model_kwarg_assisted_decoding_decoder_only(self): ) self.assertListEqual(outputs_assisted.tolist(), outputs_tti.tolist()) + @skipIfRocm(arch='gfx1201') def test_model_kwarg_assisted_decoding_encoder_decoder(self): """ Tests that the following scenario is compatible with assisted generation: @@ -3464,6 +3487,7 @@ def prepare_inputs_for_generation(self, *args, foo=False, encoder_outputs=None, ) self.assertListEqual(outputs_assisted.tolist(), outputs_foo.tolist()) + @skipIfRocm(arch='gfx1201') def test_assisted_decoding_encoder_decoder_shared_encoder(self): """ Tests that the following scenario is compatible with assisted generation: @@ -3542,6 +3566,7 @@ def prepare_inputs_for_generation(self, *args, foo=False, encoder_outputs=None, ) self.assertListEqual(outputs_assisted.tolist(), outputs_foo.tolist()) + @skipIfRocm(arch='gfx1201') def test_assisted_decoding_num_assistant_tokens_heuristic_schedule(self): # This test ensures that the assisted generation num_assistant_tokens 'heuristic' schedule works properly. @@ -3788,6 +3813,7 @@ def test_special_tokens_fall_back_to_model_default(self): self.assertTrue(test_bos_id == gen_output[0, 0]) self.assertTrue(generation_config.bos_token_id is None) + @skipIfRocm(arch='gfx1201') def test_speculative_decoding_equals_regular_decoding(self): draft_name = "double7/vicuna-68m" target_name = "Qwen/Qwen2-0.5B-Instruct" @@ -3818,6 +3844,7 @@ def test_speculative_decoding_equals_regular_decoding(self): @pytest.mark.generate @require_torch_multi_gpu + @skipIfRocm(arch='gfx1201') def test_generate_with_static_cache_multi_gpu(self): """ Tests if the static cache has been set correctly and if generate works correctly when we are using multi-gpus. @@ -3853,6 +3880,7 @@ def test_generate_with_static_cache_multi_gpu(self): @pytest.mark.generate @require_torch_multi_gpu + @skipIfRocm(arch='gfx1201') def test_init_static_cache_multi_gpu(self): """ Tests if the static cache has been set correctly when we initialize it manually in a multi-gpu setup. @@ -4034,6 +4062,7 @@ def test_padding_input_contrastive_search_t5(self): self.assertEqual(generated_text_no_padding, generated_text_with_padding) self.assertEqual(generated_text_no_padding, "Ich muss diese Aufgabe vor Ende des Tages beenden.") + @skipIfRocm(arch='gfx1201') def test_prepare_inputs_for_generation_decoder_llm(self): """Tests GenerationMixin.prepare_inputs_for_generation against expected usage with decoder-only llms.""" @@ -4150,6 +4179,7 @@ def test_prepare_inputs_for_generation_encoder_decoder_llm(self): self.assertTrue(model_inputs["encoder_outputs"] == "foo") # See the decoder-only test for more corner cases. The code is the same, so we don't repeat it here. + @skipIfRocm(arch='gfx1201') def test_generate_compile_fullgraph_tiny(self): """ Tests that we can call end-to-end generation with a tiny model (i.e. doesn't crash) @@ -4173,6 +4203,7 @@ def test_generate_compile_fullgraph_tiny(self): gen_out = compiled_generate(**model_inputs, generation_config=generation_config) self.assertTrue(gen_out.shape[1] > model_inputs["input_ids"].shape[1]) # some text was generated + @skipIfRocm(arch='gfx1201') def test_assisted_generation_early_exit(self): """ Tests that assisted generation with early exit works as expected. Under the hood, this has complex cache @@ -4209,6 +4240,7 @@ class TokenHealingTestCase(unittest.TestCase): ("empty_prompt", "", ""), ] ) + @skipIfRocm(arch='gfx1201') def test_prompts(self, name, input, expected): model_name_or_path = "distilbert/distilgpt2" tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) diff --git a/tests/models/dbrx/test_modeling_dbrx.py b/tests/models/dbrx/test_modeling_dbrx.py index d38a479ab36..1512c108b22 100644 --- a/tests/models/dbrx/test_modeling_dbrx.py +++ b/tests/models/dbrx/test_modeling_dbrx.py @@ -17,7 +17,7 @@ import unittest from transformers import DbrxConfig, is_torch_available -from transformers.testing_utils import require_torch, slow, torch_device +from transformers.testing_utils import require_torch, slow, torch_device, skipIfRocm from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester @@ -327,6 +327,16 @@ class DbrxModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin test_headmasking = False test_pruning = False + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_with_static_cache(self): + super().test_generate_from_inputs_embeds_with_static_cache() + pass + def setUp(self): self.model_tester = DbrxModelTester(self) self.config_tester = ConfigTester(self, config_class=DbrxConfig, d_model=37) diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py index 893132f4337..17c3c224c21 100644 --- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py +++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py @@ -27,6 +27,7 @@ require_torch_multi_gpu, slow, torch_device, + skipIfRocm, ) from ...generation.test_utils import GenerationTesterMixin @@ -299,6 +300,7 @@ def test_config(self): self.config_tester.run_common_tests() @require_torch_multi_gpu + @skipIfRocm(arch='gfx1201') def test_multi_gpu_data_parallel_forward(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/gpt_neox/test_modeling_gpt_neox.py b/tests/models/gpt_neox/test_modeling_gpt_neox.py index 2c3319f0247..09a5e0b42ce 100644 --- a/tests/models/gpt_neox/test_modeling_gpt_neox.py +++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py @@ -19,7 +19,7 @@ from parameterized import parameterized from transformers import AutoTokenizer, GPTNeoXConfig, is_torch_available, set_seed -from transformers.testing_utils import require_torch, slow, torch_device +from transformers.testing_utils import require_torch, slow, torch_device, skipIfRocm from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester @@ -287,6 +287,11 @@ class GPTNeoXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi test_model_parallel = False test_head_masking = False + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(): + super().test_generate_with_static_cache() + pass + def setUp(self): self.model_tester = GPTNeoXModelTester(self) self.config_tester = ConfigTester(self, config_class=GPTNeoXConfig, hidden_size=64, num_attention_heads=8) diff --git a/tests/models/granite/test_modeling_granite.py b/tests/models/granite/test_modeling_granite.py index 60eb9649272..6d59a68c289 100644 --- a/tests/models/granite/test_modeling_granite.py +++ b/tests/models/granite/test_modeling_granite.py @@ -27,6 +27,7 @@ require_torch_gpu, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -300,6 +301,16 @@ class GraniteModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi # This is because we are hitting edge cases with the causal_mask buffer model_split_percents = [0.5, 0.7, 0.8] + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_with_static_cache(self): + super().test_generate_from_inputs_embeds_with_static_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass + def setUp(self): self.model_tester = GraniteModelTester(self) self.config_tester = ConfigTester(self, config_class=GraniteConfig, hidden_size=37) diff --git a/tests/models/granitemoe/test_modeling_granitemoe.py b/tests/models/granitemoe/test_modeling_granitemoe.py index 97af65667ed..2960575a78e 100644 --- a/tests/models/granitemoe/test_modeling_granitemoe.py +++ b/tests/models/granitemoe/test_modeling_granitemoe.py @@ -27,6 +27,7 @@ require_torch_gpu, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -299,6 +300,16 @@ class GraniteMoeModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test # This is because we are hitting edge cases with the causal_mask buffer model_split_percents = [0.5, 0.7, 0.8] + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_with_static_cache(self): + super().test_generate_from_inputs_embeds_with_static_cache() + pass + def setUp(self): self.model_tester = GraniteMoeModelTester(self) self.config_tester = ConfigTester(self, config_class=GraniteMoeConfig, hidden_size=37) diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index 12004cc3c8a..415fee502ed 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -30,6 +30,7 @@ require_vision, slow, torch_device, + skipIfRocm, ) from transformers.utils import cached_property @@ -599,6 +600,17 @@ def test_sdpa_can_dispatch_non_composite_models(self): class IdeficsForVisionText2TextTest(IdeficsModelTest, GenerationTesterMixin, unittest.TestCase): all_model_classes = (IdeficsForVisionText2Text,) if is_torch_available() else () all_generative_model_classes = (IdeficsForVisionText2Text,) if is_torch_available() else () + + + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_with_static_cache(self): + super().test_generate_from_inputs_embeds_with_static_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass def setUp(self): self.model_tester = IdeficsModelTester( diff --git a/tests/models/moshi/test_modeling_moshi.py b/tests/models/moshi/test_modeling_moshi.py index 7d4b855c10d..5c9ce34be74 100644 --- a/tests/models/moshi/test_modeling_moshi.py +++ b/tests/models/moshi/test_modeling_moshi.py @@ -39,6 +39,8 @@ require_torch_sdpa, slow, torch_device, + skipIfRocm, + rocmUtils ) from transformers.utils import cached_property @@ -530,6 +532,117 @@ class MoshiTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): test_resize_embeddings = False test_torchscript = False + def test_generate_without_input_ids(self): + if rocmUtils.is_rocm_skippable(arch='gfx1201'): + torch._dynamo.config.capture_dynamic_output_shape_ops = True + super().test_generate_without_input_ids() + pass + + @skipIfRocm(arch='gfx1201') + def test_constrained_beam_search_generate_dict_output(self): + super().test_constrained_beam_search_generate_dict_output() + pass + + @skipIfRocm(arch='gfx1201') + def test_constrained_beam_search_generate(self): + super().test_constrained_beam_search_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_search_low_memory(self): + super().test_beam_search_low_memory() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_search_generate_dict_outputs_use_cache(self): + super().test_beam_search_generate_dict_outputs_use_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_search_generate_dict_output(self): + super().test_beam_search_generate_dict_output() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_search_generate(self): + super().test_beam_search_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_sample_generate_dict_output(self): + super().test_beam_sample_generate_dict_output() + pass + + @skipIfRocm(arch='gfx1201') + def test_beam_sample_generate(self): + super().test_beam_sample_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_dola_decoding_sample(self): + super().test_dola_decoding_sample() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_0_greedy(self): + super().test_generate_from_inputs_embeds_0_greedy() + pass + + @skipIfRocm(arch='gfx1201') + def test_generate_from_inputs_embeds_1_beam_search(self): + super().test_generate_from_inputs_embeds_1_beam_search() + pass + + @skipIfRocm(arch='gfx1201') + def test_greedy_generate(self): + super().test_greedy_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_greedy_generate_dict_outputs(self): + super().test_greedy_generate_dict_outputs() + pass + + @skipIfRocm(arch='gfx1201') + def test_greedy_generate_dict_outputs_use_cache(self): + super().test_greedy_generate_dict_outputs_use_cache() + pass + + @skipIfRocm(arch='gfx1201') + def test_group_beam_search_generate(self): + super().test_group_beam_search_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_group_beam_search_generate_dict_output(self): + super().test_group_beam_search_generate_dict_output() + pass + + @skipIfRocm(arch='gfx1201') + def test_new_cache_format_0(self): + super().test_new_cache_format_0() + pass + + @skipIfRocm(arch='gfx1201') + def test_new_cache_format_1(self): + super().test_new_cache_format_1() + pass + + @skipIfRocm(arch='gfx1201') + def test_new_cache_format_2(self): + super().test_new_cache_format_2() + pass + + @skipIfRocm(arch='gfx1201') + def test_sample_generate(self): + super().test_sample_generate() + pass + + @skipIfRocm(arch='gfx1201') + def test_sample_generate_dict_output(self): + super().test_sample_generate_dict_output() + pass + def setUp(self): self.model_tester = MoshiTester(self) @@ -814,6 +927,7 @@ def test_eager_matches_sdpa_generate(self): self.assertTrue(torch.allclose(res_eager.audio_sequences, res_sdpa.audio_sequences)) @pytest.mark.generate + @skipIfRocm(arch='gfx1201') def test_generate_without_input_ids(self): config, _, _, _ = self._get_input_ids_and_config() @@ -838,6 +952,7 @@ def test_training_gradient_checkpointing_use_reentrant(self): def test_training_gradient_checkpointing_use_reentrant_false(self): pass + @skipIfRocm(arch='gfx1201') def test_generate_from_input_values(self): for model_class in self.all_generative_model_classes: config, input_ids, _, _ = self._get_input_ids_and_config() @@ -867,6 +982,7 @@ def test_generate_from_input_values(self): torch.allclose(outputs_from_audio_codes.audio_sequences, outputs_from_audio_values.audio_sequences) ) + @skipIfRocm(arch='gfx1201') def test_generate_depth_decoder_kwargs(self): # test sampling and beam search for model_class in self.all_generative_model_classes: @@ -880,6 +996,7 @@ def test_generate_depth_decoder_kwargs(self): input_ids, max_new_tokens=5, **input_dict, depth_decoder_do_sample=True, depth_decoder_num_beams=5 ) + @skipIfRocm(arch='gfx1201') def test_generate_from_unconditional(self): # test sampling and beam search for model_class in self.all_generative_model_classes: diff --git a/tests/models/olmoe/test_modeling_olmoe.py b/tests/models/olmoe/test_modeling_olmoe.py index 9efadb06eb4..ccf12c22fa5 100644 --- a/tests/models/olmoe/test_modeling_olmoe.py +++ b/tests/models/olmoe/test_modeling_olmoe.py @@ -26,6 +26,8 @@ require_torch, slow, torch_device, + skipIfRocm, + rocmUtils, ) from ...generation.test_utils import GenerationTesterMixin @@ -303,6 +305,18 @@ class OlmoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi # This is because we are hitting edge cases with the causal_mask buffer model_split_percents = [0.5, 0.7, 0.8] + def test_generate_with_static_cache(self): + if rocmUtils.is_rocm_skippable(arch='gfx1201'): + torch._dynamo.config.capture_dynamic_output_shape_ops = True + super().test_generate_with_static_cache() + pass + + def test_generate_from_inputs_embeds_with_static_cache(self): + if rocmUtils.is_rocm_skippable(arch='gfx1201'): + torch._dynamo.config.capture_dynamic_output_shape_ops = True + super().test_generate_from_inputs_embeds_with_static_cache() + pass + def setUp(self): self.model_tester = OlmoeModelTester(self) self.config_tester = ConfigTester(self, config_class=OlmoeConfig, hidden_size=37) diff --git a/tests/models/paligemma/test_modeling_paligemma.py b/tests/models/paligemma/test_modeling_paligemma.py index ce44436a20a..6de8660635d 100644 --- a/tests/models/paligemma/test_modeling_paligemma.py +++ b/tests/models/paligemma/test_modeling_paligemma.py @@ -31,6 +31,7 @@ require_torch, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -188,6 +189,11 @@ class PaliGemmaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTes test_head_masking = False _is_composite = True + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass + def setUp(self): self.model_tester = PaliGemmaVisionText2TextModelTester(self) self.config_tester = ConfigTester(self, config_class=PaliGemmaConfig, has_text_modality=False) diff --git a/tests/models/roberta/test_modeling_roberta.py b/tests/models/roberta/test_modeling_roberta.py index 1c128513b17..73c4009dc97 100644 --- a/tests/models/roberta/test_modeling_roberta.py +++ b/tests/models/roberta/test_modeling_roberta.py @@ -17,7 +17,7 @@ import unittest from transformers import AutoTokenizer, RobertaConfig, is_torch_available -from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device +from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device, skipIfRocm from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester @@ -397,6 +397,21 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi fx_compatible = True model_split_percents = [0.5, 0.8, 0.9] + @skipIfRocm(arch='gfx1201') + def test_cpu_offload(self): + super().test_cpu_offload() + pass + + @skipIfRocm(arch='gfx1201') + def test_disk_offload_bin(self): + super().test_disk_offload_bin() + pass + + @skipIfRocm(arch='gfx1201') + def test_disk_offload_safetensors(self): + super().test_disk_offload_safetensors() + pass + def setUp(self): self.model_tester = RobertaModelTester(self) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) diff --git a/tests/models/stablelm/test_modeling_stablelm.py b/tests/models/stablelm/test_modeling_stablelm.py index 91044a4eb75..52aeb434c79 100644 --- a/tests/models/stablelm/test_modeling_stablelm.py +++ b/tests/models/stablelm/test_modeling_stablelm.py @@ -26,6 +26,7 @@ require_torch, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -307,6 +308,11 @@ class StableLmModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM test_headmasking = False test_pruning = False + @skipIfRocm(arch='gfx1201') + def test_generate_with_static_cache(self): + super().test_generate_with_static_cache() + pass + def setUp(self): self.model_tester = StableLmModelTester(self) self.config_tester = ConfigTester(self, config_class=StableLmConfig, hidden_size=37) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index e8cd8febca0..c819fca01bc 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -47,6 +47,7 @@ require_torchaudio, slow, torch_device, + skipIfRocm, ) from .test_pipelines_common import ANY @@ -237,6 +238,7 @@ def test_whisper_fp16(self): speech_recognizer(waveform) @require_torch + @skipIfRocm(arch='gfx1201') def test_small_model_pt_seq2seq(self): speech_recognizer = pipeline( model="hf-internal-testing/tiny-random-speech-encoder-decoder", diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 1fec4be3d95..2b658756cf4 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -56,6 +56,7 @@ require_torch_or_tf, slow, torch_device, + skipIfRocm, ) from transformers.utils import direct_transformers_import, is_tf_available, is_torch_available from transformers.utils import logging as transformers_logging @@ -864,6 +865,7 @@ def test_dynamic_pipeline(self): ) @require_torch_or_tf + @skipIfRocm def test_cached_pipeline_has_minimum_calls_to_head(self): # Make sure we have cached the pipeline. _ = pipeline("text-classification", model="hf-internal-testing/tiny-random-bert") @@ -897,6 +899,7 @@ def new_forward(*args, **kwargs): self.assertEqual(self.COUNT, 1) @require_torch + @skipIfRocm(arch='gfx1201') def test_custom_code_with_string_tokenizer(self): # This test checks for an edge case - tokenizer loading used to fail when using a custom code model # with a separate tokenizer that was passed as a repo name rather than a tokenizer object. diff --git a/tests/pipelines/test_pipelines_image_to_text.py b/tests/pipelines/test_pipelines_image_to_text.py index 0996b399b28..d4dc7824bda 100644 --- a/tests/pipelines/test_pipelines_image_to_text.py +++ b/tests/pipelines/test_pipelines_image_to_text.py @@ -26,6 +26,7 @@ require_torch, require_vision, slow, + skipIfRocm, ) from .test_pipelines_common import ANY @@ -122,6 +123,7 @@ def test_small_model_tf(self): compare_pipeline_output_to_hub_spec(single_output, ImageToTextOutput) @require_torch + @skipIfRocm(arch='gfx1201') def test_small_model_pt(self): pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2") image = "./tests/fixtures/tests_samples/COCO/000000039769.png" diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py index 613b9dca8e1..42f26e4d9d1 100644 --- a/tests/pipelines/test_pipelines_summarization.py +++ b/tests/pipelines/test_pipelines_summarization.py @@ -21,7 +21,7 @@ TFPreTrainedModel, pipeline, ) -from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow, torch_device +from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow, torch_device, skipIfRocm from transformers.tokenization_utils import TruncationStrategy from .test_pipelines_common import ANY @@ -91,6 +91,7 @@ def run_pipeline_test(self, summarizer, _): outputs = summarizer("This " * 1000, truncation=TruncationStrategy.ONLY_FIRST) @require_torch + @skipIfRocm(arch='gfx1201') def test_small_model_pt(self): summarizer = pipeline(task="summarization", model="sshleifer/tiny-mbart", framework="pt") outputs = summarizer("This is a small test") diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 51f3cae5e31..92c5229f585 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -31,6 +31,7 @@ require_torch_gpu, require_torch_or_tf, torch_device, + skipIfRocm, ) from .test_pipelines_common import ANY @@ -43,6 +44,7 @@ class TextGenerationPipelineTests(unittest.TestCase): tf_model_mapping = TF_MODEL_FOR_CAUSAL_LM_MAPPING @require_torch + @skipIfRocm(arch='gfx1201') def test_small_model_pt(self): text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="pt") # Using `do_sample=False` to force deterministic output @@ -396,6 +398,7 @@ def get_test_pipeline( ) return text_generator, ["This is a test", "Another test"] + @skipIfRocm(arch='gfx1201') def test_stop_sequence_stopping_criteria(self): prompt = """Hello I believe in""" text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2") @@ -510,6 +513,7 @@ def run_pipeline_test(self, text_generator, _): @require_torch @require_accelerate @require_torch_gpu + @skipIfRocm(arch='gfx1201') def test_small_model_pt_bloom_accelerate(self): import torch diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index f1ed97ac13d..e5c6596c8cd 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -27,6 +27,7 @@ require_torch, require_torch_or_tf, require_vision, + skipIfRocm, ) from .test_pipelines_common import ANY @@ -81,6 +82,7 @@ def run_pipeline_test(self, video_classifier, examples): compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement) @require_torch + @skipIfRocm(arch='gfx1201') def test_small_model_pt(self): small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification" small_feature_extractor = VideoMAEFeatureExtractor( diff --git a/tests/pipelines/test_pipelines_zero_shot_image_classification.py b/tests/pipelines/test_pipelines_zero_shot_image_classification.py index bbeaeff3c17..e1788fb7e85 100644 --- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py @@ -26,6 +26,7 @@ require_torch, require_vision, slow, + rocmUtils ) from .test_pipelines_common import ANY @@ -131,7 +132,11 @@ def test_small_model_pt(self, torch_dtype="float32"): ) for single_output in output: - compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement) + if rocmUtils.is_rocm_skippable(arch='gfx1201'): + for sub_output in single_output: + compare_pipeline_output_to_hub_spec(sub_output, ZeroShotImageClassificationOutputElement) + else: + compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement) @require_torch def test_small_model_pt_fp16(self): diff --git a/tests/tp/test_tp.py b/tests/tp/test_tp.py index 2139a648867..0c6d6976109 100644 --- a/tests/tp/test_tp.py +++ b/tests/tp/test_tp.py @@ -14,6 +14,7 @@ import os +from transformers.testing_utils import skipIfRocm from transformers import is_torch_available from transformers.models.llama.configuration_llama import LlamaConfig from transformers.models.llama.modeling_llama import LlamaModel @@ -31,6 +32,7 @@ class TestTensorParallel(TestCasePlus): @require_torch_multi_gpu + @skipIfRocm(arch='gfx1201') def test_tp(self): distributed_args = f"""--nproc_per_node={torch.cuda.device_count()} --master_port={get_torch_dist_unique_port()} diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index cbe692d64e8..f6c13c51d40 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -2589,6 +2589,7 @@ def test_run_seq2seq_double_train_wrap_once(self): self.assertIs(model_wrapped_before, model_wrapped_after, "should be not wrapped twice") @require_torch_up_to_2_accelerators + @skipIfRocm(arch='gfx1201') def test_can_resume_training(self): # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of # save_steps, the checkpoint will resume training at epoch 2 or more (so the data seen by the model @@ -2855,6 +2856,7 @@ def test_training_with_resume_from_checkpoint_false(self): trainer.train(resume_from_checkpoint=False) @require_torch_up_to_2_accelerators + @skipIfRocm(arch='gfx1201') def test_resume_training_with_shard_checkpoint(self): # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of # save_steps, the checkpoint will resume training at epoch 2 or more (so the data seen by the model @@ -2881,6 +2883,7 @@ def test_resume_training_with_shard_checkpoint(self): @require_safetensors @require_torch_up_to_2_accelerators + @skipIfRocm(arch='gfx1201') def test_resume_training_with_safe_checkpoint(self): # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of # save_steps, the checkpoint will resume training at epoch 2 or more (so the data seen by the model @@ -2916,6 +2919,7 @@ def test_resume_training_with_safe_checkpoint(self): self.check_trainer_state_are_the_same(state, state1) @require_torch_up_to_2_accelerators + @skipIfRocm(arch='gfx1201') def test_resume_training_with_gradient_accumulation(self): # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of # save_steps, the checkpoint will resume training at epoch 2 or more (so the data seen by the model @@ -2954,6 +2958,7 @@ def test_resume_training_with_gradient_accumulation(self): self.check_trainer_state_are_the_same(state, state1) @require_torch_up_to_2_accelerators + @skipIfRocm(arch='gfx1201') def test_resume_training_with_frozen_params(self): # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of # save_steps, the checkpoint will resume training at epoch 2 or more (so the data seen by the model diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index 0d1e6645f9a..6bfc867d39a 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -31,7 +31,7 @@ TrainingArguments, is_torch_available, ) -from transformers.testing_utils import require_torch +from transformers.testing_utils import require_torch, skipIfRocm from transformers.trainer_callback import ExportableState @@ -217,6 +217,7 @@ def test_add_remove_callback(self): expected_callbacks.insert(0, DefaultFlowCallback) self.check_callbacks_equality(trainer.callback_handler.callbacks, expected_callbacks) + @skipIfRocm(arch='gfx1201') def test_event_flow(self): import warnings