diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h index d1acba0f26c78f..bd22c536c56fca 100644 --- a/bolt/include/bolt/Core/DIEBuilder.h +++ b/bolt/include/bolt/Core/DIEBuilder.h @@ -162,7 +162,7 @@ class DIEBuilder { /// Clone an attribute in reference format. void cloneDieOffsetReferenceAttribute( - DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE, + DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE, const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref); /// Clone an attribute in block format. diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h index 0e61a0e4f9d9f0..cc4e13a481b2d6 100644 --- a/bolt/include/bolt/Core/DebugNames.h +++ b/bolt/include/bolt/Core/DebugNames.h @@ -72,8 +72,8 @@ class DWARF5AcceleratorTable { return std::move(FullTableBuffer); } /// Adds a DIE that is referenced across CUs. - void addCrossCUDie(const DIE *Die) { - CrossCUDies.insert({Die->getOffset(), Die}); + void addCrossCUDie(DWARFUnit *Unit, const DIE *Die) { + CrossCUDies.insert({Die->getOffset(), {Unit, Die}}); } /// Returns true if the DIE can generate an entry for a cross cu reference. /// This only checks TAGs of a DIE because when this is invoked DIE might not @@ -145,7 +145,7 @@ class DWARF5AcceleratorTable { llvm::DenseMap CUOffsetsToPatch; // Contains a map of Entry ID to Entry relative offset. llvm::DenseMap EntryRelativeOffsets; - llvm::DenseMap CrossCUDies; + llvm::DenseMap> CrossCUDies; /// Adds Unit to either CUList, LocalTUList or ForeignTUList. /// Input Unit being processed, and DWO ID if Unit is being processed comes /// from a DWO section. @@ -191,6 +191,29 @@ class DWARF5AcceleratorTable { void emitData(); /// Emit augmentation string. void emitAugmentationString() const; + /// Creates a new entry for a given DIE. + std::optional + addEntry(DWARFUnit &DU, const DIE &CurrDie, + const std::optional &DWOID, + const std::optional &Parent, + const std::optional &Name, + const uint32_t NumberParentsInChain); + /// Returns UnitID for a given DWARFUnit. + uint32_t getUnitID(const DWARFUnit &Unit, + const std::optional &DWOID, bool &IsTU); + std::optional getName(DWARFUnit &DU, + const std::optional &DWOID, + const std::string &NameToUse, + DIEValue ValName); + /// Processes a DIE with references to other DIEs for DW_AT_name and + /// DW_AT_linkage_name resolution. + /// If DW_AT_name exists method creates a new entry for this DIE and returns + /// it. + std::optional processReferencedDie( + DWARFUnit &Unit, const DIE &Die, const std::optional &DWOID, + const std::optional &Parent, + const std::string &NameToUse, const uint32_t NumberParentsInChain, + const dwarf::Attribute &Attr); }; } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp index 414912ea1c2076..80ad583e079d49 100644 --- a/bolt/lib/Core/DIEBuilder.cpp +++ b/bolt/lib/Core/DIEBuilder.cpp @@ -622,7 +622,7 @@ DWARFDie DIEBuilder::resolveDIEReference( } void DIEBuilder::cloneDieOffsetReferenceAttribute( - DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE, + DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE, const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref) { DIE *NewRefDie = nullptr; DWARFUnit *RefUnit = nullptr; @@ -654,7 +654,7 @@ void DIEBuilder::cloneDieOffsetReferenceAttribute( // Adding referenced DIE to DebugNames to be used when entries are created // that contain cross cu references. if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec)) - DebugNamesTable.addCrossCUDie(DieInfo.Die); + DebugNamesTable.addCrossCUDie(&U, DieInfo.Die); // no matter forward reference or backward reference, we are supposed // to calculate them in `finish` due to the possible modification of // the DIE. diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp index 280c7c505eeda1..f33ca83023a0b0 100644 --- a/bolt/lib/Core/DebugNames.cpp +++ b/bolt/lib/Core/DebugNames.cpp @@ -222,134 +222,113 @@ static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) { return reinterpret_cast(&Entry); } -std::optional -DWARF5AcceleratorTable::addAccelTableEntry( - DWARFUnit &Unit, const DIE &Die, const std::optional &DWOID, - const uint32_t NumberParentsInChain, - std::optional &Parent) { - if (Unit.getVersion() < 5 || !NeedToCreate) - return std::nullopt; - std::string NameToUse = ""; - - auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU, - uint32_t &DieTag) -> uint32_t { - IsTU = Unit.isTypeUnit(); - DieTag = Die.getTag(); - if (IsTU) { - if (DWOID) { - const uint64_t TUHash = cast(&Unit)->getTypeHash(); - auto Iter = TUHashToIndexMap.find(TUHash); - assert(Iter != TUHashToIndexMap.end() && - "Could not find TU hash in map"); - return Iter->second; - } - return LocalTUList.size() - 1; +uint32_t DWARF5AcceleratorTable::getUnitID(const DWARFUnit &Unit, + const std::optional &DWOID, + bool &IsTU) { + IsTU = Unit.isTypeUnit(); + if (IsTU) { + if (DWOID) { + const uint64_t TUHash = cast(&Unit)->getTypeHash(); + auto Iter = TUHashToIndexMap.find(TUHash); + assert(Iter != TUHashToIndexMap.end() && "Could not find TU hash in map"); + return Iter->second; } - return CUList.size() - 1; - }; + return LocalTUList.size() - 1; + } + return CUList.size() - 1; +} - if (!canProcess(Unit, Die, NameToUse, false)) +std::optional DWARF5AcceleratorTable::getName( + DWARFUnit &Unit, const std::optional &DWOID, + const std::string &NameToUse, DIEValue ValName) { + if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) && + NameToUse.empty()) return std::nullopt; - - // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we - // encounter it. - // Invoking it here so that we don't add Units that don't have any entries. - if (&Unit != CurrentUnit) { - CurrentUnit = &Unit; - addUnit(Unit, DWOID); + std::string Name = ""; + uint64_t NameIndexOffset = 0; + if (NameToUse.empty()) { + NameIndexOffset = ValName.getDIEInteger().getValue(); + if (ValName.getForm() != dwarf::DW_FORM_strp) + NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset); + // Counts on strings end with '\0'. + Name = std::string(&StrSection.data()[NameIndexOffset]); + } else { + Name = NameToUse; } - - auto getName = [&](DIEValue ValName) -> std::optional { - if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) && - NameToUse.empty()) - return std::nullopt; - std::string Name = ""; - uint64_t NameIndexOffset = 0; - if (NameToUse.empty()) { - NameIndexOffset = ValName.getDIEInteger().getValue(); - if (ValName.getForm() != dwarf::DW_FORM_strp) - NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset); - // Counts on strings end with '\0'. - Name = std::string(&StrSection.data()[NameIndexOffset]); - } else { - Name = NameToUse; - } - auto &It = Entries[Name]; - if (It.Values.empty()) { - if (DWOID && NameToUse.empty()) { - // For DWO Unit the offset is in the .debug_str.dwo section. - // Need to find offset for the name in the .debug_str section. - llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name)); - auto ItCache = StrCacheToOffsetMap.find(Hash); - if (ItCache == StrCacheToOffsetMap.end()) - NameIndexOffset = MainBinaryStrWriter.addString(Name); - else - NameIndexOffset = ItCache->second; - } - if (!NameToUse.empty()) + auto &It = Entries[Name]; + if (It.Values.empty()) { + if (DWOID && NameToUse.empty()) { + // For DWO Unit the offset is in the .debug_str.dwo section. + // Need to find offset for the name in the .debug_str section. + llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name)); + auto ItCache = StrCacheToOffsetMap.find(Hash); + if (ItCache == StrCacheToOffsetMap.end()) NameIndexOffset = MainBinaryStrWriter.addString(Name); - It.StrOffset = NameIndexOffset; - // This the same hash function used in DWARF5AccelTableData. - It.HashValue = caseFoldingDjbHash(Name); + else + NameIndexOffset = ItCache->second; } - return Name; - }; + if (!NameToUse.empty()) + NameIndexOffset = MainBinaryStrWriter.addString(Name); + It.StrOffset = NameIndexOffset; + // This is the same hash function used in DWARF5AccelTableData. + It.HashValue = caseFoldingDjbHash(Name); + } + return Name; +} - auto addEntry = - [&](DIEValue ValName) -> std::optional { - std::optional Name = getName(ValName); - if (!Name) - return std::nullopt; +std::optional DWARF5AcceleratorTable::addEntry( + DWARFUnit &DU, const DIE &CurrDie, const std::optional &DWOID, + const std::optional &Parent, + const std::optional &Name, + const uint32_t NumberParentsInChain) { + if (!Name) + return std::nullopt; - auto &It = Entries[*Name]; - bool IsTU = false; - uint32_t DieTag = 0; - uint32_t UnitID = getUnitID(Unit, IsTU, DieTag); - std::optional SecondIndex = std::nullopt; - if (IsTU && DWOID) { - auto Iter = CUOffsetsToPatch.find(*DWOID); - if (Iter == CUOffsetsToPatch.end()) - BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find " - "DWO ID in CU offsets for second Unit Index " - << *Name << ". For DIE at offset: " - << Twine::utohexstr(CurrentUnitOffset + Die.getOffset()) - << ".\n"; - SecondIndex = Iter->second; - } - std::optional ParentOffset = - (Parent ? std::optional(getEntryID(**Parent)) : std::nullopt); - // This will be populated later in writeEntry. - // This way only parent entries get tracked. - // Keeping memory footprint down. - if (ParentOffset) - EntryRelativeOffsets.insert({*ParentOffset, 0}); - bool IsParentRoot = false; - // If there is no parent and no valid Entries in parent chain this is a root - // to be marked with a flag. - if (!Parent && !NumberParentsInChain) - IsParentRoot = true; - It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData( - Die.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU, - SecondIndex)); - return It.Values.back(); - }; + auto &It = Entries[*Name]; + bool IsTU = false; + uint32_t DieTag = CurrDie.getTag(); + uint32_t UnitID = getUnitID(DU, DWOID, IsTU); + std::optional SecondIndex = std::nullopt; + if (IsTU && DWOID) { + auto Iter = CUOffsetsToPatch.find(*DWOID); + if (Iter == CUOffsetsToPatch.end()) + BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find " + "DWO ID in CU offsets for second Unit Index " + << *Name << ". For DIE at offset: " + << Twine::utohexstr(CurrentUnitOffset + CurrDie.getOffset()) + << ".\n"; + SecondIndex = Iter->second; + } + std::optional ParentOffset = + (Parent ? std::optional(getEntryID(**Parent)) : std::nullopt); + // This will be only populated in writeEntry, in order to keep only the parent + // entries, and keep the footprint down. + if (ParentOffset) + EntryRelativeOffsets.insert({*ParentOffset, 0}); + bool IsParentRoot = false; + // If there is no parent and no valid Entries in parent chain this is a root + // to be marked with a flag. + if (!Parent && !NumberParentsInChain) + IsParentRoot = true; + It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData( + CurrDie.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU, + SecondIndex)); + return It.Values.back(); +} - // Minor optimization not to add entry twice for DW_TAG_namespace if it has no - // DW_AT_name. - if (!(Die.getTag() == dwarf::DW_TAG_namespace && - !Die.findAttribute(dwarf::Attribute::DW_AT_name))) - addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name)); - // For the purposes of determining whether a debugging information entry has a - // particular attribute (such as DW_AT_name), if debugging information entry A - // has a DW_AT_specification or DW_AT_abstract_origin attribute pointing to - // another debugging information entry B, any attributes of B are considered - // to be part of A. - auto processReferencedDie = [&](const dwarf::Attribute &Attr) - -> std::optional { - const DIEValue Value = Die.findAttribute(Attr); +std::optional +DWARF5AcceleratorTable::processReferencedDie( + DWARFUnit &Unit, const DIE &Die, const std::optional &DWOID, + const std::optional &Parent, + const std::string &NameToUse, const uint32_t NumberParentsInChain, + const dwarf::Attribute &Attr) { + DIEValue Value = Die.findAttribute(Attr); + if (!Value) + return std::nullopt; + auto getReferenceDie = [&](const DIEValue &Value, const DIE *RefDieUsed) + -> std::optional> { if (!Value) return std::nullopt; - const DIE *EntryDie = nullptr; if (Value.getForm() == dwarf::DW_FORM_ref_addr) { auto Iter = CrossCUDies.find(Value.getDIEInteger().getValue()); if (Iter == CrossCUDies.end()) { @@ -359,24 +338,97 @@ DWARF5AcceleratorTable::addAccelTableEntry( << ".\n"; return std::nullopt; } - EntryDie = Iter->second; - } else { - const DIEEntry &DIEENtry = Value.getDIEEntry(); - EntryDie = &DIEENtry.getEntry(); + return Iter->second; } - - addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_linkage_name)); - return addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_name)); + const DIEEntry &DIEENtry = Value.getDIEEntry(); + return {{&Unit, &DIEENtry.getEntry()}}; }; - if (std::optional Entry = - processReferencedDie(dwarf::Attribute::DW_AT_abstract_origin)) + DIEValue AttrValLinkageName; + DIEValue AttrValName = Die.findAttribute(dwarf::Attribute::DW_AT_name); + DWARFUnit *RefUnit = &Unit; + const DIE *RefDieUsed = &Die; + // It is possible to have DW_TAG_subprogram only with DW_AT_linkage_name that + // DW_AT_abstract_origin/DW_AT_specification point to. + while (!AttrValName) { + std::optional> RefDUDie = + getReferenceDie(Value, RefDieUsed); + if (!RefDUDie) + break; + RefUnit = RefDUDie->first; + const DIE &RefDie = *RefDUDie->second; + RefDieUsed = &RefDie; + if (!AttrValLinkageName) + AttrValLinkageName = + RefDie.findAttribute(dwarf::Attribute::DW_AT_linkage_name); + AttrValName = RefDie.findAttribute(dwarf::Attribute::DW_AT_name); + Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_abstract_origin); + if (!Value) + Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_specification); + } + addEntry(Unit, Die, DWOID, Parent, + getName(*RefUnit, DWOID, NameToUse, AttrValLinkageName), + NumberParentsInChain); + return addEntry(Unit, Die, DWOID, Parent, + getName(*RefUnit, DWOID, NameToUse, AttrValName), + NumberParentsInChain); +} + +std::optional +DWARF5AcceleratorTable::addAccelTableEntry( + DWARFUnit &Unit, const DIE &Die, const std::optional &DWOID, + const uint32_t NumberParentsInChain, + std::optional &Parent) { + if (Unit.getVersion() < 5 || !NeedToCreate) + return std::nullopt; + std::string NameToUse = ""; + + if (!canProcess(Unit, Die, NameToUse, false)) + return std::nullopt; + + // Adds a Unit to either CU, LocalTU or ForeignTU list the first time we + // encounter it. + // Invoking it here so that we don't add Units that don't have any entries. + if (&Unit != CurrentUnit) { + CurrentUnit = &Unit; + addUnit(Unit, DWOID); + } + + // Minor optimization not to add entry twice for DW_TAG_namespace if it has no + // DW_AT_name. + std::optional LinkageEntry = std::nullopt; + DIEValue NameVal = Die.findAttribute(dwarf::Attribute::DW_AT_name); + DIEValue LinkageNameVal = + Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name); + if (!(Die.getTag() == dwarf::DW_TAG_namespace && !NameVal)) + LinkageEntry = addEntry(Unit, Die, DWOID, Parent, + getName(Unit, DWOID, NameToUse, LinkageNameVal), + NumberParentsInChain); + + std::optional NameEntry = + addEntry(Unit, Die, DWOID, Parent, + getName(Unit, DWOID, NameToUse, NameVal), NumberParentsInChain); + if (NameEntry) + return NameEntry; + + // The DIE doesn't have DW_AT_name or DW_AT_linkage_name, so we need to see if + // we can follow other attributes to find them. For the purposes of + // determining whether a debug information entry has a particular + // attribute (such as DW_AT_name), if debug information entry A has a + // DW_AT_specification or DW_AT_abstract_origin attribute pointing to another + // debug information entry B, any attributes of B are considered to be + // part of A. + if (std::optional Entry = processReferencedDie( + Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain, + dwarf::Attribute::DW_AT_abstract_origin)) return *Entry; - if (std::optional Entry = - processReferencedDie(dwarf::Attribute::DW_AT_specification)) + if (std::optional Entry = processReferencedDie( + Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain, + dwarf::Attribute::DW_AT_specification)) return *Entry; - return addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name)); + // This point can be hit by DW_TAG_varialbe that has no DW_AT_name. + return std::nullopt; } /// Algorithm from llvm implementation. diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s new file mode 100644 index 00000000000000..8c9817ce91edb3 --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s @@ -0,0 +1,568 @@ +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o +# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt +# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s + +## Tests that bolt can correctly generate debug_names when there is an DW_TAG_inlined_subroutine +## with DW_AT_abstract_origin that points to DW_TAG_subprogram that only has DW_AT_linkage_name. + +# BOLT: Name Index @ 0x0 { +# BOLT-NEXT: Header { +# BOLT-NEXT: Length: 0xA2 +# BOLT-NEXT: Format: DWARF32 +# BOLT-NEXT: Version: 5 +# BOLT-NEXT: CU count: 1 +# BOLT-NEXT: Local TU count: 0 +# BOLT-NEXT: Foreign TU count: 0 +# BOLT-NEXT: Bucket count: 4 +# BOLT-NEXT: Name count: 4 +# BOLT-NEXT: Abbreviations table size: 0x19 +# BOLT-NEXT: Augmentation: 'BOLT' +# BOLT-NEXT: } +# BOLT-NEXT: Compilation Unit offsets [ +# BOLT-NEXT: CU[0]: 0x00000000 +# BOLT-NEXT: ] +# BOLT-NEXT: Abbreviations [ +# BOLT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_ref4 +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 0 [ +# BOLT-NEXT: Name 1 { +# BOLT-NEXT: Hash: 0xB888030 +# BOLT-NEXT: String: {{.+}} "int" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: 0x1 +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000004a +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 1 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 2 [ +# BOLT-NEXT: Name 2 { +# BOLT-NEXT: Hash: 0x7C9A7F6A +# BOLT-NEXT: String: {{.+}} "main" +# BOLT-NEXT: Entry @ [[REF1:0x[0-9a-f]*]] { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x0000004e +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 3 { +# BOLT-NEXT: Hash: 0xB5063CFE +# BOLT-NEXT: String: {{.+}} "_Z3fooi" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000024 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ 0x96 { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x0000007e +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 3 [ +# BOLT-NEXT: Name 4 { +# BOLT-NEXT: Hash: 0x7C952063 +# BOLT-NEXT: String: {{.+}} "char" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000009f +# BOLT-NEXT: DW_IDX_parent: + +## int foo(int i) { +## return i ++; +## } +## int main(int argc, char* argv[]) { +## int i = 0; +## [[clang::always_inline]] i = foo(argc); +## return i; +## } +## Test was manually modified so that DW_TAG_subprogram only had DW_AT_linkage_name. + + .text + .file "main.cpp" + .globl _Z3fooi + .p2align 4, 0x90 + .type _Z3fooi,@function +_Z3fooi: +.Lfunc_begin0: + .file 0 "/abstractChain" "main.cpp" md5 0x2e29d55fc1320801a8057a4c50643ea1 + .loc 0 1 0 + .loc 0 2 12 prologue_end + .loc 0 2 3 epilogue_begin is_stmt 0 + retq +.Lfunc_end0: + .size _Z3fooi, .Lfunc_end0-_Z3fooi + + .globl main + .p2align 4, 0x90 + .type main,@function +main: +.Lfunc_begin1: + .loc 0 4 0 is_stmt 1 +.Ltmp2: + .loc 0 5 7 prologue_end + .loc 0 6 36 + movl -12(%rbp), %eax +.Ltmp3: + .loc 0 2 12 +.Ltmp4: + .loc 0 6 30 + .loc 0 7 10 + .loc 0 7 3 epilogue_begin is_stmt 0 + retq +.Ltmp5: +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + #.byte 3 # DW_AT_name + #.byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x98 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x15 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 56 # DW_AT_abstract_origin + .byte 3 # Abbrev [3] 0x2f:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 64 # DW_AT_abstract_origin Manually Modified + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x38:0x12 DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + #.byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 74 # DW_AT_type + # DW_AT_external + # DW_AT_inline + .byte 5 # Abbrev [5] 0x41:0x8 DW_TAG_formal_parameter + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 74 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x4a:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x4e:0x47 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + # DW_AT_external + .byte 8 # Abbrev [8] 0x5d:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + .byte 8 # Abbrev [8] 0x68:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 104 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 148 # DW_AT_type Manually Modified + .byte 9 # Abbrev [9] 0x73:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 100 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 73 # DW_AT_type Manually Modified + .byte 10 # Abbrev [10] 0x7e:0x16 DW_TAG_inlined_subroutine + .long 56 # DW_AT_abstract_origin + .byte 2 # DW_AT_low_pc + .long .Ltmp4-.Ltmp3 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 6 # DW_AT_call_line + .byte 32 # DW_AT_call_column + .byte 3 # Abbrev [3] 0x8b:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 64 # DW_AT_abstract_origin Manually Modified + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x95:0x5 DW_TAG_pointer_type + .long 153 # DW_AT_type Manually Modified + .byte 11 # Abbrev [11] 0x9a:0x5 DW_TAG_pointer_type + .long 158 # DW_AT_type Manually Modified + .byte 6 # Abbrev [6] 0x9f:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.0.0git" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=24 +.Linfo_string2: + .asciz "/abstractChain" # string offset=33 +.Linfo_string3: + .asciz "foo" # string offset=85 +.Linfo_string4: + .asciz "_Z3fooi" # string offset=89 +.Linfo_string5: + .asciz "int" # string offset=97 +.Linfo_string6: + .asciz "i" # string offset=101 +.Linfo_string7: + .asciz "main" # string offset=103 +.Linfo_string8: + .asciz "argc" # string offset=108 +.Linfo_string9: + .asciz "argv" # string offset=113 +.Linfo_string10: + .asciz "char" # string offset=118 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string4 + .long .Linfo_string3 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp3 +.Ldebug_addr_end0: + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: unit length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 5 # Header: bucket count + .long 5 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 8 # Header: augmentation string size + .ascii "LLVM0700" # Header: augmentation string + .long .Lcu_begin0 # Compilation unit 0 + .long 0 # Bucket 0 + .long 1 # Bucket 1 + .long 0 # Bucket 2 + .long 3 # Bucket 3 + .long 4 # Bucket 4 + .long 2090499946 # Hash in Bucket 1 + .long -1257882370 # Hash in Bucket 1 + .long 193495088 # Hash in Bucket 3 + .long 193491849 # Hash in Bucket 4 + .long 2090147939 # Hash in Bucket 4 + .long .Linfo_string7 # String in Bucket 1: main + .long .Linfo_string4 # String in Bucket 1: _Z3fooi + .long .Linfo_string5 # String in Bucket 3: int + .long .Linfo_string3 # String in Bucket 4: foo + .long .Linfo_string10 # String in Bucket 4: char + .long .Lnames3-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 4 + .long .Lnames4-.Lnames_entries0 # Offset in Bucket 4 +.Lnames_abbrev_start0: + .byte 1 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 2 # Abbrev code + .byte 29 # DW_TAG_inlined_subroutine + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 19 # DW_FORM_ref4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 3 # Abbrev code + .byte 36 # DW_TAG_base_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames3: +.L2: + .byte 1 # Abbreviation code + .long 78 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: main +.Lnames1: +.L0: + .byte 1 # Abbreviation code + .long 35 # DW_IDX_die_offset +.L3: # DW_IDX_parent + .byte 2 # Abbreviation code + .long 126 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 0 # End of list: _Z3fooi +.Lnames2: +.L1: + .byte 3 # Abbreviation code + .long 74 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: int +.Lnames0: + .byte 1 # Abbreviation code + .long 35 # DW_IDX_die_offset + .byte 2 # DW_IDX_parent + # Abbreviation code + .long 126 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 0 # End of list: foo +.Lnames4: +.L4: + .byte 3 # Abbreviation code + .long 159 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: char + .p2align 2, 0x0 +.Lnames_end0: + .ident "clang version 20.0.0git" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s new file mode 100644 index 00000000000000..2075640d6761cd --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s @@ -0,0 +1,829 @@ +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o +# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt +# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s + +## This test checks that BOLT correctly generates .debug_names section when there is transative +## DW_AT_name/DW_AT_linkage_name resolution. + +# BOLT: Abbreviations [ +# BOLT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_class_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_ref4 +# BOLT-NEXT: } +# BOLT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +# BOLT-NEXT: DW_IDX_parent: DW_FORM_flag_present +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 0 [ +# BOLT-NEXT: Name 1 { +# BOLT-NEXT: Hash: 0xD72418AA +# BOLT-NEXT: String: {{.+}} "_ZL3fooi" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x000000ba +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 1 [ +# BOLT-NEXT: Name 2 { +# BOLT-NEXT: Hash: 0x10614A06 +# BOLT-NEXT: String: {{.+}} "State" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV2]] +# BOLT-NEXT: Tag: DW_TAG_class_type +# BOLT-NEXT: DW_IDX_die_offset: 0x0000002b +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ [[REF1:0x[0-9a-f]*]] { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000089 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x000000a3 +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 2 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 3 [ +# BOLT-NEXT: Name 3 { +# BOLT-NEXT: Hash: 0xB888030 +# BOLT-NEXT: String: {{.+}} "int" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV4]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x00000085 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 4 { +# BOLT-NEXT: Hash: 0x7C9A7F6A +# BOLT-NEXT: String: {{.+}} "main" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000042 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 4 [ +# BOLT-NEXT: EMPTY +# BOLT-NEXT: ] +# BOLT-NEXT: Bucket 5 [ +# BOLT-NEXT: Name 5 { +# BOLT-NEXT: Hash: 0xB887389 +# BOLT-NEXT: String: {{.+}} "foo" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x000000ba +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 6 { +# BOLT-NEXT: Hash: 0x7C952063 +# BOLT-NEXT: String: {{.+}} "char" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV4]] +# BOLT-NEXT: Tag: DW_TAG_base_type +# BOLT-NEXT: DW_IDX_die_offset: 0x000000d9 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: } +# BOLT-NEXT: Name 7 { +# BOLT-NEXT: Hash: 0xFBBDC812 +# BOLT-NEXT: String: {{.+}} "_ZN5StateC2Ev" +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV1]] +# BOLT-NEXT: Tag: DW_TAG_subprogram +# BOLT-NEXT: DW_IDX_die_offset: 0x00000089 +# BOLT-NEXT: DW_IDX_parent: +# BOLT-NEXT: } +# BOLT-NEXT: Entry @ {{.+}} { +# BOLT-NEXT: Abbrev: [[ABBREV3]] +# BOLT-NEXT: Tag: DW_TAG_inlined_subroutine +# BOLT-NEXT: DW_IDX_die_offset: 0x000000a3 +# BOLT-NEXT: DW_IDX_parent: Entry @ [[REF1]] + +## static int foo(int i) { +## return i ++; +## } +## class State { +## public: +## State() {[[clang::always_inline]] foo(3);} +## }; +## +## int main(int argc, char* argv[]) { +## State S; +## return 0; +## } + +## Test manually modified to redirect DW_TAG_inlined_subroutine to DW_TAG_subprogram with DW_AT_specification. + + .text + .file "main.cpp" + .file 0 "abstractChainTwo" "main.cpp" md5 0x17ad726b6a1fd49ee59559a1302da539 + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .loc 0 9 0 # main.cpp:9:0 +.Ltmp0: + .loc 0 10 9 prologue_end # main.cpp:10:9 + callq _ZN5StateC2Ev + .loc 0 11 3 # main.cpp:11:3 + .loc 0 11 3 epilogue_begin is_stmt 0 # main.cpp:11:3 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + # -- End function + .section .text._ZN5StateC2Ev,"axG",@progbits,_ZN5StateC2Ev,comdat + .weak _ZN5StateC2Ev # -- Begin function _ZN5StateC2Ev + .p2align 4, 0x90 + .type _ZN5StateC2Ev,@function +_ZN5StateC2Ev: # @_ZN5StateC2Ev +.Lfunc_begin1: + .loc 0 6 0 is_stmt 1 # main.cpp:6:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -16(%rbp) + movl $3, -4(%rbp) +.Ltmp2: + .loc 0 2 12 prologue_end # main.cpp:2:12 + movl -4(%rbp), %eax + addl $1, %eax + movl %eax, -4(%rbp) +.Ltmp3: + .loc 0 6 44 epilogue_begin # main.cpp:6:44 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp4: +.Lfunc_end1: + .size _ZN5StateC2Ev, .Lfunc_end1-_ZN5StateC2Ev + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _ZL3fooi + .type _ZL3fooi,@function +_ZL3fooi: # @_ZL3fooi +.Lfunc_begin2: + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp5: + .loc 0 2 12 prologue_end # main.cpp:2:12 + movl -4(%rbp), %eax + movl %eax, %ecx + addl $1, %ecx + movl %ecx, -4(%rbp) + .loc 0 2 3 epilogue_begin is_stmt 0 # main.cpp:2:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp6: +.Lfunc_end2: + .size _ZL3fooi, .Lfunc_end2-_ZL3fooi + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 2 # DW_TAG_class_type + .byte 1 # DW_CHILDREN_yes + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 50 # DW_AT_accessibility + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 100 # DW_AT_object_pointer + .byte 19 # DW_FORM_ref4 + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 13 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 14 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 1 # DW_CHILDREN_yes + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 15 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 16 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0xd7 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 0 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x12 DW_TAG_class_type + .byte 5 # DW_AT_calling_convention + .byte 3 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x31:0xb DW_TAG_subprogram + .byte 3 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 6 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 1 # DW_AT_accessibility + # DW_ACCESS_public + .byte 4 # Abbrev [4] 0x36:0x5 DW_TAG_formal_parameter + .long 61 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x3d:0x5 DW_TAG_pointer_type + .long 43 # DW_AT_type + .byte 6 # Abbrev [6] 0x42:0x31 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 133 # DW_AT_type + # DW_AT_external + .byte 7 # Abbrev [7] 0x51:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 10 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 7 # Abbrev [7] 0x5c:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 11 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 207 # DW_AT_type + .byte 8 # Abbrev [8] 0x67:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 111 + .byte 13 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 10 # DW_AT_decl_line + .long 43 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 9 # Abbrev [9] 0x73:0x12 DW_TAG_subprogram + .byte 4 # DW_AT_linkage_name + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 133 # DW_AT_type + # DW_AT_inline + .byte 10 # Abbrev [10] 0x7c:0x8 DW_TAG_formal_parameter + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 11 # Abbrev [11] 0x85:0x4 DW_TAG_base_type + .byte 6 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 12 # Abbrev [12] 0x89:0x31 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 154 # DW_AT_object_pointer + .byte 9 # DW_AT_linkage_name + .long 49 # DW_AT_specification + .byte 13 # Abbrev [13] 0x9a:0x9 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 14 # DW_AT_name + .long 221 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0xa3:0x16 DW_TAG_inlined_subroutine + .long 137 # DW_AT_abstract_origin Manually Modified + .byte 2 # DW_AT_low_pc + .long .Ltmp3-.Ltmp2 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 6 # DW_AT_call_line + .byte 37 # DW_AT_call_column + .byte 15 # Abbrev [15] 0xb0:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 124 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 16 # Abbrev [16] 0xba:0x15 DW_TAG_subprogram + .byte 3 # DW_AT_low_pc + .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 115 # DW_AT_abstract_origin + .byte 15 # Abbrev [15] 0xc6:0x8 DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .long 124 # DW_AT_abstract_origin + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0xcf:0x5 DW_TAG_pointer_type + .long 212 # DW_AT_type + .byte 5 # Abbrev [5] 0xd4:0x5 DW_TAG_pointer_type + .long 217 # DW_AT_type + .byte 11 # Abbrev [11] 0xd9:0x4 DW_TAG_base_type + .byte 12 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 5 # Abbrev [5] 0xdd:0x5 DW_TAG_pointer_type + .long 43 # DW_AT_type + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 1 # DW_RLE_base_addressx + .byte 0 # base address index + .byte 4 # DW_RLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end0-.Lfunc_begin0 # ending offset + .byte 4 # DW_RLE_offset_pair + .uleb128 .Lfunc_begin2-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end2-.Lfunc_begin0 # ending offset + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 64 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.0.0git" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=24 +.Linfo_string2: + .asciz "abstractChainTwo" # string offset=33 +.Linfo_string3: + .asciz "State" # string offset=88 +.Linfo_string4: + .asciz "main" # string offset=94 +.Linfo_string5: + .asciz "_ZL3fooi" # string offset=99 +.Linfo_string6: + .asciz "foo" # string offset=108 +.Linfo_string7: + .asciz "int" # string offset=112 +.Linfo_string8: + .asciz "i" # string offset=116 +.Linfo_string9: + .asciz "_ZN5StateC2Ev" # string offset=118 +.Linfo_string10: + .asciz "argc" # string offset=132 +.Linfo_string11: + .asciz "argv" # string offset=137 +.Linfo_string12: + .asciz "char" # string offset=142 +.Linfo_string13: + .asciz "S" # string offset=147 +.Linfo_string14: + .asciz "this" # string offset=149 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string4 + .long .Linfo_string9 + .long .Linfo_string10 + .long .Linfo_string11 + .long .Linfo_string12 + .long .Linfo_string13 + .long .Linfo_string14 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp2 + .quad .Lfunc_begin2 +.Ldebug_addr_end0: + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: unit length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 7 # Header: bucket count + .long 7 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 8 # Header: augmentation string size + .ascii "LLVM0700" # Header: augmentation string + .long .Lcu_begin0 # Compilation unit 0 + .long 1 # Bucket 0 + .long 2 # Bucket 1 + .long 0 # Bucket 2 + .long 3 # Bucket 3 + .long 0 # Bucket 4 + .long 5 # Bucket 5 + .long 0 # Bucket 6 + .long -685500246 # Hash in Bucket 0 + .long 274811398 # Hash in Bucket 1 + .long 193495088 # Hash in Bucket 3 + .long 2090499946 # Hash in Bucket 3 + .long 193491849 # Hash in Bucket 5 + .long 2090147939 # Hash in Bucket 5 + .long -71448558 # Hash in Bucket 5 + .long .Linfo_string5 # String in Bucket 0: _ZL3fooi + .long .Linfo_string3 # String in Bucket 1: State + .long .Linfo_string7 # String in Bucket 3: int + .long .Linfo_string4 # String in Bucket 3: main + .long .Linfo_string6 # String in Bucket 5: foo + .long .Linfo_string12 # String in Bucket 5: char + .long .Linfo_string9 # String in Bucket 5: _ZN5StateC2Ev + .long .Lnames5-.Lnames_entries0 # Offset in Bucket 0 + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 3 + .long .Lnames4-.Lnames_entries0 # Offset in Bucket 5 + .long .Lnames6-.Lnames_entries0 # Offset in Bucket 5 + .long .Lnames3-.Lnames_entries0 # Offset in Bucket 5 +.Lnames_abbrev_start0: + .byte 1 # Abbrev code + .byte 29 # DW_TAG_inlined_subroutine + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 19 # DW_FORM_ref4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 2 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 3 # Abbrev code + .byte 2 # DW_TAG_class_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 4 # Abbrev code + .byte 36 # DW_TAG_base_type + .byte 3 # DW_IDX_die_offset + .byte 19 # DW_FORM_ref4 + .byte 4 # DW_IDX_parent + .byte 25 # DW_FORM_flag_present + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames5: +.L1: + .byte 1 # Abbreviation code + .long 163 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent +.L0: + .byte 2 # Abbreviation code + .long 186 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: _ZL3fooi +.Lnames0: +.L5: + .byte 3 # Abbreviation code + .long 43 # DW_IDX_die_offset +.L2: # DW_IDX_parent + .byte 2 # Abbreviation code + .long 137 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: State +.Lnames2: +.L4: + .byte 4 # Abbreviation code + .long 133 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: int +.Lnames1: +.L6: + .byte 2 # Abbreviation code + .long 66 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: main +.Lnames4: + .byte 1 # Abbreviation code + .long 163 # DW_IDX_die_offset + .long .L2-.Lnames_entries0 # DW_IDX_parent + .byte 2 # Abbreviation code + .long 186 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: foo +.Lnames6: +.L3: + .byte 4 # Abbreviation code + .long 217 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: char +.Lnames3: + .byte 2 # Abbreviation code + .long 137 # DW_IDX_die_offset + .byte 0 # DW_IDX_parent + # End of list: _ZN5StateC2Ev + .p2align 2, 0x0 +.Lnames_end0: + .ident "clang version 20.0.0git" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp index f2ff27d85fb004..55ca4809f058ab 100644 --- a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp @@ -12,20 +12,43 @@ #include "../utils/OptionsUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include using namespace clang::ast_matchers; +using clang::ast_matchers::internal::Matcher; namespace clang::tidy::bugprone { namespace { -AST_MATCHER_P(QualType, hasCleanType, ast_matchers::internal::Matcher, - InnerMatcher) { +AST_MATCHER_P(QualType, hasCleanType, Matcher, InnerMatcher) { return InnerMatcher.matches( Node.getNonReferenceType().getUnqualifiedType().getCanonicalType(), Finder, Builder); } +constexpr std::array NameList{ + "::std::make_unique", + "::std::make_shared", +}; + +Matcher constructFrom(Matcher TypeMatcher, + Matcher ArgumentMatcher) { + return expr( + anyOf( + // construct optional + cxxConstructExpr(argumentCountIs(1U), hasType(TypeMatcher), + hasArgument(0U, ArgumentMatcher)), + // known template methods in std + callExpr(argumentCountIs(1), + callee(functionDecl( + matchers::matchesAnyListedName(NameList), + hasTemplateArgument(0, refersToType(TypeMatcher)))), + hasArgument(0, ArgumentMatcher))), + unless(anyOf(hasAncestor(typeLoc()), + hasAncestor(expr(matchers::hasUnevaluatedContext()))))); +} + } // namespace OptionalValueConversionCheck::OptionalValueConversionCheck( @@ -67,12 +90,9 @@ void OptionalValueConversionCheck::registerMatchers(MatchFinder *Finder) { callExpr(argumentCountIs(1), callee(functionDecl(hasName("::std::move"))), hasArgument(0, ignoringImpCasts(OptionalDereferenceMatcher))); Finder->addMatcher( - cxxConstructExpr( - argumentCountIs(1U), hasType(BindOptionalType), - hasArgument(0U, ignoringImpCasts(anyOf(OptionalDereferenceMatcher, - StdMoveCallMatcher))), - unless(anyOf(hasAncestor(typeLoc()), - hasAncestor(expr(matchers::hasUnevaluatedContext()))))) + expr(constructFrom(BindOptionalType, + ignoringImpCasts(anyOf(OptionalDereferenceMatcher, + StdMoveCallMatcher)))) .bind("expr"), this); } diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index c919d49b42873a..bab1167fb15ff2 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -36,6 +36,7 @@ add_clang_library(clangTidyModernizeModule STATIC UseEmplaceCheck.cpp UseEqualsDefaultCheck.cpp UseEqualsDeleteCheck.cpp + UseIntegerSignComparisonCheck.cpp UseNodiscardCheck.cpp UseNoexceptCheck.cpp UseNullptrCheck.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index 18607593320635..fc46c72982fdce 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -37,6 +37,7 @@ #include "UseEmplaceCheck.h" #include "UseEqualsDefaultCheck.h" #include "UseEqualsDeleteCheck.h" +#include "UseIntegerSignComparisonCheck.h" #include "UseNodiscardCheck.h" #include "UseNoexceptCheck.h" #include "UseNullptrCheck.h" @@ -76,6 +77,8 @@ class ModernizeModule : public ClangTidyModule { CheckFactories.registerCheck("modernize-pass-by-value"); CheckFactories.registerCheck( "modernize-use-designated-initializers"); + CheckFactories.registerCheck( + "modernize-use-integer-sign-comparison"); CheckFactories.registerCheck("modernize-use-ranges"); CheckFactories.registerCheck( "modernize-use-starts-ends-with"); diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp new file mode 100644 index 00000000000000..8f807bc0a96d56 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp @@ -0,0 +1,171 @@ +//===--- UseIntegerSignComparisonCheck.cpp - clang-tidy -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UseIntegerSignComparisonCheck.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; +using namespace clang::ast_matchers::internal; + +namespace clang::tidy::modernize { + +/// Find if the passed type is the actual "char" type, +/// not applicable to explicit "signed char" or "unsigned char" types. +static bool isActualCharType(const clang::QualType &Ty) { + using namespace clang; + const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); + if (const auto *BT = llvm::dyn_cast(DesugaredType)) + return (BT->getKind() == BuiltinType::Char_U || + BT->getKind() == BuiltinType::Char_S); + return false; +} + +namespace { +AST_MATCHER(clang::QualType, isActualChar) { + return clang::tidy::modernize::isActualCharType(Node); +} +} // namespace + +static BindableMatcher +intCastExpression(bool IsSigned, + const std::string &CastBindName = std::string()) { + // std::cmp_{} functions trigger a compile-time error if either LHS or RHS + // is a non-integer type, char, enum or bool + // (unsigned char/ signed char are Ok and can be used). + auto IntTypeExpr = expr(hasType(hasCanonicalType(qualType( + isInteger(), IsSigned ? isSignedInteger() : isUnsignedInteger(), + unless(isActualChar()), unless(booleanType()), unless(enumType()))))); + + const auto ImplicitCastExpr = + CastBindName.empty() ? implicitCastExpr(hasSourceExpression(IntTypeExpr)) + : implicitCastExpr(hasSourceExpression(IntTypeExpr)) + .bind(CastBindName); + + const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); + const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); + const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr)); + + return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr, + FunctionalCastExpr)); +} + +static StringRef parseOpCode(BinaryOperator::Opcode Code) { + switch (Code) { + case BO_LT: + return "cmp_less"; + case BO_GT: + return "cmp_greater"; + case BO_LE: + return "cmp_less_equal"; + case BO_GE: + return "cmp_greater_equal"; + case BO_EQ: + return "cmp_equal"; + case BO_NE: + return "cmp_not_equal"; + default: + return ""; + } +} + +UseIntegerSignComparisonCheck::UseIntegerSignComparisonCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + IncludeInserter(Options.getLocalOrGlobal("IncludeStyle", + utils::IncludeSorter::IS_LLVM), + areDiagsSelfContained()) {} + +void UseIntegerSignComparisonCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "IncludeStyle", IncludeInserter.getStyle()); +} + +void UseIntegerSignComparisonCheck::registerMatchers(MatchFinder *Finder) { + const auto SignedIntCastExpr = intCastExpression(true, "sIntCastExpression"); + const auto UnSignedIntCastExpr = intCastExpression(false); + + // Flag all operators "==", "<=", ">=", "<", ">", "!=" + // that are used between signed/unsigned + const auto CompareOperator = + binaryOperator(hasAnyOperatorName("==", "<=", ">=", "<", ">", "!="), + hasOperands(SignedIntCastExpr, UnSignedIntCastExpr), + unless(isInTemplateInstantiation())) + .bind("intComparison"); + + Finder->addMatcher(CompareOperator, this); +} + +void UseIntegerSignComparisonCheck::registerPPCallbacks( + const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { + IncludeInserter.registerPreprocessor(PP); +} + +void UseIntegerSignComparisonCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *SignedCastExpression = + Result.Nodes.getNodeAs("sIntCastExpression"); + assert(SignedCastExpression); + + // Ignore the match if we know that the signed int value is not negative. + Expr::EvalResult EVResult; + if (!SignedCastExpression->isValueDependent() && + SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult, + *Result.Context)) { + const llvm::APSInt SValue = EVResult.Val.getInt(); + if (SValue.isNonNegative()) + return; + } + + const auto *BinaryOp = + Result.Nodes.getNodeAs("intComparison"); + if (BinaryOp == nullptr) + return; + + const BinaryOperator::Opcode OpCode = BinaryOp->getOpcode(); + + const Expr *LHS = BinaryOp->getLHS()->IgnoreImpCasts(); + const Expr *RHS = BinaryOp->getRHS()->IgnoreImpCasts(); + if (LHS == nullptr || RHS == nullptr) + return; + const Expr *SubExprLHS = nullptr; + const Expr *SubExprRHS = nullptr; + SourceRange R1 = SourceRange(LHS->getBeginLoc()); + SourceRange R2 = SourceRange(BinaryOp->getOperatorLoc()); + SourceRange R3 = SourceRange(Lexer::getLocForEndOfToken( + RHS->getEndLoc(), 0, *Result.SourceManager, getLangOpts())); + if (const auto *LHSCast = llvm::dyn_cast(LHS)) { + SubExprLHS = LHSCast->getSubExpr(); + R1 = SourceRange(LHS->getBeginLoc(), + SubExprLHS->getBeginLoc().getLocWithOffset(-1)); + R2.setBegin(Lexer::getLocForEndOfToken( + SubExprLHS->getEndLoc(), 0, *Result.SourceManager, getLangOpts())); + } + if (const auto *RHSCast = llvm::dyn_cast(RHS)) { + SubExprRHS = RHSCast->getSubExpr(); + R2.setEnd(SubExprRHS->getBeginLoc().getLocWithOffset(-1)); + } + DiagnosticBuilder Diag = + diag(BinaryOp->getBeginLoc(), + "comparison between 'signed' and 'unsigned' integers"); + const std::string CmpNamespace = ("std::" + parseOpCode(OpCode)).str(); + const std::string CmpHeader = ""; + // Prefer modernize-use-integer-sign-comparison when C++20 is available! + Diag << FixItHint::CreateReplacement( + CharSourceRange(R1, SubExprLHS != nullptr), + llvm::Twine(CmpNamespace + "(").str()); + Diag << FixItHint::CreateReplacement(R2, ","); + Diag << FixItHint::CreateReplacement(CharSourceRange::getCharRange(R3), ")"); + + // If there is no include for cmp_{*} functions, we'll add it. + Diag << IncludeInserter.createIncludeInsertion( + Result.SourceManager->getFileID(BinaryOp->getBeginLoc()), CmpHeader); +} + +} // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h new file mode 100644 index 00000000000000..a1074829d6eca5 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h @@ -0,0 +1,42 @@ +//===--- UseIntegerSignComparisonCheck.h - clang-tidy -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H + +#include "../ClangTidyCheck.h" +#include "../utils/IncludeInserter.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +namespace clang::tidy::modernize { + +/// Replace comparisons between signed and unsigned integers with their safe +/// C++20 ``std::cmp_*`` alternative, if available. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/use-integer-sign-comparison.html +class UseIntegerSignComparisonCheck : public ClangTidyCheck { +public: + UseIntegerSignComparisonCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus20; + } + +private: + utils::IncludeInserter IncludeInserter; +}; + +} // namespace clang::tidy::modernize + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index fddfffe7523d95..207e4c3e6722c8 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -458,20 +458,6 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { PrevAlias[Self] = T; NextAlias[T] = Self; }; - // Also grab prefixes for each option, these are not fully exposed. - llvm::ArrayRef Prefixes[DriverID::LastOption]; - -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); -#define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ - METAVAR, VALUES) \ - Prefixes[DriverID::OPT_##ID] = PREFIX; -#include "clang/Driver/Options.inc" -#undef OPTION -#undef PREFIX struct { DriverID ID; @@ -498,7 +484,9 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { llvm::SmallVector Rules; // Iterate over each alias, to add rules for parsing it. for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) { - if (!Prefixes[A].size()) // option groups. + llvm::SmallVector Prefixes; + DriverTable.appendOptionPrefixes(A, Prefixes); + if (Prefixes.empty()) // option groups. continue; auto Opt = DriverTable.getOption(A); // Exclude - and -foo pseudo-options. @@ -507,7 +495,7 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { auto Modes = getModes(Opt); std::pair ArgCount = getArgCount(Opt); // Iterate over each spelling of the alias, e.g. -foo vs --foo. - for (StringRef Prefix : Prefixes[A]) { + for (StringRef Prefix : Prefixes) { llvm::SmallString<64> Buf(Prefix); Buf.append(Opt.getName()); llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey(); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b2b66dca6ccf85..6803842106791b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -136,10 +136,16 @@ New checks Gives warnings for tagged unions, where the number of tags is different from the number of data members inside the union. +- New :doc:`modernize-use-integer-sign-comparison + ` check. + + Replace comparisons between signed and unsigned integers with their safe + C++20 ``std::cmp_*`` alternative, if available. + - New :doc:`portability-template-virtual-member-function ` check. - Finds cases when an uninstantiated virtual member function in a template class + Finds cases when an uninstantiated virtual member function in a template class causes cross-compiler incompatibility. New check aliases @@ -176,6 +182,10 @@ Changes in existing checks ` check by fixing a crash when determining if an ``enable_if[_t]`` was found. +- Improved :doc:`bugprone-optional-value-conversion + ` to support detecting + conversion directly by ``std::make_unique`` and ``std::make_shared``. + - Improved :doc:`bugprone-posix-return ` check to support integer literals as LHS and posix call as RHS of comparison. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index d731b13fc0df44..41f8f958e9e181 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -301,6 +301,7 @@ Clang-Tidy Checks :doc:`modernize-use-emplace `, "Yes" :doc:`modernize-use-equals-default `, "Yes" :doc:`modernize-use-equals-delete `, "Yes" + :doc:`modernize-use-integer-sign-comparison `, "Yes" :doc:`modernize-use-nodiscard `, "Yes" :doc:`modernize-use-noexcept `, "Yes" :doc:`modernize-use-nullptr `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst new file mode 100644 index 00000000000000..7e2c13b782694f --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst @@ -0,0 +1,36 @@ +.. title:: clang-tidy - modernize-use-integer-sign-comparison + +modernize-use-integer-sign-comparison +===================================== + +Replace comparisons between signed and unsigned integers with their safe +C++20 ``std::cmp_*`` alternative, if available. + +The check provides a replacement only for C++20 or later, otherwise +it highlights the problem and expects the user to fix it manually. + +Examples of fixes created by the check: + +.. code-block:: c++ + + unsigned int func(int a, unsigned int b) { + return a == b; + } + +becomes + +.. code-block:: c++ + + #include + + unsigned int func(int a, unsigned int b) { + return std::cmp_equal(a, b); + } + +Options +------- + +.. option:: IncludeStyle + + A string specifying which include-style is used, `llvm` or `google`. + Default is `llvm`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp new file mode 100644 index 00000000000000..768ab1ce014cec --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp @@ -0,0 +1,53 @@ +// RUN: %check_clang_tidy -std=c++17-or-later %s bugprone-optional-value-conversion %t + +namespace std { +template struct optional { + constexpr optional() noexcept; + constexpr optional(T &&) noexcept; + constexpr optional(const T &) noexcept; + template constexpr optional(U &&) noexcept; + const T &operator*() const; + T *operator->(); + const T *operator->() const; + T &operator*(); + const T &value() const; + T &value(); + const T &get() const; + T &get(); + T value_or(T) const; +}; + +template T &&move(T &x) { return static_cast(x); } + +template class default_delete {}; + +template > +class unique_ptr {}; + +template +class shared_ptr {}; + +template unique_ptr make_unique(Args &&...args); +template shared_ptr make_shared(Args &&...args); + +} // namespace std + +struct A { + explicit A (int); +}; +std::optional opt; + +void invalid() { + std::make_unique>(opt.value()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional' into 'int' and back into 'std::optional', remove potentially error-prone optional dereference [bugprone-optional-value-conversion] + using A = std::optional; + std::make_unique(opt.value()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional' into 'int' and back into 'std::optional', remove potentially error-prone optional dereference [bugprone-optional-value-conversion] + std::make_shared>(opt.value()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional' into 'int' and back into 'std::optional', remove potentially error-prone optional dereference [bugprone-optional-value-conversion] +} + +void valid() { + std::make_unique(opt.value()); + std::make_shared(opt.value()); +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp new file mode 100644 index 00000000000000..99f00444c2d3f3 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp @@ -0,0 +1,122 @@ +// CHECK-FIXES: #include +// RUN: %check_clang_tidy -std=c++20 %s modernize-use-integer-sign-comparison %t + +// The code that triggers the check +#define MAX_MACRO(a, b) (a < b) ? b : a + +unsigned int FuncParameters(int bla) { + unsigned int result = 0; + if (result == bla) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_equal(result , bla)) + + return 1; +} + +template +void TemplateFuncParameter(T val) { + unsigned long uL = 0; + if (val >= uL) + return; +// CHECK-MESSAGES-NOT: warning: +} + +template +int TemplateFuncParameters(T1 val1, T2 val2) { + if (val1 >= val2) + return 0; +// CHECK-MESSAGES-NOT: warning: + return 1; +} + +int AllComparisons() { + unsigned int uVar = 42; + unsigned short uArray[7] = {0, 1, 2, 3, 9, 7, 9}; + + int sVar = -42; + short sArray[7] = {-1, -2, -8, -94, -5, -4, -6}; + + enum INT_TEST { + VAL1 = 0, + VAL2 = -1 + }; + + char ch = 'a'; + unsigned char uCh = 'a'; + signed char sCh = 'a'; + bool bln = false; + + if (bln == sVar) + return 0; +// CHECK-MESSAGES-NOT: warning: + + if (ch > uCh) + return 0; +// CHECK-MESSAGES-NOT: warning: + + if (sVar <= INT_TEST::VAL2) + return 0; +// CHECK-MESSAGES-NOT: warning: + + if (uCh < sCh) + return -1; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_less(uCh , sCh)) + + if ((int)uVar < sVar) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_less(uVar, sVar)) + + (uVar != sVar) ? uVar = sVar + : sVar = uVar; +// CHECK-MESSAGES: :[[@LINE-2]]:6: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: (std::cmp_not_equal(uVar , sVar)) ? uVar = sVar + + while (uArray[0] <= sArray[0]) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:12: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: while (std::cmp_less_equal(uArray[0] , sArray[0])) + + if (uArray[1] > sArray[1]) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_greater(uArray[1] , sArray[1])) + + MAX_MACRO(uVar, sArray[0]); +// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] + + if (static_cast(uArray[2]) < static_cast(sArray[2])) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_less(uArray[2],sArray[2])) + + if ((unsigned int)uArray[3] < (int)sArray[3]) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_less(uArray[3],sArray[3])) + + if ((unsigned int)(uArray[4]) < (int)(sArray[4])) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_less((uArray[4]),(sArray[4]))) + + if (uArray[5] > sArray[5]) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_greater(uArray[5] , sArray[5])) + + #define VALUE sArray[6] + if (uArray[6] > VALUE) + return 0; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison] +// CHECK-FIXES: if (std::cmp_greater(uArray[6] , VALUE)) + + + FuncParameters(uVar); + TemplateFuncParameter(sVar); + TemplateFuncParameters(uVar, sVar); + + return 0; +} diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 8c39ef3d5a9fa6..88d93a79d00f8f 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -747,9 +747,9 @@ class DeclaratorDecl : public ValueDecl { /// ignoring outer template declarations. SourceLocation InnerLocStart; - bool hasExtInfo() const { return DeclInfo.is(); } - ExtInfo *getExtInfo() { return DeclInfo.get(); } - const ExtInfo *getExtInfo() const { return DeclInfo.get(); } + bool hasExtInfo() const { return isa(DeclInfo); } + ExtInfo *getExtInfo() { return cast(DeclInfo); } + const ExtInfo *getExtInfo() const { return cast(DeclInfo); } protected: DeclaratorDecl(Kind DK, DeclContext *DC, SourceLocation L, @@ -762,9 +762,8 @@ class DeclaratorDecl : public ValueDecl { friend class ASTDeclWriter; TypeSourceInfo *getTypeSourceInfo() const { - return hasExtInfo() - ? getExtInfo()->TInfo - : DeclInfo.get(); + return hasExtInfo() ? getExtInfo()->TInfo + : cast(DeclInfo); } void setTypeSourceInfo(TypeSourceInfo *TI) { @@ -3587,10 +3586,10 @@ class TagDecl : public TypeDecl, /// otherwise, it is a null (TypedefNameDecl) pointer. llvm::PointerUnion TypedefNameDeclOrQualifier; - bool hasExtInfo() const { return TypedefNameDeclOrQualifier.is(); } - ExtInfo *getExtInfo() { return TypedefNameDeclOrQualifier.get(); } + bool hasExtInfo() const { return isa(TypedefNameDeclOrQualifier); } + ExtInfo *getExtInfo() { return cast(TypedefNameDeclOrQualifier); } const ExtInfo *getExtInfo() const { - return TypedefNameDeclOrQualifier.get(); + return cast(TypedefNameDeclOrQualifier); } protected: @@ -3793,7 +3792,7 @@ class TagDecl : public TypeDecl, TypedefNameDecl *getTypedefNameForAnonDecl() const { return hasExtInfo() ? nullptr - : TypedefNameDeclOrQualifier.get(); + : cast(TypedefNameDeclOrQualifier); } void setTypedefNameForAnonDecl(TypedefNameDecl *TDD); @@ -4011,7 +4010,7 @@ class EnumDecl : public TagDecl { return QualType(); if (const Type *T = IntegerType.dyn_cast()) return QualType(T, 0); - return IntegerType.get()->getType().getUnqualifiedType(); + return cast(IntegerType)->getType().getUnqualifiedType(); } /// Set the underlying integer type. diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index a3447d19909752..82932e098c86f0 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -271,16 +271,12 @@ class alignas(8) Decl { /// // LexicalDC == global namespace llvm::PointerUnion DeclCtx; - bool isInSemaDC() const { return DeclCtx.is(); } - bool isOutOfSemaDC() const { return DeclCtx.is(); } + bool isInSemaDC() const { return isa(DeclCtx); } + bool isOutOfSemaDC() const { return isa(DeclCtx); } - MultipleDC *getMultipleDC() const { - return DeclCtx.get(); - } + MultipleDC *getMultipleDC() const { return cast(DeclCtx); } - DeclContext *getSemanticDC() const { - return DeclCtx.get(); - } + DeclContext *getSemanticDC() const { return cast(DeclCtx); } /// Loc - The location of this decl. SourceLocation Loc; @@ -1340,7 +1336,7 @@ class DeclListNode { assert(Ptr && "dereferencing end() iterator"); if (DeclListNode *CurNode = Ptr.dyn_cast()) return CurNode->D; - return Ptr.get(); + return cast(Ptr); } void operator->() const { } // Unsupported. bool operator==(const iterator &X) const { return Ptr == X.Ptr; } diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index e389b5cd6df5b9..c232556edeff70 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -2388,19 +2388,19 @@ class CXXCtorInitializer final { /// Determine whether this initializer is initializing a base class. bool isBaseInitializer() const { - return Initializee.is() && !IsDelegating; + return isa(Initializee) && !IsDelegating; } /// Determine whether this initializer is initializing a non-static /// data member. - bool isMemberInitializer() const { return Initializee.is(); } + bool isMemberInitializer() const { return isa(Initializee); } bool isAnyMemberInitializer() const { return isMemberInitializer() || isIndirectMemberInitializer(); } bool isIndirectMemberInitializer() const { - return Initializee.is(); + return isa(Initializee); } /// Determine whether this initializer is an implicit initializer @@ -2416,7 +2416,7 @@ class CXXCtorInitializer final { /// Determine whether this initializer is creating a delegating /// constructor. bool isDelegatingInitializer() const { - return Initializee.is() && IsDelegating; + return isa(Initializee) && IsDelegating; } /// Determine whether this initializer is a pack expansion. @@ -2457,21 +2457,21 @@ class CXXCtorInitializer final { /// non-static data member being initialized. Otherwise, returns null. FieldDecl *getMember() const { if (isMemberInitializer()) - return Initializee.get(); + return cast(Initializee); return nullptr; } FieldDecl *getAnyMember() const { if (isMemberInitializer()) - return Initializee.get(); + return cast(Initializee); if (isIndirectMemberInitializer()) - return Initializee.get()->getAnonField(); + return cast(Initializee)->getAnonField(); return nullptr; } IndirectFieldDecl *getIndirectMember() const { if (isIndirectMemberInitializer()) - return Initializee.get(); + return cast(Initializee); return nullptr; } diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index dd92d40b804232..44ccf8932a1830 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1965,7 +1965,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization; - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } /// Retrieve the set of template arguments that should be used @@ -2013,7 +2013,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const { if (auto *Info = ExplicitInfo.dyn_cast()) return Info->TemplateArgsAsWritten; - return ExplicitInfo.get(); + return cast(ExplicitInfo); } /// Set the template argument list as written in the sources. @@ -2734,7 +2734,7 @@ class VarTemplateSpecializationDecl : public VarDecl, SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization; - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } /// Retrieve the set of template arguments that should be used @@ -2782,7 +2782,7 @@ class VarTemplateSpecializationDecl : public VarDecl, const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const { if (auto *Info = ExplicitInfo.dyn_cast()) return Info->TemplateArgsAsWritten; - return ExplicitInfo.get(); + return cast(ExplicitInfo); } /// Set the template argument list as written in the sources. @@ -3309,7 +3309,7 @@ inline NamedDecl *getAsNamedDecl(TemplateParameter P) { return PD; if (auto *PD = P.dyn_cast()) return PD; - return P.get(); + return cast(P); } inline TemplateDecl *getAsTypeTemplateDecl(Decl *D) { diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 1a24b8857674ca..4cec89c979f775 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -878,7 +878,7 @@ class CXXTypeidExpr : public Expr { /// object. This is not a strong guarantee. bool isMostDerived(const ASTContext &Context) const; - bool isTypeOperand() const { return Operand.is(); } + bool isTypeOperand() const { return isa(Operand); } /// Retrieves the type operand of this typeid() expression after /// various required adjustments (removing reference types, cv-qualifiers). @@ -887,11 +887,11 @@ class CXXTypeidExpr : public Expr { /// Retrieve source information for the type operand. TypeSourceInfo *getTypeOperandSourceInfo() const { assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)"); - return Operand.get(); + return cast(Operand); } Expr *getExprOperand() const { assert(!isTypeOperand() && "Cannot call getExprOperand for typeid(type)"); - return static_cast(Operand.get()); + return static_cast(cast(Operand)); } SourceLocation getBeginLoc() const LLVM_READONLY { return Range.getBegin(); } @@ -1093,7 +1093,7 @@ class CXXUuidofExpr : public Expr { Operand = (TypeSourceInfo*)nullptr; } - bool isTypeOperand() const { return Operand.is(); } + bool isTypeOperand() const { return isa(Operand); } /// Retrieves the type operand of this __uuidof() expression after /// various required adjustments (removing reference types, cv-qualifiers). @@ -1102,11 +1102,11 @@ class CXXUuidofExpr : public Expr { /// Retrieve source information for the type operand. TypeSourceInfo *getTypeOperandSourceInfo() const { assert(isTypeOperand() && "Cannot call getTypeOperand for __uuidof(expr)"); - return Operand.get(); + return cast(Operand); } Expr *getExprOperand() const { assert(!isTypeOperand() && "Cannot call getExprOperand for __uuidof(type)"); - return static_cast(Operand.get()); + return static_cast(cast(Operand)); } MSGuidDecl *getGuidDecl() const { return Guid; } @@ -4750,24 +4750,24 @@ class MaterializeTemporaryExpr : public Expr { /// be materialized into a glvalue. Expr *getSubExpr() const { return cast( - State.is() - ? State.get() - : State.get()->getTemporaryExpr()); + isa(State) + ? cast(State) + : cast(State)->getTemporaryExpr()); } /// Retrieve the storage duration for the materialized temporary. StorageDuration getStorageDuration() const { - return State.is() ? SD_FullExpression - : State.get() + return isa(State) ? SD_FullExpression + : cast(State) ->getStorageDuration(); } /// Get the storage for the constant value of a materialized temporary /// of static storage duration. APValue *getOrCreateValue(bool MayCreate) const { - assert(State.is() && + assert(isa(State) && "the temporary has not been lifetime extended"); - return State.get()->getOrCreateValue( + return cast(State)->getOrCreateValue( MayCreate); } @@ -4782,8 +4782,8 @@ class MaterializeTemporaryExpr : public Expr { /// Get the declaration which triggered the lifetime-extension of this /// temporary, if any. ValueDecl *getExtendingDecl() { - return State.is() ? nullptr - : State.get() + return isa(State) ? nullptr + : cast(State) ->getExtendingDecl(); } const ValueDecl *getExtendingDecl() const { @@ -4793,8 +4793,8 @@ class MaterializeTemporaryExpr : public Expr { void setExtendingDecl(ValueDecl *ExtendedBy, unsigned ManglingNumber); unsigned getManglingNumber() const { - return State.is() ? 0 - : State.get() + return isa(State) ? 0 + : cast(State) ->getManglingNumber(); } @@ -4820,17 +4820,17 @@ class MaterializeTemporaryExpr : public Expr { // Iterators child_range children() { - return State.is() + return isa(State) ? child_range(State.getAddrOfPtr1(), State.getAddrOfPtr1() + 1) - : State.get()->childrenExpr(); + : cast(State)->childrenExpr(); } const_child_range children() const { - return State.is() + return isa(State) ? const_child_range(State.getAddrOfPtr1(), State.getAddrOfPtr1() + 1) : const_cast( - State.get()) + cast(State)) ->childrenExpr(); } }; diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h index f3e32ce3961981..86c4155b6a853e 100644 --- a/clang/include/clang/AST/ExprConcepts.h +++ b/clang/include/clang/AST/ExprConcepts.h @@ -261,13 +261,13 @@ class TypeRequirement : public Requirement { assert(Status == SS_SubstitutionFailure && "Attempted to get substitution diagnostic when there has been no " "substitution failure."); - return Value.get(); + return cast(Value); } TypeSourceInfo *getType() const { assert(!isSubstitutionFailure() && "Attempted to get type when there has been a substitution failure."); - return Value.get(); + return cast(Value); } static bool classof(const Requirement *R) { @@ -409,14 +409,14 @@ class ExprRequirement : public Requirement { assert(isExprSubstitutionFailure() && "Attempted to get expression substitution diagnostic when there has " "been no expression substitution failure"); - return Value.get(); + return cast(Value); } Expr *getExpr() const { assert(!isExprSubstitutionFailure() && "ExprRequirement has no expression because there has been a " "substitution failure."); - return Value.get(); + return cast(Value); } static bool classof(const Requirement *R) { diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h index f833916c91aa54..1fccc260695820 100644 --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -752,28 +752,24 @@ class ObjCPropertyRefExpr : public Expr { setMethodRefFlag(MethodRef_Setter, val); } - const Expr *getBase() const { - return cast(Receiver.get()); - } - Expr *getBase() { - return cast(Receiver.get()); - } + const Expr *getBase() const { return cast(cast(Receiver)); } + Expr *getBase() { return cast(cast(Receiver)); } SourceLocation getLocation() const { return IdLoc; } SourceLocation getReceiverLocation() const { return ReceiverLoc; } QualType getSuperReceiverType() const { - return QualType(Receiver.get(), 0); + return QualType(cast(Receiver), 0); } ObjCInterfaceDecl *getClassReceiver() const { - return Receiver.get(); + return cast(Receiver); } - bool isObjectReceiver() const { return Receiver.is(); } - bool isSuperReceiver() const { return Receiver.is(); } - bool isClassReceiver() const { return Receiver.is(); } + bool isObjectReceiver() const { return isa(Receiver); } + bool isSuperReceiver() const { return isa(Receiver); } + bool isClassReceiver() const { return isa(Receiver); } /// Determine the type of the base, regardless of the kind of receiver. QualType getReceiverType(const ASTContext &ctx) const; @@ -787,7 +783,7 @@ class ObjCPropertyRefExpr : public Expr { // Iterators child_range children() { - if (Receiver.is()) { + if (isa(Receiver)) { Stmt **begin = reinterpret_cast(&Receiver); // hack! return child_range(begin, begin+1); } diff --git a/clang/include/clang/AST/Redeclarable.h b/clang/include/clang/AST/Redeclarable.h index 8d320a9ced2792..bba789375cb6ed 100644 --- a/clang/include/clang/AST/Redeclarable.h +++ b/clang/include/clang/AST/Redeclarable.h @@ -113,25 +113,24 @@ class Redeclarable { DeclLink(PreviousTag, decl_type *D) : Link(NotKnownLatest(Previous(D))) {} bool isFirst() const { - return Link.is() || + return isa(Link) || // FIXME: 'template' is required on the next line due to an // apparent clang bug. - Link.get().template is(); + cast(Link).template is(); } decl_type *getPrevious(const decl_type *D) const { - if (Link.is()) { - NotKnownLatest NKL = Link.get(); - if (NKL.is()) - return static_cast(NKL.get()); + if (NotKnownLatest NKL = dyn_cast(Link)) { + if (auto *Prev = dyn_cast(NKL)) + return static_cast(Prev); // Allocate the generational 'most recent' cache now, if needed. Link = KnownLatest(*reinterpret_cast( - NKL.get()), + cast(NKL)), const_cast(D)); } - return static_cast(Link.get().get(D)); + return static_cast(cast(Link).get(D)); } void setPrevious(decl_type *D) { @@ -141,25 +140,24 @@ class Redeclarable { void setLatest(decl_type *D) { assert(isFirst() && "decl became canonical unexpectedly"); - if (Link.is()) { - NotKnownLatest NKL = Link.get(); + if (NotKnownLatest NKL = dyn_cast(Link)) { Link = KnownLatest(*reinterpret_cast( - NKL.get()), + cast(NKL)), D); } else { - auto Latest = Link.get(); + auto Latest = cast(Link); Latest.set(D); Link = Latest; } } - void markIncomplete() { Link.get().markIncomplete(); } + void markIncomplete() { cast(Link).markIncomplete(); } Decl *getLatestNotUpdated() const { assert(isFirst() && "expected a canonical decl"); - if (Link.is()) + if (isa(Link)) return nullptr; - return Link.get().getNotUpdated(); + return cast(Link).getNotUpdated(); } }; diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h index a8f0263d5505ac..9d0ee24a4f5e35 100644 --- a/clang/include/clang/AST/TemplateBase.h +++ b/clang/include/clang/AST/TemplateBase.h @@ -484,7 +484,7 @@ struct TemplateArgumentLocInfo { Pointer; TemplateTemplateArgLocInfo *getTemplate() const { - return Pointer.get(); + return cast(Pointer); } public: @@ -499,10 +499,10 @@ struct TemplateArgumentLocInfo { SourceLocation EllipsisLoc); TypeSourceInfo *getAsTypeSourceInfo() const { - return Pointer.get(); + return cast(Pointer); } - Expr *getAsExpr() const { return Pointer.get(); } + Expr *getAsExpr() const { return cast(Pointer); } NestedNameSpecifierLoc getTemplateQualifierLoc() const { const auto *Template = getTemplate(); diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 30dc5c55d411ab..b9f40faf0b18e6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2468,4 +2468,11 @@ let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { // 8-bit floating-point convert to BFloat16/Float16 (top) def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; + + // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point + def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, "aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>; + + // Single-precision convert, narrow and interleave to 8-bit floating-point (top and bottom) + def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>; + def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4fe17ec01906e9..57175da32b31cd 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1876,7 +1876,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC, } // Check for overlapping memory regions. - if (!Move && SrcPtr.block() == DestPtr.block()) { + if (!Move && Pointer::pointToSameBlock(SrcPtr, DestPtr)) { unsigned SrcIndex = SrcPtr.getIndex() * SrcPtr.elemSize(); unsigned DstIndex = DestPtr.getIndex() * DestPtr.elemSize(); unsigned N = Size.getZExtValue(); diff --git a/clang/lib/Driver/DriverOptions.cpp b/clang/lib/Driver/DriverOptions.cpp index 053e7f1c6404fe..cde1f8989935b0 100644 --- a/clang/lib/Driver/DriverOptions.cpp +++ b/clang/lib/Driver/DriverOptions.cpp @@ -14,24 +14,21 @@ using namespace clang::driver; using namespace clang::driver::options; using namespace llvm::opt; +#define OPTTABLE_STR_TABLE_CODE +#include "clang/Driver/Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + #define OPTTABLE_VALUES_CODE #include "clang/Driver/Options.inc" #undef OPTTABLE_VALUES_CODE -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "clang/Driver/Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE -static constexpr const llvm::StringLiteral PrefixTable_init[] = -#define PREFIX_UNION(VALUES) VALUES +#define OPTTABLE_PREFIXES_UNION_CODE #include "clang/Driver/Options.inc" -#undef PREFIX_UNION - ; -static constexpr const llvm::ArrayRef - PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1); +#undef OPTTABLE_PREFIXES_UNION_CODE static constexpr OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -43,7 +40,9 @@ namespace { class DriverOptTable : public PrecomputedOptTable { public: - DriverOptTable() : PrecomputedOptTable(InfoTable, PrefixTable) {} + DriverOptTable() + : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + OptionPrefixesUnion) {} }; } diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 1abfe8fd92807e..e77857930996b2 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -418,8 +418,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, Add & NotAllowedWithExecuteOnly & ~DiagnosedKinds) { if (DiagnoseErrors) { std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose); - D.Diag(diag::err_drv_argument_not_allowed_with) - << Desc << Triple.str(); + llvm::opt::Arg *A = Args.getLastArgNoClaim( + options::OPT_mexecute_only, options::OPT_mno_execute_only); + if (A && A->getOption().matches(options::OPT_mexecute_only)) + D.Diag(diag::err_drv_argument_not_allowed_with) + << Desc << A->getAsString(Args); + else + D.Diag(diag::err_drv_unsupported_opt_for_target) + << Desc << Triple.str(); } DiagnosedKinds |= KindsToDiagnose; } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 03dbdc27975b42..3c78b12b0741e0 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -494,6 +494,39 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, else A.renderAsInput(Args, CmdArgs); } + if (const Arg *A = Args.getLastArg(options::OPT_fveclib)) { + const llvm::Triple &Triple = TC.getTriple(); + StringRef V = A->getValue(); + if (V == "ArmPL" && (Triple.isOSLinux() || Triple.isOSDarwin())) { + // To support -fveclib=ArmPL we need to link against libamath. Some of the + // libamath functions depend on libm, at the same time, libamath exports + // its own implementation of some of the libm functions. These are faster + // and potentially less accurate implementations, hence we need to be + // careful what is being linked in. Since here we are interested only in + // the subset of libamath functions that is covered by the veclib + // mappings, we need to prioritize libm functions by putting -lm before + // -lamath (and then -lm again, to fulfill libamath requirements). + // + // Therefore we need to do the following: + // + // 1. On Linux, link only when actually needed. + // + // 2. Prefer libm functions over libamath. + // + // 3. Link against libm to resolve libamath dependencies. + // + if (Triple.isOSLinux()) { + CmdArgs.push_back(Args.MakeArgString("--push-state")); + CmdArgs.push_back(Args.MakeArgString("--as-needed")); + } + CmdArgs.push_back(Args.MakeArgString("-lm")); + CmdArgs.push_back(Args.MakeArgString("-lamath")); + CmdArgs.push_back(Args.MakeArgString("-lm")); + if (Triple.isOSLinux()) + CmdArgs.push_back(Args.MakeArgString("--pop-state")); + addArchSpecificRPath(TC, Args, CmdArgs); + } + } } void tools::addLinkerCompressDebugSectionsOption( diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 80799d1e715f07..752c2e2751ab67 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -84,6 +84,12 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, else if (TC.getTriple().isWindowsArm64EC()) CmdArgs.push_back("-machine:arm64ec"); + if (const Arg *A = Args.getLastArg(options::OPT_fveclib)) { + StringRef V = A->getValue(); + if (V == "ArmPL") + CmdArgs.push_back(Args.MakeArgString("--dependent-lib=amath")); + } + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) && !C.getDriver().IsCLMode() && !C.getDriver().IsFlangMode()) { CmdArgs.push_back("-defaultlib:libcmt"); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 98136b7a455d9c..23906d5c06d380 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -277,6 +277,14 @@ CowCompilerInvocation::getMutPreprocessorOutputOpts() { using ArgumentConsumer = CompilerInvocation::ArgumentConsumer; +#define OPTTABLE_STR_TABLE_CODE +#include "clang/Driver/Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + +static llvm::StringRef lookupStrInTable(unsigned Offset) { + return &OptionStrTable[Offset]; +} + #define SIMPLE_ENUM_VALUE_TABLE #include "clang/Driver/Options.inc" #undef SIMPLE_ENUM_VALUE_TABLE @@ -303,6 +311,11 @@ static std::optional normalizeSimpleNegativeFlag(OptSpecifier Opt, /// denormalizeSimpleFlags never looks at it. Avoid bloating compile-time with /// unnecessary template instantiations and just ignore it with a variadic /// argument. +static void denormalizeSimpleFlag(ArgumentConsumer Consumer, + unsigned SpellingOffset, Option::OptionClass, + unsigned, /*T*/...) { + Consumer(lookupStrInTable(SpellingOffset)); +} static void denormalizeSimpleFlag(ArgumentConsumer Consumer, const Twine &Spelling, Option::OptionClass, unsigned, /*T*/...) { @@ -343,10 +356,10 @@ static auto makeBooleanOptionNormalizer(bool Value, bool OtherValue, } static auto makeBooleanOptionDenormalizer(bool Value) { - return [Value](ArgumentConsumer Consumer, const Twine &Spelling, + return [Value](ArgumentConsumer Consumer, unsigned SpellingOffset, Option::OptionClass, unsigned, bool KeyPath) { if (KeyPath == Value) - Consumer(Spelling); + Consumer(lookupStrInTable(SpellingOffset)); }; } @@ -371,6 +384,14 @@ static void denormalizeStringImpl(ArgumentConsumer Consumer, } } +template +static void +denormalizeString(ArgumentConsumer Consumer, unsigned SpellingOffset, + Option::OptionClass OptClass, unsigned TableIndex, T Value) { + denormalizeStringImpl(Consumer, lookupStrInTable(SpellingOffset), OptClass, + TableIndex, Twine(Value)); +} + template static void denormalizeString(ArgumentConsumer Consumer, const Twine &Spelling, Option::OptionClass OptClass, unsigned TableIndex, @@ -417,14 +438,14 @@ static std::optional normalizeSimpleEnum(OptSpecifier Opt, } static void denormalizeSimpleEnumImpl(ArgumentConsumer Consumer, - const Twine &Spelling, + unsigned SpellingOffset, Option::OptionClass OptClass, unsigned TableIndex, unsigned Value) { assert(TableIndex < SimpleEnumValueTablesSize); const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex]; if (auto MaybeEnumVal = findValueTableByValue(Table, Value)) { - denormalizeString(Consumer, Spelling, OptClass, TableIndex, - MaybeEnumVal->Name); + denormalizeString(Consumer, lookupStrInTable(SpellingOffset), OptClass, + TableIndex, MaybeEnumVal->Name); } else { llvm_unreachable("The simple enum value was not correctly defined in " "the tablegen option description"); @@ -433,11 +454,11 @@ static void denormalizeSimpleEnumImpl(ArgumentConsumer Consumer, template static void denormalizeSimpleEnum(ArgumentConsumer Consumer, - const Twine &Spelling, + unsigned SpellingOffset, Option::OptionClass OptClass, unsigned TableIndex, T Value) { - return denormalizeSimpleEnumImpl(Consumer, Spelling, OptClass, TableIndex, - static_cast(Value)); + return denormalizeSimpleEnumImpl(Consumer, SpellingOffset, OptClass, + TableIndex, static_cast(Value)); } static std::optional normalizeString(OptSpecifier Opt, @@ -473,7 +494,7 @@ normalizeStringVector(OptSpecifier Opt, int, const ArgList &Args, } static void denormalizeStringVector(ArgumentConsumer Consumer, - const Twine &Spelling, + unsigned SpellingOffset, Option::OptionClass OptClass, unsigned TableIndex, const std::vector &Values) { @@ -487,15 +508,16 @@ static void denormalizeStringVector(ArgumentConsumer Consumer, CommaJoinedValue.append(Value); } } - denormalizeString(Consumer, Spelling, Option::OptionClass::JoinedClass, - TableIndex, CommaJoinedValue); + denormalizeString(Consumer, SpellingOffset, + Option::OptionClass::JoinedClass, TableIndex, + CommaJoinedValue); break; } case Option::JoinedClass: case Option::SeparateClass: case Option::JoinedOrSeparateClass: for (const std::string &Value : Values) - denormalizeString(Consumer, Spelling, OptClass, TableIndex, Value); + denormalizeString(Consumer, SpellingOffset, OptClass, TableIndex, Value); break; default: llvm_unreachable("Cannot denormalize an option with option class " @@ -532,10 +554,11 @@ static T extractMaskValue(T KeyPath) { } #define PARSE_OPTION_WITH_MARSHALLING( \ - ARGS, DIAGS, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ - SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ - IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ + IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ + TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ @@ -549,8 +572,8 @@ static T extractMaskValue(T KeyPath) { // Capture the extracted value as a lambda argument to avoid potential issues // with lifetime extension of the reference. #define GENERATE_OPTION_WITH_MARSHALLING( \ - CONSUMER, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ @@ -559,8 +582,8 @@ static T extractMaskValue(T KeyPath) { (Extracted != \ static_cast((IMPLIED_CHECK) ? (IMPLIED_VALUE) \ : (DEFAULT_VALUE)))) \ - DENORMALIZER(CONSUMER, SPELLING, Option::KIND##Class, TABLE_INDEX, \ - Extracted); \ + DENORMALIZER(CONSUMER, SPELLING_OFFSET, Option::KIND##Class, \ + TABLE_INDEX, Extracted); \ }(EXTRACTOR(KEYPATH)); \ } diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index b64c72904b19a4..bc59de3c1a0ada 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -86,10 +86,14 @@ OpenACCClauseKind getOpenACCClauseKind(Token Tok) { if (Tok.is(tok::kw_if)) return OpenACCClauseKind::If; - // 'private' is also a keyword, make sure we pare it correctly. + // 'private' is also a keyword, make sure we parse it correctly. if (Tok.is(tok::kw_private)) return OpenACCClauseKind::Private; + // 'delete' is a keyword, make sure we parse it correctly. + if (Tok.is(tok::kw_delete)) + return OpenACCClauseKind::Delete; + if (!Tok.is(tok::identifier)) return OpenACCClauseKind::Invalid; diff --git a/clang/test/CodeGen/AArch64/cpu-supports.c b/clang/test/CodeGen/AArch64/cpu-supports.c index 76fcea0be31581..406201781d4803 100644 --- a/clang/test/CodeGen/AArch64/cpu-supports.c +++ b/clang/test/CodeGen/AArch64/cpu-supports.c @@ -18,8 +18,8 @@ // CHECK-NEXT: br label [[RETURN:%.*]] // CHECK: if.end: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17867063951360 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17867063951360 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17936857268992 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17936857268992 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]] // CHECK: if.then1: @@ -27,8 +27,8 @@ // CHECK-NEXT: br label [[RETURN]] // CHECK: if.end2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171136785840078848 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171136785840078848 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 171141184020873984 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171141184020873984 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]] // CHECK: if.then3: diff --git a/clang/test/CodeGen/AArch64/fmv-dependencies.c b/clang/test/CodeGen/AArch64/fmv-dependencies.c index f74b7aa32c7dca..3a524b89496e04 100644 --- a/clang/test/CodeGen/AArch64/fmv-dependencies.c +++ b/clang/test/CodeGen/AArch64/fmv-dependencies.c @@ -183,10 +183,10 @@ int caller() { // CHECK: attributes #[[sha2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+v8a" // CHECK: attributes #[[sha3]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+sha3,+v8a" // CHECK: attributes #[[sm4]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sm4,+v8a" -// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+v8a" -// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a" -// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a" -// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a" +// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+v8a" +// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a" +// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a" +// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme2,+v8a" // CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a" // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" // CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a" diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c new file mode 100644 index 00000000000000..ed5b0ce02af4bd --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c @@ -0,0 +1,101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include +#else +#include +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local @test_svcvtn_f8_bf16( +// CHECK-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm( +// CHECK-CXX-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtn_f8_f16( +// CHECK-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtn.nxv8f16( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z18test_svcvtn_f8_f1613svfloat16x2_tm( +// CHECK-CXX-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtn.nxv8f16( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svcvtn_f8_f16(svfloat16x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtn_mf8,_f16_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtnb_f8_f32( +// CHECK-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtnb.nxv4f32( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_svcvtnb_f8_f3213svfloat32x2_tm( +// CHECK-CXX-SAME: [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtnb.nxv4f32( [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtnb_mf8,_f32_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvtnt_f8_f32( +// CHECK-SAME: [[ZD:%.*]], [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtnt.nxv4f32( [[ZD]], [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_svcvtnt_f8_f32u13__SVMfloat8_t13svfloat32x2_tm( +// CHECK-CXX-SAME: [[ZD:%.*]], [[ZN_ZM_COERCE0:%.*]], [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvtnt.nxv4f32( [[ZD]], [[ZN_ZM_COERCE0]], [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret [[TMP0]] +// +svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm); +} diff --git a/clang/test/CodeGen/AArch64/mixed-target-attributes.c b/clang/test/CodeGen/AArch64/mixed-target-attributes.c index bb6fb7eb8862a2..1ccb0c6177c8c5 100644 --- a/clang/test/CodeGen/AArch64/mixed-target-attributes.c +++ b/clang/test/CodeGen/AArch64/mixed-target-attributes.c @@ -66,24 +66,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @explicit_default._Mjscvt // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: // CHECK-NEXT: ret ptr @explicit_default._Mrdm // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: @@ -140,24 +140,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @implicit_default._Mjscvt // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: // CHECK-NEXT: ret ptr @implicit_default._Mrdm // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 16 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 784 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: @@ -207,16 +207,16 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @default_def_with_version_decls._Mjscvt // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 784 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c index 961279424754d5..6b7acbbd4fc597 100644 --- a/clang/test/CodeGen/attr-target-clones-aarch64.c +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -64,16 +64,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 32896 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @ftc._MaesMlse // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: @@ -100,16 +100,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @ftc_def._MmemtagMsha2 // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4864 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: @@ -129,8 +129,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4864 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -157,8 +157,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1040 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1808 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1808 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -310,16 +310,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 619551195904 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 619551195904 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @ftc_inline2._MfcmaMsve2-bitperm // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65536 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65792 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65792 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: @@ -360,8 +360,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014673387388928 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014673387388928 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014743180706560 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014743180706560 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -376,8 +376,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: ret ptr @ftc_inline1._MpredresMrcpc // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 513 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 769 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 769 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: @@ -411,8 +411,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -521,16 +521,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 32896 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse // CHECK-MTE-BTI: resolver_else: // CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736 -// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352 // CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK-MTE-BTI: resolver_return1: @@ -557,16 +557,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_def._MmemtagMsha2 // CHECK-MTE-BTI: resolver_else: // CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096 -// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4864 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864 // CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK-MTE-BTI: resolver_return1: @@ -586,8 +586,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4864 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: @@ -614,8 +614,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1040 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1808 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1808 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: @@ -767,16 +767,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 619551195904 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 619551195904 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline2._MfcmaMsve2-bitperm // CHECK-MTE-BTI: resolver_else: // CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65536 -// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65792 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65792 // CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK-MTE-BTI: resolver_return1: @@ -817,8 +817,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014673387388928 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014673387388928 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 18014743180706560 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014743180706560 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: @@ -833,8 +833,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline1._MpredresMrcpc // CHECK-MTE-BTI: resolver_else2: // CHECK-MTE-BTI-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 513 -// CHECK-MTE-BTI-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513 +// CHECK-MTE-BTI-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 769 +// CHECK-MTE-BTI-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 769 // CHECK-MTE-BTI-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK-MTE-BTI: resolver_return3: @@ -868,8 +868,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 4194ce26870504..428e7937d8d39e 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -460,24 +460,24 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 11 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 66315 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66315 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @fmv._MflagmMfp16fmlMrng // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927940 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72057594037927940 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72061992218723078 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72061992218723078 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: // CHECK-NEXT: ret ptr @fmv._Mflagm2Msme-i16i64 // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254741008 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254741008 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254741776 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254741776 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: @@ -492,32 +492,32 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: ret ptr @fmv._McrcMls64 // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17592186044424 -// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186044424 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17592186110728 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186110728 // CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] // CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] // CHECK: resolver_return7: // CHECK-NEXT: ret ptr @fmv._Mfp16fmlMmemtag // CHECK: resolver_else8: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 33024 -// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 33024 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 33536 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 33536 // CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] // CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] // CHECK: resolver_return9: // CHECK-NEXT: ret ptr @fmv._MaesMfp // CHECK: resolver_else10: // CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4224 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4224 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4992 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4992 // CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] // CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] // CHECK: resolver_return11: // CHECK-NEXT: ret ptr @fmv._MlseMsha2 // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 144115188075855872 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144115188075855872 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 144119586256651008 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144119586256651008 // CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] // CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] // CHECK: resolver_return13: @@ -538,8 +538,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254741504 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254741504 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254741760 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254741760 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -560,16 +560,16 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 66048 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66048 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 66304 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66304 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @fmv_two._Mfp16Msimd // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 512 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 768 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 768 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: @@ -765,128 +765,128 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4398048673856 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398048673856 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4398182892352 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398182892352 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16MrdmMsme // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864708720641179648 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864708720641179648 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864708720653762560 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864708720653762560 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: // CHECK-NEXT: ret ptr @fmv_inline._MmemtagMmopsMrcpc3 // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 894427038464 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 894427038464 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: // CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-aesMsve2-bitperm // CHECK: resolver_else4: // CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359775232 -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359775232 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 35433583360 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 35433583360 // CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] // CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] // CHECK: resolver_return5: // CHECK-NEXT: ret ptr @fmv_inline._MaesMf64mmMsha2 // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240 -// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 18320798464 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 18320798464 // CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] // CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] // CHECK: resolver_return7: // CHECK-NEXT: ret ptr @fmv_inline._Mf32mmMi8mmMsha3 // CHECK: resolver_else8: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968 -// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 19791209299968 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19861002584864 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 19861002584864 // CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] // CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] // CHECK: resolver_return9: // CHECK-NEXT: ret ptr @fmv_inline._MmemtagMsve2-sm4 // CHECK: resolver_else10: // CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 1374389534720 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 1374389534720 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 1444182864640 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 1444182864640 // CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] // CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] // CHECK: resolver_return11: // CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1208025856 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1208025856 // CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] // CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] // CHECK: resolver_return13: // CHECK-NEXT: ret ptr @fmv_inline._Mbf16Msve // CHECK: resolver_else14: // CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 134348800 -// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 134348800 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 134349568 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 134349568 // CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] // CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] // CHECK: resolver_return15: // CHECK-NEXT: ret ptr @fmv_inline._Mbf16Mdit // CHECK: resolver_else16: // CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 20971520 -// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 20971520 +// CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 20971776 +// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 20971776 // CHECK-NEXT: [[TMP39:%.*]] = and i1 true, [[TMP38]] // CHECK-NEXT: br i1 [[TMP39]], label [[RESOLVER_RETURN17:%.*]], label [[RESOLVER_ELSE18:%.*]] // CHECK: resolver_return17: // CHECK-NEXT: ret ptr @fmv_inline._MfrinttsMrcpc // CHECK: resolver_else18: // CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 8650752 -// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 8650752 +// CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 12845056 +// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 12845056 // CHECK-NEXT: [[TMP43:%.*]] = and i1 true, [[TMP42]] // CHECK-NEXT: br i1 [[TMP43]], label [[RESOLVER_RETURN19:%.*]], label [[RESOLVER_ELSE20:%.*]] // CHECK: resolver_return19: // CHECK-NEXT: ret ptr @fmv_inline._MdpbMrcpc2 // CHECK: resolver_else20: // CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP45:%.*]] = and i64 [[TMP44]], 1572864 -// CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 1572864 +// CHECK-NEXT: [[TMP45:%.*]] = and i64 [[TMP44]], 1835264 +// CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 1835264 // CHECK-NEXT: [[TMP47:%.*]] = and i1 true, [[TMP46]] // CHECK-NEXT: br i1 [[TMP47]], label [[RESOLVER_RETURN21:%.*]], label [[RESOLVER_ELSE22:%.*]] // CHECK: resolver_return21: // CHECK-NEXT: ret ptr @fmv_inline._Mdpb2Mjscvt // CHECK: resolver_else22: // CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP49:%.*]] = and i64 [[TMP48]], 520 -// CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[TMP49]], 520 +// CHECK-NEXT: [[TMP49:%.*]] = and i64 [[TMP48]], 66312 +// CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[TMP49]], 66312 // CHECK-NEXT: [[TMP51:%.*]] = and i1 true, [[TMP50]] // CHECK-NEXT: br i1 [[TMP51]], label [[RESOLVER_RETURN23:%.*]], label [[RESOLVER_ELSE24:%.*]] // CHECK: resolver_return23: // CHECK-NEXT: ret ptr @fmv_inline._Mfp16fmlMsimd // CHECK: resolver_else24: // CHECK-NEXT: [[TMP52:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP53:%.*]] = and i64 [[TMP52]], 32784 -// CHECK-NEXT: [[TMP54:%.*]] = icmp eq i64 [[TMP53]], 32784 +// CHECK-NEXT: [[TMP53:%.*]] = and i64 [[TMP52]], 33552 +// CHECK-NEXT: [[TMP54:%.*]] = icmp eq i64 [[TMP53]], 33552 // CHECK-NEXT: [[TMP55:%.*]] = and i1 true, [[TMP54]] // CHECK-NEXT: br i1 [[TMP55]], label [[RESOLVER_RETURN25:%.*]], label [[RESOLVER_ELSE26:%.*]] // CHECK: resolver_return25: // CHECK-NEXT: ret ptr @fmv_inline._MaesMdotprod // CHECK: resolver_else26: // CHECK-NEXT: [[TMP56:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP57:%.*]] = and i64 [[TMP56]], 192 -// CHECK-NEXT: [[TMP58:%.*]] = icmp eq i64 [[TMP57]], 192 +// CHECK-NEXT: [[TMP57:%.*]] = and i64 [[TMP56]], 960 +// CHECK-NEXT: [[TMP58:%.*]] = icmp eq i64 [[TMP57]], 960 // CHECK-NEXT: [[TMP59:%.*]] = and i1 true, [[TMP58]] // CHECK-NEXT: br i1 [[TMP59]], label [[RESOLVER_RETURN27:%.*]], label [[RESOLVER_ELSE28:%.*]] // CHECK: resolver_return27: // CHECK-NEXT: ret ptr @fmv_inline._MlseMrdm // CHECK: resolver_else28: // CHECK-NEXT: [[TMP60:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP61:%.*]] = and i64 [[TMP60]], 288 -// CHECK-NEXT: [[TMP62:%.*]] = icmp eq i64 [[TMP61]], 288 +// CHECK-NEXT: [[TMP61:%.*]] = and i64 [[TMP60]], 800 +// CHECK-NEXT: [[TMP62:%.*]] = icmp eq i64 [[TMP61]], 800 // CHECK-NEXT: [[TMP63:%.*]] = and i1 true, [[TMP62]] // CHECK-NEXT: br i1 [[TMP63]], label [[RESOLVER_RETURN29:%.*]], label [[RESOLVER_ELSE30:%.*]] // CHECK: resolver_return29: @@ -899,8 +899,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -913,8 +913,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 65536 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65536 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 65792 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65792 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -941,16 +941,16 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048576 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1048832 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: // CHECK-NEXT: ret ptr @default_def_with_version_decls._Mjscvt // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 64 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 832 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: diff --git a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp index 0cdc3b32521ff3..a2cc9f30f026af 100644 --- a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp +++ b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp @@ -57,8 +57,8 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806528 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806528 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806784 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806784 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -77,8 +77,8 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806528 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806528 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806784 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806784 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -173,16 +173,16 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36596145153180416 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36596145153180416 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: // CHECK-NEXT: ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs // CHECK: [[RESOLVER_ELSE]]: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777472 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777472 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] // CHECK: [[RESOLVER_RETURN1]]: @@ -222,16 +222,16 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36596145153180416 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36596145153180416 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: // CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs // CHECK: [[RESOLVER_ELSE]]: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777216 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16777472 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777472 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] // CHECK: [[RESOLVER_RETURN1]]: diff --git a/clang/test/CodeGenCXX/attr-target-version.cpp b/clang/test/CodeGenCXX/attr-target-version.cpp index 0fd9bc33df8094..b6ba07ed29504e 100644 --- a/clang/test/CodeGenCXX/attr-target-version.cpp +++ b/clang/test/CodeGenCXX/attr-target-version.cpp @@ -235,8 +235,8 @@ int bar() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36028797153181696 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36028797153181696 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36033195199759104 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36033195199759104 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -249,8 +249,8 @@ int bar() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134217760 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217760 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134218528 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134218528 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -271,8 +271,8 @@ int bar() { // CHECK-NEXT: ret ptr @_ZN7MyClass3gooEi._Mcrc // CHECK: [[RESOLVER_ELSE]]: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 16 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 784 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] // CHECK: [[RESOLVER_RETURN1]]: @@ -285,8 +285,8 @@ int bar() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -299,8 +299,8 @@ int bar() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 65536 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65536 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 65792 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65792 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: diff --git a/clang/test/CodeGenCXX/fmv-namespace.cpp b/clang/test/CodeGenCXX/fmv-namespace.cpp index d61f6dc9a7071b..1a76ee03565245 100644 --- a/clang/test/CodeGenCXX/fmv-namespace.cpp +++ b/clang/test/CodeGenCXX/fmv-namespace.cpp @@ -76,8 +76,8 @@ __attribute((target_version("mops"))) int bar() { return 1; } // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 15f190165a7d73..bb692b2aeea1d3 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -989,19 +989,25 @@ // RUN: not %clang --target=x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL // CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small' -// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION -// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=undefined -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION -// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI -// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI --check-prefix=CHECK-UBSAN-FUNCTION +// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-TARGET +// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=undefined -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-TARGET +// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-TARGET +// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-TARGET --check-prefix=CHECK-UBSAN-FUNCTION-TARGET // RUN: %clang --target=x86_64-sie-ps5 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED // CHECK-UBSAN-UNDEFINED: "-fsanitize={{((alignment|array-bounds|bool|builtin|enum|float-cast-overflow|integer-divide-by-zero|nonnull-attribute|null|pointer-overflow|return|returns-nonnull-attribute|shift-base|shift-exponent|signed-integer-overflow|unreachable|vla-bound),?){17}"}} -// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION -// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI +// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MEXECUTE-ONLY +// RUN: not %clang --target=armv6t2-eabi -mpure-code -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MPURE-CODE +// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-MEXECUTE-ONLY +// RUN: not %clang --target=armv6t2-eabi -mpure-code -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-MPURE-CODE // RUN: %clang --target=armv6t2-eabi -mexecute-only -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED-VPTR -// CHECK-UBSAN-KCFI-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}} -// CHECK-UBSAN-FUNCTION-DAG: error: invalid argument '-fsanitize=function' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}} +// CHECK-UBSAN-KCFI-TARGET-DAG: error: unsupported option '-fsanitize=kcfi' for target 'x86_64-sie-ps5' +// CHECK-UBSAN-KCFI-MEXECUTE-ONLY-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with '-mexecute-only' +// CHECK-UBSAN-KCFI-MPURE-CODE-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with '-mpure-code' +// CHECK-UBSAN-FUNCTION-TARGET-DAG: error: unsupported option '-fsanitize=function' for target 'x86_64-sie-ps5' +// CHECK-UBSAN-FUNCTION-MEXECUTE-ONLY-DAG: error: invalid argument '-fsanitize=function' not allowed with '-mexecute-only' +// CHECK-UBSAN-FUNCTION-MPURE-CODE-DAG: error: invalid argument '-fsanitize=function' not allowed with '-mpure-code' // CHECK-UBSAN-UNDEFINED-VPTR: "-fsanitize={{((alignment|array-bounds|bool|builtin|enum|float-cast-overflow|integer-divide-by-zero|nonnull-attribute|null|pointer-overflow|return|returns-nonnull-attribute|shift-base|shift-exponent|signed-integer-overflow|unreachable|vla-bound|vptr),?){18}"}} // * Test BareMetal toolchain sanitizer support * diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c index 09a12c2327137c..7d0985c4dd4f48 100644 --- a/clang/test/Driver/fveclib.c +++ b/clang/test/Driver/fveclib.c @@ -112,3 +112,20 @@ /* Verify no warning when math-errno is re-enabled for a different veclib (that does not imply -fno-math-errno). */ // RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -fmath-errno -fveclib=LIBMVEC %s 2>&1 | FileCheck --check-prefix=CHECK-REPEAT-VECLIB %s // CHECK-REPEAT-VECLIB-NOT: math errno enabled + +/// Verify that vectorized routines library is being linked in. +// RUN: %clang -### --target=aarch64-pc-windows-msvc -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-MSVC %s +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-LINUX %s +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX %s +// RUN: %clang -### --target=arm64-apple-darwin -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-DARWIN %s +// RUN: %clang -### --target=arm64-apple-darwin -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN %s +// CHECK-LINKING-ARMPL-LINUX: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +// CHECK-LINKING-ARMPL-DARWIN: "-lm" "-lamath" "-lm" +// CHECK-LINKING-ARMPL-MSVC: "--dependent-lib=amath" +// CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX: "-lamath" {{.*}}"--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +// CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN: "-lamath" {{.*}}"-lm" "-lamath" "-lm" + +/// Verify that the RPATH is being set when needed. +// RUN: %clang -### --target=aarch64-linux-gnu -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_arch_subdir -frtlib-add-rpath -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-RPATH-ARMPL %s +// CHECK-RPATH-ARMPL: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +// CHECK-RPATH-ARMPL-SAME: "-rpath" diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index 656b31444a9eed..3741ed099cf5c2 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -528,22 +528,30 @@ void VarListClauses() { #pragma acc serial firstprivate(s.array[s.value : 5], s.value), self for(int i = 0; i < 5;++i) {} - // expected-error@+2{{expected ','}} - // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}} -#pragma acc serial delete(s.array[s.value] s.array[s.value :5] ), self + // expected-warning@+4{{OpenACC construct 'exit data' not yet implemented}} + // expected-error@+3{{expected ','}} + // expected-warning@+2{{OpenACC clause 'delete' not yet implemented, clause ignored}} + // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} +#pragma acc exit data delete(s.array[s.value] s.array[s.value :5] ) async for(int i = 0; i < 5;++i) {} - // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}} -#pragma acc serial delete(s.array[s.value : 5], s.value), self + // expected-warning@+3{{OpenACC construct 'exit data' not yet implemented}} + // expected-warning@+2{{OpenACC clause 'delete' not yet implemented, clause ignored}} + // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} +#pragma acc exit data delete(s.array[s.value : 5], s.value),async for(int i = 0; i < 5;++i) {} - // expected-error@+2{{expected ','}} - // expected-warning@+1{{OpenACC clause 'use_device' not yet implemented, clause ignored}} -#pragma acc serial use_device(s.array[s.value] s.array[s.value :5] ), self + // expected-warning@+4{{OpenACC construct 'exit data' not yet implemented}} + // expected-error@+3{{expected ','}} + // expected-warning@+2{{OpenACC clause 'use_device' not yet implemented, clause ignored}} + // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} +#pragma acc exit data use_device(s.array[s.value] s.array[s.value :5] ),async for(int i = 0; i < 5;++i) {} - // expected-warning@+1{{OpenACC clause 'use_device' not yet implemented, clause ignored}} -#pragma acc serial use_device(s.array[s.value : 5], s.value), self + // expected-warning@+3{{OpenACC construct 'exit data' not yet implemented}} + // expected-warning@+2{{OpenACC clause 'use_device' not yet implemented, clause ignored}} + // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} +#pragma acc exit data use_device(s.array[s.value : 5], s.value), async for(int i = 0; i < 5;++i) {} // expected-error@+2{{expected ','}} diff --git a/clang/test/ParserOpenACC/parse-clauses.cpp b/clang/test/ParserOpenACC/parse-clauses.cpp index dc985826a4efe9..4dc966ea9879f9 100644 --- a/clang/test/ParserOpenACC/parse-clauses.cpp +++ b/clang/test/ParserOpenACC/parse-clauses.cpp @@ -34,6 +34,11 @@ void templ() { #pragma acc parallel async for(;;){} + + // expected-warning@+2{{OpenACC construct 'exit data' not yet implemented}} + // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}} +#pragma acc exit data delete(I) + ; } struct S { diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c index 2e94c2314f1824..e47efccf480433 100644 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c @@ -1,6 +1,6 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm -o - %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -verify -emit-llvm -o - %s #include @@ -21,4 +21,13 @@ void test_features(svmfloat8_t zn, fpm_t fpm) { // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} svcvtlt2_f16_mf8_fpm(zn, fpm); // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + + svcvtn_mf8_bf16_x2_fpm(svcreate2(svundef_bf16(), svundef_bf16()), fpm); + // expected-error@-1 {{'svcvtn_mf8_bf16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtn_mf8_f16_x2_fpm(svcreate2(svundef_f16(), svundef_f16()), fpm); + // expected-error@-1 {{'svcvtn_mf8_f16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtnb_mf8_f32_x2_fpm(svcreate2(svundef_f32(), svundef_f32()), fpm); + // expected-error@-1 {{'svcvtnb_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtnt_mf8_f32_x2_fpm(zn, svcreate2(svundef_f32(), svundef_f32()), fpm); + // expected-error@-1 {{'svcvtnt_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} } diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 3fa79636de5d75..8a2c3463189fa9 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -31,41 +31,21 @@ namespace drv = clang::driver::options; namespace clang { namespace installapi { -/// Create prefix string literals used in InstallAPIOpts.td. -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "InstallAPIOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE -static constexpr const llvm::StringLiteral PrefixTable_init[] = -#define PREFIX_UNION(VALUES) VALUES +#define OPTTABLE_PREFIXES_TABLE_CODE #include "InstallAPIOpts.inc" -#undef PREFIX_UNION - ; -static constexpr const ArrayRef - PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1); +#undef OPTTABLE_PREFIXES_TABLE_CODE + +#define OPTTABLE_PREFIXES_UNION_CODE +#include "InstallAPIOpts.inc" +#undef OPTTABLE_PREFIXES_UNION_CODE /// Create table mapping all options defined in InstallAPIOpts.td. static constexpr OptTable::Info InfoTable[] = { -#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ - VALUES) \ - {PREFIX, \ - NAME, \ - HELPTEXT, \ - HELPTEXTSFORVARIANTS, \ - METAVAR, \ - OPT_##ID, \ - Option::KIND##Class, \ - PARAM, \ - FLAGS, \ - VISIBILITY, \ - OPT_##GROUP, \ - OPT_##ALIAS, \ - ALIASARGS, \ - VALUES}, +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), #include "InstallAPIOpts.inc" #undef OPTION }; @@ -75,7 +55,9 @@ namespace { /// \brief Create OptTable class for parsing actual command line arguments. class DriverOptTable : public opt::PrecomputedOptTable { public: - DriverOptTable() : PrecomputedOptTable(InfoTable, PrefixTable) {} + DriverOptTable() + : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + OptionPrefixesUnion) {} }; } // end anonymous namespace. diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index ebafd7eb7774ec..fae32a3503c185 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -174,12 +174,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "LinkerWrapperOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "LinkerWrapperOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -189,7 +190,8 @@ static constexpr OptTable::Info InfoTable[] = { class WrapperOptTable : public opt::GenericOptTable { public: - WrapperOptTable() : opt::GenericOptTable(InfoTable) {} + WrapperOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; const OptTable &getOptTable() { diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index bc191afdca739d..faf73a7c2f1938 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -109,12 +109,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "NVLinkOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "NVLinkOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -124,7 +125,8 @@ static constexpr OptTable::Info InfoTable[] = { class WrapperOptTable : public opt::GenericOptTable { public: - WrapperOptTable() : opt::GenericOptTable(InfoTable) {} + WrapperOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; const OptTable &getOptTable() { diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 58b56dcfd3bece..bd36181fca3f31 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -50,12 +50,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE const llvm::opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -65,7 +66,8 @@ const llvm::opt::OptTable::Info InfoTable[] = { class ScanDepsOptTable : public llvm::opt::GenericOptTable { public: - ScanDepsOptTable() : GenericOptTable(InfoTable) { + ScanDepsOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 076458a275d986..2bcb3757d49d08 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -88,12 +88,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "SYCLLinkOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "SYCLLinkOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -103,7 +104,8 @@ static constexpr OptTable::Info InfoTable[] = { class LinkerOptTable : public opt::GenericOptTable { public: - LinkerOptTable() : opt::GenericOptTable(InfoTable) {} + LinkerOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; const OptTable &getOptTable() { diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 70dc7d860d8f6a..b32b42423f6a90 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -573,8 +573,8 @@ set(aarch64_SOURCES ) if (COMPILER_RT_HAS_AARCH64_SME) - if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD)) - list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c) + if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG) + list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c) message(STATUS "AArch64 SME ABI routines enabled") set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin") else() @@ -842,6 +842,8 @@ else () if(COMPILER_RT_DISABLE_AARCH64_FMV) list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV) + elseif(COMPILER_RT_BAREMETAL_BUILD) + list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV) endif() append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c new file mode 100644 index 00000000000000..4333353f8d2d1b --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c @@ -0,0 +1,10 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// We rely on the FMV __aarch64_cpu_features mechanism to determine +// which features are set at runtime. + +#include "../cpu_model/AArch64CPUFeatures.inc" +_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30"); +_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42"); diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c deleted file mode 100644 index d3cd8278a5d214..00000000000000 --- a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c +++ /dev/null @@ -1,50 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -__attribute__((visibility("hidden"), nocommon)) -_Bool __aarch64_has_sme_and_tpidr2_el0; - -// We have multiple ways to check that the function has SME, depending on our -// target. -// * For Linux/Glibc we can use getauxval(). -// * For Android we can use getauxval(). -// * For newlib we can use __aarch64_sme_accessible(). - -#if defined(__linux__) - -#if defined(__ANDROID__) -#include -#elif __has_include() -#include -#else -#define getauxval(x) 0 -#endif -#include "../cpu_model/aarch64/hwcap.inc" - -static _Bool has_sme(void) { return getauxval(AT_HWCAP2) & HWCAP2_SME; } - -#else // defined(__linux__) - -#if defined(COMPILER_RT_SHARED_LIB) -__attribute__((weak)) -#endif -extern _Bool __aarch64_sme_accessible(void); - -static _Bool has_sme(void) { -#if defined(COMPILER_RT_SHARED_LIB) - if (!__aarch64_sme_accessible) - return 0; -#endif - return __aarch64_sme_accessible(); -} - -#endif // defined(__linux__) - -#if __GNUC__ >= 9 -#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" -#endif -__attribute__((constructor(90))) -static void init_aarch64_has_sme(void) { - __aarch64_has_sme_and_tpidr2_el0 = has_sme(); -} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 623a95dd4dae5f..a6bb921bd9e6b9 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -9,18 +9,15 @@ #include "../assembly.h" .set FEAT_SVE_BIT, 30 +.set FEAT_SME_BIT, 42 .set SVCR_PSTATE_SM_BIT, 0 #if !defined(__APPLE__) -#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) -#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) #else // MachO requires @page/@pageoff directives because the global is defined // in a different file. Otherwise this file may fail to build. -#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page -#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff #endif @@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) mov x0, xzr mov x1, xzr - adrp x16, TPIDR2_SYMBOL - ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET] - cbz w16, 1f + adrp x16, CPU_FEATS_SYMBOL + ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] + tbz x16, #FEAT_SME_BIT, 1f 0: orr x0, x0, #0xC000000000000000 mrs x16, SVCR @@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) BTI_C // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. - adrp x14, TPIDR2_SYMBOL - ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] - cbz w14, 1f + adrp x14, CPU_FEATS_SYMBOL + ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] + tbz x14, #FEAT_SME_BIT, 1f // If TPIDR2_EL0 is null, the subroutine does nothing. mrs x16, TPIDR2_EL0 @@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) BTI_C // If the current thread does not have access to SME, the subroutine does // nothing. - adrp x14, TPIDR2_SYMBOL - ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET] - cbz w14, 0f + adrp x14, CPU_FEATS_SYMBOL + ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] + tbz x14, #FEAT_SME_BIT, 0f // Otherwise, the subroutine behaves as if it did the following: // * Call __arm_tpidr2_save. @@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg) BTI_C adrp x17, CPU_FEATS_SYMBOL - ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET] + ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] tbnz w17, #FEAT_SVE_BIT, 1f - adrp x17, TPIDR2_SYMBOL - ldrb w17, [x17, TPIDR2_SYMBOL_OFFSET] - cbz x17, 2f + tbz x17, #FEAT_SME_BIT, 2f 0: mrs x17, SVCR tbz x17, #SVCR_PSTATE_SM_BIT, 2f diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index 74e5e01b66c547..4082fd62ea11a2 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -80,6 +80,8 @@ struct { #include "aarch64/fmv/getauxval.inc" #elif defined(_WIN32) #include "aarch64/fmv/windows.inc" +#elif defined(ENABLE_BAREMETAL_AARCH64_FMV) +#include "aarch64/fmv/baremetal.inc" #else #include "aarch64/fmv/unimplemented.inc" #endif diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc new file mode 100644 index 00000000000000..f188e84808e019 --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc @@ -0,0 +1,31 @@ +// For baremetal platforms, we don't really initialise '__aarch64_cpu_features', +// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'. + +#if defined(COMPILER_RT_SHARED_LIB) +__attribute__((weak)) +#endif +extern _Bool +__aarch64_sme_accessible(void); + +static _Bool has_sme(void) { +#if defined(COMPILER_RT_SHARED_LIB) + if (!__aarch64_sme_accessible) + return 0; +#endif + return __aarch64_sme_accessible(); +} + +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) {} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + // CPU features already initialized. + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + unsigned long long feat = 0; + if (has_sme()) + feat |= 1ULL << FEAT_SME; + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); +} diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index b0fb263b62f156..4afc74933a33bc 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -634,6 +634,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0xD284: // 84 D2 : test dl,dl return 2; + case 0xEC83: // 83 EC XX : sub esp, XX case 0xC1F6: // F6 C1 XX : test cl, XX return 3; @@ -644,8 +645,6 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { switch (0x00FFFFFF & *(u32*)address) { case 0xF8E483: // 83 E4 F8 : and esp, 0xFFFFFFF8 - case 0x64EC83: // 83 EC 64 : sub esp, 64h - return 3; case 0x24A48D: // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX] return 7; } @@ -873,7 +872,6 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0x5D8B: // 8B 5D XX : mov ebx, dword ptr [ebp + XX] case 0x7D8B: // 8B 7D XX : mov edi, dword ptr [ebp + XX] case 0x758B: // 8B 75 XX : mov esi, dword ptr [ebp + XX] - case 0xEC83: // 83 EC XX : sub esp, XX case 0x75FF: // FF 75 XX : push dword ptr [ebp + XX] return 3; case 0xC1F7: // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX diff --git a/compiler-rt/lib/interception/tests/interception_win_test.cpp b/compiler-rt/lib/interception/tests/interception_win_test.cpp index 761c07d4288f75..6e01209ac3a7e4 100644 --- a/compiler-rt/lib/interception/tests/interception_win_test.cpp +++ b/compiler-rt/lib/interception/tests/interception_win_test.cpp @@ -852,6 +852,7 @@ const struct InstructionSizeData { { 2, {0x8B, 0xC1}, 0, "8B C1 : mov eax, ecx"}, { 2, {0x8B, 0xEC}, 0, "8B EC : mov ebp, esp"}, { 2, {0x8B, 0xFF}, 0, "8B FF : mov edi, edi"}, + { 3, {0x83, 0xEC, 0x72}, 0, "83 EC XX : sub esp, XX"}, { 3, {0xc2, 0x71, 0x72}, 0, "C2 XX XX : ret XX (needed for registering weak functions)"}, { 5, {0x68, 0x71, 0x72, 0x73, 0x74}, 0, "68 XX XX XX XX : push imm32"}, { 5, {0xb8, 0x71, 0x72, 0x73, 0x74}, 0, "b8 XX XX XX XX : mov eax, XX XX XX XX"}, diff --git a/flang/include/flang/Common/Fortran-consts.h b/flang/include/flang/Common/Fortran-consts.h index eedcdae335c400..cf7884e7454c0c 100644 --- a/flang/include/flang/Common/Fortran-consts.h +++ b/flang/include/flang/Common/Fortran-consts.h @@ -9,7 +9,7 @@ #ifndef FORTRAN_COMMON_FORTRAN_CONSTS_H_ #define FORTRAN_COMMON_FORTRAN_CONSTS_H_ -#include "flang/Common/enum-class.h" +#include "enum-set.h" #include namespace Fortran::common { diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index c6f35a07d81ea5..13825eb7ba41e3 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -546,6 +546,7 @@ class ParseTreeDumper { NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) + NODE(parser, OmpFailClause) NODE(parser, OmpFromClause) NODE(OmpFromClause, Modifier) NODE(parser, OmpExpectation) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 8160b095f06dd9..2b4cb21017fa0d 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -269,6 +269,7 @@ struct OpenACCRoutineConstruct; struct OpenMPConstruct; struct OpenMPDeclarativeConstruct; struct OmpEndLoopDirective; +struct OmpMemoryOrderClause; struct CUFKernelDoConstruct; // Cooked character stream locations @@ -3914,6 +3915,14 @@ struct OmpDeviceTypeClause { WRAPPER_CLASS_BOILERPLATE(OmpDeviceTypeClause, DeviceTypeDescription); }; +// OMP 5.2 15.8.3 extended-atomic, fail-clause -> +// FAIL(memory-order) +struct OmpFailClause { + WRAPPER_CLASS_BOILERPLATE( + OmpFailClause, common::Indirection); + CharBlock source; +}; + // Ref: [4.5:107-109], [5.0:176-180], [5.1:205-210], [5.2:167-168] // // from-clause -> @@ -4317,11 +4326,12 @@ struct OmpMemoryOrderClause { }; // 2.17.7 Atomic construct -// atomic-clause -> memory-order-clause | HINT(hint-expression) +// atomic-clause -> memory-order-clause | HINT(hint-expression) | +// FAIL(memory-order) struct OmpAtomicClause { UNION_CLASS_BOILERPLATE(OmpAtomicClause); CharBlock source; - std::variant u; + std::variant u; }; // atomic-clause-list -> [atomic-clause, [atomic-clause], ...] diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index a1c0e8f417bcd1..99835c515463b9 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -38,7 +38,7 @@ DataSharingProcessor::DataSharingProcessor( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, lower::pft::Evaluation &eval, bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization, - lower::SymMap *symTable) + lower::SymMap &symTable) : converter(converter), semaCtx(semaCtx), firOpBuilder(converter.getFirOpBuilder()), clauses(clauses), eval(eval), shouldCollectPreDeterminedSymbols(shouldCollectPreDeterminedSymbols), @@ -93,7 +93,7 @@ void DataSharingProcessor::insertDeallocs() { fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); mlir::omp::PrivateClauseOp privatizer = symToPrivatizer.at(sym); - lower::SymMapScope scope(*symTable); + lower::SymMapScope scope(symTable); mlir::OpBuilder::InsertionGuard guard(firOpBuilder); mlir::Region &deallocRegion = privatizer.getDeallocRegion(); @@ -102,8 +102,8 @@ void DataSharingProcessor::insertDeallocs() { &deallocRegion, /*insertPt=*/{}, symType, symLoc); firOpBuilder.setInsertionPointToEnd(deallocEntryBlock); - symTable->addSymbol(*sym, - fir::substBase(symExV, deallocRegion.getArgument(0))); + symTable.addSymbol(*sym, + fir::substBase(symExV, deallocRegion.getArgument(0))); converter.createHostAssociateVarCloneDealloc(*sym); firOpBuilder.create(hsb.getAddr().getLoc()); @@ -474,7 +474,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate : mlir::omp::DataSharingClauseType::Private); fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); - lower::SymMapScope outerScope(*symTable); + lower::SymMapScope outerScope(symTable); // Populate the `alloc` region. { @@ -491,10 +491,10 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) .first; - symTable->addSymbol(*sym, localExV); - lower::SymMapScope innerScope(*symTable); + symTable.addSymbol(*sym, localExV); + lower::SymMapScope innerScope(symTable); cloneSymbol(sym); - mlir::Value cloneAddr = symTable->shallowLookupSymbol(*sym).getAddr(); + mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr(); mlir::Type cloneType = cloneAddr.getType(); // A `convert` op is required for variables that are storage associated @@ -522,25 +522,24 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, auto addSymbol = [&](unsigned argIdx, bool force = false) { symExV.match( [&](const fir::MutableBoxValue &box) { - symTable->addSymbol( + symTable.addSymbol( *sym, fir::substBase(box, copyRegion.getArgument(argIdx)), force); }, [&](const auto &box) { - symTable->addSymbol(*sym, copyRegion.getArgument(argIdx), force); + symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force); }); }; addSymbol(0, true); - lower::SymMapScope innerScope(*symTable); + lower::SymMapScope innerScope(symTable); addSymbol(1); auto ip = firOpBuilder.saveInsertionPoint(); copyFirstPrivateSymbol(sym, &ip); firOpBuilder.create( - hsb.getAddr().getLoc(), - symTable->shallowLookupSymbol(*sym).getAddr()); + hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr()); } return result; diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index ff186483e04a83..2f5c69cc264cea 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -86,7 +86,7 @@ class DataSharingProcessor { lower::pft::Evaluation &eval; bool shouldCollectPreDeterminedSymbols; bool useDelayedPrivatization; - lower::SymMap *symTable; + lower::SymMap &symTable; OMPConstructSymbolVisitor visitor; bool needBarrier(); @@ -122,8 +122,7 @@ class DataSharingProcessor { const List &clauses, lower::pft::Evaluation &eval, bool shouldCollectPreDeterminedSymbols, - bool useDelayedPrivatization = false, - lower::SymMap *symTable = nullptr); + bool useDelayedPrivatization, lower::SymMap &symTable); // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2ef4d184a6321f..c167d347b43159 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -718,7 +718,8 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, std::optional tempDsp; if (privatize && !info.dsp) { tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, - Fortran::lower::omp::isLastItemInQueue(item, queue)); + Fortran::lower::omp::isLastItemInQueue(item, queue), + /*useDelayedPrivatization=*/false, info.symTable); tempDsp->processStep1(); } @@ -1423,7 +1424,7 @@ static void genLoopOp(lower::AbstractConverter &converter, DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&loopClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; @@ -1544,7 +1545,8 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, // Insert privatizations before SECTIONS lower::SymMapScope scope(symTable); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - lower::omp::isLastItemInQueue(item, queue)); + lower::omp::isLastItemInQueue(item, queue), + /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); List nonDsaClauses; @@ -1695,7 +1697,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/ lower::omp::isLastItemInQueue(item, queue), - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&clauseOps); // 5.8.1 Implicit Data-Mapping Attribute Rules @@ -1896,7 +1898,7 @@ genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable, DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(&clauseOps); EntryBlockArgs taskArgs; @@ -2011,7 +2013,7 @@ static void genStandaloneDistribute(lower::AbstractConverter &converter, DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - enableDelayedPrivatizationStaging, &symTable); + enableDelayedPrivatizationStaging, symTable); dsp.processStep1(&distributeClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2045,7 +2047,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter, DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - enableDelayedPrivatizationStaging, &symTable); + enableDelayedPrivatizationStaging, symTable); dsp.processStep1(&wsloopClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2084,7 +2086,7 @@ static void genStandaloneParallel(lower::AbstractConverter &converter, if (enableDelayedPrivatization) { dsp.emplace(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp->processStep1(¶llelClauseOps); } @@ -2113,7 +2115,7 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); + /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2167,7 +2169,7 @@ static void genCompositeDistributeParallelDo( DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(¶llelClauseOps); EntryBlockArgs parallelArgs; @@ -2235,7 +2237,7 @@ static void genCompositeDistributeParallelDoSimd( DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, &symTable); + /*useDelayedPrivatization=*/true, symTable); dsp.processStep1(¶llelClauseOps); EntryBlockArgs parallelArgs; @@ -2323,7 +2325,7 @@ static void genCompositeDistributeSimd(lower::AbstractConverter &converter, // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); + /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); // Pass the innermost leaf construct's clauses because that's where COLLAPSE @@ -2380,7 +2382,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); + /*useDelayedPrivatization=*/false, symTable); dsp.processStep1(); // Pass the innermost leaf construct's clauses because that's where COLLAPSE diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index b61f9767ccc2b8..f446e59b0a7230 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -13,6 +13,7 @@ #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Builder/IntrinsicCall.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" @@ -331,6 +332,108 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { } }; +class CShiftAsElementalConversion + : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + explicit CShiftAsElementalConversion(mlir::MLIRContext *ctx) + : OpRewritePattern(ctx) { + setHasBoundedRewriteRecursion(); + } + + llvm::LogicalResult + matchAndRewrite(hlfir::CShiftOp cshift, + mlir::PatternRewriter &rewriter) const override { + using Fortran::common::maxRank; + + mlir::Location loc = cshift.getLoc(); + fir::FirOpBuilder builder{rewriter, cshift.getOperation()}; + hlfir::ExprType expr = mlir::dyn_cast(cshift.getType()); + assert(expr && + "expected an expression type for the result of hlfir.cshift"); + mlir::Type elementType = expr.getElementType(); + hlfir::Entity array = hlfir::Entity{cshift.getArray()}; + mlir::Value arrayShape = hlfir::genShape(loc, builder, array); + llvm::SmallVector arrayExtents = + hlfir::getExplicitExtentsFromShape(arrayShape, builder); + unsigned arrayRank = expr.getRank(); + llvm::SmallVector typeParams; + hlfir::genLengthParameters(loc, builder, array, typeParams); + hlfir::Entity shift = hlfir::Entity{cshift.getShift()}; + // The new index computation involves MODULO, which is not implemented + // for IndexType, so use I64 instead. + mlir::Type calcType = builder.getI64Type(); + + mlir::Value one = builder.createIntegerConstant(loc, calcType, 1); + mlir::Value shiftVal; + if (shift.isScalar()) { + shiftVal = hlfir::loadTrivialScalar(loc, builder, shift); + shiftVal = builder.createConvert(loc, calcType, shiftVal); + } + + int64_t dimVal = 1; + if (arrayRank == 1) { + // When it is a 1D CSHIFT, we may assume that the DIM argument + // (whether it is present or absent) is equal to 1, otherwise, + // the program is illegal. + assert(shiftVal && "SHIFT must be scalar"); + } else { + if (mlir::Value dim = cshift.getDim()) + dimVal = fir::getIntIfConstant(dim).value_or(0); + assert(dimVal > 0 && dimVal <= arrayRank && + "DIM must be present and a positive constant not exceeding " + "the array's rank"); + } + + auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange inputIndices) -> hlfir::Entity { + llvm::SmallVector indices{inputIndices}; + if (!shift.isScalar()) { + // When the array is not a vector, section + // (s(1), s(2), ..., s(dim-1), :, s(dim+1), ..., s(n) + // of the result has a value equal to: + // CSHIFT(ARRAY(s(1), s(2), ..., s(dim-1), :, s(dim+1), ..., s(n)), + // SH, 1), + // where SH is either SHIFT (if scalar) or + // SHIFT(s(1), s(2), ..., s(dim-1), s(dim+1), ..., s(n)). + llvm::SmallVector shiftIndices{indices}; + shiftIndices.erase(shiftIndices.begin() + dimVal - 1); + hlfir::Entity shiftElement = + hlfir::getElementAt(loc, builder, shift, shiftIndices); + shiftVal = hlfir::loadTrivialScalar(loc, builder, shiftElement); + shiftVal = builder.createConvert(loc, calcType, shiftVal); + } + + // Element i of the result (1-based) is element + // 'MODULO(i + SH - 1, SIZE(ARRAY)) + 1' (1-based) of the original + // ARRAY (or its section, when ARRAY is not a vector). + mlir::Value index = + builder.createConvert(loc, calcType, inputIndices[dimVal - 1]); + mlir::Value extent = arrayExtents[dimVal - 1]; + mlir::Value newIndex = + builder.create(loc, index, shiftVal); + newIndex = builder.create(loc, newIndex, one); + newIndex = fir::IntrinsicLibrary{builder, loc}.genModulo( + calcType, {newIndex, builder.createConvert(loc, calcType, extent)}); + newIndex = builder.create(loc, newIndex, one); + newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex); + + indices[dimVal - 1] = newIndex; + hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices); + return hlfir::loadTrivialScalar(loc, builder, element); + }; + + hlfir::ElementalOp elementalOp = hlfir::genElementalOp( + loc, builder, elementType, arrayShape, typeParams, genKernel, + /*isUnordered=*/true, + array.isPolymorphic() ? static_cast(array) : nullptr, + cshift.getResult().getType()); + rewriter.replaceOp(cshift, elementalOp); + return mlir::success(); + } +}; + class SimplifyHLFIRIntrinsics : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { public: @@ -339,6 +442,7 @@ class SimplifyHLFIRIntrinsics mlir::RewritePatternSet patterns(context); patterns.insert(context); patterns.insert(context); + patterns.insert(context); mlir::ConversionTarget target(*context); // don't transform transpose of polymorphic arrays (not currently supported // by hlfir.elemental) @@ -375,6 +479,24 @@ class SimplifyHLFIRIntrinsics } return true; }); + target.addDynamicallyLegalOp([](hlfir::CShiftOp cshift) { + unsigned resultRank = hlfir::Entity{cshift}.getRank(); + if (resultRank == 1) + return false; + + mlir::Value dim = cshift.getDim(); + if (!dim) + return false; + + // If DIM is present, then it must be constant to please + // the conversion. In addition, ignore cases with + // illegal DIM values. + if (auto dimVal = fir::getIntIfConstant(dim)) + if (*dimVal > 0 && *dimVal <= resultRank) + return false; + + return true; + }); target.markUnknownOpDynamicallyLegal( [](mlir::Operation *) { return true; }); if (mlir::failed(mlir::applyFullConversion(getOperation(), target, diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index 72b894d1cec757..20d47fda32380d 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -11,6 +11,7 @@ #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/allocatable.h" @@ -27,6 +28,8 @@ namespace fir { namespace { +static constexpr llvm::StringRef builtinPrefix = "_QM__fortran_builtins"; + static void processAddrOfOp(fir::AddrOfOp addrOfOp, mlir::SymbolTable &symbolTable, llvm::DenseSet &candidates) { @@ -35,22 +38,46 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp, // TO DO: limit candidates to non-scalars. Scalars appear to have been // folded in already. if (globalOp.getConstant()) { + // Limit recursion to builtin global for now. + if (globalOp.getSymName().starts_with(builtinPrefix)) { + globalOp.walk([&](fir::AddrOfOp op) { + processAddrOfOp(op, symbolTable, candidates); + }); + } candidates.insert(globalOp); } } } +static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable, + llvm::DenseSet &candidates) { + if (auto recTy = mlir::dyn_cast( + fir::unwrapRefType(emboxOp.getMemref().getType()))) + // Only look at builtin record type. + if (recTy.getName().starts_with(builtinPrefix)) + if (auto globalOp = symbolTable.lookup( + fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) { + if (!candidates.contains(globalOp)) { + globalOp.walk([&](fir::AddrOfOp op) { + processAddrOfOp(op, symbolTable, candidates); + }); + candidates.insert(globalOp); + } + } +} + static void prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp, mlir::SymbolTable &symbolTable, llvm::DenseSet &candidates) { - auto cudaProcAttr{ funcOp->getAttrOfType(cuf::getProcAttrName())}; if (cudaProcAttr && cudaProcAttr.getValue() != cuf::ProcAttribute::Host) { - funcOp.walk([&](fir::AddrOfOp addrOfOp) { - processAddrOfOp(addrOfOp, symbolTable, candidates); + funcOp.walk([&](fir::AddrOfOp op) { + processAddrOfOp(op, symbolTable, candidates); }); + funcOp.walk( + [&](fir::EmboxOp op) { processEmboxOp(op, symbolTable, candidates); }); } } diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 86d475c1a15422..f8fda92d5ac2bb 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -739,6 +739,9 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(sourced(construct(verbatim("CANCEL"_tok), Parser{}, maybe("IF" >> parenthesized(scalarLogicalExpr))))) +TYPE_PARSER(sourced(construct( + parenthesized(indirect(Parser{}))))) + // 2.17.7 Atomic construct/2.17.8 Flush construct [OpenMP 5.0] // memory-order-clause -> // seq_cst @@ -767,6 +770,7 @@ TYPE_PARSER(construct( // atomic-clause -> memory-order-clause | HINT(hint-expression) TYPE_PARSER(sourced(construct( construct(Parser{}) || + construct("FAIL" >> Parser{}) || construct("HINT" >> sourced(construct( construct(parenthesized(constantExpr)))))))) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4782cc1f2d7d7d..a10be3f1c797de 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2702,10 +2702,16 @@ class UnparseVisitor { Put("\n"); EndOpenMP(); } + void Unparse(const OmpFailClause &x) { + Word("FAIL("); + Walk(x.v); + Put(")"); + } void Unparse(const OmpMemoryOrderClause &x) { Walk(x.v); } void Unparse(const OmpAtomicClause &x) { common::visit(common::visitors{ [&](const OmpMemoryOrderClause &y) { Walk(y); }, + [&](const OmpFailClause &y) { Walk(y); }, [&](const OmpClause &z) { Walk(z); }, }, x.u); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 3c9c5a02a338a6..1e78cf359a213e 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2530,18 +2530,27 @@ void OmpStructureChecker::CheckAtomicCaptureConstruct( void OmpStructureChecker::CheckAtomicMemoryOrderClause( const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList) { - int numMemoryOrderClause = 0; + int numMemoryOrderClause{0}; + int numFailClause{0}; auto checkForValidMemoryOrderClause = [&](const parser::OmpAtomicClauseList *clauseList) { for (const auto &clause : clauseList->v) { - if (std::get_if(&clause.u)) { - numMemoryOrderClause++; - if (numMemoryOrderClause > 1) { + if (std::get_if(&clause.u)) { + numFailClause++; + if (numFailClause > 1) { context_.Say(clause.source, - "More than one memory order clause not allowed on " - "OpenMP Atomic construct"_err_en_US); + "More than one FAIL clause not allowed on OpenMP ATOMIC construct"_err_en_US); return; } + } else { + if (std::get_if(&clause.u)) { + numMemoryOrderClause++; + if (numMemoryOrderClause > 1) { + context_.Say(clause.source, + "More than one memory order clause not allowed on OpenMP ATOMIC construct"_err_en_US); + return; + } + } } } }; @@ -2816,8 +2825,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) { // Following clauses do not have a separate node in parse-tree.h. CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent) -CHECK_SIMPLE_CLAUSE(AcqRel, OMPC_acq_rel) -CHECK_SIMPLE_CLAUSE(Acquire, OMPC_acquire) CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity) CHECK_SIMPLE_CLAUSE(Capture, OMPC_capture) CHECK_SIMPLE_CLAUSE(Contains, OMPC_contains) @@ -2853,9 +2860,6 @@ CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup) CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial) CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind) -CHECK_SIMPLE_CLAUSE(Release, OMPC_release) -CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed) -CHECK_SIMPLE_CLAUSE(SeqCst, OMPC_seq_cst) CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd) CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes) CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation) @@ -2883,7 +2887,6 @@ CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type) CHECK_SIMPLE_CLAUSE(OmpxAttribute, OMPC_ompx_attribute) CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare) -CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) CHECK_SIMPLE_CLAUSE(Weak, OMPC_weak) CHECK_REQ_SCALAR_INT_CLAUSE(NumTeams, OMPC_num_teams) @@ -2896,6 +2899,53 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) +void OmpStructureChecker::Enter(const parser::OmpClause::AcqRel &) { + if (!isFailClause) + CheckAllowedClause(llvm::omp::Clause::OMPC_acq_rel); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::Acquire &) { + if (!isFailClause) + CheckAllowedClause(llvm::omp::Clause::OMPC_acquire); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::Release &) { + if (!isFailClause) + CheckAllowedClause(llvm::omp::Clause::OMPC_release); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::Relaxed &) { + if (!isFailClause) + CheckAllowedClause(llvm::omp::Clause::OMPC_relaxed); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::SeqCst &) { + if (!isFailClause) + CheckAllowedClause(llvm::omp::Clause::OMPC_seq_cst); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::Fail &) { + assert(!isFailClause && "Unexpected FAIL clause inside a FAIL clause?"); + isFailClause = true; + CheckAllowedClause(llvm::omp::Clause::OMPC_fail); +} + +void OmpStructureChecker::Leave(const parser::OmpClause::Fail &) { + assert(isFailClause && "Expected to be inside a FAIL clause here"); + isFailClause = false; +} + +void OmpStructureChecker::Enter(const parser::OmpFailClause &) { + assert(!isFailClause && "Unexpected FAIL clause inside a FAIL clause?"); + isFailClause = true; + CheckAllowedClause(llvm::omp::Clause::OMPC_fail); +} + +void OmpStructureChecker::Leave(const parser::OmpFailClause &) { + assert(isFailClause && "Expected to be inside a FAIL clause here"); + isFailClause = false; +} + // Restrictions specific to each clause are implemented apart from the // generalized restrictions. diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 89af46d9171ad3..e28e5f6d7b0d5a 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -142,6 +142,10 @@ class OmpStructureChecker #define GEN_FLANG_CLAUSE_CHECK_ENTER #include "llvm/Frontend/OpenMP/OMP.inc" + void Leave(const parser::OmpClause::Fail &); + void Enter(const parser::OmpFailClause &); + void Leave(const parser::OmpFailClause &); + private: bool CheckAllowedClause(llvmOmpClause clause); bool IsVariableListItem(const Symbol &sym); @@ -276,6 +280,7 @@ class OmpStructureChecker using LoopConstruct = std::variant; std::vector loopStack_; + bool isFailClause{false}; }; /// Find a duplicate entry in the range, and return an iterator to it. diff --git a/flang/test/Driver/fveclib.f90 b/flang/test/Driver/fveclib.f90 index 14c59b0616f828..490ce974724a6d 100644 --- a/flang/test/Driver/fveclib.f90 +++ b/flang/test/Driver/fveclib.f90 @@ -30,3 +30,20 @@ ! TODO: if we add support for -nostdlib or -nodefaultlibs we need to test that ! these prevent "-framework Accelerate" being added on Darwin + +! RUN: %flang -### --target=aarch64-pc-windows-msvc -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-MSVC %s +! RUN: %flang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-LINUX %s +! RUN: %flang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX %s +! RUN: %flang -### --target=arm64-apple-darwin -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-DARWIN %s +! RUN: %flang -### --target=arm64-apple-darwin -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN %s +! CHECK-LINKING-ARMPL-LINUX: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +! CHECK-LINKING-ARMPL-DARWIN: "-lm" "-lamath" "-lm" +! CHECK-LINKING-ARMPL-MSVC: "--dependent-lib=amath" +! CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX: "-lamath" {{.*}}"--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +! CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN: "-lamath" {{.*}}"-lm" "-lamath" "-lm" + +! RUN: %flang -### --target=aarch64-linux-gnu -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_arch_subdir -frtlib-add-rpath -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-RPATH-ARMPL %s +! CHECK-RPATH-ARMPL: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state" +! We need to see "-rpath" at least twice, one for veclib, one for the Fortran runtime +! CHECK-RPATH-ARMPL-SAME: "-rpath" +! CHECK-RPATH-ARMPL-SAME: "-rpath" diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 index ec5ed06824e227..ef32722488fe2a 100644 --- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 +++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 @@ -191,3 +191,120 @@ // Test that global used in device function are flagged with the correct // CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50> // CHECK: gpu.module @cuda_device_mod // CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50> + +// ----- + +func.func @_QMmtestsPtestany() attributes {cuf.proc_attr = #cuf.cuda_proc} { + %1135 = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> {bindc_name = "x", uniq_name = "_QMmtestsFtestanyEx"} + %1136 = fir.declare %1135 {uniq_name = "_QMmtestsFtestanyEx"} : (!fir.ref}>>) -> !fir.ref}>> + %1138 = fir.embox %1136 : (!fir.ref}>>) -> !fir.box}>> + return +} +fir.type_info @_QM__fortran_builtinsT__builtin_c_devptr noinit nodestroy nofinal : !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> +fir.global linkonce_odr @_QM__fortran_builtinsE.dt.__builtin_c_devptr constant : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> { + %c1_i8 = arith.constant 1 : i8 + %c0_i8 = arith.constant 0 : i8 + %c0_i32 = arith.constant 0 : i32 + %c1 = arith.constant 1 : index + %c8_i64 = arith.constant 8 : i64 + %c18 = arith.constant 18 : index + %c0 = arith.constant 0 : index + %0 = fir.undefined !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %1 = fir.zero_bits !fir.ptr,name:!fir.box>>}>>> + %2 = fir.shape %c0 : (index) -> !fir.shape<1> + %3 = fir.embox %1(%2) : (!fir.ptr,name:!fir.box>>}>>>, !fir.shape<1>) -> !fir.box,name:!fir.box>>}>>>> + %4 = fir.insert_value %0, %3, ["binding", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box,name:!fir.box>>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %5 = fir.address_of(@_QM__fortran_builtinsE.n.__builtin_c_devptr) : !fir.ref> + %6 = fir.declare %5 typeparams %c18 {fortran_attrs = #fir.var_attrs, uniq_name = "_QM__fortran_builtinsE.n.__builtin_c_devptr"} : (!fir.ref>, index) -> !fir.ref> + %7 = fir.embox %6 : (!fir.ref>) -> !fir.box> + %8 = fir.rebox %7 : (!fir.box>) -> !fir.box>> + %9 = fir.insert_value %4, %8, ["name", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %10 = fir.insert_value %9, %c8_i64, ["sizeinbytes", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i64) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %11 = fir.zero_bits !fir.ptr,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>> + %12 = fir.embox %11 : (!fir.ptr,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>> + %13 = fir.insert_value %10, %12, ["uninstantiated", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %14 = fir.zero_bits !fir.ptr> + %15 = fir.embox %14(%2) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + %16 = fir.insert_value %13, %15, ["kindparameter", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %17 = fir.zero_bits !fir.ptr> + %18 = fir.embox %17(%2) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + %19 = fir.insert_value %16, %18, ["lenparameterkind", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %20 = fir.address_of(@_QM__fortran_builtinsE.c.__builtin_c_devptr) : !fir.ref>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>> + %21 = fir.shape_shift %c0, %c1 : (index, index) -> !fir.shapeshift<1> + %22 = fir.declare %20(%21) {fortran_attrs = #fir.var_attrs, uniq_name = "_QM__fortran_builtinsE.c.__builtin_c_devptr"} : (!fir.ref>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> !fir.ref>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>> + %23 = fir.embox %22(%21) : (!fir.ref>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> !fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>> + %24 = fir.shift %c0 : (index) -> !fir.shift<1> + %25 = fir.rebox %23(%24) : (!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shift<1>) -> !fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>> + %26 = fir.insert_value %19, %25, ["component", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %27 = fir.zero_bits !fir.ptr>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>> + %28 = fir.embox %27(%2) : (!fir.ptr>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>, !fir.shape<1>) -> !fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>> + %29 = fir.insert_value %26, %28, ["procptr", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %30 = fir.zero_bits !fir.ptr,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>> + %31 = fir.embox %30(%2) : (!fir.ptr,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>, !fir.shape<1>) -> !fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>> + %32 = fir.insert_value %29, %31, ["special", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %33 = fir.insert_value %32, %c0_i32, ["specialbitset", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i32) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %34 = fir.insert_value %33, %c0_i8, ["hasparent", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %35 = fir.insert_value %34, %c1_i8, ["noinitializationneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %36 = fir.insert_value %35, %c1_i8, ["nodestructionneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + %37 = fir.insert_value %36, %c1_i8, ["nofinalizationneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> + fir.has_value %37 : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> +} +fir.global linkonce_odr @_QM__fortran_builtinsE.n.__builtin_c_devptr constant : !fir.char<1,18> { + %0 = fir.string_lit "__builtin_c_devptr"(18) : !fir.char<1,18> + fir.has_value %0 : !fir.char<1,18> +} +fir.global linkonce_odr @_QM__fortran_builtinsE.c.__builtin_c_devptr constant : !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>> { + %c0 = arith.constant 0 : index + %c0_i64 = arith.constant 0 : i64 + %c0_i8 = arith.constant 0 : i8 + %c5_i8 = arith.constant 5 : i8 + %c1_i8 = arith.constant 1 : i8 + %c4 = arith.constant 4 : index + %0 = fir.undefined !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>> + %1 = fir.undefined !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %2 = fir.address_of(@_QM__fortran_builtinsE.n.cptr) : !fir.ref> + %3 = fir.declare %2 typeparams %c4 {fortran_attrs = #fir.var_attrs, uniq_name = "_QM__fortran_builtinsE.n.cptr"} : (!fir.ref>, index) -> !fir.ref> + %4 = fir.embox %3 : (!fir.ref>) -> !fir.box> + %5 = fir.rebox %4 : (!fir.box>) -> !fir.box>> + %6 = fir.insert_value %1, %5, ["name", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %7 = fir.insert_value %6, %c1_i8, ["genre", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %8 = fir.insert_value %7, %c5_i8, ["category", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %9 = fir.insert_value %8, %c0_i8, ["kind", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %10 = fir.insert_value %9, %c0_i8, ["rank", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %11 = fir.insert_value %10, %c0_i64, ["offset", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i64) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %12 = fir.undefined !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}> + %13 = fir.insert_value %12, %c1_i8, ["genre", !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>] : (!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>, i8) -> !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}> + %14 = fir.insert_value %13, %c0_i64, ["value", !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>] : (!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>, i64) -> !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}> + %15 = fir.insert_value %11, %14, ["characterlen", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %16 = fir.address_of(@_QM__fortran_builtinsE.dt.__builtin_c_ptr) : !fir.ref,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>> + %17 = fir.declare %16 {fortran_attrs = #fir.var_attrs, uniq_name = "_QM__fortran_builtinsE.dt.__builtin_c_ptr"} : (!fir.ref,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.ref,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>> + %18 = fir.embox %17 : (!fir.ref,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>> + %19 = fir.rebox %18 : (!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>> + %20 = fir.insert_value %15, %19, ["derived", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %21 = fir.zero_bits !fir.ptr,value:i64}>>> + %22 = fir.shape %c0 : (index) -> !fir.shape<1> + %23 = fir.embox %21(%22) : (!fir.ptr,value:i64}>>>, !fir.shape<1>) -> !fir.box,value:i64}>>>> + %24 = fir.insert_value %20, %23, ["lenvalue", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box,value:i64}>>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %25 = fir.zero_bits !fir.ptr,value:i64}>>> + %26 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2> + %27 = fir.embox %25(%26) : (!fir.ptr,value:i64}>>>, !fir.shape<2>) -> !fir.box,value:i64}>>>> + %28 = fir.insert_value %24, %27, ["bounds", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box,value:i64}>>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %29 = fir.zero_bits !fir.ref + %30 = fir.convert %29 : (!fir.ref) -> i64 + %31 = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + %32 = fir.insert_value %31, %30, ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + %33 = fir.insert_value %28, %32, ["initialization", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> + %34 = fir.insert_value %0, %33, [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>) -> !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>> + fir.has_value %34 : !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>> +} +fir.global linkonce_odr @_QM__fortran_builtinsE.n.cptr constant : !fir.char<1,4> { + %0 = fir.string_lit "cptr"(4) : !fir.char<1,4> + fir.has_value %0 : !fir.char<1,4> +} + +// CHECK-LABEL: func.func @_QMmtestsPtestany() +// CHECK: gpu.module @cuda_device_mod +// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.n.cptr +// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.c.__builtin_c_devptr +// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.dt.__builtin_c_devptr +// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.n.__builtin_c_devptr diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir new file mode 100644 index 00000000000000..acb89c0719aa08 --- /dev/null +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir @@ -0,0 +1,304 @@ +// Test hlfir.cshift simplification to hlfir.elemental: +// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s + +func.func @cshift_vector(%arg0: !fir.box>, %arg1: !fir.ref) { + %res = hlfir.cshift %arg0 %arg1 : (!fir.box>, !fir.ref) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_vector( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref) { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> i64 +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (index) -> i64 +// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_7]] : i64 +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_5]] : i64 +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = arith.remsi %[[VAL_12]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_15:.*]] = arith.xori %[[VAL_12]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_16:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_18:.*]] = arith.cmpi ne, %[[VAL_14]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_19:.*]] = arith.andi %[[VAL_18]], %[[VAL_17]] : i1 +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_14]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_5]] : i64 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index +// CHECK: %[[VAL_24:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_24]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_26:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_26]] : index +// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_27]] : index +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref +// CHECK: hlfir.yield_element %[[VAL_30]] : i32 +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @cshift_2d_by_scalar(%arg0: !fir.box>, %arg1: !fir.ref) { + %dim = arith.constant 2 : i32 + %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box>, !fir.ref, i32) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_2d_by_scalar( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref) { +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_4]]#1, %[[VAL_6]]#1 : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64 +// CHECK: %[[VAL_11:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index): +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 +// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_10]] : i64 +// CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_15]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64 +// CHECK: %[[VAL_18:.*]] = arith.remsi %[[VAL_16]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_19:.*]] = arith.xori %[[VAL_16]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_20:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_21:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_22]], %[[VAL_21]] : i1 +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_18]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_25]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i64) -> index +// CHECK: %[[VAL_28:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_29:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_28]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_30:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_30]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_32:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_29]]#0, %[[VAL_32]] : index +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index +// CHECK: %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_32]] : index +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_27]], %[[VAL_35]] : index +// CHECK: %[[VAL_37:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_37]] : !fir.ref +// CHECK: hlfir.yield_element %[[VAL_38]] : i32 +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @cshift_2d_by_vector(%arg0: !fir.box>, %arg1: !fir.box>) { + %dim = arith.constant 2 : i32 + %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box>, !fir.box>, i32) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_2d_by_vector( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box>) { +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_4]]#1, %[[VAL_6]]#1 : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index): +// CHECK: %[[VAL_12:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_12]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_14]] : index +// CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_10]], %[[VAL_15]] : index +// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_16]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64 +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_11]] : (index) -> i64 +// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64 +// CHECK: %[[VAL_24:.*]] = arith.remsi %[[VAL_22]], %[[VAL_23]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.xori %[[VAL_22]], %[[VAL_23]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_27:.*]] = arith.cmpi slt, %[[VAL_25]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_24]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.andi %[[VAL_28]], %[[VAL_27]] : i1 +// CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_24]], %[[VAL_23]] : i64 +// CHECK: %[[VAL_31:.*]] = arith.select %[[VAL_29]], %[[VAL_30]], %[[VAL_24]] : i64 +// CHECK: %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i64) -> index +// CHECK: %[[VAL_34:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_35:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_34]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_36:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_36]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_38:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_39:.*]] = arith.subi %[[VAL_35]]#0, %[[VAL_38]] : index +// CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_10]], %[[VAL_39]] : index +// CHECK: %[[VAL_41:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_38]] : index +// CHECK: %[[VAL_42:.*]] = arith.addi %[[VAL_33]], %[[VAL_41]] : index +// CHECK: %[[VAL_43:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_40]], %[[VAL_42]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.ref +// CHECK: hlfir.yield_element %[[VAL_44]] : i32 +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @cshift_vector_char(%arg0: !fir.box>>, %arg1: !fir.ref) { + %res = hlfir.cshift %arg0 %arg1 : (!fir.box>>, !fir.ref) -> !hlfir.expr> + return +} +// CHECK-LABEL: func.func @cshift_vector_char( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref) { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box>>) -> index +// CHECK: %[[VAL_6:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_7:.*]] = arith.divsi %[[VAL_5]], %[[VAL_6]] : index +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64 +// CHECK: %[[VAL_11:.*]] = hlfir.elemental %[[VAL_4]] typeparams %[[VAL_7]] unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_12:.*]]: index): +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_10]] : i64 +// CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64 +// CHECK: %[[VAL_17:.*]] = arith.remsi %[[VAL_15]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_18:.*]] = arith.xori %[[VAL_15]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_19:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_21:.*]] = arith.cmpi ne, %[[VAL_17]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.andi %[[VAL_21]], %[[VAL_20]] : i1 +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_17]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_22]], %[[VAL_23]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_8]] : i64 +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i64) -> index +// CHECK: %[[VAL_27:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box>>) -> index +// CHECK: %[[VAL_28:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_29:.*]] = arith.divsi %[[VAL_27]], %[[VAL_28]] : index +// CHECK: %[[VAL_30:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_30]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_32:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_32]] : index +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_26]], %[[VAL_33]] : index +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]]) typeparams %[[VAL_29]] : (!fir.box>>, index, index) -> !fir.boxchar<2> +// CHECK: hlfir.yield_element %[[VAL_35]] : !fir.boxchar<2> +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @cshift_vector_poly(%arg0: !fir.class>>, %arg1: i32) { + %res = hlfir.cshift %arg0 %arg1 : (!fir.class>>, i32) -> !hlfir.expr?> + return +} +// CHECK-LABEL: func.func @cshift_vector_poly( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.class>>, +// CHECK-SAME: %[[VAL_1:.*]]: i32) { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.class>>, index) -> (index, index, index) +// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_4]] mold %[[VAL_0]] unordered : (!fir.shape<1>, !fir.class>>) -> !hlfir.expr?> { +// CHECK: ^bb0(%[[VAL_8:.*]]: index): +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i64 +// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_10]], %[[VAL_5]] : i64 +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64 +// CHECK: %[[VAL_13:.*]] = arith.remsi %[[VAL_11]], %[[VAL_12]] : i64 +// CHECK: %[[VAL_14:.*]] = arith.xori %[[VAL_11]], %[[VAL_12]] : i64 +// CHECK: %[[VAL_15:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_16:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_18:.*]] = arith.andi %[[VAL_17]], %[[VAL_16]] : i1 +// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : i64 +// CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_5]] : i64 +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (i64) -> index +// CHECK: %[[VAL_23:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_23]] : (!fir.class>>, index) -> (index, index, index) +// CHECK: %[[VAL_25:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_26:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_25]] : index +// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_22]], %[[VAL_26]] : index +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_27]]) : (!fir.class>>, index) -> !fir.class> +// CHECK: hlfir.yield_element %[[VAL_28]] : !fir.class> +// CHECK: } +// CHECK: return +// CHECK: } + +// negative: non-constant dim argument +func.func @cshift_nonconst_dim(%arg0: !fir.box>, %arg1: i32, %dim : i32) { + %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box>, i32, i32) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_nonconst_dim( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: i32, +// CHECK-SAME: %[[VAL_2:.*]]: i32) { +// CHECK: %[[VAL_3:.*]] = hlfir.cshift %[[VAL_0]] %[[VAL_1]] dim %[[VAL_2]] : (!fir.box>, i32, i32) -> !hlfir.expr +// CHECK: return +// CHECK: } + +// negative: invalid constant dim argument +func.func @cshift_invalid_dim(%arg0: !fir.box>, %arg1: i32) { + %dim = arith.constant 3 : i32 + %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box>, i32, i32) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_invalid_dim( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: i32) { +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : i32 +// CHECK: %[[VAL_3:.*]] = hlfir.cshift %[[VAL_0]] %[[VAL_1]] dim %[[VAL_2]] : (!fir.box>, i32, i32) -> !hlfir.expr +// CHECK: return +// CHECK: } + +// When the input array is 1D, we may assume that DIM==1, +// otherwise the program is illegal, and we can do anything +// about it. +func.func @cshift_vector_assumed_dim_1(%arg0: !fir.box>, %arg1: i32) { + %dim = arith.constant 3 : i32 + %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box>, i32, i32) -> !hlfir.expr + return +} +// CHECK-LABEL: func.func @cshift_vector_assumed_dim_1( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: i32) { +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (index) -> i64 +// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_7]] : i64 +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = arith.remsi %[[VAL_12]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_15:.*]] = arith.xori %[[VAL_12]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_16:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_18:.*]] = arith.cmpi ne, %[[VAL_14]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_19:.*]] = arith.andi %[[VAL_18]], %[[VAL_17]] : i1 +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_14]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index +// CHECK: %[[VAL_24:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_24]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_26:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_26]] : index +// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_27]] : index +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref +// CHECK: hlfir.yield_element %[[VAL_30]] : i32 +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90 b/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90 new file mode 100644 index 00000000000000..b82bd13622764b --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90 @@ -0,0 +1,11 @@ +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + +! CHECK: not yet implemented: OpenMP atomic compare +program p + integer :: x + logical :: r + !$omp atomic compare fail(relaxed) + if (x .eq. 0) then + x = 2 + end if +end program p diff --git a/flang/test/Parser/OpenMP/atomic-unparse.f90 b/flang/test/Parser/OpenMP/atomic-unparse.f90 index 64fa79fb1d1a2f..16dc7a1a92bf9e 100644 --- a/flang/test/Parser/OpenMP/atomic-unparse.f90 +++ b/flang/test/Parser/OpenMP/atomic-unparse.f90 @@ -165,6 +165,20 @@ program main i = j end if + +!$omp atomic compare fail(relaxed) + if (i .eq. k) then + i = j + end if +!$omp atomic fail(relaxed) compare + if (i .eq. k) then + i = j + end if +!$omp atomic fail(relaxed) compare acquire + if (i .eq. k) then + i = j + end if + !ATOMIC !$omp atomic i = j @@ -262,6 +276,9 @@ end program main !CHECK: !$OMP ATOMIC COMPARE ACQUIRE !CHECK: !$OMP ATOMIC RELAXED COMPARE !CHECK: !$OMP ATOMIC COMPARE RELAXED +!CHECK: !$OMP ATOMIC COMPARE FAIL(RELAXED) +!CHECK: !$OMP ATOMIC FAIL(RELAXED) COMPARE +!CHECK: !$OMP ATOMIC FAIL(RELAXED) COMPARE ACQUIRE !ATOMIC !CHECK: !$OMP ATOMIC @@ -270,3 +287,5 @@ end program main !CHECK: !$OMP ATOMIC ACQ_REL !CHECK: !$OMP ATOMIC ACQUIRE !CHECK: !$OMP ATOMIC RELAXED + + diff --git a/flang/test/Semantics/OpenMP/atomic-compare.f90 b/flang/test/Semantics/OpenMP/atomic-compare.f90 index 85644ad909107e..54492bf6a22a60 100644 --- a/flang/test/Semantics/OpenMP/atomic-compare.f90 +++ b/flang/test/Semantics/OpenMP/atomic-compare.f90 @@ -35,45 +35,58 @@ if (b .eq. a) b = c !$omp end atomic + !$omp atomic hint(1) acq_rel compare fail(release) + if (c .eq. a) a = b + !$omp end atomic + + !$omp atomic compare fail(release) + if (c .eq. a) a = b + !$omp end atomic + ! Check for error conditions: - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive !$omp atomic seq_cst seq_cst compare if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive !$omp atomic compare seq_cst seq_cst if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive !$omp atomic seq_cst compare seq_cst if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive !$omp atomic acquire acquire compare if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive !$omp atomic compare acquire acquire if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive !$omp atomic acquire compare acquire if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the COMPARE directive !$omp atomic relaxed relaxed compare if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the COMPARE directive !$omp atomic compare relaxed relaxed if (b .eq. c) b = a - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the COMPARE directive !$omp atomic relaxed compare relaxed if (b .eq. c) b = a + !ERROR: More than one FAIL clause not allowed on OpenMP ATOMIC construct + !$omp atomic fail(release) compare fail(release) + if (c .eq. a) a = b + !$omp end atomic + !$omp end parallel end diff --git a/flang/test/Semantics/OpenMP/atomic01.f90 b/flang/test/Semantics/OpenMP/atomic01.f90 index 538db316f6e7f5..173effe86b69c0 100644 --- a/flang/test/Semantics/OpenMP/atomic01.f90 +++ b/flang/test/Semantics/OpenMP/atomic01.f90 @@ -14,193 +14,193 @@ ! At most one memory-order-clause may appear on the construct. !READ - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic seq_cst seq_cst read i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic read seq_cst seq_cst i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic seq_cst read seq_cst i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic acquire acquire read i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic read acquire acquire i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic acquire read acquire i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic relaxed relaxed read i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic read relaxed relaxed i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic relaxed read relaxed i = j !UPDATE - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic seq_cst seq_cst update !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic update seq_cst seq_cst !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic seq_cst update seq_cst !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic release release update !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic update release release !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic release update release !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic relaxed relaxed update !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic update relaxed relaxed !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic relaxed update relaxed !ERROR: Invalid or missing operator in atomic update statement i = j !CAPTURE - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic seq_cst seq_cst capture i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic capture seq_cst seq_cst i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic seq_cst capture seq_cst i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic release release capture i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic capture release release i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic release capture release i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic relaxed relaxed capture i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic capture relaxed relaxed i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic relaxed capture relaxed i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic acq_rel acq_rel capture i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic capture acq_rel acq_rel i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic acq_rel capture acq_rel i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic acquire acquire capture i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic capture acquire acquire i = j j = k !$omp end atomic - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic acquire capture acquire i = j @@ -208,57 +208,57 @@ !$omp end atomic !WRITE - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic seq_cst seq_cst write i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic write seq_cst seq_cst i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic seq_cst write seq_cst i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic release release write i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic write release release i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic release write release i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic relaxed relaxed write i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic write relaxed relaxed i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic relaxed write relaxed i = j !No atomic-clause - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELAXED clause can appear on the ATOMIC directive !$omp atomic relaxed relaxed !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one SEQ_CST clause can appear on the ATOMIC directive !$omp atomic seq_cst seq_cst !ERROR: Invalid or missing operator in atomic update statement i = j - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !ERROR: At most one RELEASE clause can appear on the ATOMIC directive !$omp atomic release release !ERROR: Invalid or missing operator in atomic update statement diff --git a/flang/test/Semantics/OpenMP/atomic05.f90 b/flang/test/Semantics/OpenMP/atomic05.f90 index f37aabcfce06ec..266268a2124409 100644 --- a/flang/test/Semantics/OpenMP/atomic05.f90 +++ b/flang/test/Semantics/OpenMP/atomic05.f90 @@ -8,20 +8,20 @@ program OmpAtomic use omp_lib integer :: g, x - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !$omp atomic relaxed, seq_cst x = x + 1 - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !$omp atomic read seq_cst, relaxed x = g - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !$omp atomic write relaxed, release x = 2 * 4 - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !$omp atomic update release, seq_cst !ERROR: Invalid or missing operator in atomic update statement x = 10 - !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct !$omp atomic capture release, seq_cst x = g g = x * 10 diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt index f98af5744685fc..05f15a0e4e5cbb 100644 --- a/libc/config/linux/aarch64/headers.txt +++ b/libc/config/linux/aarch64/headers.txt @@ -1,5 +1,6 @@ set(TARGET_PUBLIC_HEADERS libc.include.assert + libc.include.complex libc.include.ctype libc.include.dlfcn libc.include.elf diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt index 6576db1f852691..9aabac5dea33cb 100644 --- a/libc/config/linux/arm/headers.txt +++ b/libc/config/linux/arm/headers.txt @@ -1,4 +1,5 @@ set(TARGET_PUBLIC_HEADERS + libc.include.complex libc.include.ctype libc.include.errno libc.include.fenv diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt index 41c343f71998b9..b38659e0b8daf7 100644 --- a/libc/config/linux/riscv/headers.txt +++ b/libc/config/linux/riscv/headers.txt @@ -1,5 +1,6 @@ set(TARGET_PUBLIC_HEADERS libc.include.assert + libc.include.complex libc.include.ctype libc.include.dirent libc.include.dlfcn diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt index e0c04b381492d0..8750100302ea7e 100644 --- a/libc/config/linux/x86_64/headers.txt +++ b/libc/config/linux/x86_64/headers.txt @@ -1,5 +1,6 @@ set(TARGET_PUBLIC_HEADERS libc.include.assert + libc.include.complex libc.include.ctype libc.include.dirent libc.include.dlfcn diff --git a/libc/hdrgen/yaml/complex.yaml b/libc/hdrgen/yaml/complex.yaml new file mode 100644 index 00000000000000..be0d3c9ae59b47 --- /dev/null +++ b/libc/hdrgen/yaml/complex.yaml @@ -0,0 +1,105 @@ +header: complex.h +macros: [] +types: + - type_name: cfloat16 + - type_name: cfloat128 + - type_name: float128 +enums: [] +objects: [] +functions: + - name: cimag + standards: + - stdc + return_type: double + arguments: + - type: _Complex double + - name: cimagf + standards: + - stdc + return_type: float + arguments: + - type: _Complex float + - name: cimagl + standards: + - stdc + return_type: long double + arguments: + - type: _Complex long double + - name: cimagf16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: cfloat16 + guard: LIBC_TYPES_HAS_CFLOAT16 + - name: cimagf128 + standards: + - stdc + return_type: float128 + arguments: + - type: cfloat128 + guard: LIBC_TYPES_HAS_CFLOAT128 + - name: creal + standards: + - stdc + return_type: double + arguments: + - type: _Complex double + - name: crealf + standards: + - stdc + return_type: float + arguments: + - type: _Complex float + - name: creall + standards: + - stdc + return_type: long double + arguments: + - type: _Complex long double + - name: crealf16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: cfloat16 + guard: LIBC_TYPES_HAS_CFLOAT16 + - name: crealf128 + standards: + - stdc + return_type: float128 + arguments: + - type: cfloat128 + guard: LIBC_TYPES_HAS_CFLOAT128 + - name: conj + standards: + - stdc + return_type: _Complex double + arguments: + - type: _Complex double + - name: conjf + standards: + - stdc + return_type: _Complex float + arguments: + - type: _Complex float + - name: conjl + standards: + - stdc + return_type: _Complex long double + arguments: + - type: _Complex long double + - name: conjf16 + standards: + - stdc + return_type: cfloat16 + arguments: + - type: cfloat16 + guard: LIBC_TYPES_HAS_CFLOAT16 + - name: conjf128 + standards: + - stdc + return_type: cfloat128 + arguments: + - type: cfloat128 + guard: LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/test/src/strings/CMakeLists.txt b/libc/test/src/strings/CMakeLists.txt index 963a1d6d6d60c0..10f96b8531f68f 100644 --- a/libc/test/src/strings/CMakeLists.txt +++ b/libc/test/src/strings/CMakeLists.txt @@ -20,7 +20,7 @@ add_libc_test( index_test.cpp DEPENDS libc.src.strings.index - libc.test.src.strchr_test_support + libc.test.src.string.strchr_test_support ) add_libc_test( @@ -31,7 +31,7 @@ add_libc_test( rindex_test.cpp DEPENDS libc.src.strings.rindex - libc.test.src.strchr_test_support + libc.test.src.string.strchr_test_support ) add_libc_test( diff --git a/libc/test/src/strings/index_test.cpp b/libc/test/src/strings/index_test.cpp index 88953205009d76..fc4cd2b31c55d1 100644 --- a/libc/test/src/strings/index_test.cpp +++ b/libc/test/src/strings/index_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "StrchrTest.h" +#include "test/src/string/StrchrTest.h" -#include "src/string/index.h" +#include "src/strings/index.h" #include "test/UnitTest/Test.h" STRCHR_TEST(Index, LIBC_NAMESPACE::index) diff --git a/libc/test/src/strings/rindex_test.cpp b/libc/test/src/strings/rindex_test.cpp index 10513919cffa2d..d3b756fe5f6e52 100644 --- a/libc/test/src/strings/rindex_test.cpp +++ b/libc/test/src/strings/rindex_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "StrchrTest.h" +#include "test/src/string/StrchrTest.h" -#include "src/string/rindex.h" +#include "src/strings/rindex.h" #include "test/UnitTest/Test.h" STRRCHR_TEST(Rindex, LIBC_NAMESPACE::rindex) diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index 2488bcfc155a8d..5075de537b6ac2 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -52,23 +52,25 @@ namespace std { */ -#include <__config> -#include <__memory/addressof.h> -#include <__ostream/basic_ostream.h> -#include <__string/char_traits.h> -#include <__type_traits/decay.h> -#include <__utility/forward.h> -#include <__utility/move.h> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +#if 0 +#else // 0 +# include <__config> +# include <__memory/addressof.h> +# include <__ostream/basic_ostream.h> +# include <__string/char_traits.h> +# include <__type_traits/decay.h> +# include <__utility/forward.h> +# include <__utility/move.h> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_PUSH_MACROS -#include <__undef_macros> +# include <__undef_macros> -#if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 14 _LIBCPP_BEGIN_NAMESPACE_LFTS @@ -117,14 +119,15 @@ make_ostream_joiner(basic_ostream<_CharT, _Traits>& __os, _Delim&& __d) { _LIBCPP_END_NAMESPACE_LFTS -#endif // _LIBCPP_STD_VER >= 14 +# endif // _LIBCPP_STD_VER >= 14 _LIBCPP_POP_MACROS -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# endif +#endif // 0 #endif // _LIBCPP_EXPERIMENTAL_ITERATOR diff --git a/libcxx/include/experimental/memory b/libcxx/include/experimental/memory index 48e42a0a88a649..cb02ecb8e2bee7 100644 --- a/libcxx/include/experimental/memory +++ b/libcxx/include/experimental/memory @@ -49,27 +49,29 @@ public: } */ -#include <__config> -#include <__cstddef/nullptr_t.h> -#include <__cstddef/size_t.h> -#include <__functional/hash.h> -#include <__functional/operations.h> -#include <__type_traits/add_lvalue_reference.h> -#include <__type_traits/add_pointer.h> -#include <__type_traits/common_type.h> -#include <__type_traits/enable_if.h> -#include <__type_traits/is_convertible.h> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#ifdef _LIBCPP_ENABLE_EXPERIMENTAL +#if 0 +#else // 0 +# include <__config> +# include <__cstddef/nullptr_t.h> +# include <__cstddef/size_t.h> +# include <__functional/hash.h> +# include <__functional/operations.h> +# include <__type_traits/add_lvalue_reference.h> +# include <__type_traits/add_pointer.h> +# include <__type_traits/common_type.h> +# include <__type_traits/enable_if.h> +# include <__type_traits/is_convertible.h> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +# ifdef _LIBCPP_ENABLE_EXPERIMENTAL _LIBCPP_BEGIN_NAMESPACE_LFTS_V2 -# if _LIBCPP_STD_VER >= 17 +# if _LIBCPP_STD_VER >= 17 template class observer_ptr { @@ -172,7 +174,7 @@ _LIBCPP_HIDE_FROM_ABI bool operator>=(observer_ptr<_W1> __a, observer_ptr<_W2> _ return !(__a < __b); } -# endif // _LIBCPP_STD_VER >= 17 +# endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_LFTS_V2 @@ -180,22 +182,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD // hash -# if _LIBCPP_STD_VER >= 17 +# if _LIBCPP_STD_VER >= 17 template struct hash> { _LIBCPP_HIDE_FROM_ABI size_t operator()(const experimental::observer_ptr<_Tp>& __ptr) const noexcept { return hash<_Tp*>()(__ptr.get()); } }; -# endif // _LIBCPP_STD_VER >= 17 +# endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP_ENABLE_EXPERIMENTAL +# endif // _LIBCPP_ENABLE_EXPERIMENTAL -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# endif +#endif // 0 #endif /* _LIBCPP_EXPERIMENTAL_MEMORY */ diff --git a/libcxx/include/experimental/propagate_const b/libcxx/include/experimental/propagate_const index 8466d4e9c7ef3c..4d06e9f844acfa 100644 --- a/libcxx/include/experimental/propagate_const +++ b/libcxx/include/experimental/propagate_const @@ -107,39 +107,41 @@ */ -#include <__config> -#include <__cstddef/nullptr_t.h> -#include <__cstddef/size_t.h> -#include <__functional/operations.h> -#include <__fwd/functional.h> -#include <__type_traits/conditional.h> -#include <__type_traits/decay.h> -#include <__type_traits/enable_if.h> -#include <__type_traits/is_array.h> -#include <__type_traits/is_constructible.h> -#include <__type_traits/is_convertible.h> -#include <__type_traits/is_function.h> -#include <__type_traits/is_pointer.h> -#include <__type_traits/is_reference.h> -#include <__type_traits/is_same.h> -#include <__type_traits/is_swappable.h> -#include <__type_traits/remove_cv.h> -#include <__type_traits/remove_pointer.h> -#include <__type_traits/remove_reference.h> -#include <__utility/declval.h> -#include <__utility/forward.h> -#include <__utility/move.h> -#include <__utility/swap.h> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +#if 0 +#else // 0 +# include <__config> +# include <__cstddef/nullptr_t.h> +# include <__cstddef/size_t.h> +# include <__functional/operations.h> +# include <__fwd/functional.h> +# include <__type_traits/conditional.h> +# include <__type_traits/decay.h> +# include <__type_traits/enable_if.h> +# include <__type_traits/is_array.h> +# include <__type_traits/is_constructible.h> +# include <__type_traits/is_convertible.h> +# include <__type_traits/is_function.h> +# include <__type_traits/is_pointer.h> +# include <__type_traits/is_reference.h> +# include <__type_traits/is_same.h> +# include <__type_traits/is_swappable.h> +# include <__type_traits/remove_cv.h> +# include <__type_traits/remove_pointer.h> +# include <__type_traits/remove_reference.h> +# include <__utility/declval.h> +# include <__utility/forward.h> +# include <__utility/move.h> +# include <__utility/swap.h> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_PUSH_MACROS -#include <__undef_macros> +# include <__undef_macros> -#if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 14 _LIBCPP_BEGIN_NAMESPACE_LFTS_V2 @@ -481,13 +483,14 @@ struct greater_equal> { _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP_STD_VER >= 14 +# endif // _LIBCPP_STD_VER >= 14 _LIBCPP_POP_MACROS -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# endif +#endif // 0 #endif // _LIBCPP_EXPERIMENTAL_PROPAGATE_CONST diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd index 1a868513d160b6..374ab57cdbca55 100644 --- a/libcxx/include/experimental/simd +++ b/libcxx/include/experimental/simd @@ -75,18 +75,21 @@ inline namespace parallelism_v2 { # pragma GCC system_header #endif -#include <__config> -#include -#include -#include -#include -#include -#include -#include -#include - -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -#endif +#if 0 +#else // 0 +# include <__config> +# include +# include +# include +# include +# include +# include +# include +# include + +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# endif +#endif // 0 #endif /* _LIBCPP_EXPERIMENTAL_SIMD */ diff --git a/libcxx/include/experimental/type_traits b/libcxx/include/experimental/type_traits index 6980fc3c51e465..0da95b79f8ab77 100644 --- a/libcxx/include/experimental/type_traits +++ b/libcxx/include/experimental/type_traits @@ -68,16 +68,18 @@ inline namespace fundamentals_v1 { */ -#include <__config> +#if 0 +#else // 0 +# include <__config> -#if _LIBCPP_STD_VER >= 14 +# if _LIBCPP_STD_VER >= 14 -# include -# include +# include +# include -# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -# endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_LFTS @@ -148,10 +150,11 @@ constexpr bool is_detected_convertible_v = is_detected_convertible<_To, _Op, _Ar _LIBCPP_END_NAMESPACE_LFTS -# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# endif -#endif /* _LIBCPP_STD_VER >= 14 */ +# endif /* _LIBCPP_STD_VER >= 14 */ +#endif // 0 #endif /* _LIBCPP_EXPERIMENTAL_TYPE_TRAITS */ diff --git a/libcxx/include/experimental/utility b/libcxx/include/experimental/utility index 00151b967e496d..1cb9c6a3617a8e 100644 --- a/libcxx/include/experimental/utility +++ b/libcxx/include/experimental/utility @@ -30,12 +30,14 @@ inline namespace fundamentals_v1 { */ -#include <__config> -#include +#if 0 +#else // 0 +# include <__config> +# include -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_LFTS @@ -43,8 +45,9 @@ struct _LIBCPP_TEMPLATE_VIS erased_type {}; _LIBCPP_END_NAMESPACE_LFTS -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# endif +#endif // 0 #endif /* _LIBCPP_EXPERIMENTAL_UTILITY */ diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 7b5b31c4081788..c9e35479b88c89 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -201,23 +201,25 @@ template */ -#include <__config> -#include <__hash_table> -#include -#include -#include - -#if defined(__DEPRECATED) && __DEPRECATED -# if defined(_LIBCPP_WARNING) +#if 0 +#else // 0 +# include <__config> +# include <__hash_table> +# include +# include +# include + +# if defined(__DEPRECATED) && __DEPRECATED +# if defined(_LIBCPP_WARNING) _LIBCPP_WARNING("Use of the header is deprecated. Migrate to ") -# else -# warning Use of the header is deprecated. Migrate to +# else +# warning Use of the header is deprecated. Migrate to +# endif # endif -#endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif namespace __gnu_cxx { @@ -312,17 +314,17 @@ public: _LIBCPP_HIDE_FROM_ABI explicit __hash_map_node_destructor(allocator_type& __na) : __na_(__na), __first_constructed(false), __second_constructed(false) {} -#ifndef _LIBCPP_CXX03_LANG +# ifndef _LIBCPP_CXX03_LANG _LIBCPP_HIDE_FROM_ABI __hash_map_node_destructor(std::__hash_node_destructor&& __x) : __na_(__x.__na_), __first_constructed(__x.__value_constructed), __second_constructed(__x.__value_constructed) { __x.__value_constructed = false; } -#else // _LIBCPP_CXX03_LANG +# else // _LIBCPP_CXX03_LANG _LIBCPP_HIDE_FROM_ABI __hash_map_node_destructor(const std::__hash_node_destructor& __x) : __na_(__x.__na_), __first_constructed(__x.__value_constructed), __second_constructed(__x.__value_constructed) { const_cast(__x.__value_constructed) = false; } -#endif // _LIBCPP_CXX03_LANG +# endif // _LIBCPP_CXX03_LANG _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) { if (__second_constructed) @@ -863,10 +865,11 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const hash_multimap<_Key, _Tp, _Has } // namespace __gnu_cxx -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# endif +#endif // 0 #endif // _LIBCPP_HASH_MAP diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 1ab259b59979f3..073fcf2fd9911b 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -192,23 +192,25 @@ template */ -#include <__config> -#include <__hash_table> -#include -#include -#include - -#if defined(__DEPRECATED) && __DEPRECATED -# if defined(_LIBCPP_WARNING) +#if 0 +#else // 0 +# include <__config> +# include <__hash_table> +# include +# include +# include + +# if defined(__DEPRECATED) && __DEPRECATED +# if defined(_LIBCPP_WARNING) _LIBCPP_WARNING("Use of the header is deprecated. Migrate to ") -# else -# warning Use of the header is deprecated. Migrate to +# else +# warning Use of the header is deprecated. Migrate to +# endif # endif -#endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif namespace __gnu_cxx { @@ -575,10 +577,11 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const hash_multiset<_Value, _Hash, } // namespace __gnu_cxx -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -# include -# include -#endif +# if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# endif +#endif // 0 #endif // _LIBCPP_HASH_SET diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map index b99eebfa9fc538..1f2c204e2ef215 100644 --- a/libcxx/include/flat_map +++ b/libcxx/include/flat_map @@ -38,11 +38,15 @@ namespace std { */ #if 0 -#else // 0 -# include <__assert> // all public C++ headers provide the assertion handler +#else // 0 # include <__config> -# include <__flat_map/flat_map.h> -# include <__flat_map/sorted_unique.h> + +# if _LIBCPP_STD_VER >= 23 +# include <__flat_map/flat_map.h> +# include <__flat_map/sorted_unique.h> +# endif + +// for feature-test macros # include // standard required includes diff --git a/libcxx/test/libcxx/feature_test_macro/version_header.sh.py b/libcxx/test/libcxx/feature_test_macro/version_header.sh.py index 24d3981f81aebb..1e53d5fd928305 100644 --- a/libcxx/test/libcxx/feature_test_macro/version_header.sh.py +++ b/libcxx/test/libcxx/feature_test_macro/version_header.sh.py @@ -30,8 +30,8 @@ def test(output, expected): // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP_VERSION -#define _LIBCPP_VERSION +#ifndef _LIBCPP_VERSIONH +#define _LIBCPP_VERSIONH #include <__config> @@ -69,6 +69,6 @@ def test(output, expected): // define __cpp_lib_missing_FTM_in_older_standard 2026L #endif // _LIBCPP_STD_VER >= 26 -#endif // _LIBCPP_VERSION +#endif // _LIBCPP_VERSIONH """, ) diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 72fccfd3649325..801f6cbf9060f8 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -674,35 +674,13 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version -flat_map atomic -flat_map cctype -flat_map climits flat_map cmath flat_map compare -flat_map concepts flat_map cstddef flat_map cstdint -flat_map cstdio -flat_map cstdlib -flat_map cstring -flat_map ctime -flat_map cwchar -flat_map cwctype -flat_map exception flat_map initializer_list -flat_map iosfwd -flat_map iterator flat_map limits -flat_map memory -flat_map new -flat_map optional -flat_map ratio -flat_map stdexcept -flat_map tuple flat_map type_traits -flat_map typeinfo -flat_map utility -flat_map variant flat_map version format algorithm format array diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 72fccfd3649325..801f6cbf9060f8 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -674,35 +674,13 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version -flat_map atomic -flat_map cctype -flat_map climits flat_map cmath flat_map compare -flat_map concepts flat_map cstddef flat_map cstdint -flat_map cstdio -flat_map cstdlib -flat_map cstring -flat_map ctime -flat_map cwchar -flat_map cwctype -flat_map exception flat_map initializer_list -flat_map iosfwd -flat_map iterator flat_map limits -flat_map memory -flat_map new -flat_map optional -flat_map ratio -flat_map stdexcept -flat_map tuple flat_map type_traits -flat_map typeinfo -flat_map utility -flat_map variant flat_map version format algorithm format array diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index fd36dace19c76c..9146a7079ee1e9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -692,35 +692,13 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version -flat_map atomic -flat_map cctype -flat_map climits flat_map cmath flat_map compare -flat_map concepts flat_map cstddef flat_map cstdint -flat_map cstdio -flat_map cstdlib -flat_map cstring -flat_map ctime -flat_map cwchar -flat_map cwctype -flat_map exception flat_map initializer_list -flat_map iosfwd -flat_map iterator flat_map limits -flat_map memory -flat_map new -flat_map optional -flat_map ratio -flat_map stdexcept -flat_map tuple flat_map type_traits -flat_map typeinfo -flat_map utility -flat_map variant flat_map version format algorithm format array diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index eaec25f81e5821..a3518f7f62ecb9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -701,35 +701,13 @@ filesystem utility filesystem variant filesystem vector filesystem version -flat_map atomic -flat_map cctype -flat_map climits flat_map cmath flat_map compare -flat_map concepts flat_map cstddef flat_map cstdint -flat_map cstdio -flat_map cstdlib -flat_map cstring -flat_map ctime -flat_map cwchar -flat_map cwctype -flat_map exception flat_map initializer_list -flat_map iosfwd -flat_map iterator flat_map limits -flat_map memory -flat_map new -flat_map optional -flat_map ratio -flat_map stdexcept -flat_map tuple flat_map type_traits -flat_map typeinfo -flat_map utility -flat_map variant flat_map version format algorithm format array diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 89c28e49d6c9de..6de95139279471 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -697,35 +697,13 @@ filesystem utility filesystem variant filesystem vector filesystem version -flat_map atomic -flat_map cctype -flat_map climits flat_map cmath flat_map compare -flat_map concepts flat_map cstddef flat_map cstdint -flat_map cstdio -flat_map cstdlib -flat_map cstring -flat_map ctime -flat_map cwchar -flat_map cwctype -flat_map exception flat_map initializer_list -flat_map iosfwd -flat_map iterator flat_map limits -flat_map memory -flat_map new -flat_map optional -flat_map ratio -flat_map stdexcept -flat_map tuple flat_map type_traits -flat_map typeinfo -flat_map utility -flat_map variant flat_map version format algorithm format array diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 53fd44291b216a..0b5c0f107134ba 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -2230,8 +2230,8 @@ def version_header(self) -> str: // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP_VERSION -#define _LIBCPP_VERSION +#ifndef _LIBCPP_VERSIONH +#define _LIBCPP_VERSIONH #include <__config> @@ -2241,7 +2241,7 @@ def version_header(self) -> str: {feature_test_macros} -#endif // _LIBCPP_VERSION +#endif // _LIBCPP_VERSIONH """ return template.format( feature_test_macros=generate_version_header_implementation( diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index da0e8b286cddc1..158d61ff4fcb25 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -34,6 +34,11 @@ MACRO_ENSURE_OUT_OF_SOURCE_BUILD( build directory and run 'cmake /path/to/${PROJECT_NAME} [options]' there." ) +if (MSVC) + message(FATAL_ERROR "Libc++abi can't be built for MSVC targets, and doing so is pointless anyway because such " + "targets must use the MS C++ ABI, and libc++abi provides the Itanium C++ ABI.") +endif() + #=============================================================================== # Setup CMake Options #=============================================================================== diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp index 2f631041f74c94..01a1d2603b18d0 100644 --- a/libcxxabi/src/private_typeinfo.cpp +++ b/libcxxabi/src/private_typeinfo.cpp @@ -591,10 +591,9 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info, // .. and reset the pointer. adjustedPtr = nullptr; } - __base_type->has_unambiguous_public_base( - info, - static_cast(adjustedPtr) + offset_to_base, - (__offset_flags & __public_mask) ? path_below : not_public_path); + __base_type->has_unambiguous_public_base( + info, reinterpret_cast(reinterpret_cast(adjustedPtr) + offset_to_base), + (__offset_flags & __public_mask) ? path_below : not_public_path); } void diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index ea06dc8a67b949..3c8499fd334649 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -24,6 +24,11 @@ set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH include(GNUInstallDirs) include(CheckSymbolExists) +if (MSVC) + message(FATAL_ERROR "Libunwind doesn't build for MSVC targets, and that is almost certainly not what you want to do " + "anyway since libunwind is tied to the Itanium C++ ABI, and MSVC targets must use the MS C++ ABI.") +endif() + #=============================================================================== # Setup CMake Options #=============================================================================== diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index c1b3272a1f49ea..1148be09fb10cc 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -845,13 +845,14 @@ MemoryBufferRef LinkerDriver::convertResToCOFF(ArrayRef mbs, // Create OptTable +#define OPTTABLE_STR_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + // Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE // Create table mapping all options defined in Options.td static constexpr llvm::opt::OptTable::Info infoTable[] = { @@ -860,7 +861,8 @@ static constexpr llvm::opt::OptTable::Info infoTable[] = { #undef OPTION }; -COFFOptTable::COFFOptTable() : GenericOptTable(infoTable, true) {} +COFFOptTable::COFFOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, infoTable, true) {} // Set color diagnostics according to --color-diagnostics={auto,always,never} // or --no-color-diagnostics flags. diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 4c88723f090d08..6d027c529c19e9 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -33,13 +33,14 @@ using namespace lld::elf; // Create OptTable +#define OPTTABLE_STR_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + // Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info optInfo[] = { @@ -48,7 +49,8 @@ static constexpr opt::OptTable::Info optInfo[] = { #undef OPTION }; -ELFOptTable::ELFOptTable() : GenericOptTable(optInfo) {} +ELFOptTable::ELFOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {} // Set color diagnostics according to --color-diagnostics={auto,always,never} // or --no-color-diagnostics flags. diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 2084fcfd4d651a..c44773d0b7dabe 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1709,7 +1709,6 @@ static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { } static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym, - const std::vector &keptComdats, const lto::InputFile::Symbol &objSym, BitcodeFile &f) { uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; @@ -1726,8 +1725,7 @@ static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym, sym = ctx.symtab->insert(objSym.getName()); } - int c = objSym.getComdatIndex(); - if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { + if (objSym.isUndefined()) { Undefined newSym(&f, StringRef(), binding, visibility, type); sym->resolve(ctx, newSym); sym->referenced = true; @@ -1766,10 +1764,10 @@ void BitcodeFile::parse() { // ObjFile::initializeSymbols. for (auto [i, irSym] : llvm::enumerate(obj->symbols())) if (!irSym.isUndefined()) - createBitcodeSymbol(ctx, symbols[i], keptComdats, irSym, *this); + createBitcodeSymbol(ctx, symbols[i], irSym, *this); for (auto [i, irSym] : llvm::enumerate(obj->symbols())) if (irSym.isUndefined()) - createBitcodeSymbol(ctx, symbols[i], keptComdats, irSym, *this); + createBitcodeSymbol(ctx, symbols[i], irSym, *this); for (auto l : obj->getDependentLibraries()) addDependentLibrary(ctx, l, this); diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp index 308c5eaf8c3178..69d023c23b3c79 100644 --- a/lld/MachO/DriverUtils.cpp +++ b/lld/MachO/DriverUtils.cpp @@ -34,13 +34,14 @@ using namespace llvm::sys; using namespace lld; using namespace lld::macho; +#define OPTTABLE_STR_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + // Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE // Create table mapping all options defined in Options.td static constexpr OptTable::Info optInfo[] = { @@ -65,7 +66,8 @@ static constexpr OptTable::Info optInfo[] = { #undef OPTION }; -MachOOptTable::MachOOptTable() : GenericOptTable(optInfo) {} +MachOOptTable::MachOOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {} // Set color diagnostics according to --color-diagnostics={auto,always,never} // or --no-color-diagnostics flags. diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index b4371b5b2240d2..a77d86b443a6c3 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -58,13 +58,13 @@ enum { #undef OPTION }; -// Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info infoTable[] = { @@ -92,7 +92,9 @@ static constexpr opt::OptTable::Info infoTable[] = { namespace { class MinGWOptTable : public opt::GenericOptTable { public: - MinGWOptTable() : opt::GenericOptTable(infoTable, false) {} + MinGWOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, infoTable, + false) {} opt::InputArgList parse(ArrayRef argv); }; } // namespace diff --git a/lld/test/ELF/lto/internalize-exportdyn.ll b/lld/test/ELF/lto/internalize-exportdyn.ll index 7c670a548025f1..25e319269bbdb6 100644 --- a/lld/test/ELF/lto/internalize-exportdyn.ll +++ b/lld/test/ELF/lto/internalize-exportdyn.ll @@ -56,16 +56,19 @@ define linkonce_odr void @baz() { @use_baz = global ptr @baz +;; Test comdat symbols that are prevailing in this module and non-prevailing in the other module. define void @ext_and_ext() local_unnamed_addr comdat { call void @foo(i64 1) ret void } +;; linkonce_odr in this module and external in the other module. define linkonce_odr void @lo_and_ext() local_unnamed_addr comdat { call void @foo(i64 1) ret void } +;; linkonce_odr in this module and weak_odr in the other module. define linkonce_odr void @lo_and_wo() local_unnamed_addr comdat { ret void } @@ -92,7 +95,7 @@ define weak_odr void @wo_and_lo() local_unnamed_addr comdat { ; CHECK-NEXT: call void @foo(i64 1) ; CHECK: define internal void @lo_and_ext() comdat ; CHECK-NEXT: call void @foo(i64 1) -; CHECK: define internal void @lo_and_wo() comdat +; CHECK: define weak_odr dso_local void @lo_and_wo() comdat ; CHECK: define weak_odr dso_local void @wo_and_lo() comdat ; DSO: @c = weak_odr constant i32 1 @@ -110,7 +113,7 @@ define weak_odr void @wo_and_lo() local_unnamed_addr comdat { ; DSO: define weak_odr void @baz() ; DSO: define void @ext_and_ext() comdat ; DSO: define internal void @lo_and_ext() comdat -; DSO: define internal void @lo_and_wo() comdat +; DSO: define weak_odr void @lo_and_wo() comdat ; DSO: define weak_odr void @wo_and_lo() comdat ;--- lib.s diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 37a0156c728f6f..00b5c82d9c7777 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -139,13 +139,13 @@ bool link(ArrayRef args, llvm::raw_ostream &stdoutOS, return errorCount() == 0; } -// Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info optInfo[] = { @@ -173,7 +173,8 @@ static constexpr opt::OptTable::Info optInfo[] = { namespace { class WasmOptTable : public opt::GenericOptTable { public: - WasmOptTable() : opt::GenericOptTable(optInfo) {} + WasmOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {} opt::InputArgList parse(ArrayRef argv); }; } // namespace diff --git a/lldb/source/DataFormatters/FormatterBytecode.cpp b/lldb/source/DataFormatters/FormatterBytecode.cpp index f344fbaff6f02a..e49c7506781875 100644 --- a/lldb/source/DataFormatters/FormatterBytecode.cpp +++ b/lldb/source/DataFormatters/FormatterBytecode.cpp @@ -379,7 +379,7 @@ llvm::Error Interpret(std::vector &control, BINOP_CHECKZERO(%); continue; case op_shl: -#define SHIFTOP(OP) \ +#define SHIFTOP(OP, LEFT) \ { \ TYPE_CHECK(Any, UInt); \ uint64_t y = data.Pop(); \ @@ -390,16 +390,18 @@ llvm::Error Interpret(std::vector &control, data.Push(x OP y); \ } else if (std::holds_alternative(data.back())) { \ int64_t x = data.Pop(); \ + if (x < 0 && LEFT) \ + return error("left shift of negative value"); \ if (y > 64) \ return error("shift out of bounds"); \ data.Push(x OP y); \ } else \ return error("unsupported data types"); \ } - SHIFTOP(<<); + SHIFTOP(<<, true); continue; case op_shr: - SHIFTOP(<<); + SHIFTOP(>>, false); continue; case op_and: BINOP(&); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 8baf3a8d60c373..2a36f95c94d0ce 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -54,6 +54,10 @@ using namespace lldb; using namespace lldb_private; +#define OPTTABLE_STR_TABLE_CODE +#include "clang/Driver/Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + static Status ExceptionMaskValidator(const char *string, void *unused) { Status error; llvm::StringRef str_ref(string); @@ -1078,8 +1082,8 @@ void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( // clang has no version-min clang flag for XROS. if (!version.empty() && sdk_type != XcodeSDK::Type::Linux && sdk_type != XcodeSDK::Type::XROS) { -#define OPTION(PREFIX, NAME, VAR, ...) \ - llvm::StringRef opt_##VAR = NAME; \ +#define OPTION(PREFIX_OFFSET, NAME_OFFSET, VAR, ...) \ + llvm::StringRef opt_##VAR = &OptionStrTable[NAME_OFFSET]; \ (void)opt_##VAR; #include "clang/Driver/Options.inc" #undef OPTION diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index afb1a1ff95c3a1..98c3643f75c97b 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -61,12 +61,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -76,7 +77,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class LLDBOptTable : public opt::GenericOptTable { public: - LLDBOptTable() : opt::GenericOptTable(InfoTable) {} + LLDBOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // namespace diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 3bfc578806021e..7e8f7b5f6df679 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -92,12 +92,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr llvm::opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -106,7 +107,9 @@ static constexpr llvm::opt::OptTable::Info InfoTable[] = { }; class LLDBDAPOptTable : public llvm::opt::GenericOptTable { public: - LLDBDAPOptTable() : llvm::opt::GenericOptTable(InfoTable, true) {} + LLDBDAPOptTable() + : llvm::opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, + InfoTable, true) {} }; typedef void (*RequestCallback)(const llvm::json::Object &command); diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp index ed10c161b6b2f6..fec868b1fa9a18 100644 --- a/lldb/tools/lldb-server/lldb-gdbserver.cpp +++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp @@ -291,12 +291,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "LLGSOptions.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "LLGSOptions.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -306,7 +307,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class LLGSOptTable : public opt::GenericOptTable { public: - LLGSOptTable() : opt::GenericOptTable(InfoTable) {} + LLGSOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} void PrintHelp(llvm::StringRef Name) { std::string Usage = diff --git a/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp index 15d9229de00332..7307db650c1629 100644 --- a/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp +++ b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp @@ -147,9 +147,12 @@ TEST_F(FormatterBytecodeTest, ArithOps) { { DataStack data; unsigned char minus_one = 127; - ASSERT_TRUE( + ASSERT_FALSE( Interpret({op_lit_int, minus_one, op_lit_uint, 2, op_shl}, data)); - ASSERT_EQ(data.Pop(), -4); + unsigned char minus_two = 126; + ASSERT_TRUE( + Interpret({op_lit_int, minus_two, op_lit_uint, 1, op_shr}, data)); + ASSERT_EQ(data.Pop(), -1); } { DataStack data; diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 8da9b150c13d9f..655bd996806299 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -126,11 +126,6 @@ llvm-dev@redking.me.uk (email), [RKSimon](https://github.com/RKSimon) (GitHub) \ Craig Topper \ craig.topper@sifive.com (email), [topperc](https://github.com/topperc) (GitHub) -#### FastISel - -Chad Rosier \ -mcrosier@codeaurora.org (email) - #### Instruction scheduling Matthias Braun \ @@ -387,7 +382,7 @@ tstellar@redhat.com (email), [tstellar](https://github.com/tstellar) (GitHub) #### MinGW support Martin Storsjö \ -martin@martin.st (email), [mstrorsjo](https://github.com/mstrorsjo) (GitHub) +martin@martin.st (email), [mstorsjo](https://github.com/mstorsjo) (GitHub) #### Windows support in object tools @@ -461,6 +456,7 @@ Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- Se Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \ Renato Golin (rengolin@systemcall.eu, [rengolin](https://github.com/rengolin)) -- ARM backend \ Anton Korobeynikov (anton@korobeynikov.info, [asl](https://github.com/asl)) -- ARM EABI \ +Chad Rosier (mcrosier@codeaurora.org) -- FastISel \ Hans Wennborg (hans@chromium.org, [zmodem](https://github.com/zmodem)) -- Release management \ Kostya Serebryany ([kcc](https://github.com/kcc)) -- Sanitizers \ Evgeniy Stepanov ([eugenis](https://github.com/eugenis)) -- Sanitizers diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 67b6ebb4b04d94..aaa599b787a76d 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -36,6 +36,7 @@ meet the criteria for inclusion below. The list is in the format `* ${full_name} (${affiliation}) [${github_username}]`. If a github username for an individual isn't available, the brackets will be empty. +* Abhay Kanhere (Apple) [@AbhayKanhere] * Ahmed Bougacha (Apple) [@ahmedbougacha] * Artur Pilipenko (Azul Systems Inc) [] * Boovaragavan Dasarathan (Nvidia) [@mrragava] diff --git a/llvm/docs/UndefinedBehavior.rst b/llvm/docs/UndefinedBehavior.rst index f68bbbd505330a..82ae20c74f330e 100644 --- a/llvm/docs/UndefinedBehavior.rst +++ b/llvm/docs/UndefinedBehavior.rst @@ -237,6 +237,53 @@ Poison values can be replaced with any value of type (undef, concrete values, or a ``freeze`` instruction). +Propagation of Poison Through Select +------------------------------------ +Most instructions return poison if any of their inputs is poison. +A notable exception is the ``select`` instruction, which is poison if and +only if the condition is poison or the selected value is poison. +This means that ``select`` acts as a barrier for poison propagation, which +impacts which optimizations can be performed. + +For example, consider the following function: + +.. code-block:: llvm + + define i1 @fn(i32 %x, i32 %y) { + %cmp1 = icmp ne i32 %x, 0 + %cmp2 = icmp ugt i32 %x, %y + %and = select i1 %cmp1, i1 %cmp2, i1 false + ret i1 %and + } + +It is not correct to optimize the ``select`` into an ``and`` because when +``%cmp1`` is false, the ``select`` is only poison if ``%x`` is poison, while +the ``and`` below is poison if either ``%x`` or ``%y`` are poison. + +.. code-block:: llvm + + define i1 @fn(i32 %x, i32 %y) { + %cmp1 = icmp ne i32 %x, 0 + %cmp2 = icmp ugt i32 %x, %y + %and = and i1 %cmp1, %cmp2 ;; poison if %x or %y are poison + ret i1 %and + } + +However, the optimization is possible if all operands of the values are used in +the condition (notice the flipped operands in the ``select``): + +.. code-block:: llvm + + define i1 @fn(i32 %x, i32 %y) { + %cmp1 = icmp ne i32 %x, 0 + %cmp2 = icmp ugt i32 %x, %y + %and = select i1 %cmp2, i1 %cmp1, i1 false + ; ok to replace with: + %and = and i1 %cmp1, %cmp2 + ret i1 %and + } + + The Freeze Instruction ====================== Both undef and poison values sometimes propagate too much down an expression diff --git a/llvm/docs/Vectorizers.rst b/llvm/docs/Vectorizers.rst index a4462e53edda09..f134a6df94a69a 100644 --- a/llvm/docs/Vectorizers.rst +++ b/llvm/docs/Vectorizers.rst @@ -399,6 +399,19 @@ small trip counts. .. image:: epilogue-vectorization-cfg.png +Early Exit Vectorization +^^^^^^^^^^^^^^^^^^^^^^^^ + +When vectorizing a loop with a single early exit, the loop blocks following the +early exit are predicated and the vector loop will always exit via the latch. +If the early exit has been taken, the vector loop's successor block +(``middle.split`` below) branches to the early exit block. Otherwise +``middle.block`` selects between the exit block from the latch or the scalar +remainder loop. + +.. image:: vplan-early-exit.png + + Performance ----------- diff --git a/llvm/docs/vplan-early-exit.dot b/llvm/docs/vplan-early-exit.dot new file mode 100644 index 00000000000000..63490b0cdb2e43 --- /dev/null +++ b/llvm/docs/vplan-early-exit.dot @@ -0,0 +1,41 @@ +digraph VPlan { +graph [labelloc=t, fontsize=30; label=""] +node [shape=rect, fontname=Courier, fontsize=30] +edge [fontname=Courier, fontsize=30] +compound=true + N1 [label = + "vector.ph" + ] + N1 -> N2 [ label="" lhead=cluster_N3] + subgraph cluster_N3 { + fontname=Courier + label="\ vector loop" + N2 [label = + "vector.body" + ] + } + N2 -> N4 [ label="" ltail=cluster_N3] + N4 [label = + "middle.split" + ] + N4 -> N5 [ label=""] + N4 -> N6 [ label=""] + N5 [label = + "early.exit" + ] + N6 [label = + "middle.block" + ] + N6 -> N9 [ label=""] + N6 -> N7 [ label=""] + N7 [label = + "scalar.ph" + ] + N7 -> N8 [ label=""] + N8 [label = + "loop.header" + ] + N9 [label = + "latch.exit" + ] +} diff --git a/llvm/docs/vplan-early-exit.png b/llvm/docs/vplan-early-exit.png new file mode 100644 index 00000000000000..3cd293bcdbcc82 Binary files /dev/null and b/llvm/docs/vplan-early-exit.png differ diff --git a/llvm/include/llvm/CodeGen/SDNodeProperties.td b/llvm/include/llvm/CodeGen/SDNodeProperties.td index 3cb304f47f4b9d..d32904283a11a9 100644 --- a/llvm/include/llvm/CodeGen/SDNodeProperties.td +++ b/llvm/include/llvm/CodeGen/SDNodeProperties.td @@ -29,5 +29,3 @@ def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'. def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'. def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand def SDNPVariadic : SDNodeProperty; // Node has variable arguments. -def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match -def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 292fa3c94969be..374f9f2e7f5696 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -41,12 +41,10 @@ class RegScavenger; class VirtRegMap; class LiveIntervals; class LiveInterval; - class TargetRegisterClass { public: using iterator = const MCPhysReg *; using const_iterator = const MCPhysReg *; - using sc_iterator = const TargetRegisterClass* const *; // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; @@ -67,7 +65,8 @@ class TargetRegisterClass { /// Whether a combination of subregisters can cover every register in the /// class. See also the CoveredBySubRegs description in Target.td. const bool CoveredBySubRegs; - const sc_iterator SuperClasses; + const unsigned *SuperClasses; + const uint16_t SuperClassesSize; ArrayRef (*OrderFunc)(const MachineFunction&); /// Return the register class ID number. @@ -175,18 +174,16 @@ class TargetRegisterClass { return SuperRegIndices; } - /// Returns a NULL-terminated list of super-classes. The + /// Returns a list of super-classes. The /// classes are ordered by ID which is also a topological ordering from large /// to small classes. The list does NOT include the current class. - sc_iterator getSuperClasses() const { - return SuperClasses; + ArrayRef superclasses() const { + return ArrayRef(SuperClasses, SuperClassesSize); } /// Return true if this TargetRegisterClass is a subset /// class of at least one other TargetRegisterClass. - bool isASubClass() const { - return SuperClasses[0] != nullptr; - } + bool isASubClass() const { return SuperClasses != nullptr; } /// Returns the preferred order for allocating registers from this register /// class in MF. The raw order comes directly from the .td file and may diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 2eb80f49bedf12..be5e63633097e1 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -16,7 +16,7 @@ /* Indicate that this is LLVM compiled from the amd-gfx branch. */ #define LLVM_HAVE_BRANCH_AMD_GFX -#define LLVM_MAIN_REVISION 521082 +#define LLVM_MAIN_REVISION 521148 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index bd7fb2361aaeb1..772f60343c6348 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -177,6 +177,7 @@ def OMPC_Exclusive : Clause<"exclusive"> { } def OMPC_Fail : Clause<"fail"> { let clangClass = "OMPFailClause"; + let flangClass = "OmpFailClause"; } def OMPC_Filter : Clause<"filter"> { let clangClass = "OMPFilterClause"; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b2f0aa2f7e4d90..0a1bd4c923b9b8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3872,6 +3872,20 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt; def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt; + // SVE Narrowing Conversions + class SVE2_FP8_Narrow_Cvt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; + def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; + + def int_aarch64_sve_fp8_cvtnt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + class SME2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 8fabc78d81aedf..decb6cb5455ebc 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -53,10 +53,8 @@ class OptTable { public: /// Entry for a single option instance in the option data table. struct Info { - /// A null terminated array of prefix strings to apply to name while - /// matching. - ArrayRef Prefixes; - StringLiteral PrefixedName; + unsigned PrefixesOffset; + unsigned PrefixedNameOffset; const char *HelpText; // Help text for specific visibilities. A list of pairs, where each pair // is a list of visibilities and a specific help string for those @@ -80,15 +78,56 @@ class OptTable { const char *AliasArgs; const char *Values; - StringRef getName() const { - unsigned PrefixLength = Prefixes.empty() ? 0 : Prefixes[0].size(); - return PrefixedName.drop_front(PrefixLength); + bool hasNoPrefix() const { return PrefixesOffset == 0; } + + unsigned getNumPrefixes(ArrayRef PrefixesTable) const { + return PrefixesTable[PrefixesOffset]; + } + + ArrayRef + getPrefixOffsets(ArrayRef PrefixesTable) const { + return hasNoPrefix() ? ArrayRef() + : PrefixesTable.slice(PrefixesOffset + 1, + getNumPrefixes(PrefixesTable)); + } + + void appendPrefixes(const char *StrTable, ArrayRef PrefixesTable, + SmallVectorImpl &Prefixes) const { + for (unsigned PrefixOffset : getPrefixOffsets(PrefixesTable)) + Prefixes.push_back(&StrTable[PrefixOffset]); + } + + StringRef getPrefix(const char *StrTable, ArrayRef PrefixesTable, + unsigned PrefixIndex) const { + return &StrTable[getPrefixOffsets(PrefixesTable)[PrefixIndex]]; + } + + StringRef getPrefixedName(const char *StrTable) const { + return &StrTable[PrefixedNameOffset]; + } + + StringRef getName(const char *StrTable, + ArrayRef PrefixesTable) const { + unsigned PrefixLength = + hasNoPrefix() ? 0 : getPrefix(StrTable, PrefixesTable, 0).size(); + return getPrefixedName(StrTable).drop_front(PrefixLength); } }; private: + // A unified string table for these options. Individual strings are stored as + // null terminated C-strings at offsets within this table. + const char *StrTable; + + // A table of different sets of prefixes. Each set starts with the number of + // prefixes in that set followed by that many offsets into the string table + // for each of the prefix strings. This is essentially a Pascal-string style + // encoding. + ArrayRef PrefixesTable; + /// The option information table. ArrayRef OptionInfos; + bool IgnoreCase; bool GroupedShortOptions = false; bool DashDashParsing = false; @@ -102,12 +141,12 @@ class OptTable { /// special option like 'input' or 'unknown', and is not an option group). unsigned FirstSearchableIndex = 0; - /// The union of the first element of all option prefixes. - SmallString<8> PrefixChars; - /// The union of all option prefixes. If an argument does not begin with /// one of these, it is an input. - virtual ArrayRef getPrefixesUnion() const = 0; + SmallVector PrefixesUnion; + + /// The union of the first element of all option prefixes. + SmallString<8> PrefixChars; private: const Info &getInfo(OptSpecifier Opt) const { @@ -122,7 +161,8 @@ class OptTable { protected: /// Initialize OptTable using Tablegen'ed OptionInfos. Child class must /// manually call \c buildPrefixChars once they are fully constructed. - OptTable(ArrayRef OptionInfos, bool IgnoreCase = false); + OptTable(const char *StrTable, ArrayRef PrefixesTable, + ArrayRef OptionInfos, bool IgnoreCase = false); /// Build (or rebuild) the PrefixChars member. void buildPrefixChars(); @@ -130,6 +170,12 @@ class OptTable { public: virtual ~OptTable(); + /// Return the string table used for option names. + const char *getStrTable() const { return StrTable; } + + /// Return the prefixes table used for option names. + ArrayRef getPrefixesTable() const { return PrefixesTable; } + /// Return the total number of option classes. unsigned getNumOptions() const { return OptionInfos.size(); } @@ -141,7 +187,25 @@ class OptTable { /// Lookup the name of the given option. StringRef getOptionName(OptSpecifier id) const { - return getInfo(id).getName(); + return getInfo(id).getName(StrTable, PrefixesTable); + } + + /// Lookup the prefix of the given option. + StringRef getOptionPrefix(OptSpecifier id) const { + const Info &I = getInfo(id); + return I.hasNoPrefix() ? StringRef() + : I.getPrefix(StrTable, PrefixesTable, 0); + } + + void appendOptionPrefixes(OptSpecifier id, + SmallVectorImpl &Prefixes) const { + const Info &I = getInfo(id); + I.appendPrefixes(StrTable, PrefixesTable, Prefixes); + } + + /// Lookup the prefixed name of the given option. + StringRef getOptionPrefixedName(OptSpecifier id) const { + return getInfo(id).getPrefixedName(StrTable); } /// Get the kind of the given option. @@ -353,28 +417,22 @@ class OptTable { /// Specialization of OptTable class GenericOptTable : public OptTable { - SmallVector PrefixesUnionBuffer; - protected: - GenericOptTable(ArrayRef OptionInfos, bool IgnoreCase = false); - ArrayRef getPrefixesUnion() const final { - return PrefixesUnionBuffer; - } + GenericOptTable(const char *StrTable, ArrayRef PrefixesTable, + ArrayRef OptionInfos, bool IgnoreCase = false); }; class PrecomputedOptTable : public OptTable { - ArrayRef PrefixesUnion; - protected: - PrecomputedOptTable(ArrayRef OptionInfos, - ArrayRef PrefixesTable, + PrecomputedOptTable(const char *StrTable, ArrayRef PrefixesTable, + ArrayRef OptionInfos, + ArrayRef PrefixesUnionOffsets, bool IgnoreCase = false) - : OptTable(OptionInfos, IgnoreCase), PrefixesUnion(PrefixesTable) { + : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) { + for (unsigned PrefixOffset : PrefixesUnionOffsets) + PrefixesUnion.push_back(&StrTable[PrefixOffset]); buildPrefixChars(); } - ArrayRef getPrefixesUnion() const final { - return PrefixesUnion; - } }; } // end namespace opt @@ -382,31 +440,35 @@ class PrecomputedOptTable : public OptTable { } // end namespace llvm #define LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ - ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ ID_PREFIX##ID -#define LLVM_MAKE_OPT_ID(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ - ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ - LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ - OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUE) +#define LLVM_MAKE_OPT_ID(PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, \ + GROUP, ALIAS, ALIASARGS, FLAGS, VISIBILITY, PARAM, \ + HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, \ + ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, \ + HELPTEXTSFORVARIANTS, METAVAR, VALUE) #define LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ - ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ llvm::opt::OptTable::Info { \ - PREFIX, PREFIXED_NAME, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ - ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ + PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES \ } -#define LLVM_CONSTRUCT_OPT_INFO(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ - ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ +#define LLVM_CONSTRUCT_OPT_INFO( \ + PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ - OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) + OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) #endif // LLVM_OPTION_OPTTABLE_H diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 5d16fbdb6b77c8..a0563da15c8edf 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -100,7 +100,8 @@ class Option { /// Get the name of this option without any prefix. StringRef getName() const { assert(Info && "Must have a valid info!"); - return Info->getName(); + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionName(Info->ID); } const Option getGroup() const { @@ -127,15 +128,16 @@ class Option { /// Get the default prefix for this option. StringRef getPrefix() const { - return Info->Prefixes.empty() - ? StringRef() - : static_cast(Info->Prefixes[0]); + assert(Info && "Must have a valid info!"); + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionPrefix(Info->ID); } /// Get the name of this option with the default prefix. - StringLiteral getPrefixedName() const { + StringRef getPrefixedName() const { assert(Info && "Must have a valid info!"); - return Info->PrefixedName; + assert(Owner && "Must have a valid owner!"); + return Owner->getOptionPrefixedName(Info->ID); } /// Get the help text for this option. diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 25e13f575296dd..f1010b312ee569 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -21,6 +21,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfCorrelator.h" #include "llvm/ProfileData/MemProf.h" +#include "llvm/ProfileData/MemProfYAML.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ace3c044d8c277..ef96b74c9d400c 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -11,9 +11,7 @@ #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" -#include "llvm/Support/Format.h" #include "llvm/Support/HashBuilder.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include @@ -492,23 +490,6 @@ struct MemProfRecord { } }; -// A "typedef" for GUID. See ScalarTraits for how a GUID is -// serialized and deserialized in YAML. -LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64) - -// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields -// within MemProfRecord at the same level as if the GUID were part of -// MemProfRecord. -struct GUIDMemProfRecordPair { - GUIDHex64 GUID; - MemProfRecord Record; -}; - -// The top-level data structure, only used with YAML for now. -struct AllMemProfData { - std::vector HeapProfileRecords; -}; - // Reads a memprof schema from a buffer. All entries in the buffer are // interpreted as uint64_t. The first entry in the buffer denotes the number of // ids in the schema. Subsequent entries are integers which map to memprof::Meta @@ -1169,131 +1150,6 @@ template class CallStackRadixTreeBuilder { } }; } // namespace memprof - -namespace yaml { -template <> struct ScalarTraits { - static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) { - // Print GUID as a 16-digit hexadecimal number. - Out << format("0x%016" PRIx64, (uint64_t)Val); - } - static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) { - // Reject decimal GUIDs. - if (all_of(Scalar, [](char C) { return std::isdigit(C); })) - return "use a hexadecimal GUID or a function instead"; - - uint64_t Num; - if (Scalar.starts_with_insensitive("0x")) { - // Accept hexadecimal numbers starting with 0x or 0X. - if (Scalar.getAsInteger(0, Num)) - return "invalid hex64 number"; - Val = Num; - } else { - // Otherwise, treat the input as a string containing a function name. - Val = memprof::IndexedMemProfRecord::getGUID(Scalar); - } - return StringRef(); - } - static QuotingType mustQuote(StringRef) { return QuotingType::None; } -}; - -template <> struct MappingTraits { - static void mapping(IO &Io, memprof::Frame &F) { - Io.mapRequired("Function", F.Function); - Io.mapRequired("LineOffset", F.LineOffset); - Io.mapRequired("Column", F.Column); - Io.mapRequired("IsInlineFrame", F.IsInlineFrame); - - // Assert that the definition of Frame matches what we expect. The - // structured bindings below detect changes to the number of fields. - // static_assert checks the type of each field. - const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F; - static_assert( - std::is_same_v, GlobalValue::GUID>); - static_assert(std::is_same_v, - std::unique_ptr>); - static_assert( - std::is_same_v, uint32_t>); - static_assert(std::is_same_v, uint32_t>); - static_assert( - std::is_same_v, bool>); - - // MSVC issues unused variable warnings despite the uses in static_assert - // above. - (void)Function; - (void)SymbolName; - (void)LineOffset; - (void)Column; - (void)IsInlineFrame; - } - - // Request the inline notation for brevity: - // { Function: 123, LineOffset: 11, Column: 10; IsInlineFrame: true } - static const bool flow = true; -}; - -template <> struct CustomMappingTraits { - static void inputOne(IO &Io, StringRef KeyStr, - memprof::PortableMemInfoBlock &MIB) { - // PortableMemInfoBlock keeps track of the set of fields that actually have - // values. We update the set here as we receive a key-value pair from the - // YAML document. - // - // We set MIB.Name via a temporary variable because ScalarTraits - // isn't available on macOS. -#define MIBEntryDef(NameTag, Name, Type) \ - if (KeyStr == #Name) { \ - uint64_t Value; \ - Io.mapRequired(KeyStr.str().c_str(), Value); \ - MIB.Name = static_cast(Value); \ - MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \ - return; \ - } -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - Io.setError("Key is not a valid validation event"); - } - - static void output(IO &Io, memprof::PortableMemInfoBlock &MIB) { - auto Schema = MIB.getSchema(); -#define MIBEntryDef(NameTag, Name, Type) \ - if (Schema.test(llvm::to_underlying(memprof::Meta::Name))) { \ - uint64_t Value = MIB.Name; \ - Io.mapRequired(#Name, Value); \ - } -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - } -}; - -template <> struct MappingTraits { - static void mapping(IO &Io, memprof::AllocationInfo &AI) { - Io.mapRequired("Callstack", AI.CallStack); - Io.mapRequired("MemInfoBlock", AI.Info); - } -}; - -// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can -// treat the GUID and the fields within MemProfRecord at the same level as if -// the GUID were part of MemProfRecord. -template <> struct MappingTraits { - static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) { - Io.mapRequired("GUID", Pair.GUID); - Io.mapRequired("AllocSites", Pair.Record.AllocSites); - Io.mapRequired("CallSites", Pair.Record.CallSites); - } -}; - -template <> struct MappingTraits { - static void mapping(IO &Io, memprof::AllMemProfData &Data) { - Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); - } -}; -} // namespace yaml } // namespace llvm -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame) -LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector) -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) -LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) - #endif // LLVM_PROFILEDATA_MEMPROF_H_ diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h new file mode 100644 index 00000000000000..4568385fc6f71f --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfYAML.h @@ -0,0 +1,154 @@ +#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_ +#define LLVM_PROFILEDATA_MEMPROFYAML_H_ + +#include "llvm/ProfileData/MemProf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace memprof { +// A "typedef" for GUID. See ScalarTraits for how a GUID is +// serialized and deserialized in YAML. +LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64) + +// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields +// within MemProfRecord at the same level as if the GUID were part of +// MemProfRecord. +struct GUIDMemProfRecordPair { + GUIDHex64 GUID; + MemProfRecord Record; +}; + +// The top-level data structure, only used with YAML for now. +struct AllMemProfData { + std::vector HeapProfileRecords; +}; +} // namespace memprof + +namespace yaml { +template <> struct ScalarTraits { + static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) { + // Print GUID as a 16-digit hexadecimal number. + Out << format("0x%016" PRIx64, (uint64_t)Val); + } + static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) { + // Reject decimal GUIDs. + if (all_of(Scalar, [](char C) { return std::isdigit(C); })) + return "use a hexadecimal GUID or a function instead"; + + uint64_t Num; + if (Scalar.starts_with_insensitive("0x")) { + // Accept hexadecimal numbers starting with 0x or 0X. + if (Scalar.getAsInteger(0, Num)) + return "invalid hex64 number"; + Val = Num; + } else { + // Otherwise, treat the input as a string containing a function name. + Val = memprof::IndexedMemProfRecord::getGUID(Scalar); + } + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +template <> struct MappingTraits { + static void mapping(IO &Io, memprof::Frame &F) { + Io.mapRequired("Function", F.Function); + Io.mapRequired("LineOffset", F.LineOffset); + Io.mapRequired("Column", F.Column); + Io.mapRequired("IsInlineFrame", F.IsInlineFrame); + + // Assert that the definition of Frame matches what we expect. The + // structured bindings below detect changes to the number of fields. + // static_assert checks the type of each field. + const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F; + static_assert( + std::is_same_v, GlobalValue::GUID>); + static_assert(std::is_same_v, + std::unique_ptr>); + static_assert( + std::is_same_v, uint32_t>); + static_assert(std::is_same_v, uint32_t>); + static_assert( + std::is_same_v, bool>); + + // MSVC issues unused variable warnings despite the uses in static_assert + // above. + (void)Function; + (void)SymbolName; + (void)LineOffset; + (void)Column; + (void)IsInlineFrame; + } + + // Request the inline notation for brevity: + // { Function: 123, LineOffset: 11, Column: 10; IsInlineFrame: true } + static const bool flow = true; +}; + +template <> struct CustomMappingTraits { + static void inputOne(IO &Io, StringRef KeyStr, + memprof::PortableMemInfoBlock &MIB) { + // PortableMemInfoBlock keeps track of the set of fields that actually have + // values. We update the set here as we receive a key-value pair from the + // YAML document. + // + // We set MIB.Name via a temporary variable because ScalarTraits + // isn't available on macOS. +#define MIBEntryDef(NameTag, Name, Type) \ + if (KeyStr == #Name) { \ + uint64_t Value; \ + Io.mapRequired(KeyStr.str().c_str(), Value); \ + MIB.Name = static_cast(Value); \ + MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \ + return; \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + Io.setError("Key is not a valid validation event"); + } + + static void output(IO &Io, memprof::PortableMemInfoBlock &MIB) { + auto Schema = MIB.getSchema(); +#define MIBEntryDef(NameTag, Name, Type) \ + if (Schema.test(llvm::to_underlying(memprof::Meta::Name))) { \ + uint64_t Value = MIB.Name; \ + Io.mapRequired(#Name, Value); \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + } +}; + +template <> struct MappingTraits { + static void mapping(IO &Io, memprof::AllocationInfo &AI) { + Io.mapRequired("Callstack", AI.CallStack); + Io.mapRequired("MemInfoBlock", AI.Info); + } +}; + +// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can +// treat the GUID and the fields within MemProfRecord at the same level as if +// the GUID were part of MemProfRecord. +template <> struct MappingTraits { + static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) { + Io.mapRequired("GUID", Pair.GUID); + Io.mapRequired("AllocSites", Pair.Record.AllocSites); + Io.mapRequired("CallSites", Pair.Record.CallSites); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &Io, memprof::AllMemProfData &Data) { + Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords); + } +}; +} // namespace yaml +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame) +LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair) + +#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_ diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 7bb6c3156c43e0..2c58eedce1de0b 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -2086,4 +2086,12 @@ class ComplexPattern RootNodes = roots; list Properties = props; int Complexity = complexity; + + // Set this to true if SelectFunc wants an additional argument + // that is the root of the matched pattern. + bit WantsRoot = false; + + // Set this to true if SelectFunc wants an additional argument + // that is the parent of the matched node. + bit WantsParent = false; } diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 1311329821828f..2dd8469becbb9e 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -272,9 +272,10 @@ bool isX18ReservedByDefault(const Triple &TT); unsigned getFMVPriority(ArrayRef Features); // For given feature names, return a bitmask corresponding to the entries of -// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks -// themselves, they are sequential (0, 1, 2, 3, ...). -uint64_t getCpuSupportsMask(ArrayRef FeatureStrs); +// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks themselves, +// they are sequential (0, 1, 2, 3, ...). The resulting bitmask is used at +// runtime to test whether a certain FMV feature is available on the host. +uint64_t getCpuSupportsMask(ArrayRef Features); void PrintSupportedExtensions(); diff --git a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h index 7ea50a5584dde0..e1f8065f8011df 100644 --- a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h +++ b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h @@ -55,29 +55,49 @@ template struct ShouldRunExtraPasses { /// request additional transformations on demand. An example is extra /// simplifications after loop-vectorization, if runtime checks have been added. template -struct ExtraFunctionPassManager : public FunctionPassManager { +class ExtraFunctionPassManager + : public PassInfoMixin> { + FunctionPassManager InnerFPM; + +public: + template void addPass(PassT &&Pass) { + InnerFPM.addPass(std::move(Pass)); + } + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { auto PA = PreservedAnalyses::all(); if (AM.getCachedResult(F)) - PA.intersect(FunctionPassManager::run(F, AM)); + PA.intersect(InnerFPM.run(F, AM)); PA.abandon(); return PA; } + + static bool isRequired() { return true; } }; /// A pass manager to run a set of extra loop passes if the MarkerTy analysis is /// present. This allows passes to request additional transformations on demand. /// An example is doing additional runs of SimpleLoopUnswitch. template -struct ExtraLoopPassManager : public LoopPassManager { +class ExtraLoopPassManager + : public PassInfoMixin> { + LoopPassManager InnerLPM; + +public: + template void addPass(PassT &&Pass) { + InnerLPM.addPass(std::move(Pass)); + } + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { auto PA = PreservedAnalyses::all(); if (AM.getCachedResult(L)) - PA.intersect(LoopPassManager::run(L, AM, AR, U)); + PA.intersect(InnerLPM.run(L, AM, AR, U)); PA.abandon(); return PA; } + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index dc7e484a40a452..fbe80eddbae07a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -395,6 +395,11 @@ class LoopVectorizationLegality { /// Returns the uncountable early exiting block. BasicBlock *getUncountableEarlyExitingBlock() const { + if (!HasUncountableEarlyExit) { + assert(getUncountableExitingBlocks().empty() && + "Expected no uncountable exiting blocks"); + return nullptr; + } assert(getUncountableExitingBlocks().size() == 1 && "Expected only a single uncountable exiting block"); return getUncountableExitingBlocks()[0]; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d1332de4582087..ebae27e37b4fcb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -313,10 +313,9 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } if (Error) { const Function &Fn = MI->getMF()->getFunction(); - DiagnosticInfoInlineAsm DI(LocCookie, - "invalid operand in inline asm: '" + - Twine(AsmStr) + "'"); - Fn.getContext().diagnose(DI); + Fn.getContext().diagnose(DiagnosticInfoInlineAsm( + LocCookie, + "invalid operand in inline asm: '" + Twine(AsmStr) + "'")); } } break; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 605937503407a8..907c4577d93d39 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,11 @@ static cl::opt EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); +static cl::opt ReduceLoadOpStoreWidthForceNarrowingProfitable( + "combiner-reduce-load-op-store-width-force-narrowing-profitable", + cl::Hidden, cl::init(false), + cl::desc("DAG combiner force override the narrowing profitable check when" + "reducing the width of load/op/store sequences")); static cl::opt EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), @@ -20351,19 +20356,38 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. - while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW || - !TLI.isOperationLegalOrCustom(Opc, NewVT) || - !TLI.isNarrowingProfitable(N, VT, NewVT))) { + while (NewBW < BitWidth && + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + (!ReduceLoadOpStoreWidthForceNarrowingProfitable && + !TLI.isNarrowingProfitable(N, VT, NewVT)))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); - // If the lsb changed does not start at the type bitwidth boundary, - // start at the previous one. - if (ShAmt % NewBW) - ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + // TODO: For big-endian we probably want to align given the most significant + // bit being modified instead of adjusting ShAmt based on least significant + // bits. This to reduce the risk of failing on the alignment check below. If + // for example VT.getStoreSize()==5 and Imm is 0x0000ffff00, then we want to + // find NewBW=16, and we want to load/store with a PtrOff set to 2. But then + // ShAmt should be set to 8, which isn't a multiple of NewBW. But given + // that isNarrowingProfitable doesn't seem to be overridden for any in-tree + // big-endian target, then the support for big-endian here isn't covered by + // any in-tree lit tests, so it is unfortunately not highly optimized + // either. It should be possible to improve that by using + // ReduceLoadOpStoreWidthForceNarrowingProfitable. + + // If the lsb that is modified does not start at the type bitwidth boundary, + // align to start at the previous boundary. + ShAmt = ShAmt - (ShAmt % NewBW); + + // Make sure we do not access memory outside the memory touched by the + // original load/store. + if (ShAmt + NewBW > VT.getStoreSizeInBits()) + return SDValue(); + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, std::min(BitWidth, ShAmt + NewBW)); if ((Imm & Mask) == Imm) { diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp index f23f3ed9406bdc..ecf5c0e519caec 100644 --- a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp @@ -19,21 +19,17 @@ using namespace jitlink; #define DEBUG_TYPE "jitlink" -// Create prefix string literals used in Options.td -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "COFFOptions.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE -static constexpr const StringLiteral PrefixTable_init[] = -#define PREFIX_UNION(VALUES) VALUES +#define OPTTABLE_PREFIXES_TABLE_CODE #include "COFFOptions.inc" -#undef PREFIX_UNION - ; -static constexpr const ArrayRef - PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1); +#undef OPTTABLE_PREFIXES_TABLE_CODE + +#define OPTTABLE_PREFIXES_UNION_CODE +#include "COFFOptions.inc" +#undef OPTTABLE_PREFIXES_UNION_CODE // Create table mapping all options defined in COFFOptions.td using namespace llvm::opt; @@ -46,7 +42,9 @@ static constexpr opt::OptTable::Info infoTable[] = { class COFFOptTable : public opt::PrecomputedOptTable { public: - COFFOptTable() : PrecomputedOptTable(infoTable, PrefixTable, true) {} + COFFOptTable() + : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, infoTable, + OptionPrefixesUnion, true) {} }; static COFFOptTable optTable; diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 9fdafed39b8b6c..87e6f1f12364c2 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -31,46 +31,55 @@ using namespace llvm; using namespace llvm::opt; -namespace llvm::opt { +namespace { +struct OptNameLess { + const char *StrTable; + ArrayRef PrefixesTable; + + explicit OptNameLess(const char *StrTable, ArrayRef PrefixesTable) + : StrTable(StrTable), PrefixesTable(PrefixesTable) {} + #ifndef NDEBUG -static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { - if (&A == &B) - return false; - - if (int Cmp = StrCmpOptionName(A.getName(), B.getName())) - return Cmp < 0; - - // Note: we are converting ArrayRef to ArrayRef. - // In general, ArrayRef cannot be safely viewed as ArrayRef - // since sizeof(SubClass) may not be same as sizeof(Base). However in this - // case, sizeof(StringLiteral) is same as sizeof(StringRef), so this - // conversion is safe. - static_assert(sizeof(StringRef) == sizeof(StringLiteral)); - ArrayRef APrefixes(A.Prefixes.data(), A.Prefixes.size()); - ArrayRef BPrefixes(B.Prefixes.data(), B.Prefixes.size()); - - if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes)) - return Cmp < 0; - - // Names are the same, check that classes are in order; exactly one - // should be joined, and it should succeed the other. - assert(((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) && - "Unexpected classes for options with same name."); - return B.Kind == Option::JoinedClass; -} + inline bool operator()(const OptTable::Info &A, + const OptTable::Info &B) const { + if (&A == &B) + return false; + + if (int Cmp = StrCmpOptionName(A.getName(StrTable, PrefixesTable), + B.getName(StrTable, PrefixesTable))) + return Cmp < 0; + + SmallVector APrefixes, BPrefixes; + A.appendPrefixes(StrTable, PrefixesTable, APrefixes); + B.appendPrefixes(StrTable, PrefixesTable, BPrefixes); + + if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes)) + return Cmp < 0; + + // Names are the same, check that classes are in order; exactly one + // should be joined, and it should succeed the other. + assert( + ((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) && + "Unexpected classes for options with same name."); + return B.Kind == Option::JoinedClass; + } #endif -// Support lower_bound between info and an option name. -static inline bool operator<(const OptTable::Info &I, StringRef Name) { - // Do not fallback to case sensitive comparison. - return StrCmpOptionName(I.getName(), Name, false) < 0; -} -} // namespace llvm::opt + // Support lower_bound between info and an option name. + inline bool operator()(const OptTable::Info &I, StringRef Name) const { + // Do not fallback to case sensitive comparison. + return StrCmpOptionName(I.getName(StrTable, PrefixesTable), Name, false) < + 0; + } +}; +} // namespace OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} -OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) - : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { +OptTable::OptTable(const char *StrTable, ArrayRef PrefixesTable, + ArrayRef OptionInfos, bool IgnoreCase) + : StrTable(StrTable), PrefixesTable(PrefixesTable), + OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -102,7 +111,7 @@ OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) // Check that options are in order. for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions(); i != e; ++i){ - if (!(getInfo(i) < getInfo(i + 1))) { + if (!(OptNameLess(StrTable, PrefixesTable)(getInfo(i), getInfo(i + 1)))) { getOption(i).dump(); getOption(i + 1).dump(); llvm_unreachable("Options are not in order!"); @@ -115,7 +124,7 @@ void OptTable::buildPrefixChars() { assert(PrefixChars.empty() && "rebuilding a non-empty prefix char"); // Build prefix chars. - for (const StringLiteral &Prefix : getPrefixesUnion()) { + for (StringRef Prefix : PrefixesUnion) { for (char C : Prefix) if (!is_contained(PrefixChars, C)) PrefixChars.push_back(C); @@ -132,7 +141,7 @@ const Option OptTable::getOption(OptSpecifier Opt) const { return Option(&getInfo(id), this); } -static bool isInput(const ArrayRef &Prefixes, StringRef Arg) { +static bool isInput(const ArrayRef &Prefixes, StringRef Arg) { if (Arg == "-") return true; for (const StringRef &Prefix : Prefixes) @@ -142,25 +151,32 @@ static bool isInput(const ArrayRef &Prefixes, StringRef Arg) { } /// \returns Matched size. 0 means no match. -static unsigned matchOption(const OptTable::Info *I, StringRef Str, +static unsigned matchOption(const char *StrTable, + ArrayRef PrefixesTable, + const OptTable::Info *I, StringRef Str, bool IgnoreCase) { - for (auto Prefix : I->Prefixes) { + StringRef Name = I->getName(StrTable, PrefixesTable); + for (unsigned PrefixOffset : I->getPrefixOffsets(PrefixesTable)) { + StringRef Prefix = &StrTable[PrefixOffset]; if (Str.starts_with(Prefix)) { StringRef Rest = Str.substr(Prefix.size()); - bool Matched = IgnoreCase ? Rest.starts_with_insensitive(I->getName()) - : Rest.starts_with(I->getName()); + bool Matched = IgnoreCase ? Rest.starts_with_insensitive(Name) + : Rest.starts_with(Name); if (Matched) - return Prefix.size() + StringRef(I->getName()).size(); + return Prefix.size() + Name.size(); } } return 0; } // Returns true if one of the Prefixes + In.Names matches Option -static bool optionMatches(const OptTable::Info &In, StringRef Option) { - for (auto Prefix : In.Prefixes) - if (Option.ends_with(In.getName())) - if (Option.slice(0, Option.size() - In.getName().size()) == Prefix) +static bool optionMatches(const char *StrTable, + ArrayRef PrefixesTable, + const OptTable::Info &In, StringRef Option) { + StringRef Name = In.getName(StrTable, PrefixesTable); + if (Option.consume_back(Name)) + for (unsigned PrefixOffset : In.getPrefixOffsets(PrefixesTable)) + if (Option == &StrTable[PrefixOffset]) return true; return false; } @@ -173,7 +189,7 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { // Search all options and return possible values. for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) { const Info &In = OptionInfos[I]; - if (!In.Values || !optionMatches(In, Option)) + if (!In.Values || !optionMatches(StrTable, PrefixesTable, In, Option)) continue; SmallVector Candidates; @@ -194,15 +210,17 @@ OptTable::findByPrefix(StringRef Cur, Visibility VisibilityMask, std::vector Ret; for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) { const Info &In = OptionInfos[I]; - if (In.Prefixes.empty() || (!In.HelpText && !In.GroupID)) + if (In.hasNoPrefix() || (!In.HelpText && !In.GroupID)) continue; if (!(In.Visibility & VisibilityMask)) continue; if (In.Flags & DisableFlags) continue; - for (auto Prefix : In.Prefixes) { - std::string S = (Prefix + In.getName() + "\t").str(); + StringRef Name = In.getName(StrTable, PrefixesTable); + for (unsigned PrefixOffset : In.getPrefixOffsets(PrefixesTable)) { + StringRef Prefix = &StrTable[PrefixOffset]; + std::string S = (Twine(Prefix) + Name + "\t").str(); if (In.HelpText) S += In.HelpText; if (StringRef(S).starts_with(Cur) && S != std::string(Cur) + "\t") @@ -253,7 +271,7 @@ unsigned OptTable::internalFindNearest( for (const Info &CandidateInfo : ArrayRef(OptionInfos).drop_front(FirstSearchableIndex)) { - StringRef CandidateName = CandidateInfo.getName(); + StringRef CandidateName = CandidateInfo.getName(StrTable, PrefixesTable); // We can eliminate some option prefix/name pairs as candidates right away: // * Ignore option candidates with empty names, such as "--", or names @@ -267,7 +285,7 @@ unsigned OptTable::internalFindNearest( // * Ignore positional argument option candidates (which do not // have prefixes). - if (CandidateInfo.Prefixes.empty()) + if (CandidateInfo.hasNoPrefix()) continue; // Now check if the candidate ends with a character commonly used when @@ -286,7 +304,9 @@ unsigned OptTable::internalFindNearest( // Consider each possible prefix for each candidate to find the most // appropriate one. For example, if a user asks for "--helm", suggest // "--help" over "-help". - for (auto CandidatePrefix : CandidateInfo.Prefixes) { + for (unsigned CandidatePrefixOffset : + CandidateInfo.getPrefixOffsets(PrefixesTable)) { + StringRef CandidatePrefix = &StrTable[CandidatePrefixOffset]; // If Candidate and NormalizedName have more than 'BestDistance' // characters of difference, no need to compute the edit distance, it's // going to be greater than BestDistance. Don't bother computing Candidate @@ -332,19 +352,21 @@ std::unique_ptr OptTable::parseOneArgGrouped(InputArgList &Args, // itself. const char *CStr = Args.getArgString(Index); StringRef Str(CStr); - if (isInput(getPrefixesUnion(), Str)) + if (isInput(PrefixesUnion, Str)) return std::make_unique(getOption(InputOptionID), Str, Index++, CStr); const Info *End = OptionInfos.data() + OptionInfos.size(); StringRef Name = Str.ltrim(PrefixChars); const Info *Start = - std::lower_bound(OptionInfos.data() + FirstSearchableIndex, End, Name); + std::lower_bound(OptionInfos.data() + FirstSearchableIndex, End, Name, + OptNameLess(StrTable, PrefixesTable)); const Info *Fallback = nullptr; unsigned Prev = Index; // Search for the option which matches Str. for (; Start != End; ++Start) { - unsigned ArgSize = matchOption(Start, Str, IgnoreCase); + unsigned ArgSize = + matchOption(StrTable, PrefixesTable, Start, Str, IgnoreCase); if (!ArgSize) continue; @@ -417,7 +439,7 @@ std::unique_ptr OptTable::internalParseOneArg( // Anything that doesn't start with PrefixesUnion is an input, as is '-' // itself. - if (isInput(getPrefixesUnion(), Str)) + if (isInput(PrefixesUnion, Str)) return std::make_unique(getOption(InputOptionID), Str, Index++, Str.data()); @@ -426,7 +448,8 @@ std::unique_ptr OptTable::internalParseOneArg( StringRef Name = Str.ltrim(PrefixChars); // Search for the first next option which could be a prefix. - Start = std::lower_bound(Start, End, Name); + Start = + std::lower_bound(Start, End, Name, OptNameLess(StrTable, PrefixesTable)); // Options are stored in sorted order, with '\0' at the end of the // alphabet. Since the only options which can accept a string must @@ -440,7 +463,8 @@ std::unique_ptr OptTable::internalParseOneArg( unsigned ArgSize = 0; // Scan for first option which is a proper prefix. for (; Start != End; ++Start) - if ((ArgSize = matchOption(Start, Str, IgnoreCase))) + if ((ArgSize = + matchOption(StrTable, PrefixesTable, Start, Str, IgnoreCase))) break; if (Start == End) break; @@ -763,12 +787,15 @@ void OptTable::internalPrintHelp( OS.flush(); } -GenericOptTable::GenericOptTable(ArrayRef OptionInfos, bool IgnoreCase) - : OptTable(OptionInfos, IgnoreCase) { +GenericOptTable::GenericOptTable(const char *StrTable, + ArrayRef PrefixesTable, + ArrayRef OptionInfos, bool IgnoreCase) + : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) { - std::set TmpPrefixesUnion; + std::set TmpPrefixesUnion; for (auto const &Info : OptionInfos.drop_front(FirstSearchableIndex)) - TmpPrefixesUnion.insert(Info.Prefixes.begin(), Info.Prefixes.end()); - PrefixesUnionBuffer.append(TmpPrefixesUnion.begin(), TmpPrefixesUnion.end()); + for (unsigned PrefixOffset : Info.getPrefixOffsets(PrefixesTable)) + TmpPrefixesUnion.insert(StringRef(&StrTable[PrefixOffset])); + PrefixesUnion.append(TmpPrefixesUnion.begin(), TmpPrefixesUnion.end()); buildPrefixChars(); } diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp index ecb3e84b1da8bd..738f75bb41e68c 100644 --- a/llvm/lib/Option/Option.cpp +++ b/llvm/lib/Option/Option.cpp @@ -57,10 +57,13 @@ void Option::print(raw_ostream &O, bool AddNewLine) const { #undef P } - if (!Info->Prefixes.empty()) { + if (!Info->hasNoPrefix()) { O << " Prefixes:["; - for (size_t I = 0, N = Info->Prefixes.size(); I != N; ++I) - O << '"' << Info->Prefixes[I] << (I == N - 1 ? "\"" : "\", "); + for (size_t I = 0, N = Info->getNumPrefixes(Owner->getPrefixesTable()); + I != N; ++I) + O << '"' + << Info->getPrefix(Owner->getStrTable(), Owner->getPrefixesTable(), I) + << (I == N - 1 ? "\"" : "\", "); O << ']'; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 2ddebb07017c2a..825f2f7f9a494a 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -366,6 +366,8 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) +FUNCTION_PASS("extra-vector-passes", + ExtraFunctionPassManager()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flatten-cfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) @@ -651,6 +653,8 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch", LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("guard-widening", GuardWideningPass()) +LOOP_PASS("extra-simple-loop-unswitch-passes", + ExtraLoopPassManager()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 19ae9a79ea519e..9dd43d34f2a0bb 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -32,6 +32,7 @@ #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/MemProfReader.h" +#include "llvm/ProfileData/MemProfYAML.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 088de4328a198d..3a6bef81f4a0d8 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -160,7 +160,7 @@ def FeatureSVE : ExtensionWithMArch<"sve", "SVE", "FEAT_SVE", let ArchExtKindSpelling = "AEK_I8MM" in def FeatureMatMulInt8 : ExtensionWithMArch<"i8mm", "MatMulInt8", "FEAT_I8MM", - "Enable Matrix Multiply Int8 Extension">; + "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; let ArchExtKindSpelling = "AEK_F32MM" in def FeatureMatMulFP32 : ExtensionWithMArch<"f32mm", "MatMulFP32", "FEAT_F32MM", @@ -207,7 +207,7 @@ def FeatureLSE2 : Extension<"lse2", "LSE2", "FEAT_LSE2", "Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules">; def FeatureFP16FML : ExtensionWithMArch<"fp16fml", "FP16FML", "FEAT_FHM", - "Enable FP16 FML instructions", [FeatureFullFP16]>; + "Enable FP16 FML instructions", [FeatureFullFP16, FeatureNEON]>; def FeatureDotProd : ExtensionWithMArch<"dotprod", "DotProd", "FEAT_DotProd", "Enable dot product support", [FeatureNEON]>; @@ -247,7 +247,8 @@ def FeatureAltFPCmp : Extension<"altnzcv", "AlternativeNZCV", "FEAT_FlagM2", def FeatureFRInt3264 : Extension<"fptoint", "FRInt3264", "FEAT_FRINTTS", "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to " - "an integer (in FP format) forcing it to fit into a 32- or 64-bit int">; + "an integer (in FP format) forcing it to fit into a 32- or 64-bit int", + [FeatureFPARMv8]>; def FeatureSB : ExtensionWithMArch<"sb", "SB", "FEAT_SB", "Enable Armv8.5-A Speculation Barrier">; @@ -279,7 +280,7 @@ def FeatureMTE : ExtensionWithMArch<"mte", "MTE", "FEAT_MTE, FEAT_MTE2", //===----------------------------------------------------------------------===// def FeatureBF16 : ExtensionWithMArch<"bf16", "BF16", "FEAT_BF16", - "Enable BFloat16 Extension">; + "Enable BFloat16 Extension", [FeatureNEON]>; def FeatureAMVS : Extension<"amvs", "AMVS", "FEAT_AMUv1p1", "Enable Armv8.6-A Activity Monitors Virtualization support", @@ -403,7 +404,7 @@ def FeatureRME : Extension<"rme", "RME", "FEAT_RME", "Enable Realm Management Extension">; def FeatureSME : ExtensionWithMArch<"sme", "SME", "FEAT_SME", - "Enable Scalable Matrix Extension (SME)", [FeatureBF16]>; + "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureFullFP16]>; def FeatureSMEF64F64 : ExtensionWithMArch<"sme-f64f64", "SMEF64F64", "FEAT_SME_F64F64", "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 424f18ba4d822f..cee609ed1e2f6f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15841,27 +15841,11 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG); } - // Results of setcc operations get widened to 128 bits for xor reduce if - // their input operands are 128 bits wide, otherwise vectors that are less - // than 64 bits get widened to neatly fit a 64 bit register, so e.g. - // <4 x i1> gets lowered to either <4 x i16> or <4 x i32>. Sign extending to + // Vectors that are less than 64 bits get widened to neatly fit a 64 bit + // register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to // this element size leads to the best codegen, since e.g. setcc results // might need to be truncated otherwise. - unsigned ExtendedWidth = 64; - if (ScalarOpcode == ISD::XOR && Vec.getOpcode() == ISD::SETCC && - Vec.getOperand(0).getValueSizeInBits() >= 128) { - ExtendedWidth = 128; - } - EVT ExtendedVT = MVT::getIntegerVT(std::max(ExtendedWidth / NumElems, 8u)); - - // Negate the reduced vector value for reduce and operations that use - // fcmp. - if (ScalarOpcode == ISD::AND && NumElems < 16) { - Vec = DAG.getNode( - ISD::XOR, DL, VecVT, Vec, - DAG.getSplatVector( - VecVT, DL, DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32))); - } + EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u)); // any_ext doesn't work with umin/umax, so only use it for uadd. unsigned ExtendOp = @@ -15870,36 +15854,10 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, ExtendOp, DL, VecVT.changeVectorElementType(ExtendedVT), Vec); switch (ScalarOpcode) { case ISD::AND: - if (NumElems < 16) { - // Check if all lanes of the negated bool vector value are zero by - // comparing against 0.0 with ordered and equal predicate. The only - // non-zero bit pattern that compares ordered and equal to 0.0 is -0.0, - // where only the sign bit is set. However the bool vector is - // sign-extended so that each bit in a lane is either zero or one, - // meaning that it is impossible to get the bit pattern of -0.0. - assert(Extended.getValueSizeInBits() == 64); - Extended = DAG.getBitcast(MVT::f64, Extended); - Result = - DAG.getSetCC(DL, MVT::i32, Extended, - DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETOEQ); - } else { - Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended); - } + Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended); break; case ISD::OR: - if (NumElems < 16) { - // Check if any lane of the bool vector is set by comparing against 0.0. - // NaN bit patterns are handled by using the 'unordered or not equal' - // predicate. Similarly to the reduce and case, -0.0 doesn't have to be - // handled here (see explanation above). - assert(Extended.getValueSizeInBits() == 64); - Extended = DAG.getBitcast(MVT::f64, Extended); - Result = - DAG.getSetCC(DL, MVT::i32, Extended, - DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETUNE); - } else { - Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended); - } + Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended); break; case ISD::XOR: Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, ExtendedVT, Extended); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index da585dd3a21c88..1a5be28dce4a0c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4379,10 +4379,11 @@ defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aar defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>; // FP8 downconvert -defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>; -defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>; -defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>; -defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; +defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r, nxv8f16, int_aarch64_sve_fp8_cvtn>; +defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnb>; +defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r, nxv8bf16, int_aarch64_sve_fp8_cvtn>; + +defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single_top<0b11, "fcvtnt", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnt>; } // End HasSVE2orSME2, HasFP8 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 27995ca5bb701f..a67093b1a58c3d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -32,7 +32,8 @@ def tileslicerange2s4 : ComplexPattern", []>; def tileslicerange1s4 : ComplexPattern", []>; def tileslicerange0s4 : ComplexPattern", []>; -def am_sme_indexed_b4 :ComplexPattern", [], [SDNPWantRoot]>; +let WantsRoot = true in +def am_sme_indexed_b4 : ComplexPattern">; def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9ae66518dfb4ed..22b56f7f3e9a9e 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9567,8 +9567,10 @@ multiclass sve_int_perm_bin_perm_128_zz opc, bit P, string asm, SDPatter } /// Addressing modes -def am_sve_indexed_s4 :ComplexPattern", [], [SDNPWantRoot]>; -def am_sve_indexed_s6 :ComplexPattern", [], [SDNPWantRoot]>; +let WantsRoot = true in { + def am_sve_indexed_s4 : ComplexPattern">; + def am_sve_indexed_s6 : ComplexPattern">; +} def am_sve_regreg_lsl0 : ComplexPattern", []>; def am_sve_regreg_lsl1 : ComplexPattern", []>; @@ -10794,10 +10796,45 @@ class sve2_fp8_down_cvt_single opc, string mnemonic, let Inst{5} = 0b0; let Inst{4-0} = Zd; let Uses = [FPMR, FPCR]; + + let mayLoad = 1; + let mayStore = 0; } -multiclass sve2_fp8_down_cvt_single opc, string mnemonic, RegisterOperand src> { +multiclass sve2_fp8_down_cvt_single opc, string mnemonic, RegisterOperand src, + ValueType ty, SDPatternOperator op> { def NAME : sve2_fp8_down_cvt_single; + + def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)), + (!cast(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; +} + +class sve2_fp8_down_cvt_single_top opc, string mnemonic, RegisterOperand src_ty> + : I<(outs ZPR8:$Zd), (ins ZPR8:$_Zd, src_ty:$Zn), mnemonic, "\t$Zd, $Zn","", []>, Sched<[]> { + bits<5> Zd; + bits<4> Zn; + + let Inst{31-12} = 0b01100101000010100011; + let Inst{11-10} = opc; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR8.ElementSize; + + let Uses = [FPMR, FPCR]; + let mayLoad = 1; + let mayStore = 0; +} + +multiclass sve2_fp8_down_cvt_single_top opc, string mnemonic, RegisterOperand src_ty, + ValueType ty, SDPatternOperator op> { + def NAME : sve2_fp8_down_cvt_single_top; + + def : Pat<(nxv16i8 (op nxv16i8:$Zd, ty:$Zn1, ty:$Zn2)), + (!cast(NAME) $Zd, (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; } // FP8 Widening Multiply-Add Long - Indexed Group diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 06e852fe4752e0..21f9c50c352563 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -168,9 +168,18 @@ class AMDGPUInformationCache : public InformationCache { return ST.supportsGetDoorbellID(); } - std::pair getFlatWorkGroupSizes(const Function &F) { + std::optional> + getFlatWorkGroupSizeAttr(const Function &F) const { + auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size"); + if (!R) + return std::nullopt; + return std::make_pair(R->first, *(R->second)); + } + + std::pair + getDefaultFlatWorkGroupSize(const Function &F) const { const GCNSubtarget &ST = TM.getSubtarget(F); - return ST.getFlatWorkGroupSizes(F); + return ST.getDefaultFlatWorkGroupSize(F.getCallingConv()); } std::pair @@ -197,6 +206,19 @@ class AMDGPUInformationCache : public InformationCache { return ST.getWavesPerEU(F, FlatWorkGroupSize); } + std::optional> + getWavesPerEUAttr(const Function &F) { + auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", + /*OnlyFirstRequired=*/true); + if (!Val) + return std::nullopt; + if (!Val->second) { + const GCNSubtarget &ST = TM.getSubtarget(F); + Val->second = ST.getMaxWavesPerEU(); + } + return std::make_pair(Val->first, *(Val->second)); + } + std::pair getEffectiveWavesPerEU(const Function &F, std::pair WavesPerEU, @@ -812,17 +834,30 @@ struct AAAMDSizeRangeAttribute return Change; } - ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, - unsigned Max) { - // Don't add the attribute if it's the implied default. - if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) + /// Clamp the assumed range to the default value ([Min, Max]) and emit the + /// attribute if it is not same as default. + ChangeStatus + emitAttributeIfNotDefaultAfterClamp(Attributor &A, + std::pair Default) { + auto [Min, Max] = Default; + unsigned Lower = getAssumed().getLower().getZExtValue(); + unsigned Upper = getAssumed().getUpper().getZExtValue(); + + // Clamp the range to the default value. + if (Lower < Min) + Lower = Min; + if (Upper > Max + 1) + Upper = Max + 1; + + // No manifest if the value is invalid or same as default after clamp. + if ((Lower == Min && Upper == Max + 1) || (Upper < Lower)) return ChangeStatus::UNCHANGED; Function *F = getAssociatedFunction(); LLVMContext &Ctx = F->getContext(); SmallString<10> Buffer; raw_svector_ostream OS(Buffer); - OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; + OS << Lower << ',' << Upper - 1; return A.manifestAttrs(getIRPosition(), {Attribute::get(Ctx, AttrName, OS.str())}, /*ForceReplace=*/true); @@ -846,13 +881,33 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); - unsigned MinGroupSize, MaxGroupSize; - std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); - intersectKnown( - ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); + bool HasAttr = false; + auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F); + auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F); + + if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) { + // We only consider an attribute that is not max range because the front + // end always emits the attribute, unfortunately, and sometimes it emits + // the max range. + if (*Attr != MaxRange) { + Range = *Attr; + HasAttr = true; + } + } + + // We don't want to directly clamp the state if it's the max range because + // that is basically the worst state. + if (Range == MaxRange) + return; + + auto [Min, Max] = Range; + ConstantRange CR(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState IRS(CR); + clampStateAndIndicateChange(this->getState(), IRS); + + if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv())) + indicateOptimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { @@ -866,9 +921,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { ChangeStatus manifest(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); - return emitAttributeIfNotDefault(A, Min, Max); + return emitAttributeIfNotDefaultAfterClamp( + A, InfoCache.getMaximumFlatWorkGroupRange(*F)); } /// See AbstractAttribute::getName() @@ -1044,29 +1098,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} - bool isValidState() const override { - return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); - } - void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); - if (const auto *AssumedGroupSize = A.getAAFor( - *this, IRPosition::function(*F), DepClassTy::REQUIRED); - AssumedGroupSize->isValidState()) { + auto TakeRange = [&](std::pair R) { + auto [Min, Max] = R; + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState RangeState(Range); + clampStateAndIndicateChange(this->getState(), RangeState); + indicateOptimisticFixpoint(); + }; - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getWavesPerEU( - *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), - AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); + std::pair MaxWavesPerEURange{ + 1U, InfoCache.getMaxWavesPerEU(*F)}; - ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); - intersectKnown(Range); + // If the attribute exists, we will honor it if it is not the default. + if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) { + if (*Attr != MaxWavesPerEURange) { + TakeRange(*Attr); + return; + } } - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); + // Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the + // calculation of waves per EU involves flat work group size, we can't + // simply use an assumed flat work group size as a start point, because the + // update of flat work group size is in an inverse direction of waves per + // EU. However, we can still do something if it is an entry function. Since + // an entry function is a terminal node, and flat work group size either + // from attribute or default will be used anyway, we can take that value and + // calculate the waves per EU based on it. This result can't be updated by + // no means, but that could still allow us to propagate it. + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { + std::pair FlatWorkGroupSize; + if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) + FlatWorkGroupSize = *Attr; + else + FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F); + TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange, + FlatWorkGroupSize)); + } } ChangeStatus updateImpl(Attributor &A) override { @@ -1115,8 +1187,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { ChangeStatus manifest(Attributor &A) override { Function *F = getAssociatedFunction(); auto &InfoCache = static_cast(A.getInfoCache()); - unsigned Max = InfoCache.getMaxWavesPerEU(*F); - return emitAttributeIfNotDefault(A, 1, Max); + return emitAttributeIfNotDefaultAfterClamp( + A, {1U, InfoCache.getMaxWavesPerEU(*F)}); } /// See AbstractAttribute::getName() diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a288c58def5cbd..a351f451584f9d 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -9,8 +9,10 @@ def MUBUFAddr64 : ComplexPattern; def MUBUFOffset : ComplexPattern; -def MUBUFScratchOffen : ComplexPattern; -def MUBUFScratchOffset : ComplexPattern; +let WantsParent = true in { + def MUBUFScratchOffen : ComplexPattern; + def MUBUFScratchOffset : ComplexPattern; +} def BUFSOffset : ComplexPattern; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9244b4a6649861..8fa708b74dde32 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -6,13 +6,15 @@ // //===----------------------------------------------------------------------===// -def FlatOffset : ComplexPattern; -def GlobalOffset : ComplexPattern; -def ScratchOffset : ComplexPattern; +let WantsRoot = true in { + def FlatOffset : ComplexPattern; + def GlobalOffset : ComplexPattern; + def ScratchOffset : ComplexPattern; -def GlobalSAddr : ComplexPattern; -def ScratchSAddr : ComplexPattern; -def ScratchSVAddr : ComplexPattern; + def GlobalSAddr : ComplexPattern; + def ScratchSAddr : ComplexPattern; + def ScratchSVAddr : ComplexPattern; +} //===----------------------------------------------------------------------===// // FLAT classes diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 8510f53c03654d..6b78c96c982b9e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1344,22 +1344,33 @@ std::pair getIntegerPairAttribute(const Function &F, StringRef Name, std::pair Default, bool OnlyFirstRequired) { + if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired)) + return {Attr->first, Attr->second ? *(Attr->second) : Default.second}; + return Default; +} + +std::optional>> +getIntegerPairAttribute(const Function &F, StringRef Name, + bool OnlyFirstRequired) { Attribute A = F.getFnAttribute(Name); if (!A.isStringAttribute()) - return Default; + return std::nullopt; LLVMContext &Ctx = F.getContext(); - std::pair Ints = Default; + std::pair> Ints; std::pair Strs = A.getValueAsString().split(','); if (Strs.first.trim().getAsInteger(0, Ints.first)) { Ctx.emitError("can't parse first integer attribute " + Name); - return Default; + return std::nullopt; } - if (Strs.second.trim().getAsInteger(0, Ints.second)) { + unsigned Second = 0; + if (Strs.second.trim().getAsInteger(0, Second)) { if (!OnlyFirstRequired || !Strs.second.trim().empty()) { Ctx.emitError("can't parse second integer attribute " + Name); - return Default; + return std::nullopt; } + } else { + Ints.second = Second; } return Ints; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 667d8600a589d9..21ed61a9426fcf 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -936,6 +936,19 @@ getIntegerPairAttribute(const Function &F, StringRef Name, std::pair Default, bool OnlyFirstRequired = false); +/// \returns A pair of integer values requested using \p F's \p Name attribute +/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired +/// is false). +/// +/// \returns \p std::nullopt if attribute is not present. +/// +/// \returns \p std::nullopt and emits error if one of the requested values +/// cannot be converted to integer, or \p OnlyFirstRequired is false and +/// "second" value is not present. +std::optional>> +getIntegerPairAttribute(const Function &F, StringRef Name, + bool OnlyFirstRequired = false); + /// \returns Generate a vector of integer values requested using \p F's \p Name /// attribute. /// diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a1f068f0e049bd..291bfc0610f85d 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -262,30 +262,31 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF, const TargetRegisterClass * ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { - const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); + unsigned SuperID = RC->getID(); + auto I = RC->superclasses().begin(); + auto E = RC->superclasses().end(); do { - switch (Super->getID()) { + switch (SuperID) { case ARM::GPRRegClassID: case ARM::SPRRegClassID: case ARM::DPRRegClassID: case ARM::GPRPairRegClassID: - return Super; + return getRegClass(SuperID); case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: if (MF.getSubtarget().hasNEON()) - return Super; + return getRegClass(SuperID); break; case ARM::MQPRRegClassID: case ARM::MQQPRRegClassID: case ARM::MQQQQPRRegClassID: if (MF.getSubtarget().hasMVEIntegerOps()) - return Super; + return getRegClass(SuperID); break; } - Super = *I++; - } while (Super); + SuperID = (I != E) ? *I++ : ~0U; + } while (SuperID != ~0U); return RC; } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 718cb964ab7c3b..1cb6589184b603 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -1225,25 +1225,25 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass { let ParserMethod = "parsePostIdxReg"; } def am2offset_reg : MemOperand, - ComplexPattern { + ComplexPattern { let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; // When using this for assembly, it's always as a post-index offset. let ParserMatchClass = PostIdxRegShiftedAsmOperand; let MIOperandInfo = (ops GPRnopc, i32imm); + let WantsRoot = true; } // FIXME: am2offset_imm should only need the immediate, not the GPR. Having // the GPR is purely vestigal at this point. def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } def am2offset_imm : MemOperand, - ComplexPattern { + ComplexPattern { let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let ParserMatchClass = AM2OffsetImmAsmOperand; let MIOperandInfo = (ops GPRnopc, i32imm); + let WantsRoot = true; } @@ -1275,13 +1275,12 @@ def AM3OffsetAsmOperand : AsmOperandClass { let Name = "AM3Offset"; let ParserMethod = "parseAM3Offset"; } -def am3offset : MemOperand, - ComplexPattern { +def am3offset : MemOperand, ComplexPattern { let EncoderMethod = "getAddrMode3OffsetOpValue"; let PrintMethod = "printAddrMode3OffsetOperand"; let ParserMatchClass = AM3OffsetAsmOperand; let MIOperandInfo = (ops GPR, i32imm); + let WantsRoot = true; } // ldstm_mode := {ia, ib, da, db} @@ -1328,40 +1327,39 @@ def addrmode5fp16 : AddrMode5FP16 { // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } -def addrmode6 : MemOperand, - ComplexPattern{ +def addrmode6 : MemOperand, ComplexPattern { let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); let EncoderMethod = "getAddrMode6AddressOpValue"; let DecoderMethod = "DecodeAddrMode6Operand"; let ParserMatchClass = AddrMode6AsmOperand; + let WantsParent = true; } -def am6offset : MemOperand, - ComplexPattern { +def am6offset : MemOperand, ComplexPattern { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; let DecoderMethod = "DecodeGPRRegisterClass"; + let WantsRoot = true; } // Special version of addrmode6 to handle alignment encoding for VST1/VLD1 // (single element from one lane) for size 32. -def addrmode6oneL32 : MemOperand, - ComplexPattern{ +def addrmode6oneL32 : MemOperand, ComplexPattern { let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; + let WantsParent = true; } // Base class for addrmode6 with specific alignment restrictions. -class AddrMode6Align : MemOperand, - ComplexPattern{ +class AddrMode6Align : MemOperand, ComplexPattern { let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); let EncoderMethod = "getAddrMode6AddressOpValue"; let DecoderMethod = "DecodeAddrMode6Operand"; + let WantsParent = true; } // Special version of addrmode6 to handle no allowed alignment encoding for @@ -1432,22 +1430,23 @@ def addrmode6align64or128or256 : AddrMode6Align { // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. -def addrmode6dup : MemOperand, - ComplexPattern{ +def addrmode6dup : MemOperand, ComplexPattern { let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; // FIXME: This is close, but not quite right. The alignment specifier is // different. let ParserMatchClass = AddrMode6AsmOperand; + let WantsParent = true; } // Base class for addrmode6dup with specific alignment restrictions. class AddrMode6DupAlign : MemOperand, - ComplexPattern{ + ComplexPattern { let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; + let WantsParent = true; } // Special version of addrmode6 to handle no allowed alignment encoding for diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index bdd0d739a05684..6dd8a374a92af4 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -184,15 +184,16 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>; def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset : MemOperand, - ComplexPattern", - [], [SDNPWantRoot]> { +class t2am_imm7_offset + : MemOperand, + ComplexPattern"> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = !cast("t2am_imm7shift"#shift#"OffsetAsmOperand"); let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">"; let DecoderMethod = "DecodeT2Imm7<"#shift#">"; + let WantsRoot = true; } // Operands for gather/scatter loads of the form [Rbase, Qoffsets] diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index cc7fc743fe4f92..b69bc601a0cdce 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -287,8 +287,8 @@ def t_addrmode_sp : MemOperand, // Inspects parent to determine whether an or instruction can be implemented as // an add (i.e. whether we know overflow won't occur in the add). -def AddLikeOrOp : ComplexPattern; +let WantsParent = true in +def AddLikeOrOp : ComplexPattern; // Pattern to exclude immediates from matching def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa(N); }]>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 99617e53d657a9..9ff056f9c06a16 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -267,11 +267,11 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 { } def t2am_imm8_offset : MemOperand, - ComplexPattern { + ComplexPattern { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let EncoderMethod = "getT2AddrModeImm8OffsetOpValue"; let DecoderMethod = "DecodeT2Imm8"; + let WantsRoot = true; } // t2addrmode_imm8s4 := reg +/- (imm8 << 2) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 61c17090e3fbe8..5474a42e58848d 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -232,7 +232,8 @@ def imm_port6 : Operand { } // Addressing mode pattern reg+imm6 -def addr : ComplexPattern; +let WantsRoot = true in +def addr : ComplexPattern; //===----------------------------------------------------------------------===// // AVR predicates for subtarget features diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index d4d121e4380089..2731c523963e5d 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -431,8 +431,9 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex( return WSub[GenIdx]; } - if (const TargetRegisterClass *SuperRC = *RC.getSuperClasses()) - return getHexagonSubRegIndex(*SuperRC, GenIdx); + if (!RC.superclasses().empty()) + return getHexagonSubRegIndex(*getRegClass(*RC.superclasses().begin()), + GenIdx); llvm_unreachable("Invalid register class"); } diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td index 6e116d7cfe4019..4e94b2ed3a0645 100644 --- a/llvm/lib/Target/M68k/M68kInstrControl.td +++ b/llvm/lib/Target/M68k/M68kInstrControl.td @@ -179,6 +179,8 @@ class MxBcc (descend 0b0110, !cast("MxCC"#cc).Value, disp_8), disp_16_32 ); + + let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []); } foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge", @@ -190,6 +192,10 @@ foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge", def B#cc#"16" : MxBcc"))>; + + def B#cc#"32" + : MxBcc"))>; } foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge", @@ -215,6 +221,8 @@ class MxBra (descend 0b0110, 0b0000, disp_8), disp_16_32 ); + + let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []); } def BRA8 : MxBra"))>; +def BRA32 : MxBra"), + (decoder "DecodeImm32"))>; + def : Pat<(br bb:$target), (BRA8 MxBrTarget8:$target)>; /// ------------------------------------------------- @@ -242,6 +254,7 @@ class MxBsr (descend 0b0110, 0b0001, disp_8), disp_16_32 ); + let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []); } def BSR8 : MxBsr; // Less or Equal // NOTE Though this CP is not strictly necessarily it will simplify instruciton // definitions -def MxCP_ARI : ComplexPattern; +let WantsParent = true in { + def MxCP_ARI : ComplexPattern; -def MxCP_ARIPI : ComplexPattern; + def MxCP_ARIPI : ComplexPattern; -def MxCP_ARIPD : ComplexPattern; + def MxCP_ARIPD : ComplexPattern; -def MxCP_ARID : ComplexPattern; + def MxCP_ARID : ComplexPattern; -def MxCP_ARII : ComplexPattern; + def MxCP_ARII : ComplexPattern; -def MxCP_AL : ComplexPattern; + def MxCP_AL : ComplexPattern; -def MxCP_PCD : ComplexPattern; - -def MxCP_PCI : ComplexPattern; + def MxCP_PCD : ComplexPattern; + def MxCP_PCI : ComplexPattern; +} //===----------------------------------------------------------------------===// // Pattern Fragments diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp index 2c52fe07bb1119..04777677cbf218 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp @@ -29,19 +29,28 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "M68k-asm-backend" + namespace { class M68kAsmBackend : public MCAsmBackend { + bool Allows32BitBranch; public: - M68kAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::big) {} + M68kAsmBackend(const Target &T, const MCSubtargetInfo &STI) + : MCAsmBackend(llvm::endianness::big), + Allows32BitBranch(llvm::StringSwitch(STI.getCPU()) + .CasesLower("m68020", "m68030", "m68040", true) + .Default(false)) {} unsigned getNumFixupKinds() const override { return 0; } @@ -51,18 +60,34 @@ class M68kAsmBackend : public MCAsmBackend { const MCSubtargetInfo *STI) const override { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); + if (Fixup.getOffset() + Size > Data.size()) { + LLVM_DEBUG(dbgs() << "Fixup.getOffset(): " << Fixup.getOffset() << '\n'); + LLVM_DEBUG(dbgs() << "Size: " << Size << '\n'); + LLVM_DEBUG(dbgs() << "Data.size(): " << Data.size() << '\n'); + assert(Fixup.getOffset() + Size <= Data.size() && + "Invalid fixup offset!"); + } // Check that uppper bits are either all zeros or all ones. // Specifically ignore overflow/underflow as long as the leakage is // limited to the lower bits. This is to remain compatible with // other assemblers. - assert(isIntN(Size * 8 + 1, Value) && - "Value does not fit in the Fixup field"); + if (!(isIntN(Size * 8 + 1, static_cast(Value)) || IsResolved)) { + LLVM_DEBUG(dbgs() << "Fixup.getOffset(): " << Fixup.getOffset() << '\n'); + LLVM_DEBUG(dbgs() << "Size: " << Size << '\n'); + LLVM_DEBUG(dbgs() << "Data.size(): " << Data.size() << '\n'); + LLVM_DEBUG(dbgs() << "Value: " << Value << '\n'); + LLVM_DEBUG(dbgs() << "Target: "); + LLVM_DEBUG(Target.print(dbgs())); + LLVM_DEBUG(dbgs() << '\n'); + assert(isIntN(Size * 8 + 1, static_cast(Value)) && + "Value does not fit in the Fixup field"); + } // Write in Big Endian for (unsigned i = 0; i != Size; ++i) - Data[Fixup.getOffset() + i] = uint8_t(Value >> ((Size - i - 1) * 8)); + Data[Fixup.getOffset() + i] = + uint8_t(static_cast(Value) >> ((Size - i - 1) * 8)); } bool mayNeedRelaxation(const MCInst &Inst, @@ -99,6 +124,8 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst) { switch (Op) { default: return Op; + + // 8 -> 16 case M68k::BRA8: return M68k::BRA16; case M68k::Bcc8: @@ -129,6 +156,38 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst) { return M68k::Ble16; case M68k::Bvs8: return M68k::Bvs16; + + // 16 -> 32 + case M68k::BRA16: + return M68k::BRA32; + case M68k::Bcc16: + return M68k::Bcc32; + case M68k::Bls16: + return M68k::Bls32; + case M68k::Blt16: + return M68k::Blt32; + case M68k::Beq16: + return M68k::Beq32; + case M68k::Bmi16: + return M68k::Bmi32; + case M68k::Bne16: + return M68k::Bne32; + case M68k::Bge16: + return M68k::Bge32; + case M68k::Bcs16: + return M68k::Bcs32; + case M68k::Bpl16: + return M68k::Bpl32; + case M68k::Bgt16: + return M68k::Bgt32; + case M68k::Bhi16: + return M68k::Bhi32; + case M68k::Bvc16: + return M68k::Bvc32; + case M68k::Ble16: + return M68k::Ble32; + case M68k::Bvs16: + return M68k::Bvs32; } } @@ -166,26 +225,35 @@ bool M68kAsmBackend::mayNeedRelaxation(const MCInst &Inst, } bool M68kAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value) const { - // TODO Newer CPU can use 32 bit offsets, so check for this when ready - if (!isInt<16>(Value)) { + uint64_t UnsignedValue) const { + int64_t Value = static_cast(UnsignedValue); + + if (!isInt<32>(Value) || (!Allows32BitBranch && !isInt<16>(Value))) llvm_unreachable("Cannot relax the instruction, value does not fit"); - } - // Relax if the value is too big for a (signed) i8. This means that byte-wide - // instructions have to matched by default - // + + // Relax if the value is too big for a (signed) i8 + // (or signed i16 if 32 bit branches can be used). This means + // that byte-wide instructions have to matched by default + unsigned KindLog2Size = getFixupKindLog2Size(Fixup.getKind()); + bool FixupFieldTooSmall = false; + if (!isInt<8>(Value) && KindLog2Size == 0) + FixupFieldTooSmall = true; + else if (!isInt<16>(Value) && KindLog2Size <= 1) + FixupFieldTooSmall = true; + // NOTE // A branch to the immediately following instruction automatically // uses the 16-bit displacement format because the 8-bit // displacement field contains $00 (zero offset). - return Value == 0 || !isInt<8>(Value); + bool ZeroDisplacementNeedsFixup = Value == 0 && KindLog2Size == 0; + + return ZeroDisplacementNeedsFixup || FixupFieldTooSmall; } // NOTE Can tblgen help at all here to verify there aren't other instructions // we can relax? void M68kAsmBackend::relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const { - // The only relaxations M68k does is from a 1byte pcrel to a 2byte PCRel. unsigned RelaxedOp = getRelaxedOpcode(Inst); if (RelaxedOp == Inst.getOpcode()) { @@ -218,8 +286,8 @@ namespace { class M68kELFAsmBackend : public M68kAsmBackend { public: uint8_t OSABI; - M68kELFAsmBackend(const Target &T, uint8_t OSABI) - : M68kAsmBackend(T), OSABI(OSABI) {} + M68kELFAsmBackend(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI) + : M68kAsmBackend(T, STI), OSABI(OSABI) {} std::unique_ptr createObjectTargetWriter() const override { @@ -235,5 +303,5 @@ MCAsmBackend *llvm::createM68kAsmBackend(const Target &T, const MCTargetOptions &Options) { const Triple &TheTriple = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new M68kELFAsmBackend(T, OSABI); + return new M68kELFAsmBackend(T, STI, OSABI); } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 66684dbff6b335..a7836ccc45f476 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1907,10 +1907,10 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // Data Movement (Load / Store, Move) //----------------------------------- -def ADDRri : ComplexPattern; -def ADDRri64 : ComplexPattern; +let WantsRoot = true in { + def ADDRri : ComplexPattern; + def ADDRri64 : ComplexPattern; +} def ADDRvar : ComplexPattern; def MEMri : Operand { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 0d3c07aa87ff25..be90a5c562c570 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -686,13 +686,15 @@ def addr : ComplexPattern; def iaddroff : ComplexPattern; // Load and Store Instruction Selection addressing modes. -def DForm : ComplexPattern; -def DSForm : ComplexPattern; -def DQForm : ComplexPattern; -def XForm : ComplexPattern; -def ForceXForm : ComplexPattern; -def PCRelForm : ComplexPattern; -def PDForm : ComplexPattern; +let WantsParent = true in { + def DForm : ComplexPattern; + def DSForm : ComplexPattern; + def DQForm : ComplexPattern; + def XForm : ComplexPattern; + def ForceXForm : ComplexPattern; + def PCRelForm : ComplexPattern; + def PDForm : ComplexPattern; +} //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 43dfc4108f8384..019d4cfa33fbaf 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -692,21 +692,23 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, InflateGPRC++; } - for (const auto *I = RC->getSuperClasses(); *I; ++I) { - if (getRegSizeInBits(**I) != getRegSizeInBits(*RC)) + for (unsigned SuperID : RC->superclasses()) { + if (getRegSizeInBits(*getRegClass(SuperID)) != getRegSizeInBits(*RC)) continue; - switch ((*I)->getID()) { + switch (SuperID) { case PPC::VSSRCRegClassID: - return Subtarget.hasP8Vector() ? *I : DefaultSuperclass; + return Subtarget.hasP8Vector() ? getRegClass(SuperID) + : DefaultSuperclass; case PPC::VSFRCRegClassID: case PPC::VSRCRegClassID: - return *I; + return getRegClass(SuperID); case PPC::VSRpRCRegClassID: - return Subtarget.pairedVectorMemops() ? *I : DefaultSuperclass; + return Subtarget.pairedVectorMemops() ? getRegClass(SuperID) + : DefaultSuperclass; case PPC::ACCRCRegClassID: case PPC::UACCRCRegClassID: - return Subtarget.hasMMA() ? *I : DefaultSuperclass; + return Subtarget.hasMMA() ? getRegClass(SuperID) : DefaultSuperclass; } } } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 1028149bf513f4..ffc477e9cc252b 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1785,7 +1785,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI); const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI); - auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) { + auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) { for (auto &CS : CSInfo) { // Insert the spill to the stack frame. Register Reg = CS.getReg(); @@ -1794,8 +1794,8 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( CS.getFrameIdx(), RC, TRI, Register()); } }; - storeRegToStackSlot(UnmanagedCSI); - storeRegToStackSlot(RVVCSI); + storeRegsToStackSlots(UnmanagedCSI); + storeRegsToStackSlots(RVVCSI); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index dcd3598f658f6a..4bcb0edc4b093d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -51,13 +51,6 @@ static cl::opt EnableGlobalMerge("riscv-enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); -static cl::opt ForceEnableGlobalMergeExternalGlobals( - "riscv-force-enable-global-merge-external-globals", cl::Hidden, - cl::init(false), - cl::desc( - "If the global merge pass is enabled, force enable global merging of " - "external globals (overriding any logic that might disable it)")); - static cl::opt EnableMachineCombiner("riscv-enable-machine-combiner", cl::desc("Enable the machine combiner pass"), @@ -494,8 +487,7 @@ bool RISCVPassConfig::addPreISel() { // Investigating and addressing both items are TODO. addPass(createGlobalMergePass(TM, /* MaxOffset */ 2047, /* OnlyOptimizeForSize */ false, - /* MergeExternalByDefault */ - ForceEnableGlobalMergeExternalGlobals)); + /* MergeExternalByDefault */ true)); } return false; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index dabf36480f1dcf..0df4c451894bec 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -247,6 +247,23 @@ static OperandInfo getOperandInfo(const MachineInstr &MI, llvm_unreachable("Configuration setting instructions do not read or write " "vector registers"); + // Vector Loads and Stores + // Vector Unit-Stride Instructions + // Vector Strided Instructions + /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL + case RISCV::VSE8_V: + case RISCV::VSSE8_V: + return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3); + case RISCV::VSE16_V: + case RISCV::VSSE16_V: + return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4); + case RISCV::VSE32_V: + case RISCV::VSSE32_V: + return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5); + case RISCV::VSE64_V: + case RISCV::VSSE64_V: + return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6); + // Vector Integer Arithmetic Instructions // Vector Single-Width Integer Add and Subtract case RISCV::VADD_VI: @@ -546,8 +563,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions // FIXME: Add support // Vector Narrowing Integer Right Shift Instructions - // FIXME: Add support + case RISCV::VNSRL_WX: case RISCV::VNSRL_WI: + case RISCV::VNSRL_WV: + case RISCV::VNSRA_WI: + case RISCV::VNSRA_WV: + case RISCV::VNSRA_WX: // Vector Integer Compare Instructions // FIXME: Add support // Vector Integer Min/Max Instructions diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index ba9779cdc335d6..ea7af893ce103f 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -352,7 +352,8 @@ def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd, SDNPMemOperand]>; // Define X86-specific addressing mode. -def addr : ComplexPattern; +let WantsParent = true in +def addr : ComplexPattern; def lea32addr : ComplexPattern; @@ -378,7 +379,8 @@ def tls64addr : ComplexPattern; -def vectoraddr : ComplexPattern; +let WantsParent = true in +def vectoraddr : ComplexPattern; // A relocatable immediate is an operand that can be relocated by the linker to // an immediate, such as a regular symbol in non-PIC code. diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 50db211c99d882..164d4205955166 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -123,7 +123,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const X86Subtarget &Subtarget = MF.getSubtarget(); const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); + auto I = RC->superclasses().begin(); + auto E = RC->superclasses().end(); do { switch (Super->getID()) { case X86::FR32RegClassID: @@ -172,7 +173,12 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } - Super = *I++; + if (I != E) { + Super = getRegClass(*I); + ++I; + } else { + Super = nullptr; + } } while (Super); return RC; } diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 5865eb7b70b608..7e43c03ee72cac 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -14,6 +14,7 @@ #include "XtensaISelLowering.h" #include "XtensaConstantPoolValue.h" #include "XtensaInstrInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -133,6 +134,13 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + // VASTART, VAARG and VACOPY need to deal with the Xtensa-specific varargs + // structure, but VAEND is a no-op. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Compute derived properties from the register classes computeRegisterProperties(STI.getRegisterInfo()); } @@ -217,12 +225,12 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint( #include "XtensaGenCallingConv.inc" +static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4, + Xtensa::A5, Xtensa::A6, Xtensa::A7}; + static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4, - Xtensa::A5, Xtensa::A6, Xtensa::A7}; - if (ArgFlags.isByVal()) { Align ByValAlign = ArgFlags.getNonZeroByValAlign(); unsigned ByValSize = ArgFlags.getByValSize(); @@ -304,13 +312,11 @@ SDValue XtensaTargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + XtensaMachineFunctionInfo *XtensaFI = MF.getInfo(); // Used with vargs to acumulate store chains. std::vector OutChains; - if (IsVarArg) - report_fatal_error("Var arg not supported by FormalArguments Lowering"); - // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, @@ -323,17 +329,14 @@ SDValue XtensaTargetLowering::LowerFormalArguments( // Arguments stored on registers if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - const TargetRegisterClass *RC; - if (RegVT == MVT::i32) - RC = &Xtensa::ARRegClass; - else + if (RegVT != MVT::i32) report_fatal_error("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on // physical registers into virtual ones - unsigned Register = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT); + Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then @@ -378,6 +381,56 @@ SDValue XtensaTargetLowering::LowerFormalArguments( } } + if (IsVarArg) { + unsigned Idx = CCInfo.getFirstUnallocated(IntRegs); + unsigned ArgRegsNum = std::size(IntRegs); + const TargetRegisterClass *RC = &Xtensa::ARRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned RegSize = 4; + MVT RegTy = MVT::i32; + MVT FITy = getFrameIndexTy(DAG.getDataLayout()); + + XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register + + XtensaFI->setVarArgsOnStackFrameIndex( + MFI.CreateFixedObject(4, CCInfo.getStackSize(), true)); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegsNum == Idx) { + VaArgOffset = CCInfo.getStackSize(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = RegSize * (ArgRegsNum - Idx); + VaArgOffset = -VarArgsSaveSize; + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + XtensaFI->setVarArgsInRegsFrameIndex(FI); + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegsNum; ++I, VaArgOffset += RegSize) { + const Register Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(IntRegs[I], Reg); + + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, FITy); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + OutChains.push_back(Store); + } + } + } + // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { @@ -579,9 +632,6 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - if (IsVarArg) - report_fatal_error("VarArg not supported"); - MachineFunction &MF = DAG.getMachineFunction(); // Assign locations to each returned value. @@ -859,6 +909,156 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + XtensaMachineFunctionInfo *XtensaFI = MF.getInfo(); + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + EVT PtrVT = Addr.getValueType(); + SDLoc DL(Op); + + // Struct va_list_tag + // int32 *va_stk - points to the arguments passed in memory + // int32 *va_reg - points to the registers with arguments saved in memory + // int32 va_ndx - offset from va_stk or va_reg pointers which points to the + // next variable argument + + SDValue VAIndex; + SDValue StackOffsetFI = + DAG.getFrameIndex(XtensaFI->getVarArgsOnStackFrameIndex(), PtrVT); + unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2; + + // If first variable argument passed in registers (maximum words in registers + // is 6) then set va_ndx to the position of this argument in registers area + // stored in memory (va_reg pointer). Otherwise va_ndx should point to the + // position of the first variable argument on stack (va_stk pointer). + if (ArgWords < 6) { + VAIndex = DAG.getConstant(ArgWords * 4, DL, MVT::i32); + } else { + VAIndex = DAG.getConstant(32, DL, MVT::i32); + } + + SDValue FrameIndex = + DAG.getFrameIndex(XtensaFI->getVarArgsInRegsFrameIndex(), PtrVT); + uint64_t FrameOffset = PtrVT.getStoreSize(); + const Value *SV = cast(Op.getOperand(2))->getValue(); + + // Store pointer to arguments given on stack (va_stk) + SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI, + DAG.getConstant(32, DL, PtrVT)); + + SDValue StoreStackPtr = + DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV)); + + uint64_t NextOffset = FrameOffset; + SDValue NextPtr = + DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset)); + + // Store pointer to arguments given on registers (va_reg) + SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr, + MachinePointerInfo(SV, NextOffset)); + NextOffset += FrameOffset; + NextPtr = DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset)); + + // Store third word : position in bytes of the first VA argument (va_ndx) + return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr, + MachinePointerInfo(SV, NextOffset)); +} + +SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + // Size of the va_list_tag structure + constexpr unsigned VAListSize = 3 * 4; + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(4))->getValue(); + SDLoc DL(Op); + + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, + DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), + Align(4), /*isVolatile*/ false, /*AlwaysInline*/ true, + /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), + MachinePointerInfo(SrcSV)); +} + +SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = Op.getValueType(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + auto &TD = DAG.getDataLayout(); + Align ArgAlignment = TD.getABITypeAlign(Ty); + unsigned ArgAlignInBytes = ArgAlignment.value(); + unsigned ArgSizeInBytes = TD.getTypeAllocSize(Ty); + unsigned VASizeInBytes = llvm::alignTo(ArgSizeInBytes, 4); + + // va_stk + SDValue VAStack = + DAG.getLoad(MVT::i32, DL, InChain, VAListPtr, MachinePointerInfo()); + InChain = VAStack.getValue(1); + + // va_reg + SDValue VARegPtr = + DAG.getObjectPtrOffset(DL, VAListPtr, TypeSize::getFixed(4)); + SDValue VAReg = + DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo()); + InChain = VAReg.getValue(1); + + // va_ndx + SDValue VarArgIndexPtr = + DAG.getObjectPtrOffset(DL, VARegPtr, TypeSize::getFixed(4)); + SDValue VAIndex = + DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo()); + InChain = VAIndex.getValue(1); + + SDValue OrigIndex = VAIndex; + + if (ArgAlignInBytes > 4) { + OrigIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex, + DAG.getConstant(ArgAlignInBytes - 1, DL, MVT::i32)); + OrigIndex = + DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex, + DAG.getSignedConstant(-ArgAlignInBytes, DL, MVT::i32)); + } + + VAIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex, + DAG.getConstant(VASizeInBytes, DL, MVT::i32)); + + SDValue CC = DAG.getSetCC(DL, MVT::i32, OrigIndex, + DAG.getConstant(6 * 4, DL, MVT::i32), ISD::SETLE); + + SDValue StkIndex = + DAG.getNode(ISD::ADD, DL, PtrVT, VAIndex, + DAG.getConstant(32 + VASizeInBytes, DL, MVT::i32)); + + CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32), + ISD::SETLE); + + SDValue Array = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAReg, VAStack); + + VAIndex = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAIndex, StkIndex); + + CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32), + ISD::SETLE); + + SDValue VAIndexStore = DAG.getStore(InChain, DL, VAIndex, VarArgIndexPtr, + MachinePointerInfo(SV)); + InChain = VAIndexStore; + + SDValue Addr = DAG.getNode(ISD::SUB, DL, PtrVT, VAIndex, + DAG.getConstant(VASizeInBytes, DL, MVT::i32)); + + Addr = DAG.getNode(ISD::ADD, DL, PtrVT, Array, Addr); + + return DAG.getLoad(VT, DL, InChain, Addr, MachinePointerInfo()); +} + SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1001,6 +1201,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerFRAMEADDR(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index f1cd00c41437a4..cebd7d2016c8ee 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -148,6 +148,12 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h index c38c060b9387ff..c430562091ba75 100644 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -24,10 +24,15 @@ namespace llvm { class XtensaMachineFunctionInfo : public MachineFunctionInfo { /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. int BranchRelaxationScratchFrameIndex = -1; + unsigned VarArgsFirstGPR; + int VarArgsOnStackFrameIndex; + int VarArgsInRegsFrameIndex; public: explicit XtensaMachineFunctionInfo(const Function &F, - const TargetSubtargetInfo *STI) {} + const TargetSubtargetInfo *STI) + : VarArgsFirstGPR(0), VarArgsOnStackFrameIndex(0), + VarArgsInRegsFrameIndex(0) {} int getBranchRelaxationScratchFrameIndex() const { return BranchRelaxationScratchFrameIndex; @@ -35,6 +40,16 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { void setBranchRelaxationScratchFrameIndex(int Index) { BranchRelaxationScratchFrameIndex = Index; } + + unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + + int getVarArgsOnStackFrameIndex() const { return VarArgsOnStackFrameIndex; } + void setVarArgsOnStackFrameIndex(int FI) { VarArgsOnStackFrameIndex = FI; } + + // Get and set the frame index of the first stack vararg. + int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; } + void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; } }; } // namespace llvm diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index fe5ab0fabefa6e..50c9a565e7ae25 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -61,12 +61,20 @@ unsigned AArch64::getFMVPriority(ArrayRef Features) { return Priority + MaxFMVPriority * NumFeatures; } -uint64_t AArch64::getCpuSupportsMask(ArrayRef FeatureStrs) { +uint64_t AArch64::getCpuSupportsMask(ArrayRef Features) { + // Transitively enable the Arch Extensions which correspond to each feature. + ExtensionSet FeatureBits; + for (const StringRef Feature : Features) + if (std::optional Info = parseFMVExtension(Feature)) + if (Info->ID) + FeatureBits.enable(*Info->ID); + + // Construct a bitmask for all the transitively enabled Arch Extensions. uint64_t FeaturesMask = 0; - for (const StringRef &FeatureStr : FeatureStrs) { - if (auto Ext = parseFMVExtension(FeatureStr)) - FeaturesMask |= (1ULL << Ext->Bit); - } + for (const FMVInfo &Info : getFMVInfo()) + if (Info.ID && FeatureBits.Enabled.test(*Info.ID)) + FeaturesMask |= (1ULL << Info.Bit); + return FeaturesMask; } diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index b3dcc0f9866842..58ff720516f384 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -31,6 +31,10 @@ using namespace llvm::COFF; namespace { +#define OPTTABLE_STR_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + enum { OPT_INVALID = 0, #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), @@ -38,12 +42,9 @@ enum { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -54,7 +55,9 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DllOptTable : public opt::GenericOptTable { public: - DllOptTable() : opt::GenericOptTable(InfoTable, false) {} + DllOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + false) {} }; // Opens a file. Path has to be resolved already. diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 2e0841ba02b543..319aebffdbbba2 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -37,6 +37,10 @@ using namespace llvm::object; namespace { +#define OPTTABLE_STR_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_STR_TABLE_CODE + enum { OPT_INVALID = 0, #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), @@ -44,12 +48,9 @@ enum { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -60,7 +61,9 @@ static constexpr opt::OptTable::Info InfoTable[] = { class LibOptTable : public opt::GenericOptTable { public: - LibOptTable() : opt::GenericOptTable(InfoTable, true) {} + LibOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + true) {} }; } // namespace diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 778f83b7925691..09e8301b772d96 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -164,9 +164,9 @@ static cl::opt OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses.")); -// TODO: remove this flag. +// TODO: turn on and remove this flag. static cl::opt EnableInitializesImprovement( - "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, + "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE")); //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index f1568781252c06..555c8435dd330d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1375,6 +1375,16 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { + // When vectorizing early exits, create predicates for the latch block only. + // The early exiting block must be a direct predecessor of the latch at the + // moment. + BasicBlock *Latch = TheLoop->getLoopLatch(); + if (hasUncountableEarlyExit()) { + assert( + is_contained(predecessors(Latch), getUncountableEarlyExitingBlock()) && + "Uncountable exiting block must be a direct predecessor of latch"); + return BB == Latch; + } return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } @@ -1788,13 +1798,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { HasUncountableEarlyExit = false; if (isa(PSE.getBackedgeTakenCount())) { + HasUncountableEarlyExit = true; if (!isVectorizableEarlyExitLoop()) { + UncountableExitingBlocks.clear(); + HasUncountableEarlyExit = false; if (DoExtraAnalysis) Result = false; else return false; - } else - HasUncountableEarlyExit = true; + } } // Go over each instruction and look at memory deps. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index de164ee434d647..ed00c844285c62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -385,6 +385,11 @@ static cl::opt UseWiderVFIfCallVariantsPresent( cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants")); +static cl::opt EnableEarlyExitVectorization( + "enable-early-exit-vectorization", cl::init(false), cl::Hidden, + cl::desc( + "Enable vectorization of early exit loops with uncountable exits.")); + // Likelyhood of bypassing the vectorized loop because assumptions about SCEV // variables not overflowing do not hold. See `emitSCEVChecks`. static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}; @@ -1382,9 +1387,10 @@ class LoopVectorizationCostModel { LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n"); return false; } - // If we might exit from anywhere but the latch, must run the exiting - // iteration in scalar form. - if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { + // If we might exit from anywhere but the latch and early exit vectorization + // is disabled, we must run the exiting iteration in scalar form. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() && + !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit())) { LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: not exiting " "from latch block\n"); return true; @@ -3656,10 +3662,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // Start with the conditional branches exiting the loop. If the branch // condition is an instruction contained in the loop that is only used by the - // branch, it is uniform. + // branch, it is uniform. Note conditions from uncountable early exits are not + // uniform. SmallVector Exiting; TheLoop->getExitingBlocks(Exiting); for (BasicBlock *E : Exiting) { + if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E) + continue; auto *Cmp = dyn_cast(E->getTerminator()->getOperand(0)); if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) AddToWorklistIfAllowed(Cmp); @@ -8239,8 +8248,11 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // If source is an exiting block, we know the exit edge is dynamically dead // in the vector loop, and thus we don't need to restrict the mask. Avoid - // adding uses of an otherwise potentially dead instruction. - if (OrigLoop->isLoopExiting(Src)) + // adding uses of an otherwise potentially dead instruction unless we are + // vectorizing a loop with uncountable exits. In that case, we always + // materialize the mask. + if (OrigLoop->isLoopExiting(Src) && + Src != Legal->getUncountableEarlyExitingBlock()) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition()); @@ -8931,14 +8943,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { static SetVector collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, const MapVector &Inductions) { + auto *MiddleVPBB = Plan.getMiddleBlock(); SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { - BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock(); - BasicBlock *ExitingBB = find_singleton( - to_vector(predecessors(ExitBB)), - [OrigLoop](BasicBlock *Pred, bool AllowRepeats) { - return OrigLoop->contains(Pred) ? Pred : nullptr; - }); for (VPRecipeBase &R : *ExitVPBB) { auto *ExitIRI = dyn_cast(&R); if (!ExitIRI) @@ -8946,35 +8953,48 @@ static SetVector collectUsersInExitBlocks( auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); if (!ExitPhi) break; - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); - VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); - // Exit values for inductions are computed and updated outside of VPlan - // and independent of induction recipes. - // TODO: Compute induction exit values in VPlan. - if ((isa(V) && - !cast(V)->getTruncInst()) || - isa(V) || - (isa(IncomingValue) && - OrigLoop->contains(cast(IncomingValue)) && - any_of(IncomingValue->users(), [&Inductions](User *U) { - auto *P = dyn_cast(U); - return P && Inductions.contains(P); - }))) - continue; - ExitUsersToFix.insert(ExitIRI); - ExitIRI->addOperand(V); + for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) { + BasicBlock *ExitingBB = OrigLoop->getLoopLatch(); + if (PredVPBB != MiddleVPBB) { + SmallVector ExitingBlocks; + OrigLoop->getExitingBlocks(ExitingBlocks); + assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks"); + ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1] + : ExitingBlocks[0]; + } + Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); + // Exit values for inductions are computed and updated outside of VPlan + // and independent of induction recipes. + // TODO: Compute induction exit values in VPlan. + if ((isa(V) && + !cast(V)->getTruncInst()) || + isa(V) || + (isa(IncomingValue) && + OrigLoop->contains(cast(IncomingValue)) && + any_of(IncomingValue->users(), [&Inductions](User *U) { + auto *P = dyn_cast(U); + return P && Inductions.contains(P); + }))) { + if (ExitVPBB->getSinglePredecessor() == MiddleVPBB) + continue; + } + ExitUsersToFix.insert(ExitIRI); + ExitIRI->addOperand(V); + } } } return ExitUsersToFix; } // Add exit values to \p Plan. Extracts are added for each entry in \p -// ExitUsersToFix if needed and their operands are updated. -static void +// ExitUsersToFix if needed and their operands are updated. Returns true if all +// exit users can be handled, otherwise return false. +static bool addUsersInExitBlocks(VPlan &Plan, const SetVector &ExitUsersToFix) { if (ExitUsersToFix.empty()) - return; + return true; auto *MiddleVPBB = Plan.getMiddleBlock(); VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); @@ -8988,14 +9008,18 @@ addUsersInExitBlocks(VPlan &Plan, if (V->isLiveIn()) continue; - assert(ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB && - "Exit value not handled yet for this edge."); + // Currently only live-ins can be used by exit values from blocks not + // exiting via the vector latch through to the middle block. + if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) + return false; + LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, {V, Plan.getOrAddLiveIn(ConstantInt::get( IntegerType::get(Ctx, 32), 1))}); ExitIRI->setOperand(0, Ext); } + return true; } /// Handle users in the exit block for first order reductions in the original @@ -9268,11 +9292,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); + if (auto *UncountableExitingBlock = + Legal->getUncountableEarlyExitingBlock()) { + VPlanTransforms::handleUncountableEarlyExit( + *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); + } addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); - addUsersInExitBlocks(*Plan, ExitUsersToFix); + if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { + reportVectorizationFailure( + "Some exit values in loop with uncountable exit not supported yet", + "Some exit values in loop with uncountable exit not supported yet", + "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop); + return nullptr; + } + // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. @@ -10138,12 +10174,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - if (LVL.hasUncountableEarlyExit()) { + if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { reportVectorizationFailure("Auto-vectorization of loops with uncountable " - "early exit is not yet supported", + "early exit is not enabled", "Auto-vectorization of loops with uncountable " - "early exit is not yet supported", - "UncountableEarlyExitLoopsUnsupported", ORE, L); + "early exit is not enabled", + "UncountableEarlyExitLoopsDisabled", ORE, L); return false; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5122232ffe9b8e..81c76bc99fbf74 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -861,14 +861,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, auto Plan = std::make_unique(Entry, VecPreheader, ScalarHeader); // Create SCEV and VPValue for the trip count. - - // Currently only loops with countable exits are vectorized, but calling - // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with - // uncountable exits whilst also ensuring the symbolic maximum and known - // back-edge taken count remain identical for loops with countable exits. + // We use the symbolic max backedge-taken-count, which works also when + // vectorizing loops with uncountable early exits. const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); - assert((!isa(BackedgeTakenCountSCEV) && - BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) && + assert(!isa(BackedgeTakenCountSCEV) && "Invalid loop count"); ScalarEvolution &SE = *PSE.getSE(); const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, @@ -903,7 +899,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, // 2) If we require a scalar epilogue, there is no conditional branch as // we unconditionally branch to the scalar preheader. Do nothing. // 3) Otherwise, construct a runtime check. - BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); + BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); // The connection order corresponds to the operands of the conditional branch. VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8794517b777f3b..7440a3a386fd2d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -621,6 +621,14 @@ class VPBlockBase { /// Remove all the successors of this block. void clearSuccessors() { Successors.clear(); } + /// Swap successors of the block. The block must have exactly 2 successors. + // TODO: This should be part of introducing conditional branch recipes rather + // than being independent. + void swapSuccessors() { + assert(Successors.size() == 2 && "must have 2 successors to swap"); + std::swap(Successors[0], Successors[1]); + } + /// The method which generates the output IR that correspond to this /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; @@ -1232,6 +1240,9 @@ class VPInstruction : public VPRecipeWithIRFlags, // operand). Only generates scalar values (either for the first lane only or // for all lanes, depending on its uses). PtrAdd, + // Returns a scalar boolean value, which is true if any lane of its single + // operand is true. + AnyOf, }; private: @@ -3884,10 +3895,10 @@ class VPlan { /// whether to execute the scalar tail loop or the exit block from the loop /// latch. const VPBasicBlock *getMiddleBlock() const { - return cast(getVectorLoopRegion()->getSingleSuccessor()); + return cast(getScalarPreheader()->getSinglePredecessor()); } VPBasicBlock *getMiddleBlock() { - return cast(getVectorLoopRegion()->getSingleSuccessor()); + return cast(getScalarPreheader()->getSinglePredecessor()); } /// Return the VPBasicBlock for the preheader of the scalar loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e882368544e815..8fea2ca9461047 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case Instruction::Or: case Instruction::ICmp: case Instruction::Select: + case VPInstruction::AnyOf: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: @@ -361,6 +362,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: + case VPInstruction::AnyOf: return true; default: return false; @@ -639,6 +641,10 @@ Value *VPInstruction::generate(VPTransformState &State) { } return NewPhi; } + case VPInstruction::AnyOf: { + Value *A = State.get(getOperand(0)); + return Builder.CreateOrReduce(A); + } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -647,7 +653,8 @@ Value *VPInstruction::generate(VPTransformState &State) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || - getOpcode() == VPInstruction::ComputeReductionResult; + getOpcode() == VPInstruction::ComputeReductionResult || + getOpcode() == VPInstruction::AnyOf; } bool VPInstruction::isSingleScalar() const { @@ -710,6 +717,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { return false; case Instruction::ICmp: case Instruction::Select: + case Instruction::Or: case VPInstruction::PtrAdd: // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); @@ -805,6 +813,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::PtrAdd: O << "ptradd"; break; + case VPInstruction::AnyOf: + O << "any-of"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -822,12 +833,13 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPIRInstruction::execute(VPTransformState &State) { assert((isa(&I) || getNumOperands() == 0) && "Only PHINodes can have extra operands"); - if (getNumOperands() == 1) { - VPValue *ExitValue = getOperand(0); + for (const auto &[Idx, Op] : enumerate(operands())) { + VPValue *ExitValue = Op; auto Lane = vputils::isUniformAfterVectorization(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); - auto *PredVPBB = cast(getParent()->getSinglePredecessor()); + VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; + auto *PredVPBB = Pred->getExitingBasicBlock(); BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. @@ -860,11 +872,13 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, O << Indent << "IR " << I; if (getNumOperands() != 0) { - assert(getNumOperands() == 1 && "can have at most 1 operand"); - O << " (extra operand: "; - getOperand(0)->printAsOperand(O, SlotTracker); - O << " from "; - getParent()->getPredecessors()[0]->printAsOperand(O); + O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; + interleaveComma( + enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) { + Op.value()->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[Op.index()]->printAsOperand(O); + }); O << ")"; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 922cba7831f4e9..e27c1bfba93525 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1858,3 +1858,62 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { } } } + +void VPlanTransforms::handleUncountableEarlyExit( + VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, + BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *LatchVPBB = cast(LoopRegion->getExiting()); + VPBuilder Builder(LatchVPBB->getTerminator()); + auto *MiddleVPBB = Plan.getMiddleBlock(); + VPValue *IsEarlyExitTaken = nullptr; + + // Process the uncountable exiting block. Update IsEarlyExitTaken, which + // tracks if the uncountable early exit has been taken. Also split the middle + // block and have it conditionally branch to the early exit block if + // EarlyExitTaken. + auto *EarlyExitingBranch = + cast(UncountableExitingBlock->getTerminator()); + BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0); + BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1); + + // The early exit block may or may not be the same as the "countable" exit + // block. Creates a new VPIRBB for the early exit block in case it is distinct + // from the countable exit block. + // TODO: Introduce both exit blocks during VPlan skeleton construction. + VPIRBasicBlock *VPEarlyExitBlock; + if (OrigLoop->getUniqueExitBlock()) { + VPEarlyExitBlock = cast(MiddleVPBB->getSuccessors()[0]); + } else { + VPEarlyExitBlock = VPIRBasicBlock::fromBasicBlock( + !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + } + + VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask( + OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc); + auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond); + IsEarlyExitTaken = + Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond}); + + VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split"); + VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle); + VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock); + NewMiddle->swapSuccessors(); + + VPBuilder MiddleBuilder(NewMiddle); + MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken}); + + // Replace the condition controlling the non-early exit from the vector loop + // with one exiting if either the original condition of the vector latch is + // true or the early exit has been taken. + auto *LatchExitingBranch = cast(LatchVPBB->getTerminator()); + assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount && + "Unexpected terminator"); + auto *IsLatchExitTaken = + Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0), + LatchExitingBranch->getOperand(1)); + auto *AnyExitTaken = Builder.createNaryOp( + Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken}); + Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken); + LatchExitingBranch->eraseFromParent(); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 9cf314a6a9f447..fddde868911665 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -124,6 +124,17 @@ struct VPlanTransforms { /// Remove dead recipes from \p Plan. static void removeDeadRecipes(VPlan &Plan); + /// Update \p Plan to account for the uncountable early exit block in \p + /// UncountableExitingBlock by + /// * updating the condition exiting the vector loop to include the early + /// exit conditions + /// * splitting the original middle block to branch to the early exit block + /// if taken. + static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, + Loop *OrigLoop, + BasicBlock *UncountableExitingBlock, + VPRecipeBuilder &RecipeBuilder); + /// Lower abstract recipes to concrete ones, that can be codegen'd. static void convertToConcreteRecipes(VPlan &Plan); }; diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 033900de55278c..9003642f1f93b2 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2848,6 +2848,12 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { if (OldCost < NewCost) return false; + // Canonicalize undef param to RHS to help further folds. + if (isa(DstVec) && !isa(SrcVec)) { + ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + std::swap(DstVec, SrcVec); + } + Value *Shuf = Builder.CreateShuffleVector(DstVec, SrcVec, Mask); replaceValue(I, *Shuf); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index fb366564723db6..a48a4e0e723ebc 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -5,8 +5,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %cmp1 = icmp eq <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 @@ -71,8 +73,9 @@ define i1 @combine_setcc_ne_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp ne <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 @@ -129,9 +132,10 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) { define i1 @combine_setcc_eq_vecreduce_and_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 +; CHECK-NEXT: uminv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp eq <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 @@ -188,9 +192,11 @@ define i1 @combine_setcc_eq_vecreduce_and_v64i1(<64 x i8> %a) { define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: uminv b0, v0.8b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %cmp1 = icmp ne <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll new file mode 100644 index 00000000000000..2ffba10e211007 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s +; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define @cvtn_bf16( %s1, %s2) { +; CHECK-LABEL: cvtn_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtn.nxv8bf16( %s1, %s2) + ret %r +} + +define @cvtn_f16( %s1, %s2) { +; CHECK-LABEL: cvtn_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtn.nxv8f16( %s1, %s2) + ret %r +} + +define @cvtnb_f32( %s1, %s2) { +; CHECK-LABEL: cvtnb_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s } +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtnb.nxv4f32( %s1, %s2) + ret %r +} + +define @cvtnt_f32( %d, %s1, %s2) { +; CHECK-LABEL: cvtnt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: fcvtnt z0.b, { z2.s, z3.s } +; CHECK-NEXT: ret + %r = call @llvm.aarch64.sve.fp8.cvtnt.nxv4f32( %d, %s1, %s2) + ret %r +} diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll index 5374d4823034ff..767ca91a58bb10 100644 --- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll +++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll @@ -9,11 +9,13 @@ define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer %cmp_result = bitcast <8 x i1> %a_cmp to i8 diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll index 62f3e8d184d24d..8ca521327c2e31 100644 --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -20,11 +20,11 @@ define i1 @test_redand_v1i1(<1 x i1> %a) { define i1 @test_redand_v2i1(<2 x i1> %a) { ; CHECK-LABEL: test_redand_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v2i1: @@ -42,11 +42,11 @@ define i1 @test_redand_v2i1(<2 x i1> %a) { define i1 @test_redand_v4i1(<4 x i1> %a) { ; CHECK-LABEL: test_redand_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: uminv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v4i1: @@ -68,11 +68,11 @@ define i1 @test_redand_v4i1(<4 x i1> %a) { define i1 @test_redand_v8i1(<8 x i1> %a) { ; CHECK-LABEL: test_redand_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: shl v0.8b, v0.8b, #7 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: uminv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i1: diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll index 485cb7c916140c..aac31ce8b71b75 100644 --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -22,8 +22,9 @@ define i1 @test_redor_v2i1(<2 x i1> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v2i1: @@ -43,8 +44,9 @@ define i1 @test_redor_v4i1(<4 x i1> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: umaxv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v4i1: @@ -68,8 +70,9 @@ define i1 @test_redor_v8i1(<8 x i1> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.8b, v0.8b, #7 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i1: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll index f402463de7be81..0d291e0bf07983 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -49,14 +49,6 @@ define half @t3(half %x) { ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret -; -; USE-NEON-NO-GPRS-LABEL: t3: -; USE-NEON-NO-GPRS: // %bb.0: // %entry -; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0 -; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0 -; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0 -; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0 -; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptosi half %x to i32 %conv1 = sitofp i32 %conv to half @@ -107,14 +99,6 @@ define half @t6(half %x) { ; CHECK-NEXT: ucvtf s0, w8 ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret -; -; USE-NEON-NO-GPRS-LABEL: t6: -; USE-NEON-NO-GPRS: // %bb.0: // %entry -; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0 -; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0 -; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0 -; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0 -; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptoui half %x to i32 %conv1 = uitofp i32 %conv to half diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll index fd81deeb7d913b..7fa416e0dbcd5c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -139,11 +139,11 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind { define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-LABEL: test_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: uminv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) ret i1 %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll index 10a3ef1658a965..58020d28702b2f 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -15,15 +15,8 @@ declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) -declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) - -define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v1i8: +define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v0.b[0] @@ -36,14 +29,16 @@ define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v2i8: +define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v2: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: cmge v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 +; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <2 x i8> %a0, zeroinitializer %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) @@ -51,14 +46,16 @@ define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v4i8: +define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v4: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: cmge v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 +; CHECK-NEXT: uminv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 +; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <4 x i8> %a0, zeroinitializer %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) @@ -66,12 +63,14 @@ define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v8i8: +define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v8: ; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 +; CHECK-NEXT: uminv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 +; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <8 x i8> %a0, zeroinitializer %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) @@ -79,8 +78,8 @@ define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v16i8: +define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v16: ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: uminv b0, v0.16b @@ -94,8 +93,8 @@ define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v32i8: +define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_and_v32: ; CHECK: // %bb.0: ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 @@ -110,182 +109,8 @@ define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[0] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: cmge v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <2 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <4 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.8h, v0.8h, #0 -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <8 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-NEXT: uminv b0, v0.16b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <16 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v1i32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <2 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <4 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v1.4s, v1.4s, #0 -; CHECK-NEXT: cmge v0.4s, v0.4s, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <8 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v0.2d, v0.2d, #0 -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <2 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_and_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_and_v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmge v1.2d, v1.2d, #0 -; CHECK-NEXT: cmge v0.2d, v0.2d, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, eq -; CHECK-NEXT: ret - %x = icmp slt <4 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v1i8: +define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v0.b[0] @@ -298,13 +123,15 @@ define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v2i8: +define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v2: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <2 x i8> %a0, zeroinitializer @@ -313,13 +140,15 @@ define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v4i8: +define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v4: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: umaxv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <4 x i8> %a0, zeroinitializer @@ -328,11 +157,13 @@ define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v8i8: +define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v8: ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret %x = icmp slt <8 x i8> %a0, zeroinitializer @@ -341,8 +172,8 @@ define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v16i8: +define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v16: ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: umaxv b0, v0.16b @@ -356,8 +187,8 @@ define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { ret i32 %z } -define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v32i8: +define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { +; CHECK-LABEL: reduce_or_v32: ; CHECK: // %bb.0: ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 @@ -371,457 +202,3 @@ define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { %z = select i1 %y, i32 %a1, i32 %a2 ret i32 %z } - -define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[0] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <8 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-NEXT: umaxv b0, v0.16b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <16 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v1i32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <8 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_or_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_or_v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.2d, v1.2d, #0 -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v1i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.b[0] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: addv h0, v0.4h -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 -; CHECK-NEXT: addv b0, v0.8b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <8 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v16i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 -; CHECK-NEXT: addv b0, v0.16b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <16 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 -; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: addv b0, v0.16b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <32 x i8> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[0] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: addv h0, v0.4h -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-NEXT: addv h0, v0.8h -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <8 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 -; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-NEXT: addv b0, v0.16b -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <16 x i16> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v1i32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-NEXT: addv s0, v0.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: addv h0, v0.8h -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <8 x i32> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w0, w1, lt -; CHECK-NEXT: ret - %x = icmp slt <1 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-NEXT: addp d0, v0.2d -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <2 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} - -define i32 @reduce_xor_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { -; CHECK-LABEL: reduce_xor_v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v1.2d, v1.2d, #0 -; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: addv s0, v0.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel w0, w0, w1, ne -; CHECK-NEXT: ret - %x = icmp slt <4 x i64> %a0, zeroinitializer - %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) - %z = select i1 %y, i32 %a1, i32 %a2 - ret i32 %z -} diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 2a21cc8d7c611d..809a6d6556a7be 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -202,8 +202,9 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-SD-NEXT: fcmp d0, #0.0 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: umaxv h0, v0.4h +; CHECK-SD-NEXT: fmov w8, s0 +; CHECK-SD-NEXT: and w0, w8, #0x1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i1: diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll index 593c9db090a26d..c0f1720e1cf8b3 100644 --- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll @@ -62,11 +62,13 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v1.8b, v1.8h ; NEON-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; NEON-FIXED-NEXT: fcmp d1, #0.0 +; NEON-FIXED-NEXT: umaxv b1, v1.8b ; NEON-FIXED-NEXT: umaxv b2, v2.8b ; NEON-FIXED-NEXT: fmov w8, s2 ; NEON-FIXED-NEXT: bfi x9, x8, #1, #3 ; NEON-FIXED-NEXT: ldrh w8, [x9] +; NEON-FIXED-NEXT: fmov w9, s1 +; NEON-FIXED-NEXT: tst w9, #0x1 ; NEON-FIXED-NEXT: csel w0, w8, w0, ne ; NEON-FIXED-NEXT: add sp, sp, #16 ; NEON-FIXED-NEXT: ret @@ -81,11 +83,13 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; SVE-FIXED-NEXT: str q0, [sp] ; SVE-FIXED-NEXT: xtn v1.8b, v1.8h ; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; SVE-FIXED-NEXT: fcmp d1, #0.0 +; SVE-FIXED-NEXT: umaxv b1, v1.8b ; SVE-FIXED-NEXT: umaxv b2, v2.8b ; SVE-FIXED-NEXT: fmov w8, s2 ; SVE-FIXED-NEXT: bfi x9, x8, #1, #3 ; SVE-FIXED-NEXT: ldrh w8, [x9] +; SVE-FIXED-NEXT: fmov w9, s1 +; SVE-FIXED-NEXT: tst w9, #0x1 ; SVE-FIXED-NEXT: csel w0, w8, w0, ne ; SVE-FIXED-NEXT: add sp, sp, #16 ; SVE-FIXED-NEXT: ret @@ -106,11 +110,13 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v1.4h, v1.4s ; NEON-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; NEON-FIXED-NEXT: fcmp d1, #0.0 +; NEON-FIXED-NEXT: umaxv h1, v1.4h ; NEON-FIXED-NEXT: umaxv h2, v2.4h ; NEON-FIXED-NEXT: fmov w8, s2 ; NEON-FIXED-NEXT: bfi x9, x8, #2, #2 ; NEON-FIXED-NEXT: ldr w8, [x9] +; NEON-FIXED-NEXT: fmov w9, s1 +; NEON-FIXED-NEXT: tst w9, #0x1 ; NEON-FIXED-NEXT: csel w0, w8, w0, ne ; NEON-FIXED-NEXT: add sp, sp, #16 ; NEON-FIXED-NEXT: ret @@ -125,11 +131,13 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; SVE-FIXED-NEXT: str q0, [sp] ; SVE-FIXED-NEXT: xtn v1.4h, v1.4s ; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; SVE-FIXED-NEXT: fcmp d1, #0.0 +; SVE-FIXED-NEXT: umaxv h1, v1.4h ; SVE-FIXED-NEXT: umaxv h2, v2.4h ; SVE-FIXED-NEXT: fmov w8, s2 ; SVE-FIXED-NEXT: bfi x9, x8, #2, #2 ; SVE-FIXED-NEXT: ldr w8, [x9] +; SVE-FIXED-NEXT: fmov w9, s1 +; SVE-FIXED-NEXT: tst w9, #0x1 ; SVE-FIXED-NEXT: csel w0, w8, w0, ne ; SVE-FIXED-NEXT: add sp, sp, #16 ; SVE-FIXED-NEXT: ret @@ -150,11 +158,13 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v1.2s, v1.2d ; NEON-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; NEON-FIXED-NEXT: fcmp d1, #0.0 +; NEON-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s ; NEON-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s ; NEON-FIXED-NEXT: fmov w8, s2 ; NEON-FIXED-NEXT: bfi x9, x8, #3, #1 ; NEON-FIXED-NEXT: ldr x8, [x9] +; NEON-FIXED-NEXT: fmov w9, s1 +; NEON-FIXED-NEXT: tst w9, #0x1 ; NEON-FIXED-NEXT: csel x0, x8, x0, ne ; NEON-FIXED-NEXT: add sp, sp, #16 ; NEON-FIXED-NEXT: ret @@ -169,11 +179,13 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; SVE-FIXED-NEXT: str q0, [sp] ; SVE-FIXED-NEXT: xtn v1.2s, v1.2d ; SVE-FIXED-NEXT: and v2.8b, v1.8b, v2.8b -; SVE-FIXED-NEXT: fcmp d1, #0.0 +; SVE-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s ; SVE-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s ; SVE-FIXED-NEXT: fmov w8, s2 ; SVE-FIXED-NEXT: bfi x9, x8, #3, #1 ; SVE-FIXED-NEXT: ldr x8, [x9] +; SVE-FIXED-NEXT: fmov w9, s1 +; SVE-FIXED-NEXT: tst w9, #0x1 ; SVE-FIXED-NEXT: csel x0, x8, x0, ne ; SVE-FIXED-NEXT: add sp, sp, #16 ; SVE-FIXED-NEXT: ret @@ -194,11 +206,13 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v1.4h, v1.4s ; NEON-FIXED-NEXT: and v3.8b, v1.8b, v3.8b -; NEON-FIXED-NEXT: fcmp d1, #0.0 +; NEON-FIXED-NEXT: umaxv h1, v1.4h ; NEON-FIXED-NEXT: umaxv h3, v3.4h ; NEON-FIXED-NEXT: fmov w8, s3 ; NEON-FIXED-NEXT: bfi x9, x8, #2, #2 +; NEON-FIXED-NEXT: fmov w8, s1 ; NEON-FIXED-NEXT: ldr s0, [x9] +; NEON-FIXED-NEXT: tst w8, #0x1 ; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne ; NEON-FIXED-NEXT: add sp, sp, #16 ; NEON-FIXED-NEXT: ret @@ -213,11 +227,13 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; SVE-FIXED-NEXT: str q0, [sp] ; SVE-FIXED-NEXT: xtn v1.4h, v1.4s ; SVE-FIXED-NEXT: and v3.8b, v1.8b, v3.8b -; SVE-FIXED-NEXT: fcmp d1, #0.0 +; SVE-FIXED-NEXT: umaxv h1, v1.4h ; SVE-FIXED-NEXT: umaxv h3, v3.4h ; SVE-FIXED-NEXT: fmov w8, s3 ; SVE-FIXED-NEXT: bfi x9, x8, #2, #2 +; SVE-FIXED-NEXT: fmov w8, s1 ; SVE-FIXED-NEXT: ldr s0, [x9] +; SVE-FIXED-NEXT: tst w8, #0x1 ; SVE-FIXED-NEXT: fcsel s0, s0, s2, ne ; SVE-FIXED-NEXT: add sp, sp, #16 ; SVE-FIXED-NEXT: ret @@ -238,11 +254,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v1.2s, v1.2d ; NEON-FIXED-NEXT: and v3.8b, v1.8b, v3.8b -; NEON-FIXED-NEXT: fcmp d1, #0.0 +; NEON-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s ; NEON-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s ; NEON-FIXED-NEXT: fmov w8, s3 ; NEON-FIXED-NEXT: bfi x9, x8, #3, #1 +; NEON-FIXED-NEXT: fmov w8, s1 ; NEON-FIXED-NEXT: ldr d0, [x9] +; NEON-FIXED-NEXT: tst w8, #0x1 ; NEON-FIXED-NEXT: fcsel d0, d0, d2, ne ; NEON-FIXED-NEXT: add sp, sp, #16 ; NEON-FIXED-NEXT: ret @@ -257,11 +275,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; SVE-FIXED-NEXT: str q0, [sp] ; SVE-FIXED-NEXT: xtn v1.2s, v1.2d ; SVE-FIXED-NEXT: and v3.8b, v1.8b, v3.8b -; SVE-FIXED-NEXT: fcmp d1, #0.0 +; SVE-FIXED-NEXT: umaxp v1.2s, v1.2s, v1.2s ; SVE-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s ; SVE-FIXED-NEXT: fmov w8, s3 ; SVE-FIXED-NEXT: bfi x9, x8, #3, #1 +; SVE-FIXED-NEXT: fmov w8, s1 ; SVE-FIXED-NEXT: ldr d0, [x9] +; SVE-FIXED-NEXT: tst w8, #0x1 ; SVE-FIXED-NEXT: fcsel d0, d0, d2, ne ; SVE-FIXED-NEXT: add sp, sp, #16 ; SVE-FIXED-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 96bbcb7ed2149a..d316e10037757b 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -217,7 +217,7 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { ; AKF_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group -; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { ; ATTRIBUTOR_HSA-NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) ; ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)) @@ -235,7 +235,6 @@ attributes #1 = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 0e4a9791b6f57d..33e7e7a7a019e3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -73,7 +73,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() { define void @func_uses_asm_virtreg_agpr() { ; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr( -; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: ret void ; @@ -83,7 +83,7 @@ define void @func_uses_asm_virtreg_agpr() { define void @func_uses_asm_physreg_agpr() { ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr( -; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: ret void ; @@ -93,7 +93,7 @@ define void @func_uses_asm_physreg_agpr() { define void @func_uses_asm_physreg_agpr_tuple() { ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple( -; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: ret void ; @@ -105,7 +105,7 @@ declare void @unknown() define amdgpu_kernel void @kernel_calls_extern() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern( -; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: ret void ; @@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() { define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( -; CHECK-SAME: ) #[[ATTR4]] { -; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]] +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; call void @unknown() #0 @@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] { +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: call void [[INDIRECT]]() ; CHECK-NEXT: ret void ; @@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]] +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]] ; CHECK-NEXT: ret void ; call void %indirect() #0 @@ -155,7 +155,7 @@ define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() { define void @empty() { ; CHECK-LABEL: define void @empty( -; CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: ret void ; ret void @@ -163,7 +163,7 @@ define void @empty() { define void @also_empty() { ; CHECK-LABEL: define void @also_empty( -; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: ret void ; ret void @@ -256,12 +256,9 @@ attributes #0 = { "amdgpu-no-agpr" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" } +; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll index 28722021e0448f..7e0208cd1f45aa 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll @@ -117,14 +117,14 @@ define void @call_no_dispatch_id() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index ed136c58379cb3..ea3f08ede2c5dc 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -593,7 +593,7 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; ATTRIBUTOR_HSA-NEXT: store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -645,7 +645,7 @@ define internal void @defined.func() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -658,7 +658,7 @@ define void @func_call_external() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @external.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -673,7 +673,7 @@ define void @func_call_defined() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: call void @defined.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -687,8 +687,8 @@ define void @func_call_asm() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR28:[0-9]+]] +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -702,7 +702,7 @@ define amdgpu_kernel void @kern_call_external() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: call void @external.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @defined.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -769,7 +769,7 @@ define float @func_indirect_call(ptr %fptr) #3 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call -; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -788,7 +788,7 @@ define float @func_extern_call() #3 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -806,7 +806,7 @@ define float @func_null_call(ptr %fptr) #3 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call -; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float null() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -827,7 +827,7 @@ define float @func_other_intrinsic_call(float %arg) #3 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call -; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR17]] { +; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]]) ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -912,7 +912,7 @@ define amdgpu_kernel void @kern_decl_sanitize_address() #3 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: call void @extern_func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR30:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -969,7 +969,7 @@ define void @unused_enqueue_block() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -980,7 +980,7 @@ define internal void @known_func() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR25]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -994,8 +994,8 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { -; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR29:[0-9]+]] +; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] { +; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR27:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void @known_func() #6 @@ -1024,37 +1024,34 @@ attributes #6 = { "enqueued-block" } ; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" } ; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" } ;. - ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR27:[0-9]+]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind } -; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" } ;. ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 239bdfde323cf5..6896ac8d2e5dbd 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -478,17 +478,11 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 { ; No-op addrspacecast should not use queue ptr define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 { -; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast -; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr -; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr -; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 -; ATTRIBUTOR_HSA-NEXT: ret void +; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast +; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4 +; HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(1) %ptr to ptr store volatile i32 0, ptr %stof @@ -496,17 +490,11 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt } define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 { -; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast -; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { -; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr -; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4 -; AKF_HSA-NEXT: ret void -; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { -; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr -; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4 -; ATTRIBUTOR_HSA-NEXT: ret void +; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast +; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr +; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4 +; HSA-NEXT: ret void ; %stof = addrspacecast ptr addrspace(4) %ptr to ptr %ld = load volatile i32, ptr %stof @@ -633,7 +621,7 @@ define void @use_alloca_func() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) ; ATTRIBUTOR_HSA-NEXT: store i32 0, ptr addrspace(5) [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -668,7 +656,6 @@ attributes #1 = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll index 366432e0fc6cb1..678c3a0158ec17 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll @@ -27,7 +27,7 @@ define internal void @callee_1_2_3() { define amdgpu_kernel void @kernel_1_2_3() #0 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2_3( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: call void @callee_1_2_3() ; CHECK-NEXT: call void @extern_callee() ; CHECK-NEXT: call void @dummy() @@ -44,7 +44,7 @@ attributes #0 = {"amdgpu-max-num-workgroups"="1,2,3"} ; -> 100,10,99 define internal void @callee_merge_100_8_32__16_10_99() { ; CHECK-LABEL: define internal void @callee_merge_100_8_32__16_10_99( -; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void ; @@ -54,7 +54,7 @@ define internal void @callee_merge_100_8_32__16_10_99() { define amdgpu_kernel void @kernel_100_8_32() #1 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_100_8_32( -; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99() ; CHECK-NEXT: ret void ; @@ -64,7 +64,7 @@ define amdgpu_kernel void @kernel_100_8_32() #1 { define amdgpu_cs void @amdgpu_cs_100_8_32() #1 { ; CHECK-LABEL: define amdgpu_cs void @amdgpu_cs_100_8_32( -; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-SAME: ) #[[ATTR4]] { ; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99() ; CHECK-NEXT: ret void ; @@ -76,7 +76,7 @@ attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"} define amdgpu_kernel void @kernel_16_10_99() #2 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_16_10_99( -; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: call void @callee_merge_100_8_32__16_10_99() ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void @@ -110,7 +110,7 @@ define internal void @callee_x_worst_case() { define amdgpu_kernel void @kernel_x_maximum() #3 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_x_maximum( -; CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: call void @merge_to_worst_case() ; CHECK-NEXT: call void @callee_x_worst_case() ; CHECK-NEXT: call void @dummy() @@ -126,7 +126,7 @@ attributes #3 = {"amdgpu-max-num-workgroups"="4294967295,1,1"} define amdgpu_kernel void @kernel_y_maximum() #4 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_y_maximum( -; CHECK-SAME: ) #[[ATTR6:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: call void @merge_to_worst_case() ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void @@ -140,7 +140,7 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1,4294967295,1"} define amdgpu_kernel void @kernel_z_maximum() #5 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_z_maximum( -; CHECK-SAME: ) #[[ATTR7:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR8:[0-9]+]] { ; CHECK-NEXT: call void @merge_to_worst_case() ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void @@ -155,7 +155,7 @@ attributes #5 = {"amdgpu-max-num-workgroups"="1,1,4294967295"} ; Make sure the attribute isn't lost from the callee. define internal void @annotated_callee_from_unannotated_kernel() #6 { ; CHECK-LABEL: define internal void @annotated_callee_from_unannotated_kernel( -; CHECK-SAME: ) #[[ATTR8:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR9:[0-9]+]] { ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void ; @@ -167,7 +167,7 @@ attributes #6 = {"amdgpu-max-num-workgroups"="42,99,123"} define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee() { ; CHECK-LABEL: define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: call void @annotated_callee_from_unannotated_kernel() ; CHECK-NEXT: ret void ; @@ -178,7 +178,7 @@ define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee() { define internal void @annotated_callee_merge_caller() #7 { ; CHECK-LABEL: define internal void @annotated_callee_merge_caller( -; CHECK-SAME: ) #[[ATTR9:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR11:[0-9]+]] { ; CHECK-NEXT: call void @dummy() ; CHECK-NEXT: ret void ; @@ -190,7 +190,7 @@ attributes #7 = {"amdgpu-max-num-workgroups"="512,256,1024"} define amdgpu_kernel void @call_annotated_callee_merge_caller() #8 { ; CHECK-LABEL: define amdgpu_kernel void @call_annotated_callee_merge_caller( -; CHECK-SAME: ) #[[ATTR10:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR12:[0-9]+]] { ; CHECK-NEXT: call void @annotated_callee_merge_caller() ; CHECK-NEXT: ret void ; @@ -212,7 +212,7 @@ define internal void @called_by_explicit_worst_case() { define amdgpu_kernel void @kernel_explicit_worst_case() #9 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_explicit_worst_case( -; CHECK-SAME: ) #[[ATTR11:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: call void @called_by_explicit_worst_case() ; CHECK-NEXT: ret void ; @@ -223,16 +223,18 @@ define amdgpu_kernel void @kernel_explicit_worst_case() #9 { attributes #9 = {"amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295"} ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="100,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,8,32" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="42,99,123" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="256,128,1024" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR10]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,10,99" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="100,8,32" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="42,99,123" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="256,128,1024" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR12]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR13]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll index bcc960f6a56c2c..25da00e6bde35d 100644 --- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll +++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll @@ -27,12 +27,12 @@ define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) { define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4 ; GFX10-NEXT: ret void ; @@ -60,13 +60,13 @@ define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) { define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -93,12 +93,12 @@ define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) { define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: store volatile i32 0, ptr addrspace(2) [[PTR]], align 4 ; GFX10-NEXT: ret void ; @@ -126,13 +126,13 @@ define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) { define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -159,12 +159,12 @@ define void @without_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) { define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4 ; GFX10-NEXT: ret void ; @@ -192,13 +192,13 @@ define void @with_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) { define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -225,12 +225,12 @@ define void @without_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) { define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: store volatile i32 0, ptr addrspace(4) [[PTR]], align 4 ; GFX10-NEXT: ret void ; @@ -258,13 +258,13 @@ define void @with_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) { define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -291,12 +291,12 @@ define void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4 ; GFX10-NEXT: ret void ; @@ -306,13 +306,13 @@ define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(ptr a define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @with_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @with_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -324,13 +324,13 @@ define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: ret void @@ -357,12 +357,12 @@ define void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -372,12 +372,12 @@ define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel( define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -387,12 +387,12 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -402,13 +402,13 @@ define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr define void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void @@ -420,13 +420,13 @@ define void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrsp define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void @@ -453,12 +453,12 @@ define void @call_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %p define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX9-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { ; GFX10-NEXT: call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -468,12 +468,12 @@ define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_ke define void @call_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @call_call_with_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_call_with_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -483,12 +483,12 @@ define void @call_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -498,12 +498,12 @@ define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kerne define void @call_call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -513,12 +513,12 @@ define void @call_call_both_with_and_without_private_to_flat_addrspacecast(ptr a define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -528,14 +528,14 @@ define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrs define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) @@ -549,14 +549,14 @@ define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace( define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) @@ -570,14 +570,14 @@ define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_ define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) @@ -591,14 +591,14 @@ define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5) define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel( -; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr ; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4 ; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]]) @@ -614,12 +614,12 @@ define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_ define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) { ; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use( -; GFX9-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use( -; GFX10-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8 ; GFX10-NEXT: ret void ; @@ -633,13 +633,13 @@ define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) noca define void @with_indirect_call() { ; GFX9-LABEL: define void @with_indirect_call( -; GFX9-SAME: ) #[[ATTR4:[0-9]+]] { +; GFX9-SAME: ) #[[ATTR2:[0-9]+]] { ; GFX9-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8 ; GFX9-NEXT: call void [[FPTR]]() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @with_indirect_call( -; GFX10-SAME: ) #[[ATTR4:[0-9]+]] { +; GFX10-SAME: ) #[[ATTR2:[0-9]+]] { ; GFX10-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8 ; GFX10-NEXT: call void [[FPTR]]() ; GFX10-NEXT: ret void @@ -651,13 +651,13 @@ define void @with_indirect_call() { define amdgpu_kernel void @with_indirect_call_cc_kernel() { ; GFX9-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel( -; GFX9-SAME: ) #[[ATTR5:[0-9]+]] { +; GFX9-SAME: ) #[[ATTR2]] { ; GFX9-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8 ; GFX9-NEXT: call void [[FPTR]]() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel( -; GFX10-SAME: ) #[[ATTR5:[0-9]+]] { +; GFX10-SAME: ) #[[ATTR2]] { ; GFX10-NEXT: [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8 ; GFX10-NEXT: call void [[FPTR]]() ; GFX10-NEXT: ret void @@ -669,12 +669,12 @@ define amdgpu_kernel void @with_indirect_call_cc_kernel() { define void @call_with_indirect_call() { ; GFX9-LABEL: define void @call_with_indirect_call( -; GFX9-SAME: ) #[[ATTR4]] { +; GFX9-SAME: ) #[[ATTR2]] { ; GFX9-NEXT: call void @with_indirect_call() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_with_indirect_call( -; GFX10-SAME: ) #[[ATTR4]] { +; GFX10-SAME: ) #[[ATTR2]] { ; GFX10-NEXT: call void @with_indirect_call() ; GFX10-NEXT: ret void ; @@ -684,12 +684,12 @@ define void @call_with_indirect_call() { define amdgpu_kernel void @call_with_indirect_call_cc_kernel() { ; GFX9-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel( -; GFX9-SAME: ) #[[ATTR5]] { +; GFX9-SAME: ) #[[ATTR2]] { ; GFX9-NEXT: call void @with_indirect_call() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel( -; GFX10-SAME: ) #[[ATTR5]] { +; GFX10-SAME: ) #[[ATTR2]] { ; GFX10-NEXT: call void @with_indirect_call() ; GFX10-NEXT: ret void ; @@ -723,7 +723,7 @@ define void @also_empty() { define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) { ; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees( -; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR6:[0-9]+]] { +; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; GFX9-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty ; GFX9-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty ; GFX9-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] @@ -741,7 +741,7 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) { ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees( -; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR6:[0-9]+]] { +; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; GFX10-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty ; GFX10-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty ; GFX10-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] @@ -767,13 +767,13 @@ declare i32 @llvm.amdgcn.workgroup.id.x() define void @use_intrinsic_workitem_id_x() { ; GFX9-LABEL: define void @use_intrinsic_workitem_id_x( -; GFX9-SAME: ) #[[ATTR8:[0-9]+]] { +; GFX9-SAME: ) #[[ATTR5:[0-9]+]] { ; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @use_intrinsic_workitem_id_x( -; GFX10-SAME: ) #[[ATTR8:[0-9]+]] { +; GFX10-SAME: ) #[[ATTR5:[0-9]+]] { ; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4 ; GFX10-NEXT: ret void @@ -785,13 +785,13 @@ define void @use_intrinsic_workitem_id_x() { define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() { ; GFX9-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel( -; GFX9-SAME: ) #[[ATTR1]] { +; GFX9-SAME: ) #[[ATTR0]] { ; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel( -; GFX10-SAME: ) #[[ATTR1]] { +; GFX10-SAME: ) #[[ATTR0]] { ; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4 ; GFX10-NEXT: ret void @@ -803,12 +803,12 @@ define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() { define void @call_use_intrinsic_workitem_id_x() { ; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x( -; GFX9-SAME: ) #[[ATTR8]] { +; GFX9-SAME: ) #[[ATTR5]] { ; GFX9-NEXT: call void @use_intrinsic_workitem_id_x() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x( -; GFX10-SAME: ) #[[ATTR8]] { +; GFX10-SAME: ) #[[ATTR5]] { ; GFX10-NEXT: call void @use_intrinsic_workitem_id_x() ; GFX10-NEXT: ret void ; @@ -818,12 +818,12 @@ define void @call_use_intrinsic_workitem_id_x() { define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() { ; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel( -; GFX9-SAME: ) #[[ATTR9:[0-9]+]] { +; GFX9-SAME: ) #[[ATTR5]] { ; GFX9-NEXT: call void @use_intrinsic_workitem_id_x() ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel( -; GFX10-SAME: ) #[[ATTR9:[0-9]+]] { +; GFX10-SAME: ) #[[ATTR5]] { ; GFX10-NEXT: call void @use_intrinsic_workitem_id_x() ; GFX10-NEXT: ret void ; @@ -833,13 +833,13 @@ define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() { define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel( -; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) ; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel( -; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) ; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 ; GFX10-NEXT: ret void @@ -851,12 +851,12 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) { ; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel( -; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] { +; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { ; GFX9-NEXT: call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]]) ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel( -; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] { +; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] { ; GFX10-NEXT: call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]]) ; GFX10-NEXT: ret void ; @@ -866,12 +866,12 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) % define amdgpu_kernel void @with_inline_asm() { ; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm( -; GFX9-SAME: ) #[[ATTR6]] { +; GFX9-SAME: ) #[[ATTR3]] { ; GFX9-NEXT: call void asm sideeffect " ; GFX9-NEXT: ret void ; ; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm( -; GFX10-SAME: ) #[[ATTR6]] { +; GFX10-SAME: ) #[[ATTR3]] { ; GFX10-NEXT: call void asm sideeffect " ; GFX10-NEXT: ret void ; @@ -880,25 +880,17 @@ define amdgpu_kernel void @with_inline_asm() { } ;. -; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR4]] = { "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR5]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR6]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" } -; GFX9: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; GFX9: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" } +; GFX9: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } ;. -; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR4]] = { "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR5]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR6]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" } -; GFX10: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } -; GFX10: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } +; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } +; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } +; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" } +; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } +; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" } +; GFX10: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll index 2b9f579e6a1839..8481cea4d7c353 100644 --- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll +++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll @@ -51,7 +51,7 @@ bb5: ; preds = %bb5, %bb3 define amdgpu_kernel void @entry() { ; CHECK-LABEL: define {{[^@]+}}@entry -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) ; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr ; CHECK-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) @@ -63,5 +63,6 @@ define amdgpu_kernel void @entry() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index c108b93766bcc8..510ee9c1a23fd2 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -27,7 +27,7 @@ define internal void @direct() { define amdgpu_kernel void @test_direct_indirect_call() { ; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: call void @direct() ; CHECK-NEXT: ret void ; @@ -35,6 +35,7 @@ define amdgpu_kernel void @test_direct_indirect_call() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll index 908746a0c7784f..c430c41f59143b 100644 --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" } ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll index 5979f1bbacdd14..d5f45d70fb970b 100644 --- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll @@ -70,14 +70,32 @@ define void @use_everything_else() { } define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kernarg) { -; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 -; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @use_everything_else() -; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; CHECK-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; CHECK-NEXT: ret void +; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 +; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { +; V4-NEXT: call void @use_everything_else() +; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 +; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V4-NEXT: ret void +; +; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 +; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { +; V5-NEXT: call void @use_everything_else() +; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 +; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 +; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V6-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -88,14 +106,32 @@ define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kern } define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kernarg) { -; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 -; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: call void @use_everything_else() -; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; CHECK-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; CHECK-NEXT: ret void +; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 +; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] { +; V4-NEXT: call void @use_everything_else() +; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 +; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V4-NEXT: ret void +; +; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 +; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { +; V5-NEXT: call void @use_everything_else() +; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 +; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V5-NEXT: ret void +; +; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0 +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] { +; V6-NEXT: call void @use_everything_else() +; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 +; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; V6-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -116,7 +152,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) % ; V4-NEXT: ret void ; ; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { ; V5-NEXT: call void @use_everything_else() ; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40 @@ -125,7 +161,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) % ; V5-NEXT: ret void ; ; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0 -; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { ; V6-NEXT: call void @use_everything_else() ; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40 @@ -142,32 +178,14 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) % } define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) { -; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 -; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] { -; V4-NEXT: call void @use_everything_else() -; V4-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V4-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 -; V4-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V4-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V4-NEXT: ret void -; -; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { -; V5-NEXT: call void @use_everything_else() -; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 -; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V5-NEXT: ret void -; -; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 -; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { -; V6-NEXT: call void @use_everything_else() -; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() -; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 -; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 -; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 -; V6-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0 +; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @use_everything_else() +; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8 +; CHECK-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8 +; CHECK-NEXT: ret void ; call void @use_everything_else() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() @@ -188,7 +206,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr ; V4-NEXT: ret void ; ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { ; V5-NEXT: call void @use_everything_else() ; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 @@ -197,7 +215,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr ; V5-NEXT: ret void ; ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0 -; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] { +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] { ; V6-NEXT: call void @use_everything_else() ; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32 @@ -224,7 +242,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr ; V4-NEXT: ret void ; ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0 -; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] { +; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { ; V5-NEXT: call void @use_everything_else() ; V5-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V5-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 @@ -233,7 +251,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr ; V5-NEXT: ret void ; ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0 -; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] { +; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] { ; V6-NEXT: call void @use_everything_else() ; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() ; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104 @@ -258,25 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo ;. ; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; V4: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; V4: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V4: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V4: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V4: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ; V4: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ;. ; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; V5: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V5: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V5: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V5: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V5: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V5: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ;. ; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; V6: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; V6: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V6: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V6: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } -; V6: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } +; V6: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" } ;. ; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400} ;. diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll index b283a8fca8a394..a27cf3c18a70cd 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll @@ -68,6 +68,6 @@ if.end: ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index e7a7b8a335d0d3..22e3cc4b047b1a 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -6,14 +6,14 @@ ; GCN: define amdgpu_kernel void @caller(ptr addrspace(1) nocapture %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 -; UNSAFE: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; UNSAFE: attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true" } ; UNSAFE: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } -; NOINFS: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "no-infs-fp-math"="true" "uniform-work-group-size"="false" } -; NOINFS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="false" } +; NOINFS: attributes #0 = { nounwind "no-infs-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; NOINFS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } -; NONANS: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "no-nans-fp-math"="true" "uniform-work-group-size"="false" } -; NONANS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="false" } +; NONANS: attributes #0 = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; NONANS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } declare void @extern() #0 @@ -32,5 +32,5 @@ entry: ret void } -attributes #0 = { nounwind } +attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true"} attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll index 51e0eb6d497945..1afd31c6d45e7d 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll @@ -98,7 +98,7 @@ define amdgpu_kernel void @kernel_128_512() #5 { define amdgpu_kernel void @kernel_512_512() #6 { ; CHECK-LABEL: define {{[^@]+}}@kernel_512_512 -; CHECK-SAME: () #[[ATTR5]] { +; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: call void @default_to_128_512() ; CHECK-NEXT: call void @flat_group_512_1024() ; CHECK-NEXT: ret void @@ -111,7 +111,7 @@ define amdgpu_kernel void @kernel_512_512() #6 { ; Called from kernels with 128,256 and 64,128 => 64,256 define internal void @default_to_64_256() { ; CHECK-LABEL: define {{[^@]+}}@default_to_64_256 -; CHECK-SAME: () #[[ATTR6:[0-9]+]] { +; CHECK-SAME: () #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -121,7 +121,7 @@ define internal void @default_to_64_256() { ; this should probably be illegal. define amdgpu_kernel void @kernel_128_256() #3 { ; CHECK-LABEL: define {{[^@]+}}@kernel_128_256 -; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: call void @default_to_64_256() ; CHECK-NEXT: ret void ; @@ -153,7 +153,7 @@ define internal void @merge_cycle_1() #3 { define amdgpu_kernel void @kernel_64_256() #7 { ; CHECK-LABEL: define {{[^@]+}}@kernel_64_256 -; CHECK-SAME: () #[[ATTR8:[0-9]+]] { +; CHECK-SAME: () #[[ATTR7]] { ; CHECK-NEXT: call void @merge_cycle_0() ; CHECK-NEXT: call void @default_captured_address() ; CHECK-NEXT: call void @externally_visible_default() @@ -188,7 +188,7 @@ define void @externally_visible_default() { ; 1,1024 -> 64,256 define internal i32 @bitcasted_function() { ; CHECK-LABEL: define {{[^@]+}}@bitcasted_function -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR7]] { ; CHECK-NEXT: ret i32 0 ; ret i32 0 @@ -207,9 +207,9 @@ attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll index eb4cf5c063d101..6a909f52082d67 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll @@ -117,7 +117,7 @@ define amdgpu_kernel void @kernel_2_9() #6 { define amdgpu_kernel void @kernel_9_9() #7 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9 -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: call void @default_to_2_9() ; CHECK-NEXT: call void @flat_group_9_10() ; CHECK-NEXT: ret void @@ -140,7 +140,7 @@ define internal void @default_to_1_8_b() { ; this should probably be illegal. define amdgpu_kernel void @kernel_2_8() #4 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8 -; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: call void @default_to_1_8_a() ; CHECK-NEXT: call void @default_to_1_8_b() ; CHECK-NEXT: ret void @@ -153,7 +153,7 @@ define amdgpu_kernel void @kernel_2_8() #4 { ; 1,2 -> 2,2 define internal void @merge_cycle_0() #1 { ; CHECK-LABEL: define internal void @merge_cycle_0 -; CHECK-SAME: () #[[ATTR5]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @merge_cycle_1() ; CHECK-NEXT: ret void ; @@ -165,7 +165,7 @@ define internal void @merge_cycle_0() #1 { ; 2,8 -> 2,8 define internal void @merge_cycle_1() #4 { ; CHECK-LABEL: define internal void @merge_cycle_1 -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: call void @merge_cycle_0() ; CHECK-NEXT: ret void ; @@ -210,7 +210,7 @@ define void @externally_visible_default() { ; 1,10 -> 3,8 define internal i32 @bitcasted_function() { ; CHECK-LABEL: define internal i32 @bitcasted_function -; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-SAME: () #[[ATTR8]] { ; CHECK-NEXT: ret i32 0 ; ret i32 0 @@ -235,7 +235,7 @@ define internal void @called_from_invalid_bounds_1() { ; Invalid range for amdgpu-waves-per-eu define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8 -; CHECK-SAME: () #[[ATTR11:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @called_from_invalid_bounds_0() ; CHECK-NEXT: ret void ; @@ -246,7 +246,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 { ; Invalid range for amdgpu-waves-per-eu define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123 -; CHECK-SAME: () #[[ATTR12:[0-9]+]] { +; CHECK-SAME: () #[[ATTR11:[0-9]+]] { ; CHECK-NEXT: call void @called_from_invalid_bounds_1() ; CHECK-NEXT: ret void ; @@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 { ; -> 2,10 define void @larger_group_size_implies_lower_minimum() #11 { ; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum -; CHECK-SAME: () #[[ATTR13:[0-9]+]] { +; CHECK-SAME: () #[[ATTR12:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -268,7 +268,7 @@ define void @larger_group_size_implies_lower_minimum() #11 { define amdgpu_kernel void @kernel_3_6() #12 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6 -; CHECK-SAME: () #[[ATTR14:[0-9]+]] { +; CHECK-SAME: () #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: call void @larger_group_size_implies_lower_minimum() ; CHECK-NEXT: ret void ; @@ -279,7 +279,7 @@ define amdgpu_kernel void @kernel_3_6() #12 { ; 3,6 -> 6,9 define internal void @refine_upper_func_3_6() #13 { ; CHECK-LABEL: define internal void @refine_upper_func_3_6 -; CHECK-SAME: () #[[ATTR15:[0-9]+]] { +; CHECK-SAME: () #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -288,7 +288,7 @@ define internal void @refine_upper_func_3_6() #13 { ; 4,8 -> 6,8 define internal void @refine_lower_func_4_8() #14 { ; CHECK-LABEL: define internal void @refine_lower_func_4_8 -; CHECK-SAME: () #[[ATTR16:[0-9]+]] { +; CHECK-SAME: () #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: ret void ; @@ -298,7 +298,7 @@ define internal void @refine_lower_func_4_8() #14 { define amdgpu_kernel void @kernel_foo_6_8() #15 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8 -; CHECK-SAME: () #[[ATTR16]] { +; CHECK-SAME: () #[[ATTR16:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: call void @refine_lower_func_4_8() ; CHECK-NEXT: call void @func_9_10_a() @@ -340,7 +340,7 @@ define internal void @func_9_10_a() #18 { ; 9,10 -> 9,9 define internal void @func_9_10_b() #18 { ; CHECK-LABEL: define internal void @func_9_10_b -; CHECK-SAME: () #[[ATTR20:[0-9]+]] { +; CHECK-SAME: () #[[ATTR19]] { ; CHECK-NEXT: ret void ; ret void @@ -348,7 +348,7 @@ define internal void @func_9_10_b() #18 { define amdgpu_kernel void @kernel_bar_8_9() #19 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9 -; CHECK-SAME: () #[[ATTR21:[0-9]+]] { +; CHECK-SAME: () #[[ATTR20:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: call void @func_5_5() ; CHECK-NEXT: call void @func_9_10_b() @@ -404,21 +404,20 @@ attributes #19 = { "amdgpu-waves-per-eu"="8,9" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,2" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="0,8" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR14]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,9" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR21]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll index 9758afa3b49912..72ae3966416f4e 100644 --- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll @@ -7,7 +7,7 @@ @global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 } ;. -; CHECK: @[[GLOBAL_2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(1) global [[STRUCT_FOO:%.*]] { [[STRUCT_PLUTO:%.*]] zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 } +; CHECK: @global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 } ;. define void @hoge() { ; CHECK-LABEL: define void @hoge @@ -19,5 +19,5 @@ define void @hoge() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll index 8792e60bb0ca16..2850612d700817 100644 --- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll @@ -178,7 +178,7 @@ define internal void @mutual_recursion_1(i16 %arg) { define amdgpu_kernel void @kernel_lds_recursion() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion( -; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] { +; CHECK-SAME: ) #[[ATTR5:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ] ; CHECK-NEXT: call void @mutual_recursion_0(i16 0) ; CHECK-NEXT: ret void @@ -191,13 +191,14 @@ define amdgpu_kernel void @kernel_lds_recursion() { !1 = !{i32 1, !"amdhsa_code_object_version", i32 400} ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0]] = !{i32 0, i32 1} ; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll index 5326c9e2c12f08..b2006d1a1f302a 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll @@ -100,8 +100,13 @@ entry: } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; NO: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; NO: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +;. +; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +;. +; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. ; NO: [[META0]] = !{ptr @bar1, ptr @bar2} diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index fabdfff5272940..59d7fe107ee537 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -81,7 +81,7 @@ define amdgpu_kernel void @test_simple_indirect_call() { ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. ; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index c8e3f97f61515f..4993df7e1ba487 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -826,5 +826,5 @@ entry: ; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]] ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind } -; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "uniform-work-group-size"="false" } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index 2ab48479fa1b08..ab5306bc2452da 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -7,7 +7,7 @@ @x = global i32 0 ;. -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @x = global i32 0 ;. define void @foo() #0 { ; CHECK-LABEL: define {{[^@]+}}@foo @@ -21,7 +21,7 @@ define void @foo() #0 { define amdgpu_kernel void @kernel1() #1 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: ret void ; @@ -31,6 +31,5 @@ define amdgpu_kernel void @kernel1() #1 { attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll index cc58d34a8b255b..da0234c90363d3 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -2,8 +2,8 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor < %s | FileCheck %s ;. -; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr null -; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @G1 = global ptr null +; CHECK: @G2 = global i32 0 ;. define weak void @weak() { ; CHECK-LABEL: define {{[^@]+}}@weak @@ -87,7 +87,7 @@ define internal void @internal2() { define amdgpu_kernel void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR3]] { +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @internal2() ; CHECK-NEXT: ret void ; @@ -97,8 +97,9 @@ define amdgpu_kernel void @kernel2() #0 { attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index 33298bde89b975..9fe753fec0f913 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -7,7 +7,7 @@ @x = global i32 0 ;. -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @x = global i32 0 ;. define void @func1() #0 { ; CHECK-LABEL: define {{[^@]+}}@func1 @@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 { attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index 6fe85cd7c9e654..f0e0df00feff6b 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -7,7 +7,7 @@ @x = global i32 0 ;. -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @x = global i32 0 ;. define void @func() #0 { ; CHECK-LABEL: define {{[^@]+}}@func @@ -31,7 +31,7 @@ define amdgpu_kernel void @kernel1() #1 { define amdgpu_kernel void @kernel2() #2 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @func() ; CHECK-NEXT: ret void ; @@ -41,7 +41,6 @@ define amdgpu_kernel void @kernel2() #2 { attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index f6ab402e012325..de83f91fce261d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -5,7 +5,7 @@ ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it ;. -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @x = global i32 0 ;. define void @func() #0 { ; CHECK-LABEL: define {{[^@]+}}@func @@ -52,8 +52,8 @@ attributes #0 = { nounwind } attributes #1 = { "uniform-work-group-size"="false" } attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 37e3376ef9d500..dc19f4d879e86d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 { attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index 7f83686bc756e8..51f060a342fa1e 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -3,7 +3,7 @@ @x = global i32 0 ;. -; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @x = global i32 0 ;. define void @func1() { ; CHECK-LABEL: define {{[^@]+}}@func1 @@ -49,7 +49,7 @@ define void @func3() { define amdgpu_kernel void @kernel3() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel3 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: call void @func2() ; CHECK-NEXT: call void @func3() ; CHECK-NEXT: ret void @@ -61,6 +61,5 @@ define amdgpu_kernel void @kernel3() #0 { attributes #0 = { "uniform-work-group-size"="false" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll b/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll new file mode 100644 index 00000000000000..efdfa10f7ca07f --- /dev/null +++ b/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple armv7 -O1 | FileCheck %s -check-prefix=CHECK-LE-NORMAL +; RUN: llc < %s -mtriple armv7 -O1 -combiner-reduce-load-op-store-width-force-narrowing-profitable=1 | FileCheck %s -check-prefix=CHECK-LE-NARROW +; RUN: llc < %s -mtriple armv7eb -O1 | FileCheck %s -check-prefix=CHECK-BE-NORMAL +; RUN: llc < %s -mtriple armv7eb -O1 -combiner-reduce-load-op-store-width-force-narrowing-profitable=1 | FileCheck %s -check-prefix=CHECK-BE-NARROW + +; This is a reproducer for a bug when DAGCombiner::ReduceLoadOpStoreWidth +; would end up narrowing the load-op-store sequence into this SDNode sequence +; for little-endian +; +; t18: i32,ch = load<(load (s32) from %ir.p1 + 8, align 8)> t0, t17, undef:i32 +; t20: i32 = or t18, Constant:i32<65534> +; t21: ch = store<(store (s32) into %ir.p1 + 8, align 8)> t18:1, t20, t17, undef:i32 +; +; This was wrong since it accesses memory above %ir.p1+9 which is outside the +; "store size" for the original store. +; +; For big-endian we used to hit an assertion due to passing a negative offset +; to getMemBasePlusOffset (at least after commit 3e1b55cafc95d4ef4, while +; before that commit we got load/store instructions that accessed memory at a +; negative offset from %p1). +; +define i16 @test(ptr %p1) { +; CHECK-LE-NORMAL-LABEL: test: +; CHECK-LE-NORMAL: @ %bb.0: @ %entry +; CHECK-LE-NORMAL-NEXT: ldrh r1, [r0, #8] +; CHECK-LE-NORMAL-NEXT: movw r2, #65534 +; CHECK-LE-NORMAL-NEXT: orr r1, r1, r2 +; CHECK-LE-NORMAL-NEXT: strh r1, [r0, #8] +; CHECK-LE-NORMAL-NEXT: mov r0, #0 +; CHECK-LE-NORMAL-NEXT: bx lr +; +; CHECK-LE-NARROW-LABEL: test: +; CHECK-LE-NARROW: @ %bb.0: @ %entry +; CHECK-LE-NARROW-NEXT: ldrh r1, [r0, #8] +; CHECK-LE-NARROW-NEXT: movw r2, #65534 +; CHECK-LE-NARROW-NEXT: orr r1, r1, r2 +; CHECK-LE-NARROW-NEXT: strh r1, [r0, #8] +; CHECK-LE-NARROW-NEXT: mov r0, #0 +; CHECK-LE-NARROW-NEXT: bx lr +; +; CHECK-BE-NORMAL-LABEL: test: +; CHECK-BE-NORMAL: @ %bb.0: @ %entry +; CHECK-BE-NORMAL-NEXT: ldrh r1, [r0] +; CHECK-BE-NORMAL-NEXT: movw r2, #65534 +; CHECK-BE-NORMAL-NEXT: orr r1, r1, r2 +; CHECK-BE-NORMAL-NEXT: strh r1, [r0] +; CHECK-BE-NORMAL-NEXT: mov r0, #0 +; CHECK-BE-NORMAL-NEXT: bx lr +; +; CHECK-BE-NARROW-LABEL: test: +; CHECK-BE-NARROW: @ %bb.0: @ %entry +; CHECK-BE-NARROW-NEXT: ldrh r1, [r0] +; CHECK-BE-NARROW-NEXT: movw r2, #65534 +; CHECK-BE-NARROW-NEXT: orr r1, r1, r2 +; CHECK-BE-NARROW-NEXT: strh r1, [r0] +; CHECK-BE-NARROW-NEXT: mov r0, #0 +; CHECK-BE-NARROW-NEXT: bx lr +entry: + %load = load i80, ptr %p1, align 32 + %mask = shl i80 -1, 65 + %op = or i80 %load, %mask + store i80 %op, ptr %p1, align 32 + ret i16 0 +} + diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll index 156a34db827456..77fe1783bb5d5e 100644 --- a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll +++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -riscv-force-enable-global-merge-external-globals \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=SMALL-DATA -; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=0 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=SMALL-DATA +; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=0 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=MINSIZE @ig1 = internal global i32 0, align 4 diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll index e41f14d394b7ca..c29749c17a5b5c 100644 --- a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll +++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -riscv-force-enable-global-merge-external-globals \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=SMALL-DATA -; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=5 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=SMALL-DATA +; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=5 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=MINSIZE @ig1 = internal global i32 0, align 4 diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll index 1d65d9d1732ba3..915dde388cffdc 100644 --- a/llvm/test/CodeGen/RISCV/global-merge-minsize.ll +++ b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -riscv-force-enable-global-merge-external-globals \ +; RUN: llc -mtriple=riscv32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32 ; RUN: llc -mtriple=riscv32 -global-merge-min-data-size=5 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE +; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE @ig1 = internal global i32 0, align 4 @ig2 = internal global i32 0, align 4 diff --git a/llvm/test/CodeGen/RISCV/global-merge-offset.ll b/llvm/test/CodeGen/RISCV/global-merge-offset.ll index bb8264ee438545..c1074bc8ca97ed 100644 --- a/llvm/test/CodeGen/RISCV/global-merge-offset.ll +++ b/llvm/test/CodeGen/RISCV/global-merge-offset.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv32 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s +; RUN: -verify-machineinstrs | FileCheck %s ; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv64 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s +; RUN: -verify-machineinstrs | FileCheck %s ; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv32 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG ; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv64 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG ; This test demonstrates that the MaxOffset is set correctly for RISC-V by ; constructing an input that is at the limit and comparing. diff --git a/llvm/test/CodeGen/RISCV/global-merge.ll b/llvm/test/CodeGen/RISCV/global-merge.ll index 327a6b54f4be25..31b3aa81b58dd9 100644 --- a/llvm/test/CodeGen/RISCV/global-merge.ll +++ b/llvm/test/CodeGen/RISCV/global-merge.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -global-merge-on-external=false \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -global-merge-on-external=false \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=CHECK-WEXTERN %s -; RUN: llc -mtriple=riscv64 \ -; RUN: -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=CHECK-WEXTERN %s @ig1 = internal global i32 0, align 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 1c6e1a37fa8af5..ebcea741a2e8bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -1021,3 +1021,55 @@ define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) { %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out } + +define <8 x i32> @shuffle_zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) { +; CHECK-LABEL: shuffle_zipeven_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> + ret <8 x i32> %out +} + +define <8 x i32> @shuffle_zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) { +; CHECK-LABEL: shuffle_zipodd_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> + ret <8 x i32> %out +} + +define <16 x i64> @shuffle_zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) { +; CHECK-LABEL: shuffle_zipeven_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t +; CHECK-NEXT: ret + %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> + ret <16 x i64> %out +} + +define <16 x i64> @shuffle_zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) { +; CHECK-LABEL: shuffle_zipodd_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v16, v8, 1, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> + ret <16 x i64> %out +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 0215e6a80d09a4..e13482d23a26f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -982,6 +982,107 @@ define @vnsrl_wi( %a, %b ret %2 } +define @vnsrl_wx( %a, %b, iXLen %c, iXLen %vl) { +; NOVLOPT-LABEL: vnsrl_wx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; NOVLOPT-NEXT: vnsrl.wx v11, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnsrl_wx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLOPT-NEXT: vnsrl.wx v11, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnsrl.nxv4i16.nxv4i32( poison, %a, iXLen %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i16.nxv4i16( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vnsrl_wv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vnsrl_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLOPT-NEXT: vnsrl.wv v12, v8, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnsrl_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLOPT-NEXT: vnsrl.wv v12, v8, v11 +; VLOPT-NEXT: vadd.vv v8, v12, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnsrl.nxv4i16.nxv4i32.nxv4i16( poison, %a, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i16.nxv4i16( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vnsra_wi( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vnsra_wi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLOPT-NEXT: vnsra.wi v11, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnsra_wi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLOPT-NEXT: vnsra.wi v11, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnsra.nxv4i16.nxv4i32( poison, %a, iXLen 5, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i16.nxv4i16( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vnsra_wx( %a, %b, iXLen %c, iXLen %vl) { +; NOVLOPT-LABEL: vnsra_wx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; NOVLOPT-NEXT: vnsra.wx v11, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnsra_wx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLOPT-NEXT: vnsra.wx v11, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnsra.nxv4i16.nxv4i32( poison, %a, iXLen %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i16.nxv4i16( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vnsra_wv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vnsra_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLOPT-NEXT: vnsra.wv v12, v8, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnsra_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLOPT-NEXT: vnsra.wv v12, v8, v11 +; VLOPT-NEXT: vadd.vv v8, v12, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnsra.nxv4i16.nxv4i32.nxv4i16( poison, %a, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i16.nxv4i16( poison, %1, %b, iXLen %vl) + ret %2 +} + + define @vminu_vv( %a, %b, iXLen %vl) { ; NOVLOPT-LABEL: vminu_vv: ; NOVLOPT: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index 1071ee53610854..814894f4acea3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -392,13 +392,12 @@ body: | %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0 ... -# TODO: VNSRL_WV isn't yet a supported instruction for VL reduction --- name: vnop_wv_vd body: | bb.0: ; CHECK-LABEL: name: vnop_wv_vd - ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 @@ -483,3 +482,64 @@ body: | %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0 ... +--- +name: vseN_v +body: | + bb.0: + ; CHECK-LABEL: name: vseN_v + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */ +... +--- +name: vseN_v_incompatible_eew +body: | + bb.0: + ; CHECK-LABEL: name: vseN_v_incompatible_eew + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 + PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */ +... +--- +name: vseN_v_incompatible_emul +body: | + bb.0: + ; CHECK-LABEL: name: vseN_v_incompatible_emul + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSE8_V_MF2 %x, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + PseudoVSE8_V_MF2 %x, $noreg, 1, 3 /* e8 */ +... +--- +name: vsseN_v +body: | + bb.0: + ; CHECK-LABEL: name: vsseN_v + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */ +... +--- +name: vsseN_v_incompatible_eew +body: | + bb.0: + ; CHECK-LABEL: name: vsseN_v_incompatible_eew + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 + PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */ +... +--- +name: vsseN_v_incompatible_emul +body: | + bb.0: + ; CHECK-LABEL: name: vsseN_v_incompatible_emul + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSSE8_V_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + PseudoVSSE8_V_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */ +... + diff --git a/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll b/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll index 06ab469e700778..432fbe0a22975d 100644 --- a/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll +++ b/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + define i4 @getConstantI4() { ret i4 2 ; i4 => OpTypeInt 8 } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll index d5a8fb3e7baafa..368c5d4a32980e 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll @@ -1,6 +1,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-DAG: OpName %[[I9:.*]] "_ZN13BaseIncrement9incrementEPi" ; CHECK-DAG: OpName %[[I29:.*]] "_ZN12IncrementBy29incrementEPi" ; CHECK-DAG: OpName %[[I49:.*]] "_ZN12IncrementBy49incrementEPi" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll index 6aeb29df9f7bd4..75ad382f05ffd5 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll @@ -1,6 +1,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-DAG: OpCapability FunctionPointersINTEL ; CHECK-DAG: OpCapability Int64 ; CHECK: OpExtension "SPV_INTEL_function_pointers" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll index b238b07bbc04ec..d38de216c22301 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll @@ -5,6 +5,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - --spirv-ext=+SPV_INTEL_function_pointers | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; Running with -verify-machineinstrs would lead to "Reading virtual register without a def" ; error, because OpConstantFunctionPointerINTEL forward-refers to a function definition. diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll index e290c1eaeabad8..45e71d44fbf60d 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll @@ -2,6 +2,9 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_cooperative_matrix %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_cooperative_matrix %s -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-ERROR: LLVM ERROR: OpTypeCooperativeMatrixKHR type requires the following SPIR-V extension: SPV_KHR_cooperative_matrix ; CHECK: OpCapability CooperativeMatrixKHR diff --git a/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll b/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll index e22e02f76d01d5..9c35d4ee9b1c5d 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-DAG: OpName [[SHFv4:%.+]] "shuffle_v4" ; CHECK-DAG: OpName [[INSv4:%.+]] "insert_v4" ; CHECK-DAG: OpName [[EXTv4:%.+]] "extract_v4" diff --git a/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll index 485da19fd6da0a..d897ccd02ed964 100644 --- a/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll +++ b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK: OpDecorate %[[#SpecConst:]] SpecId 0 ; CHECK: %[[#SpecConst]] = OpSpecConstant %[[#]] 70 ; CHECK: %[[#]] = OpPhi %[[#]] %[[#]] %[[#]] %[[#SpecConst]] %[[#]] diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll index fe71ce862dfc30..481bad9a26b7bc 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll @@ -7,6 +7,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-SPIRV: OpCapability BitInstructions ; CHECK-SPIRV: OpExtension "SPV_KHR_bit_instructions" ; CHECK-SPIRV: %[[#CharTy:]] = OpTypeInt 8 0 diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll index 1840ad5411f477..9d6a5e0dc0d324 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll @@ -7,6 +7,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - -filetype=obj | spirv-val %} +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-SPIRV: OpCapability BitInstructions ; CHECK-SPIRV: OpExtension "SPV_KHR_bit_instructions" ; CHECK-SPIRV: %[[#CharTy:]] = OpTypeInt 8 0 diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll index d2d5483ed36185..11b613b956e06d 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-SPIRV-DAG: %[[#]] = OpGroupAsyncCopy %[[#]] %[[#Scope:]] ; CHECK-SPIRV-DAG: %[[#Scope]] = OpConstant %[[#]] diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll index c770ab7f8fb5b8..03731b6d67565f 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-SPIRV: OpName %[[#vec:]] "vec" ; CHECK-SPIRV: OpName %[[#index:]] "index" ; CHECK-SPIRV: OpName %[[#res:]] "res" diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll index 437e161864eca5..79c2824c3dde1f 100644 --- a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll +++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll @@ -8,6 +8,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT +; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled +; XFAIL: expensive_checks + ; CHECK-DAG: OpName %[[#Struct:]] "struct" ; CHECK-DAG: OpName %[[#Arg:]] "arg" ; CHECK-DAG: OpName %[[#QArg:]] "qarg" diff --git a/llvm/test/CodeGen/X86/store_op_load_fold.ll b/llvm/test/CodeGen/X86/store_op_load_fold.ll index bd7068535eabcc..2915d1a7d7ae12 100644 --- a/llvm/test/CodeGen/X86/store_op_load_fold.ll +++ b/llvm/test/CodeGen/X86/store_op_load_fold.ll @@ -23,7 +23,10 @@ define void @test2() nounwind uwtable ssp { ; CHECK-LABEL: test2: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl L_s2$non_lazy_ptr, %eax -; CHECK-NEXT: andl $-262144, 20(%eax) ## imm = 0xFFFC0000 +; CHECK-NEXT: movzbl 22(%eax), %ecx +; CHECK-NEXT: andl $-4, %ecx +; CHECK-NEXT: movb %cl, 22(%eax) +; CHECK-NEXT: movw $0, 20(%eax) ; CHECK-NEXT: retl %bf.load35 = load i56, ptr getelementptr inbounds (%struct.S2, ptr @s2, i32 0, i32 5), align 16 %bf.clear36 = and i56 %bf.load35, -1125895611875329 diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll new file mode 100644 index 00000000000000..d85752e11fa6bb --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/vararg.ll @@ -0,0 +1,532 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s --mtriple=xtensa | FileCheck %s + +define void @vararg(...) { +; CHECK-LABEL: vararg: +; CHECK: .cfi_startproc +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: s32i a7, a1, 20 +; CHECK-NEXT: s32i a6, a1, 16 +; CHECK-NEXT: s32i a5, a1, 12 +; CHECK-NEXT: s32i a4, a1, 8 +; CHECK-NEXT: s32i a3, a1, 4 +; CHECK-NEXT: s32i a2, a1, 0 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + ret void +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind +declare void @f_i32(i32) nounwind +declare void @f_i64(i64) nounwind + +define void @vararg_fixed_0(...) nounwind { +; CHECK-LABEL: vararg_fixed_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -48 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 32 +; CHECK-NEXT: s32i a6, a1, 28 +; CHECK-NEXT: s32i a5, a1, 24 +; CHECK-NEXT: s32i a4, a1, 20 +; CHECK-NEXT: s32i a3, a1, 16 +; CHECK-NEXT: s32i a2, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 48 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 4 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 40 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a2, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI1_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB1_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB1_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB1_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB1_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB1_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB1_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB1_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB1_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI1_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 48 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %0 = va_arg ptr %list, i32 + call void @f_i32(i32 %0) + %1 = va_arg ptr %list, i64 + call void @f_i64(i64 %1) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_1(i32 %a1, ...) nounwind { +; CHECK-LABEL: vararg_fixed_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 28 +; CHECK-NEXT: s32i a6, a1, 24 +; CHECK-NEXT: s32i a5, a1, 20 +; CHECK-NEXT: s32i a4, a1, 16 +; CHECK-NEXT: s32i a3, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 8 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 44 +; CHECK-NEXT: .LBB2_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI2_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB2_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB2_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB2_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB2_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB2_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB2_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB2_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB2_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI2_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, ...) nounwind { +; CHECK-LABEL: vararg_fixed_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 16 +; CHECK-NEXT: s32i a6, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 20 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB3_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 56 +; CHECK-NEXT: .LBB3_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI3_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB3_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB3_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB3_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB3_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB3_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB3_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB3_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB3_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI3_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_5(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind { +; CHECK-LABEL: vararg_fixed_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 12 +; CHECK-NEXT: addi a9, a1, 12 +; CHECK-NEXT: s32i a9, a1, 4 +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a12, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB4_2: # %entry +; CHECK-NEXT: blt a12, a12, .LBB4_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: or a9, a12, a12 +; CHECK-NEXT: j .LBB4_5 +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: movi a9, 60 +; CHECK-NEXT: .LBB4_5: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI4_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB4_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB4_7: # %entry +; CHECK-NEXT: bge a12, a10, .LBB4_9 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB4_9: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB4_11 +; CHECK-NEXT: # %bb.10: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB4_11: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB4_13 +; CHECK-NEXT: # %bb.12: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB4_13: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI4_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_6(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, ...) nounwind { +; CHECK-LABEL: vararg_fixed_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: addi a10, a1, 0 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 36 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB5_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB5_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 72 +; CHECK-NEXT: .LBB5_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI5_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB5_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB5_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB5_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB5_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB5_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB5_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB5_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB5_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI5_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_7(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, ...) nounwind { +; CHECK-LABEL: vararg_fixed_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: addi a10, a1, 0 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 20 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 36 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB6_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB6_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 72 +; CHECK-NEXT: .LBB6_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI6_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB6_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB6_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB6_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB6_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB6_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB6_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB6_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB6_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI6_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s index 512906e8a669ae..b0dba632e6ec5c 100644 --- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s +++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme 2>&1 < %s| FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,-neon 2>&1 < %s| FileCheck %s // ------------------------------------------------------------------------- // // Check FABD is illegal in streaming mode @@ -12,7 +12,7 @@ fabd s0, s1, s2 // Check non-scalar v8.6a BFloat16 instructions are illegal in streaming mode bfcvtn v5.4h, v5.4s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: bf16 neon // CHECK-NEXT: bfcvtn v5.4h, v5.4s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/streaming-sve-feature.s b/llvm/test/MC/AArch64/SME/streaming-sve-feature.s index 90b1b6bd64ad6c..e4fdd09889f370 100644 --- a/llvm/test/MC/AArch64/SME/streaming-sve-feature.s +++ b/llvm/test/MC/AArch64/SME/streaming-sve-feature.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -triple=aarch64 -mattr=+sme < %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+sme < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR // Verify NEON is disabled when targeting streaming mode, if it's not // explicitly requested. diff --git a/llvm/test/MC/AArch64/armv8a-fpmul.s b/llvm/test/MC/AArch64/armv8a-fpmul.s index 6f06ad52e0e6bd..3a451a91e53d80 100644 --- a/llvm/test/MC/AArch64/armv8a-fpmul.s +++ b/llvm/test/MC/AArch64/armv8a-fpmul.s @@ -3,7 +3,7 @@ // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fp16fml,-fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon,+fp16fml < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-NEON +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fp16fml,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-FP16FML-NOR-NEON // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-FP16FML-NOR-NEON //------------------------------------------------------------------------------ @@ -37,15 +37,6 @@ FMLSL2 V0.4S, V1.4H, V2.4H //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}} //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} - //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} @@ -111,23 +102,6 @@ fmlsl2 V0.4s, v1.4h, v2.h[5] //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}} //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} -//CHECK-NO-NEON: error: instruction requires: neon{{$}} - //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}} diff --git a/llvm/test/MC/M68k/Control/bsr.s b/llvm/test/MC/M68k/Control/bsr.s index a70c7fb9a96edf..6b7a7d5c6ecd94 100644 --- a/llvm/test/MC/M68k/Control/bsr.s +++ b/llvm/test/MC/M68k/Control/bsr.s @@ -8,10 +8,6 @@ ; CHECK-SAME: encoding: [0x61,0x00,A,A] ; CHECK: fixup A - offset: 2, value: .LBB0_2, kind: FK_PCRel_2 bsr.w .LBB0_2 - ; CHECK: bsr.l .LBB0_3 - ; CHECK-SAME: encoding: [0x61,0xff,A,A,A,A] - ; CHECK: fixup A - offset: 2, value: .LBB0_3, kind: FK_PCRel_4 - bsr.l .LBB0_3 .LBB0_1: ; CHECK: add.l #0, %d0 ; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x00] @@ -26,10 +22,3 @@ ; CHECK: rts ; CHECK-SAME: encoding: [0x4e,0x75] rts -.LBB0_3: - ; CHECK: add.l #1, %d0 - ; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01] - add.l #1, %d0 - ; CHECK: rts - ; CHECK-SAME: encoding: [0x4e,0x75] - rts diff --git a/llvm/test/MC/M68k/Control/bsr32.s b/llvm/test/MC/M68k/Control/bsr32.s new file mode 100644 index 00000000000000..58ed4a29a35318 --- /dev/null +++ b/llvm/test/MC/M68k/Control/bsr32.s @@ -0,0 +1,35 @@ +; RUN: llvm-mc -triple=m68k --mcpu=M68020 -show-encoding %s | FileCheck %s + + ; CHECK: bsr.b .LBB0_1 + ; CHECK-SAME: encoding: [0x61,A] + ; CHECK: fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 + bsr.b .LBB0_1 + ; CHECK: bsr.w .LBB0_2 + ; CHECK-SAME: encoding: [0x61,0x00,A,A] + ; CHECK: fixup A - offset: 2, value: .LBB0_2, kind: FK_PCRel_2 + bsr.w .LBB0_2 + ; CHECK: bsr.l .LBB0_3 + ; CHECK-SAME: encoding: [0x61,0xff,A,A,A,A] + ; CHECK: fixup A - offset: 2, value: .LBB0_3, kind: FK_PCRel_4 + bsr.l .LBB0_3 +.LBB0_1: + ; CHECK: add.l #0, %d0 + ; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x00] + add.l #0, %d0 + ; CHECK: rts + ; CHECK-SAME: encoding: [0x4e,0x75] + rts +.LBB0_2: + ; CHECK: add.l #1, %d0 + ; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01] + add.l #1, %d0 + ; CHECK: rts + ; CHECK-SAME: encoding: [0x4e,0x75] + rts +.LBB0_3: + ; CHECK: add.l #1, %d0 + ; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01] + add.l #1, %d0 + ; CHECK: rts + ; CHECK-SAME: encoding: [0x4e,0x75] + rts diff --git a/llvm/test/MC/M68k/Relaxations/PIC/branch.s b/llvm/test/MC/M68k/Relaxations/PIC/branch.s new file mode 100644 index 00000000000000..5035ca3ce50a7b --- /dev/null +++ b/llvm/test/MC/M68k/Relaxations/PIC/branch.s @@ -0,0 +1,38 @@ +; RUN: llvm-mc -triple=m68k -motorola-integers -filetype=obj --position-independent < %s \ +; RUN: | llvm-objdump -d - | FileCheck %s +; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; CHECK-LABEL: : +TIGHT: + ; CHECK: bra $7f + ; INSTR: bra .LBB0_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 + bra .LBB0_2 + .space 0x7F ; i8::MAX +.LBB0_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED: + ; CHECK: bra $82 + ; INSTR: bra .LBB1_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 + bra .LBB1_2 + .space 0x80 ; Greater than i8::MAX +.LBB1_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +ZERO: + ; CHECK: bra $2 + ; INSTR: bra .LBB3_1 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 + bra .LBB3_1 +.LBB3_1: + add.l #0, %d0 + rts + + diff --git a/llvm/test/MC/M68k/Relaxations/PIC/branch32.s b/llvm/test/MC/M68k/Relaxations/PIC/branch32.s new file mode 100644 index 00000000000000..7e40f055aff13d --- /dev/null +++ b/llvm/test/MC/M68k/Relaxations/PIC/branch32.s @@ -0,0 +1,53 @@ +; RUN: llvm-mc -triple=m68k --mcpu=M68020 -motorola-integers -filetype=obj --position-independent < %s \ +; RUN: | llvm-objdump -d - | FileCheck %s +; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; Branch relocations are relaxed as part of object layout, so -show-encoding still +; shows them as 1-byte relocactions + +; CHECK-LABEL: : +TIGHT: + ; CHECK: bra $7f + ; INSTR: bra .LBB0_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 + bra .LBB0_2 + .space 0x7F ; i8::MAX +.LBB0_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED: + ; CHECK: bra $82 + ; INSTR: bra .LBB1_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 + bra .LBB1_2 + .space 0x80 ; Greater than i8::MAX +.LBB1_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED_32: + ; CHECK: bra $ff + ; CHECK-NEXT: 00 00 + ; CHECK-NEXT: 80 04 + ; INSTR: bra .LBB2_1 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 + bra .LBB2_1 + .space 0x8000 ; Greater than i16::MAX. +.LBB2_1: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +ZERO: + ; CHECK: bra $2 + ; INSTR: bra .LBB3_1 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 + bra .LBB3_1 +.LBB3_1: + add.l #0, %d0 + rts + diff --git a/llvm/test/MC/M68k/Relaxations/PIC/bsr.s b/llvm/test/MC/M68k/Relaxations/PIC/bsr.s new file mode 100644 index 00000000000000..67adcd545450e9 --- /dev/null +++ b/llvm/test/MC/M68k/Relaxations/PIC/bsr.s @@ -0,0 +1,51 @@ +; RUN: llvm-mc -triple=m68k -motorola-integers -filetype=obj --position-independent < %s \ +; RUN: | llvm-objdump -d - | FileCheck %s + +; CHECK-LABEL: : +TIGHT: + ; CHECK: bsr.w $7a + bsr.w .LBB0_2 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 +.LBB0_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED: + ; CHECK: bsr.b $82 + bsr.b .LBB1_2 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 + move.l $0, $0 +.LBB1_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +ZERO: + ; CHECK: bsr.w $2 + bsr.w .LBB2_1 +.LBB2_1: + add.l #0, %d0 + rts diff --git a/llvm/test/MC/M68k/Relaxations/branch32.s b/llvm/test/MC/M68k/Relaxations/branch32.s new file mode 100644 index 00000000000000..ba5c371dada8de --- /dev/null +++ b/llvm/test/MC/M68k/Relaxations/branch32.s @@ -0,0 +1,50 @@ +; RUN: llvm-mc -triple=m68k --mcpu=M68020 -motorola-integers -filetype=obj < %s \ +; RUN: | llvm-objdump -d - | FileCheck %s +; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; CHECK-LABEL: : +TIGHT: + ; CHECK: bra $7f + ; INSTR: bra .LBB0_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 + bra .LBB0_2 + .space 0x7F ; i8::MAX +.LBB0_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED: + ; CHECK: bra $82 + ; INSTR: bra .LBB1_2 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 + bra .LBB1_2 + .space 0x80 ; Greater than i8::MAX +.LBB1_2: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +RELAXED_32: + ; CHECK: bra $ff + ; CHECK-NEXT: 00 00 + ; CHECK-NEXT: 80 04 + ; INSTR: bra .LBB2_1 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 + bra .LBB2_1 + .space 0x8000 ; Greater than i16::MAX. +.LBB2_1: + add.l #0, %d0 + rts + +; CHECK-LABEL: : +ZERO: + ; CHECK: bra $2 + ; INSTR: bra .LBB3_1 ; encoding: [0x60,A] + ; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 + bra .LBB3_1 +.LBB3_1: + add.l #0, %d0 + rts + diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-abs.s b/llvm/test/MC/M68k/Relocations/PIC/data-abs.s new file mode 100644 index 00000000000000..0df09c8cb5ccb5 --- /dev/null +++ b/llvm/test/MC/M68k/Relocations/PIC/data-abs.s @@ -0,0 +1,26 @@ +; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \ +; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s +; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; RELOC: R_68K_32 dst 0x0 +; INSTR: move.l dst, %d0 +; FIXUP: fixup A - offset: 2, value: dst, kind: FK_Data_4 +move.l dst, %d0 + +; Relocating immediate values + +; RELOC: R_68K_8 str8 0x0 +; INSTR: move.b #str8, (4,%sp) +; FIXUP: fixup A - offset: 3, value: str8, kind: FK_Data_1 +move.b #str8, (4,%sp) + +; RELOC: R_68K_16 str16 0x0 +; INSTR: move.w #str16, (4,%sp) +; FIXUP: fixup A - offset: 2, value: str16, kind: FK_Data_2 +move.w #str16, (4,%sp) + +; RELOC: R_68K_32 str32 0x0 +; INSTR: move.l #str32, (4,%sp) +; FIXUP: fixup A - offset: 2, value: str32, kind: FK_Data_4 +move.l #str32, (4,%sp) diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s b/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s new file mode 100644 index 00000000000000..ca1ce52de1ca75 --- /dev/null +++ b/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s @@ -0,0 +1,19 @@ +; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \ +; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s +; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; RELOC: R_68K_GOTOFF8 dst1 0x0 +; INSTR: move.l (dst1@GOTOFF,%a5,%d0), %d0 +; FIXUP: fixup A - offset: 3, value: dst1@GOTOFF, kind: FK_Data_1 +move.l (dst1@GOTOFF,%a5,%d0), %d0 + +; RELOC: R_68K_GOTOFF16 dst2 0x0 +; INSTR: move.l (dst2@GOTOFF,%a5), %d0 +; FIXUP: fixup A - offset: 2, value: dst2@GOTOFF, kind: FK_Data_2 +move.l (dst2@GOTOFF,%a5), %d0 + +; RELOC: R_68K_GOTPCREL16 dst3 0x0 +; INSTR: lea (dst3@GOTPCREL,%pc), %a5 +; FIXUP: fixup A - offset: 2, value: dst3@GOTPCREL, kind: FK_PCRel_2 +lea (dst3@GOTPCREL,%pc), %a5 diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s b/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s new file mode 100644 index 00000000000000..9f79dd94f8198a --- /dev/null +++ b/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s @@ -0,0 +1,14 @@ +; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \ +; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s +; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; RELOC: R_68K_GOTPCREL8 dst1 0x1 +; INSTR: move.l (dst1@GOTPCREL,%pc,%d0), %a0 +; FIXUP: fixup A - offset: 3, value: dst1@GOTPCREL+1, kind: FK_PCRel_1 +move.l (dst1@GOTPCREL,%pc,%d0), %a0 + +; RELOC: R_68K_GOTPCREL16 dst2 0x0 +; INSTR: move.l (dst2@GOTPCREL,%pc), %a0 +; FIXUP: fixup A - offset: 2, value: dst2@GOTPCREL, kind: FK_PCRel_2 +move.l (dst2@GOTPCREL,%pc), %a0 diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s b/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s new file mode 100644 index 00000000000000..ef57ca589e64fe --- /dev/null +++ b/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s @@ -0,0 +1,20 @@ +; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \ +; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s +; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; RELOC: R_68K_PC8 dst1 0x1 +; INSTR: move.l (dst1,%pc,%a0), %a0 +; FIXUP: fixup A - offset: 3, value: dst1+1, kind: FK_PCRel_1 +move.l (dst1,%pc,%a0), %a0 + +; RELOC: R_68K_PC16 dst2 0x0 +; INSTR: move.l (dst2,%pc), %a0 +; FIXUP: fixup A - offset: 2, value: dst2, kind: FK_PCRel_2 +move.l (dst2,%pc), %a0 + +; Shouldn't have any relocation +; RELOC-NOT: R_68K_PC +; INSTR: move.l (0,%pc), %a0 +; FIXUP-NOT: fixup +move.l (0,%pc), %a0 diff --git a/llvm/test/MC/M68k/Relocations/PIC/text-plt.s b/llvm/test/MC/M68k/Relocations/PIC/text-plt.s new file mode 100644 index 00000000000000..0ccd1758004a97 --- /dev/null +++ b/llvm/test/MC/M68k/Relocations/PIC/text-plt.s @@ -0,0 +1,14 @@ +; RUN: llvm-mc -triple m68k --mcpu=M68020 --position-independent -filetype=obj %s -o - \ +; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s +; RUN: llvm-mc -triple m68k --mcpu=M68020 --position-independent -show-encoding %s -o - \ +; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s + +; RELOC: R_68K_PLT16 target 0x0 +; INSTR: jsr (target@PLT,%pc) +; FIXUP: fixup A - offset: 2, value: target@PLT, kind: FK_PCRel_2 +jsr (target@PLT,%pc) + +; RELOC: R_68K_PLT32 __tls_get_addr 0x0 +; INSTR: bsr.l __tls_get_addr@PLT +; FIXUP: fixup A - offset: 2, value: __tls_get_addr@PLT, kind: FK_PCRel_4 +bsr.l __tls_get_addr@PLT diff --git a/llvm/test/MC/M68k/Relocations/text-plt.s b/llvm/test/MC/M68k/Relocations/text-plt.s index 9513519c33c670..7de04b8b2182a5 100644 --- a/llvm/test/MC/M68k/Relocations/text-plt.s +++ b/llvm/test/MC/M68k/Relocations/text-plt.s @@ -1,6 +1,6 @@ -; RUN: llvm-mc -triple m68k -filetype=obj %s -o - \ +; RUN: llvm-mc -triple m68k --mcpu=M68020 -filetype=obj %s -o - \ ; RUN: | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s -; RUN: llvm-mc -triple m68k -show-encoding %s -o - \ +; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding %s -o - \ ; RUN: | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s ; RELOC: R_68K_PLT16 target 0x0 diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll index 21433477c1d7a3..2a99693523d3cf 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; REQUIRES: asserts -; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 -disable-output 2>&1 | FileCheck %s declare void @init_mem(ptr, i64); @@ -11,7 +11,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' ; CHECK: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32))) ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-NOT: LV: Not vectorizing: entry: %p1 = alloca [1024 x i32] %p2 = alloca [1024 x i32] @@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-NOT: LV: Not vectorizing entry: %p1 = alloca [1024 x i8] %p2 = alloca [1024 x i8] @@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' ; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 ; CHECK-NEXT: LV: We can vectorize this loop! -; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet. entry: %p1 = alloca [1024 x i8] call void @init_mem(ptr %p1, i64 1024) diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 52f82d007de4df..08a333fa865154 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s declare void @init_mem(ptr, i64); @@ -11,21 +11,47 @@ define i64 @same_exit_block_phi_of_consts() { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP1]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[MIDDLE_SPLIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -62,19 +88,45 @@ define i64 @diff_exit_block_phi_of_consts() { ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: loop.early.exit: ; CHECK-NEXT: ret i64 0 ; CHECK: loop.end: @@ -119,22 +171,66 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 +; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[END]] to i10 +; CHECK-NEXT: [[TMP20:%.*]] = zext i10 [[TMP19]] to i64 +; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP20]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP16]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] +; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 ; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: found: ; CHECK-NEXT: ret i32 1 ; CHECK: exit: @@ -183,14 +279,33 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276 +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP1]], label [[EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 266, [[MIDDLE_BLOCK]] ], [ -10, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]] ; CHECK: for.inc: ; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: early.exit: ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable @@ -218,3 +333,15 @@ early.exit: for.end: ret i32 0 } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll new file mode 100644 index 00000000000000..368c766e6b3c70 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +declare void @init(ptr) + +define i64 @multi_exiting_to_different_exits_live_in_exit_values() { +; CHECK: multi_exiting_to_different_exits_live_in_exit_values +; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from middle.split) +; CHECK-NEXT: No successors +; CHECK-NEXT: } +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %e1, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %e2, label %loop.header + +e1: + %p1 = phi i64 [ 0, %loop.header ] + ret i64 %p1 + +e2: + %p2 = phi i64 [ 1, %loop.latch ] + ret i64 %p2 +} + +define i64 @multi_exiting_to_same_exit_live_in_exit_values() { +; CHECK: multi_exiting_to_same_exit_live_in_exit_values +; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split) +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %exit, label %loop.header + +exit: + %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p +} + +define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { +; CHECK: multi_exiting_to_same_exit_live_in_exit_values_2 +; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 +; CHECK-NEXT: IR call void @init(ptr %src) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> +; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> +; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10> +; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%c.1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]> +; CHECK-NEXT: EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]> +; CHECK-NEXT: EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]> +; CHECK-NEXT: EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[EC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.split +; CHECK-EMPTY: +; CHECK-NEXT: middle.split: +; CHECK-NEXT: EMIT branch-on-cond vp<[[EA_TAKEN]]> +; CHECK-NEXT: Successor(s): ir-bb, middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split) +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +entry: + %src = alloca [128 x i32] + call void @init(ptr %src) + br label %loop.header + +loop.header: + %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src + %c.1 = icmp eq i32 %l, 10 + br i1 %c.1, label %exit, label %loop.latch + +loop.latch: + %inc = add nuw i64 %iv, 1 + %c.2 = icmp eq i64 %inc, 128 + br i1 %c.2, label %exit, label %loop.header + +exit: + %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i64 %p + +; uselistorder directives + uselistorder label %exit, { 1, 0 } +} diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll index cd91d07120f9ee..5b2a95f1b368c3 100644 --- a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S < %s -p loop-vectorize | FileCheck %s +; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s declare void @init_mem(ptr, i64); diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll index 3cf2940e86808e..80a21cef4e38ad 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll @@ -474,8 +474,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] -; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> -; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> +; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> ; SSE-NEXT: ret <4 x float> [[V3]] ; @@ -489,7 +488,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] -; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> +; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2 ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> ; AVX-NEXT: ret <4 x float> [[V3]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll index a08506840572c5..307fbf711cdc2d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll @@ -474,7 +474,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] -; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> +; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> ; SSE-NEXT: ret <4 x float> [[V3]] @@ -489,7 +489,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] -; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> +; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2 ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> ; AVX-NEXT: ret <4 x float> [[V3]] diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 594b52326871dd..913077eb0b06d5 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -64,12 +64,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -80,7 +81,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DsymutilOptTable : public opt::GenericOptTable { public: - DsymutilOptTable() : opt::GenericOptTable(InfoTable) {} + DsymutilOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // namespace diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp index d33459b194c9a3..9e3800f5bfbbc6 100644 --- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp +++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp @@ -51,12 +51,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -67,7 +68,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class CGDataOptTable : public opt::GenericOptTable { public: - CGDataOptTable() : GenericOptTable(InfoTable) {} + CGDataOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/llvm-cvtres/llvm-cvtres.cpp b/llvm/tools/llvm-cvtres/llvm-cvtres.cpp index 0c10769a9488ea..8ef8d6e239cfe3 100644 --- a/llvm/tools/llvm-cvtres/llvm-cvtres.cpp +++ b/llvm/tools/llvm-cvtres/llvm-cvtres.cpp @@ -42,12 +42,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -58,7 +59,9 @@ static constexpr opt::OptTable::Info InfoTable[] = { class CvtResOptTable : public opt::GenericOptTable { public: - CvtResOptTable() : opt::GenericOptTable(InfoTable, true) {} + CvtResOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + true) {} }; } diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp index 41b379e8fd396b..1467093e78c0b4 100644 --- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp @@ -31,12 +31,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -47,7 +48,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class CxxfiltOptTable : public opt::GenericOptTable { public: - CxxfiltOptTable() : opt::GenericOptTable(InfoTable) { + CxxfiltOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp b/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp index 77862737bccd2f..934833bf6fe426 100644 --- a/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp +++ b/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp @@ -37,12 +37,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -53,7 +54,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DebuginfodFindOptTable : public opt::GenericOptTable { public: - DebuginfodFindOptTable() : GenericOptTable(InfoTable) {} + DebuginfodFindOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp index 44d656148a4e2c..2859a36c80b0b3 100644 --- a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp +++ b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp @@ -36,12 +36,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -52,7 +53,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DebuginfodOptTable : public opt::GenericOptTable { public: - DebuginfodOptTable() : GenericOptTable(InfoTable) {} + DebuginfodOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp index 7b777b1845f8a4..0180abb834f9d3 100644 --- a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp +++ b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp @@ -38,12 +38,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Options.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Options.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -54,7 +55,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DwarfutilOptTable : public opt::GenericOptTable { public: - DwarfutilOptTable() : opt::GenericOptTable(InfoTable) {} + DwarfutilOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // namespace diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 60a89cb13c57a0..e34fcadfde5fc9 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -47,12 +47,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -63,7 +64,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class DwpOptTable : public opt::GenericOptTable { public: - DwpOptTable() : GenericOptTable(InfoTable) {} + DwpOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 4d441465c47fbf..42900159ce9667 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -64,12 +64,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE const opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -79,7 +80,8 @@ const opt::OptTable::Info InfoTable[] = { class GSYMUtilOptTable : public llvm::opt::GenericOptTable { public: - GSYMUtilOptTable() : GenericOptTable(InfoTable) { + GSYMUtilOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp index b76ea8dec0c98c..e12016c51e906c 100644 --- a/llvm/tools/llvm-ifs/llvm-ifs.cpp +++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp @@ -59,12 +59,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -74,7 +75,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class IFSOptTable : public opt::GenericOptTable { public: - IFSOptTable() : opt::GenericOptTable(InfoTable) { + IFSOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp index 3d3f3f0af4b7a7..94247118dc4eb2 100644 --- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp +++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp @@ -48,12 +48,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -63,7 +64,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class LibtoolDarwinOptTable : public opt::GenericOptTable { public: - LibtoolDarwinOptTable() : GenericOptTable(InfoTable) {} + LibtoolDarwinOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index 711a9185e155f6..3c0197e8b7bac7 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -72,12 +72,13 @@ enum LipoID { }; namespace lipo { -#define PREFIX(NAME, VALUE) \ - static constexpr llvm::StringLiteral NAME##_init[] = VALUE; \ - static constexpr llvm::ArrayRef NAME( \ - NAME##_init, std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "LipoOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "LipoOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info LipoInfoTable[] = { @@ -89,7 +90,9 @@ static constexpr opt::OptTable::Info LipoInfoTable[] = { class LipoOptTable : public opt::GenericOptTable { public: - LipoOptTable() : opt::GenericOptTable(lipo::LipoInfoTable) {} + LipoOptTable() + : opt::GenericOptTable(lipo::OptionStrTable, lipo::OptionPrefixesTable, + lipo::LipoInfoTable) {} }; enum class LipoAction { diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index db69109e2d1fab..1aa41096002eec 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -58,12 +58,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -73,7 +74,9 @@ static constexpr opt::OptTable::Info InfoTable[] = { class MLOptTable : public opt::GenericOptTable { public: - MLOptTable() : opt::GenericOptTable(InfoTable, /*IgnoreCase=*/false) {} + MLOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + /*IgnoreCase=*/false) {} }; } // namespace diff --git a/llvm/tools/llvm-mt/llvm-mt.cpp b/llvm/tools/llvm-mt/llvm-mt.cpp index 8b793b877642cb..3bd1bc786f86d3 100644 --- a/llvm/tools/llvm-mt/llvm-mt.cpp +++ b/llvm/tools/llvm-mt/llvm-mt.cpp @@ -40,12 +40,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -56,7 +57,9 @@ static constexpr opt::OptTable::Info InfoTable[] = { class CvtResOptTable : public opt::GenericOptTable { public: - CvtResOptTable() : opt::GenericOptTable(InfoTable, true) {} + CvtResOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + true) {} }; } // namespace diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp index d3e8d4c5ed987c..e7c3e36dd38d2f 100644 --- a/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/llvm/tools/llvm-nm/llvm-nm.cpp @@ -65,12 +65,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -80,7 +81,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class NmOptTable : public opt::GenericOptTable { public: - NmOptTable() : opt::GenericOptTable(InfoTable) { + NmOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index 104d802b1e1eeb..0925fc55317f7d 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -39,12 +39,13 @@ enum ObjcopyID { }; namespace objcopy_opt { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "ObjcopyOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "ObjcopyOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info ObjcopyInfoTable[] = { #define OPTION(...) \ @@ -56,7 +57,10 @@ static constexpr opt::OptTable::Info ObjcopyInfoTable[] = { class ObjcopyOptTable : public opt::GenericOptTable { public: - ObjcopyOptTable() : opt::GenericOptTable(objcopy_opt::ObjcopyInfoTable) { + ObjcopyOptTable() + : opt::GenericOptTable(objcopy_opt::OptionStrTable, + objcopy_opt::OptionPrefixesTable, + objcopy_opt::ObjcopyInfoTable) { setGroupedShortOptions(true); setDashDashParsing(true); } @@ -71,13 +75,13 @@ enum InstallNameToolID { }; namespace install_name_tool { +#define OPTTABLE_STR_TABLE_CODE +#include "InstallNameToolOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "InstallNameToolOpts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InstallNameToolInfoTable[] = { #define OPTION(...) \ @@ -90,7 +94,9 @@ static constexpr opt::OptTable::Info InstallNameToolInfoTable[] = { class InstallNameToolOptTable : public opt::GenericOptTable { public: InstallNameToolOptTable() - : GenericOptTable(install_name_tool::InstallNameToolInfoTable) {} + : GenericOptTable(install_name_tool::OptionStrTable, + install_name_tool::OptionPrefixesTable, + install_name_tool::InstallNameToolInfoTable) {} }; enum BitcodeStripID { @@ -102,13 +108,13 @@ enum BitcodeStripID { }; namespace bitcode_strip { +#define OPTTABLE_STR_TABLE_CODE +#include "BitcodeStripOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "BitcodeStripOpts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info BitcodeStripInfoTable[] = { #define OPTION(...) \ @@ -121,7 +127,9 @@ static constexpr opt::OptTable::Info BitcodeStripInfoTable[] = { class BitcodeStripOptTable : public opt::GenericOptTable { public: BitcodeStripOptTable() - : opt::GenericOptTable(bitcode_strip::BitcodeStripInfoTable) {} + : opt::GenericOptTable(bitcode_strip::OptionStrTable, + bitcode_strip::OptionPrefixesTable, + bitcode_strip::BitcodeStripInfoTable) {} }; enum StripID { @@ -132,12 +140,13 @@ enum StripID { }; namespace strip { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE +#include "StripOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE #include "StripOpts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info StripInfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(STRIP_, __VA_ARGS__), @@ -148,7 +157,9 @@ static constexpr opt::OptTable::Info StripInfoTable[] = { class StripOptTable : public opt::GenericOptTable { public: - StripOptTable() : GenericOptTable(strip::StripInfoTable) { + StripOptTable() + : GenericOptTable(strip::OptionStrTable, strip::OptionPrefixesTable, + strip::StripInfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 246d5cfa05818a..1e74cb80b21187 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -99,10 +99,11 @@ namespace { class CommonOptTable : public opt::GenericOptTable { public: - CommonOptTable(ArrayRef OptionInfos, const char *Usage, + CommonOptTable(const char *StrTable, ArrayRef PrefixesTable, + ArrayRef OptionInfos, const char *Usage, const char *Description) - : opt::GenericOptTable(OptionInfos), Usage(Usage), - Description(Description) { + : opt::GenericOptTable(StrTable, PrefixesTable, OptionInfos), + Usage(Usage), Description(Description) { setGroupedShortOptions(true); } @@ -121,12 +122,13 @@ class CommonOptTable : public opt::GenericOptTable { // ObjdumpOptID is in ObjdumpOptID.h namespace objdump_opt { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "ObjdumpOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "ObjdumpOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info ObjdumpInfoTable[] = { #define OPTION(...) \ @@ -139,9 +141,10 @@ static constexpr opt::OptTable::Info ObjdumpInfoTable[] = { class ObjdumpOptTable : public CommonOptTable { public: ObjdumpOptTable() - : CommonOptTable(objdump_opt::ObjdumpInfoTable, - " [options] ", - "llvm object file dumper") {} + : CommonOptTable( + objdump_opt::OptionStrTable, objdump_opt::OptionPrefixesTable, + objdump_opt::ObjdumpInfoTable, " [options] ", + "llvm object file dumper") {} }; enum OtoolOptID { @@ -152,12 +155,13 @@ enum OtoolOptID { }; namespace otool { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE +#include "OtoolOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE #include "OtoolOpts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info OtoolInfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(OTOOL_, __VA_ARGS__), @@ -169,7 +173,8 @@ static constexpr opt::OptTable::Info OtoolInfoTable[] = { class OtoolOptTable : public CommonOptTable { public: OtoolOptTable() - : CommonOptTable(otool::OtoolInfoTable, " [option...] [file...]", + : CommonOptTable(otool::OptionStrTable, otool::OptionPrefixesTable, + otool::OtoolInfoTable, " [option...] [file...]", "Mach-O object file displaying tool") {} }; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index f34d99e10f3163..6b9e2349899a44 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -21,6 +21,7 @@ #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfReader.h" +#include "llvm/ProfileData/MemProfYAML.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 4bc9d90095575b..a77188c462afe0 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -57,12 +57,13 @@ enum ID { }; namespace rc_opt { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -73,7 +74,10 @@ static constexpr opt::OptTable::Info InfoTable[] = { class RcOptTable : public opt::GenericOptTable { public: - RcOptTable() : GenericOptTable(rc_opt::InfoTable, /* IgnoreCase = */ true) {} + RcOptTable() + : GenericOptTable(rc_opt::OptionStrTable, rc_opt::OptionPrefixesTable, + rc_opt::InfoTable, + /* IgnoreCase = */ true) {} }; enum Windres_ID { @@ -84,12 +88,13 @@ enum Windres_ID { }; namespace windres_opt { -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE +#include "WindresOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE #include "WindresOpts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) \ @@ -102,7 +107,10 @@ static constexpr opt::OptTable::Info InfoTable[] = { class WindresOptTable : public opt::GenericOptTable { public: WindresOptTable() - : GenericOptTable(windres_opt::InfoTable, /* IgnoreCase = */ false) {} + : GenericOptTable(windres_opt::OptionStrTable, + windres_opt::OptionPrefixesTable, + windres_opt::InfoTable, + /* IgnoreCase = */ false) {} }; static ExitOnError ExitOnErr; diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 3e76cda2dd4335..2f77e5d350553d 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -59,12 +59,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -74,7 +75,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class ReadobjOptTable : public opt::GenericOptTable { public: - ReadobjOptTable() : opt::GenericOptTable(InfoTable) { + ReadobjOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 04282d3e4877c1..b5574ea41e332b 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -45,12 +45,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "TapiOpts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "TapiOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -60,7 +61,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class TAPIOptTable : public opt::GenericOptTable { public: - TAPIOptTable() : opt::GenericOptTable(InfoTable) { + TAPIOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-size/llvm-size.cpp b/llvm/tools/llvm-size/llvm-size.cpp index 4a1b0e879036cc..0d7bf248326702 100644 --- a/llvm/tools/llvm-size/llvm-size.cpp +++ b/llvm/tools/llvm-size/llvm-size.cpp @@ -45,12 +45,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -60,7 +61,10 @@ static constexpr opt::OptTable::Info InfoTable[] = { class SizeOptTable : public opt::GenericOptTable { public: - SizeOptTable() : GenericOptTable(InfoTable) { setGroupedShortOptions(true); } + SizeOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { + setGroupedShortOptions(true); + } }; enum OutputFormatTy { berkeley, sysv, darwin }; diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp index d4305096b60a0b..9979b93de84270 100644 --- a/llvm/tools/llvm-strings/llvm-strings.cpp +++ b/llvm/tools/llvm-strings/llvm-strings.cpp @@ -38,12 +38,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -54,7 +55,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class StringsOptTable : public opt::GenericOptTable { public: - StringsOptTable() : GenericOptTable(InfoTable) { + StringsOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); setDashDashParsing(true); } diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 3e41a85d646956..3ba7f59d5b8475 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -56,12 +56,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -72,7 +73,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class SymbolizerOptTable : public opt::GenericOptTable { public: - SymbolizerOptTable() : GenericOptTable(InfoTable) { + SymbolizerOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp index a091e37ff4026d..ca0b4247221968 100644 --- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp +++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp @@ -33,12 +33,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE using namespace llvm::opt; static constexpr opt::OptTable::Info InfoTable[] = { @@ -49,7 +50,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class TLICheckerOptTable : public opt::GenericOptTable { public: - TLICheckerOptTable() : GenericOptTable(InfoTable) {} + TLICheckerOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // end anonymous namespace diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index 39feff62391fe1..727b94b8477cc5 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -67,12 +67,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE static constexpr opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), @@ -82,7 +83,8 @@ static constexpr opt::OptTable::Info InfoTable[] = { class SancovOptTable : public opt::GenericOptTable { public: - SancovOptTable() : GenericOptTable(InfoTable) {} + SancovOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} }; } // namespace diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp index c32c2ce859af5d..ab28963b39311a 100644 --- a/llvm/unittests/CodeGen/MachineInstrTest.cpp +++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp @@ -584,7 +584,7 @@ TEST(MachineInstrTest, SpliceOperands) { // test tied operands MCRegisterClass MRC{ 0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true, /*BaseClass=*/true}; - TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0}; + TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0, 0}; // MachineRegisterInfo will be very upset if these registers aren't // allocatable. assert(RC.isAllocatable() && "unusable TargetRegisterClass"); diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 2ec422f1a09843..08c3b019689f8c 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -9,29 +9,37 @@ #include "llvm/ADT/StringRef.h" #include "gtest/gtest.h" +#define OPTTABLE_STR_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_STR_TABLE_CODE + struct OptionWithMarshallingInfo { - llvm::StringLiteral PrefixedName; + int PrefixedNameOffset; const char *KeyPath; const char *ImpliedCheck; const char *ImpliedValue; + + llvm::StringRef getPrefixedName() const { + return &OptionStrTable[PrefixedNameOffset]; + } }; static const OptionWithMarshallingInfo MarshallingTable[] = { #define OPTION_WITH_MARSHALLING( \ - PREFIX_TYPE, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + PREFIX_TYPE, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ - {PREFIXED_NAME, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE}, + {PREFIXED_NAME_OFFSET, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE}, #include "Opts.inc" #undef OPTION_WITH_MARSHALLING }; TEST(OptionMarshalling, EmittedOrderSameAsDefinitionOrder) { - ASSERT_EQ(MarshallingTable[0].PrefixedName, "-marshalled-flag-d"); - ASSERT_EQ(MarshallingTable[1].PrefixedName, "-marshalled-flag-c"); - ASSERT_EQ(MarshallingTable[2].PrefixedName, "-marshalled-flag-b"); - ASSERT_EQ(MarshallingTable[3].PrefixedName, "-marshalled-flag-a"); + ASSERT_EQ(MarshallingTable[0].getPrefixedName(), "-marshalled-flag-d"); + ASSERT_EQ(MarshallingTable[1].getPrefixedName(), "-marshalled-flag-c"); + ASSERT_EQ(MarshallingTable[2].getPrefixedName(), "-marshalled-flag-b"); + ASSERT_EQ(MarshallingTable[3].getPrefixedName(), "-marshalled-flag-a"); } TEST(OptionMarshalling, EmittedSpecifiedKeyPath) { diff --git a/llvm/unittests/Option/OptionParsingTest.cpp b/llvm/unittests/Option/OptionParsingTest.cpp index cd8743e49d4fde..3da015e343eb97 100644 --- a/llvm/unittests/Option/OptionParsingTest.cpp +++ b/llvm/unittests/Option/OptionParsingTest.cpp @@ -20,6 +20,10 @@ using namespace llvm::opt; #pragma clang diagnostic ignored "-Wdeprecated-declarations" #endif +#define OPTTABLE_STR_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_STR_TABLE_CODE + enum ID { OPT_INVALID = 0, // This is not an option ID. #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), @@ -28,20 +32,13 @@ enum ID { #undef OPTION }; -#define PREFIX(NAME, VALUE) \ - static constexpr StringLiteral NAME##_init[] = VALUE; \ - static constexpr ArrayRef NAME(NAME##_init, \ - std::size(NAME##_init) - 1); +#define OPTTABLE_PREFIXES_TABLE_CODE #include "Opts.inc" -#undef PREFIX +#undef OPTTABLE_PREFIXES_TABLE_CODE -static constexpr const StringLiteral PrefixTable_init[] = -#define PREFIX_UNION(VALUES) VALUES +#define OPTTABLE_PREFIXES_UNION_CODE #include "Opts.inc" -#undef PREFIX_UNION - ; -static constexpr const ArrayRef - PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1); +#undef OPTTABLE_PREFIXES_UNION_CODE enum OptionFlags { OptFlag1 = (1 << 4), @@ -64,13 +61,15 @@ namespace { class TestOptTable : public GenericOptTable { public: TestOptTable(bool IgnoreCase = false) - : GenericOptTable(InfoTable, IgnoreCase) {} + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + IgnoreCase) {} }; class TestPrecomputedOptTable : public PrecomputedOptTable { public: TestPrecomputedOptTable(bool IgnoreCase = false) - : PrecomputedOptTable(InfoTable, PrefixTable, IgnoreCase) {} + : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + OptionPrefixesUnion, IgnoreCase) {} }; } diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 5886d3e2bcf842..456b093362b50f 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -16,6 +16,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/MemProfReader.h" +#include "llvm/ProfileData/MemProfYAML.h" #include "llvm/Support/raw_ostream.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -161,9 +162,8 @@ TEST(MemProf, FillsValue) { /*KeepName=*/true); llvm::DenseMap Records; - for (const auto &Pair : Reader) { + for (const auto &Pair : Reader) Records.insert({Pair.first, Pair.second}); - } // Mock program pseudocode and expected memprof record contents. // @@ -396,9 +396,8 @@ TEST(MemProf, SymbolizationFilter) { RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM); llvm::SmallVector Records; - for (const auto &KeyRecordPair : Reader) { + for (const auto &KeyRecordPair : Reader) Records.push_back(KeyRecordPair.second); - } ASSERT_THAT(Records, SizeIs(1)); ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); @@ -428,9 +427,8 @@ TEST(MemProf, BaseMemProfReader) { MemProfReader Reader(std::move(MemProfData)); llvm::SmallVector Records; - for (const auto &KeyRecordPair : Reader) { + for (const auto &KeyRecordPair : Reader) Records.push_back(KeyRecordPair.second); - } ASSERT_THAT(Records, SizeIs(1)); ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); @@ -463,9 +461,8 @@ TEST(MemProf, BaseMemProfReaderWithCSIdMap) { MemProfReader Reader(std::move(MemProfData)); llvm::SmallVector Records; - for (const auto &KeyRecordPair : Reader) { + for (const auto &KeyRecordPair : Reader) Records.push_back(KeyRecordPair.second); - } ASSERT_THAT(Records, SizeIs(1)); ASSERT_THAT(Records[0].AllocSites, SizeIs(1)); @@ -748,7 +745,7 @@ TEST(MemProf, YAMLParser) { // Verify the entire contents of MemProfData.Records. ASSERT_THAT(MemProfData.Records, SizeIs(1)); - const auto &[GUID, Record] = *MemProfData.Records.begin(); + const auto &[GUID, Record] = MemProfData.Records.front(); EXPECT_EQ(GUID, 0xdeadbeef12345678ULL); ASSERT_THAT(Record.AllocSites, SizeIs(2)); EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1)); diff --git a/llvm/utils/TableGen/Basic/SDNodeProperties.h b/llvm/utils/TableGen/Basic/SDNodeProperties.h index 1fe4044edcea23..97813067341fc8 100644 --- a/llvm/utils/TableGen/Basic/SDNodeProperties.h +++ b/llvm/utils/TableGen/Basic/SDNodeProperties.h @@ -28,8 +28,6 @@ enum SDNP { SDNPSideEffect, SDNPMemOperand, SDNPVariadic, - SDNPWantRoot, - SDNPWantParent }; unsigned parseSDPatternOperatorProperties(const Record *R); diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp index 4e75db689a0b57..f01b8a962bfbb1 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp @@ -424,14 +424,13 @@ ComplexPattern::ComplexPattern(const Record *R) { Properties |= 1 << SDNPMemOperand; } else if (Prop->getName() == "SDNPVariadic") { Properties |= 1 << SDNPVariadic; - } else if (Prop->getName() == "SDNPWantRoot") { - Properties |= 1 << SDNPWantRoot; - } else if (Prop->getName() == "SDNPWantParent") { - Properties |= 1 << SDNPWantParent; } else { PrintFatalError(R->getLoc(), "Unsupported SD Node property '" + Prop->getName() + "' on ComplexPattern '" + R->getName() + "'!"); } } + + WantsRoot = R->getValueAsBit("WantsRoot"); + WantsParent = R->getValueAsBit("WantsParent"); } diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h index 8bcb2f677a00b0..c3c7f7e362dbff 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.h +++ b/llvm/utils/TableGen/Common/CodeGenTarget.h @@ -243,6 +243,8 @@ class ComplexPattern { std::vector RootNodes; unsigned Properties; // Node properties unsigned Complexity; + bool WantsRoot; + bool WantsParent; public: ComplexPattern(const Record *R); @@ -253,6 +255,8 @@ class ComplexPattern { const ArrayRef getRootNodes() const { return RootNodes; } bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); } unsigned getComplexity() const { return Complexity; } + bool wantsRoot() const { return WantsRoot; } + bool wantsParent() const { return WantsParent; } }; } // namespace llvm diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index f2385bdf1dde41..f056306b2f8571 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -1189,12 +1189,12 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) { OS << "("; // If the complex pattern wants the root of the match, pass it in as the // first argument. - if (P.hasProperty(SDNPWantRoot)) + if (P.wantsRoot()) OS << "Root, "; // If the complex pattern wants the parent of the operand being matched, // pass it in as the next argument. - if (P.hasProperty(SDNPWantParent)) + if (P.wantsParent()) OS << "Parent, "; OS << "N"; diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp index 48f5818938ae86..eca828cad5f4d2 100644 --- a/llvm/utils/TableGen/OptionParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -9,9 +9,11 @@ #include "Common/OptEmitter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" #include #include @@ -26,6 +28,15 @@ static std::string getOptionName(const Record &R) { return std::string(R.getValueAsString("EnumName")); } +static raw_ostream &writeStrTableOffset(raw_ostream &OS, + const StringToOffsetTable &Table, + llvm::StringRef Str) { + OS << Table.GetStringOffset(Str) << " /* "; + OS.write_escaped(Str); + OS << " */"; + return OS; +} + static raw_ostream &writeCstring(raw_ostream &OS, llvm::StringRef Str) { OS << '"'; OS.write_escaped(Str); @@ -260,59 +271,84 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { // Generate prefix groups. typedef SmallVector, 2> PrefixKeyT; - typedef std::map PrefixesT; + typedef std::map PrefixesT; PrefixesT Prefixes; - Prefixes.insert(std::pair(PrefixKeyT(), "prefix_0")); - unsigned CurPrefix = 0; + Prefixes.insert({PrefixKeyT(), 0}); for (const Record &R : llvm::make_pointee_range(Opts)) { std::vector RPrefixes = R.getValueAsListOfStrings("Prefixes"); PrefixKeyT PrefixKey(RPrefixes.begin(), RPrefixes.end()); - unsigned NewPrefix = CurPrefix + 1; - std::string Prefix = (Twine("prefix_") + Twine(NewPrefix)).str(); - if (Prefixes.insert(std::pair(PrefixKey, Prefix)).second) - CurPrefix = NewPrefix; + Prefixes.insert({PrefixKey, 0}); } DenseSet PrefixesUnionSet; - for (const auto &Prefix : Prefixes) - PrefixesUnionSet.insert(Prefix.first.begin(), Prefix.first.end()); + for (const auto &[Prefix, _] : Prefixes) + PrefixesUnionSet.insert(Prefix.begin(), Prefix.end()); SmallVector PrefixesUnion(PrefixesUnionSet.begin(), PrefixesUnionSet.end()); array_pod_sort(PrefixesUnion.begin(), PrefixesUnion.end()); + llvm::StringToOffsetTable Table; + // Make sure the empty string is the zero-th one in the table. This both makes + // it easy to check for empty strings (zero offset == empty) and makes + // initialization cheaper for empty strings. + Table.GetOrAddStringOffset(""); + // We can add all the prefixes via the union. + for (const auto &Prefix : PrefixesUnion) + Table.GetOrAddStringOffset(Prefix); + for (const Record &R : llvm::make_pointee_range(Groups)) + Table.GetOrAddStringOffset(R.getValueAsString("Name")); + for (const Record &R : llvm::make_pointee_range(Opts)) + Table.GetOrAddStringOffset(getOptionPrefixedName(R)); + + // Dump string table. + OS << "/////////\n"; + OS << "// String table\n\n"; + OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n"; + Table.EmitStringLiteralDef(OS, "static constexpr char OptionStrTable[]", + /*Indent=*/""); + OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n"; + // Dump prefixes. OS << "/////////\n"; OS << "// Prefixes\n\n"; - OS << "#ifdef PREFIX\n"; - OS << "#define COMMA ,\n"; - for (const auto &Prefix : Prefixes) { - OS << "PREFIX("; - - // Prefix name. - OS << Prefix.second; - - // Prefix values. - OS << ", {"; - for (const auto &PrefixKey : Prefix.first) - OS << "llvm::StringLiteral(\"" << PrefixKey << "\") COMMA "; - // Append an empty element to avoid ending up with an empty array. - OS << "llvm::StringLiteral(\"\")})\n"; + OS << "#ifdef OPTTABLE_PREFIXES_TABLE_CODE\n"; + OS << "static constexpr unsigned OptionPrefixesTable[] = {\n"; + { + // Ensure the first prefix set is always empty. + assert(!Prefixes.empty() && + "We should always emit an empty set of prefixes"); + assert(Prefixes.begin()->first.empty() && + "First prefix set should always be empty"); + llvm::ListSeparator Sep(",\n"); + unsigned CurIndex = 0; + for (auto &[Prefix, PrefixIndex] : Prefixes) { + // First emit the number of prefix strings in this list of prefixes. + OS << Sep << " " << Prefix.size() << " /* prefixes */"; + PrefixIndex = CurIndex; + assert((CurIndex == 0 || !Prefix.empty()) && + "Only first prefix set should be empty!"); + for (const auto &PrefixKey : Prefix) + OS << ", " << *Table.GetStringOffset(PrefixKey) << " /* '" << PrefixKey + << "' */"; + CurIndex += Prefix.size() + 1; + } } - OS << "#undef COMMA\n"; - OS << "#endif // PREFIX\n\n"; + OS << "\n};\n"; + OS << "#endif // OPTTABLE_PREFIXES_TABLE_CODE\n\n"; - // Dump prefix unions. + // Dump prefixes union. OS << "/////////\n"; OS << "// Prefix Union\n\n"; - OS << "#ifdef PREFIX_UNION\n"; - OS << "#define COMMA ,\n"; - OS << "PREFIX_UNION({\n"; - for (const auto &Prefix : PrefixesUnion) { - OS << "llvm::StringLiteral(\"" << Prefix << "\") COMMA "; + OS << "#ifdef OPTTABLE_PREFIXES_UNION_CODE\n"; + OS << "static constexpr unsigned OptionPrefixesUnion[] = {\n"; + { + llvm::ListSeparator Sep(", "); + for (auto Prefix : PrefixesUnion) + OS << Sep << " " << *Table.GetStringOffset(Prefix) << " /* '" << Prefix + << "' */"; } - OS << "llvm::StringLiteral(\"\")})\n"; - OS << "#undef COMMA\n"; - OS << "#endif // PREFIX_UNION\n\n"; + OS << "\n};\n"; + OS << "#endif // OPTTABLE_PREFIXES_UNION_CODE\n\n"; // Dump groups. OS << "/////////\n"; @@ -337,11 +373,12 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { // Start a single option entry. OS << "OPTION("; - // The option prefix; - OS << "llvm::ArrayRef()"; + // A zero prefix offset corresponds to an empty set of prefixes. + OS << "0 /* no prefixes */"; - // The option string. - OS << ", \"" << R.getValueAsString("Name") << '"'; + // The option string offset. + OS << ", "; + writeStrTableOffset(OS, Table, R.getValueAsString("Name")); // The option identifier name. OS << ", " << getOptionName(R); @@ -387,7 +424,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", "; // The option prefixed name. - writeCstring(OS, getOptionPrefixedName(R)); + writeStrTableOffset(OS, Table, getOptionPrefixedName(R)); // The option identifier name. OS << ", " << getOptionName(R); diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index a6f87119aca5ba..bfcd52da1c39cb 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1286,9 +1286,6 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) { } OS << "};\n"; - OS << "\nstatic const TargetRegisterClass *const " - << "NullRegClasses[] = { nullptr };\n\n"; - // Emit register class bit mask tables. The first bit mask emitted for a // register class, RC, is the set of sub-classes, including RC itself. // @@ -1340,19 +1337,18 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) { SuperRegIdxSeqs.emit(OS, printSubRegIndex); OS << "};\n\n"; - // Emit NULL terminated super-class lists. + // Emit super-class lists. for (const auto &RC : RegisterClasses) { ArrayRef Supers = RC.getSuperClasses(); - // Skip classes without supers. We can reuse NullRegClasses. + // Skip classes without supers. if (Supers.empty()) continue; - OS << "static const TargetRegisterClass *const " << RC.getName() - << "Superclasses[] = {\n"; + OS << "static unsigned const " << RC.getName() << "Superclasses[] = {\n"; for (const auto *Super : Supers) - OS << " &" << Super->getQualifiedName() << "RegClass,\n"; - OS << " nullptr\n};\n\n"; + OS << " " << Super->getQualifiedIdName() << ",\n"; + OS << "};\n\n"; } // Emit methods. @@ -1406,9 +1402,10 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) { << (RC.CoveredBySubRegs ? "true" : "false") << ", /* CoveredBySubRegs */\n "; if (RC.getSuperClasses().empty()) - OS << "NullRegClasses,\n "; + OS << "nullptr, "; else - OS << RC.getName() << "Superclasses,\n "; + OS << RC.getName() << "Superclasses, "; + OS << RC.getSuperClasses().size() << ",\n "; if (RC.AltOrderSelect.empty()) OS << "nullptr\n"; else diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index 76b2a3e26be28a..19264bca6ce8f6 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -10,6 +10,8 @@ import argparse import os +import re +import shlex import subprocess import sys from typing import List, Optional @@ -312,7 +314,112 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str return None -ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper()) +class UndefGetFormatHelper(FormatHelper): + name = "undef deprecator" + friendly_name = "undef deprecator" + + @property + def instructions(self) -> str: + return " ".join(shlex.quote(c) for c in self.cmd) + + def filter_changed_files(self, changed_files: List[str]) -> List[str]: + filtered_files = [] + for path in changed_files: + _, ext = os.path.splitext(path) + if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"): + filtered_files.append(path) + return filtered_files + + def has_tool(self) -> bool: + return True + + def pr_comment_text_for_diff(self, diff: str) -> str: + return f""" +:warning: {self.name} found issues in your code. :warning: + +
+ +You can test this locally with the following command: + + +``````````bash +{self.instructions} +`````````` + +
+ +{diff} +""" + + def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: + files = self.filter_changed_files(changed_files) + + # Use git to find files that have had a change in the number of undefs + regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)" + cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex] + + if args.start_rev and args.end_rev: + cmd.append(args.start_rev) + cmd.append(args.end_rev) + + cmd += files + self.cmd = cmd + + if args.verbose: + print(f"Running: {self.instructions}") + + proc = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" + ) + sys.stdout.write(proc.stderr) + stdout = proc.stdout + + files = [] + # Split the diff so we have one array entry per file. + # Each file is prefixed like: + # diff --git a/file b/file + for file in re.split("^diff --git ", stdout, 0, re.MULTILINE): + # search for additions of undef + if re.search("^[+].*" + regex, file, re.MULTILINE): + files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1]) + + if not files: + return None + + files = "\n".join(" - " + f for f in files) + report = f""" +The following files introduce new uses of undef: +{files} + +[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead. + +In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead. + +For example, this is considered a bad practice: +```llvm +define void @fn() {{ + ... + br i1 undef, ... +}} +``` + +Please use the following instead: +```llvm +define void @fn(i1 %cond) {{ + ... + br i1 %cond, ... +}} +``` + +Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information. +""" + if args.verbose: + print(f"error: {self.name} failed") + print(report) + return report + + +ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper()) def hook_main(): diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn index 0b72d01f2279bd..21e348da90b905 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn @@ -45,6 +45,7 @@ static_library("modernize") { "UseEmplaceCheck.cpp", "UseEqualsDefaultCheck.cpp", "UseEqualsDeleteCheck.cpp", + "UseIntegerSignComparisonCheck.cpp", "UseNodiscardCheck.cpp", "UseNoexceptCheck.cpp", "UseNullptrCheck.cpp", diff --git a/mlir/include/mlir-c/Dialect/EmitC.h b/mlir/include/mlir-c/Dialect/EmitC.h new file mode 100644 index 00000000000000..82e698344bf1e7 --- /dev/null +++ b/mlir/include/mlir-c/Dialect/EmitC.h @@ -0,0 +1,26 @@ +//===-- mlir-c/Dialect/EmitC.h - C API for EmitC dialect ----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_C_DIALECT_EmitC_H +#define MLIR_C_DIALECT_EmitC_H + +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" + +#ifdef __cplusplus +extern "C" { +#endif + +MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(EmitC, emitc); + +#ifdef __cplusplus +} +#endif + +#endif // MLIR_C_DIALECT_EmitC_H diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 296a3c305e5bf4..14880a1a66ba57 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -2029,6 +2029,107 @@ def NVVM_CpAsyncBulkTensorPrefetchOp : }]; } +// List of modes supported for TMA Store and Reduction Ops +def TMAStoreModeTile : I32EnumAttrCase<"TILE", 0, "tile">; +def TMAStoreModeIm2Col : I32EnumAttrCase<"IM2COL", 1, "im2col">; + +def TMAStoreMode : I32EnumAttr<"TMAStoreMode", "NVVM TMA Store Mode", + [TMAStoreModeTile, TMAStoreModeIm2Col]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def TMAStoreModeAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +// List of Reduction Ops supported with TMA Store +def TMAReduxKindAdd : I32EnumAttrCase<"ADD", 0, "add">; +def TMAReduxKindMin : I32EnumAttrCase<"MIN", 1, "min">; +def TMAReduxKindMax : I32EnumAttrCase<"MAX", 2, "max">; +def TMAReduxKindInc : I32EnumAttrCase<"INC", 3, "inc">; +def TMAReduxKindDec : I32EnumAttrCase<"DEC", 4, "dec">; +def TMAReduxKindAnd : I32EnumAttrCase<"AND", 5, "and">; +def TMAReduxKindOr : I32EnumAttrCase<"OR", 6, "or">; +def TMAReduxKindXor : I32EnumAttrCase<"XOR", 7, "xor">; + +def TMAReduxKind : I32EnumAttr<"TMAReduxKind", "NVVM TMA redux kind", + [TMAReduxKindAdd, TMAReduxKindMax, TMAReduxKindMin, + TMAReduxKindInc, TMAReduxKindDec, TMAReduxKindAnd, + TMAReduxKindOr, TMAReduxKindXor]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def TMAReduxKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def NVVM_CpAsyncBulkTensorReduceOp : + NVVM_Op<"cp.async.bulk.tensor.reduce", [AttrSizedOperandSegments]> { + let arguments = (ins + LLVM_AnyPointer:$tmaDescriptor, + LLVM_PointerShared:$srcMem, + TMAReduxKindAttr:$redKind, + DefaultValuedAttr:$mode, + Variadic:$coordinates, + Optional:$l2CacheHint); + + let description = [{ + Initiates an asynchronous reduction operation of tensor data in + global memory with tensor data in shared memory. + + The `mode` attribute indicates whether the copy mode is tile or im2col. + The `redOp` attribute specifies the reduction operations applied. + The supported reduction operations are: + {add, min, max, inc, dec, and, or, xor} + + The `l2CacheHint` operand is optional, and it is used to specify cache + eviction policy that may be used during the memory access. + + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-reduce-async-bulk-tensor) + }]; + + let assemblyFormat = [{ + $tmaDescriptor `,` + $srcMem `,` + `box` `[`$coordinates `]` + (`l2_cache_hint` `=` $l2CacheHint^ )? + attr-dict `:` type($tmaDescriptor) `,` type($srcMem) + }]; + + let extraClassDeclaration = [{ + static llvm::Intrinsic::ID getIntrinsicID(int tensorDims, + NVVM::TMAReduxKind kind, + bool isIm2Col); + }]; + + let hasVerifier = 1; + + string llvmBuilder = [{ + // Arguments to the intrinsic: + // shared_mem_ptr, tmaDesc, tensorDims + // cache_hint(if applicable) and flag(boolean) + llvm::SmallVector translatedOperands; + translatedOperands.push_back($srcMem); + translatedOperands.push_back($tmaDescriptor); + + for (auto v : op.getCoordinates()) + translatedOperands.push_back(moduleTranslation.lookupValue(v)); + + llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext(); + auto *i64Undef = llvm::UndefValue::get(llvm::IntegerType::get(ctx, 64)); + + bool isCacheHint = op.getL2CacheHint() ? true : false; + translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Undef); + translatedOperands.push_back(builder.getInt1(isCacheHint)); + + auto intId = NVVM::CpAsyncBulkTensorReduceOp::getIntrinsicID( + op.getCoordinates().size(), $redKind, + (op.getMode() == NVVM::TMAStoreMode::IM2COL)); + createIntrinsicCall(builder, intId, translatedOperands); + }]; +} + //===----------------------------------------------------------------------===// // NVVM Wgmma Ops //===----------------------------------------------------------------------===// diff --git a/mlir/lib/CAPI/Dialect/CMakeLists.txt b/mlir/lib/CAPI/Dialect/CMakeLists.txt index 4e141b60ff8cc9..5ad4bafedf6c48 100644 --- a/mlir/lib/CAPI/Dialect/CMakeLists.txt +++ b/mlir/lib/CAPI/Dialect/CMakeLists.txt @@ -40,6 +40,15 @@ add_mlir_upstream_c_api_library(MLIRCAPIControlFlow MLIRControlFlowDialect ) +add_mlir_upstream_c_api_library(MLIRCAPIEmitC + EmitC.cpp + + PARTIAL_SOURCES_INTENDED + LINK_LIBS PUBLIC + MLIRCAPIIR + MLIREmitCDialect +) + add_mlir_upstream_c_api_library(MLIRCAPIMath Math.cpp diff --git a/mlir/lib/CAPI/Dialect/EmitC.cpp b/mlir/lib/CAPI/Dialect/EmitC.cpp new file mode 100644 index 00000000000000..3dcb7038a57981 --- /dev/null +++ b/mlir/lib/CAPI/Dialect/EmitC.cpp @@ -0,0 +1,13 @@ +//===- EmitC.cpp - C Interface for EmitC dialect --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Dialect/EmitC.h" +#include "mlir/CAPI/Registration.h" +#include "mlir/Dialect/EmitC/IR/EmitC.h" + +MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(EmitC, emitc, mlir::emitc::EmitCDialect) diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index ca04af0b060b4f..8b09c0f386d6b6 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -75,21 +75,26 @@ ParseResult VoteBallotOp::parse(OpAsmParser &parser, OperationState &result) { void VoteBallotOp::print(OpAsmPrinter &p) { printNVVMIntrinsicOp(p, *this); } -// This verifier is shared across: -// CpAsyncBulkTensorGlobalToSharedClusterOp (TMA Load) and -// CpAsyncBulkTensorPrefetchOp (TMA Prefetch) Ops. +// This verifier is shared among the following Ops: +// CpAsyncBulkTensorGlobalToSharedClusterOp (TMA Load) +// CpAsyncBulkTensorPrefetchOp (TMA Prefetch) +// CpAsyncBulkTensorReduceOp (TMA Store-Reduce) static LogicalResult CpAsyncBulkTensorCommonVerifier(size_t tensorDims, + bool isIm2Col, size_t numIm2ColOffsets, Location loc) { if (tensorDims < 1 || tensorDims > 5) return emitError(loc, "expects coordinates between 1 to 5 dimension"); - if (numIm2ColOffsets) { + // For Im2Col mode, there are two constraints: + if (isIm2Col) { + // 1. Tensor must always be at least 3-d. if (tensorDims < 3) return emitError( loc, "to use im2col mode, the tensor has to be at least 3-dimensional"); - if (tensorDims != (numIm2ColOffsets + 2)) + // 2. When there are Im2ColOffsets, they must be (Dims - 2) in number. + if (numIm2ColOffsets && (tensorDims != (numIm2ColOffsets + 2))) return emitError( loc, "im2col offsets must be 2 less than number of coordinates"); } @@ -97,8 +102,10 @@ static LogicalResult CpAsyncBulkTensorCommonVerifier(size_t tensorDims, } LogicalResult CpAsyncBulkTensorGlobalToSharedClusterOp::verify() { - return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), - getIm2colOffsets().size(), getLoc()); + size_t numIm2ColOffsets = getIm2colOffsets().size(); + bool isIm2Col = numIm2ColOffsets > 0; + return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col, + numIm2ColOffsets, getLoc()); } LogicalResult CpAsyncBulkTensorSharedCTAToGlobalOp::verify() { @@ -119,8 +126,16 @@ LogicalResult CpAsyncOp::verify() { } LogicalResult CpAsyncBulkTensorPrefetchOp::verify() { - return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), - getIm2colOffsets().size(), getLoc()); + size_t numIm2ColOffsets = getIm2colOffsets().size(); + bool isIm2Col = numIm2ColOffsets > 0; + return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col, + numIm2ColOffsets, getLoc()); +} + +LogicalResult CpAsyncBulkTensorReduceOp::verify() { + bool isIm2Col = (getMode() == TMAStoreMode::IM2COL); + return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col, 0, + getLoc()); } // Given the element type of an operand and whether or not it is an accumulator, @@ -1094,6 +1109,55 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims, } } +#define CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, mode) \ + llvm::Intrinsic::nvvm_cp_async_bulk_tensor_##op##_##mode##_##dim##d + +#define CP_ASYNC_BULK_TENSOR_REDUCE(op, dim, is_im2col) \ + is_im2col ? CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, im2col) \ + : CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile) + +#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \ + [&]() -> auto { \ + switch (dims) { \ + case 1: \ + return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \ + case 2: \ + return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 2, tile); \ + case 3: \ + return CP_ASYNC_BULK_TENSOR_REDUCE(op, 3, is_im2col); \ + case 4: \ + return CP_ASYNC_BULK_TENSOR_REDUCE(op, 4, is_im2col); \ + case 5: \ + return CP_ASYNC_BULK_TENSOR_REDUCE(op, 5, is_im2col); \ + default: \ + llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \ + } \ + }() + +llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID( + int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) { + using RedTy = NVVM::TMAReduxKind; + switch (kind) { + case RedTy::ADD: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_add, tensorDims, isIm2Col); + case RedTy::MIN: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_min, tensorDims, isIm2Col); + case RedTy::MAX: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_max, tensorDims, isIm2Col); + case RedTy::INC: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_inc, tensorDims, isIm2Col); + case RedTy::DEC: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_dec, tensorDims, isIm2Col); + case RedTy::AND: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_and, tensorDims, isIm2Col); + case RedTy::OR: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_or, tensorDims, isIm2Col); + case RedTy::XOR: + return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_xor, tensorDims, isIm2Col); + } + llvm_unreachable("Invalid Reduction Op for CpAsyncBulkTensorReduceOp"); +} + //===----------------------------------------------------------------------===// // NVVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index e1b870b53ad25c..10866c11bdb71b 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -352,6 +352,14 @@ declare_mlir_python_sources( dialects/quant.py _mlir_libs/_mlir/dialects/quant.pyi) +declare_mlir_dialect_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/EmitC.td + SOURCES + dialects/emitc.py + DIALECT_NAME emitc) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/EmitC.td b/mlir/python/mlir/dialects/EmitC.td new file mode 100644 index 00000000000000..ff0a56d1550148 --- /dev/null +++ b/mlir/python/mlir/dialects/EmitC.td @@ -0,0 +1,14 @@ +//===-- EmitC.td - Entry point for EmitC bind --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_EMITC +#define PYTHON_BINDINGS_EMITC + +include "mlir/Dialect/EmitC/IR/EmitC.td" + +#endif diff --git a/mlir/python/mlir/dialects/emitc.py b/mlir/python/mlir/dialects/emitc.py new file mode 100644 index 00000000000000..99c3286e576f1e --- /dev/null +++ b/mlir/python/mlir/dialects/emitc.py @@ -0,0 +1,5 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._emitc_ops_gen import * diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir index 25435cf51a6156..cfa5b5d569ca9d 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir @@ -31,83 +31,6 @@ func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi // ----- -#map = affine_map<() -> ()> -func.func @extract_scalar_from_0d_into_0d(%src: tensor, %init: tensor) -> tensor { - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = [] - } outs(%init : tensor) { - ^bb0(%in: f32): - %1 = tensor.extract %src[] : tensor - linalg.yield %1 : f32 - } -> tensor - - return %res : tensor -} - -// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d( -// CHECK-SAME: %[[SRC:.*]]: tensor, -// CHECK-SAME: %[[INIT:.*]]: tensor) -> tensor { -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector -// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector, tensor - -// ----- - -#map = affine_map<(n) -> (n)> -func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32>) -> tensor<1xf32> { - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel"] - } outs(%init : tensor<1xf32>) { - ^bb0(%in: f32): - %1 = tensor.extract %src[] : tensor - linalg.yield %1 : f32 - } -> tensor<1xf32> - - return %res : tensor<1xf32> -} -// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d( -// CHECK-SAME: %[[SRC:.*]]: tensor, -// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector -// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> -// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> - -// ----- - -#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { - %c0 = arith.constant 1 : index - %c1 = arith.constant 2 : index - - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%init : tensor<1x1x3xf32>) { - ^bb0(%arg4: f32): - %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32> - linalg.yield %1 : f32 - } -> tensor<1x1x3xf32> - - return %res : tensor<1x1x3xf32> -} - -// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast( -// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>, -// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector -// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> -// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> - -// ----- - #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> func.func @vectorize_nd_tensor_extract_transfer_read_basic( %arg0: tensor<3x3x3xf32>, @@ -144,37 +67,6 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic( // CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[IDX3]]], %[[CST]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32> // CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> -// Same as example above, but reading into a column tensor. - -// TODO: Currently this fails to vectorise when the indices are non-constant. - -func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( - %input: tensor<3x3x3xf32>, - %output: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> { - - %c0 = arith.constant 0 : index - %res = linalg.generic { - indexing_maps = [#map], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%output : tensor<3x1x1xf32>) { - ^bb0(%out: f32): - %5 = tensor.extract %input[%c0, %c0, %c0] : tensor<3x3x3xf32> - linalg.yield %5 : f32 - } -> tensor<3x1x1xf32> - - return %res : tensor<3x1x1xf32> -} - -// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( -// CHECK-SAME: %[[INPUT:.*]]: tensor<3x3x3xf32>, -// CHECK-SAME: %[[OUTPUT:.*]]: tensor<3x1x1xf32>) -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector -// CHECK: %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<3x1x1xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> -// CHECK: return %[[RES]] : tensor<3x1x1xf32> - // ----- func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> { @@ -620,26 +512,6 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: // ----- -#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -func.func @vectorize_0d_tensor_extract(%arg0: tensor, %arg2: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { - %2 = linalg.generic { - indexing_maps = [#map1], - iterator_types = ["parallel", "parallel", "parallel"] - } outs(%arg2 : tensor<1x1x3xf32>) { - ^bb0(%arg4: f32): - %7 = tensor.extract %arg0[] : tensor - linalg.yield %7 : f32 - } -> tensor<1x1x3xf32> - return %2 : tensor<1x1x3xf32> -} - -// CHECK-LABEL: func.func @vectorize_0d_tensor_extract( -// CHECK-SAME: %[[ARG_0:.*]]: tensor -// CHECK: %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor -// CHECK: vector.broadcast %[[EXTRACT]] : vector to vector<1x1x3xf32> - -// ----- - #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> #map1 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)> func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1: tensor<1x1x3xf32>, %arg2: index) -> tensor<1x1x3xf32> { @@ -674,17 +546,118 @@ func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1 // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[T3]]], %[[MASK]], %[[PASSTHRU]] // CHECK: vector.transfer_write %[[GATHER]] +//===----------------------------------------------------------------------===// +// Scalar load + broadcast +//===----------------------------------------------------------------------===// + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { + %c0 = arith.constant 1 : index + %c1 = arith.constant 2 : index + + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x3xf32>) { + ^bb0(%arg4: f32): + %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32> + linalg.yield %1 : f32 + } -> tensor<1x1x3xf32> + + return %res : tensor<1x1x3xf32> +} + +// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast( +// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>, +// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> + // ----- -func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { +#map = affine_map<() -> ()> +func.func @extract_scalar_from_0d_into_0d(%src: tensor, %init: tensor) -> tensor { + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = [] + } outs(%init : tensor) { + ^bb0(%in: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor + + return %res : tensor +} + +// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d( +// CHECK-SAME: %[[SRC:.*]]: tensor, +// CHECK-SAME: %[[INIT:.*]]: tensor) -> tensor { +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector +// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector, tensor + +// ----- + +#map = affine_map<(n) -> (n)> +func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32>) -> tensor<1xf32> { + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel"] + } outs(%init : tensor<1xf32>) { + ^bb0(%in: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor<1xf32> + + return %res : tensor<1xf32> +} +// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d( +// CHECK-SAME: %[[SRC:.*]]: tensor, +// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> + +// ----- + +#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_0d_tensor_extract(%src: tensor, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> { + %res = linalg.generic { + indexing_maps = [#map1], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x3xf32>) { + ^bb0(%arg4: f32): + %1 = tensor.extract %src[] : tensor + linalg.yield %1 : f32 + } -> tensor<1x1x3xf32> + return %res : tensor<1x1x3xf32> +} + +// CHECK-LABEL: func.func @vectorize_0d_tensor_extract( +// CHECK-SAME: %[[SRC:.*]]: tensor +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %{{.+}} : tensor +// CHECK: vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> + +// ----- + +func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %src = arith.constant dense<[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> %res = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>], - iterator_types = ["parallel", "parallel", "parallel"]} - outs(%init : tensor<1x1x4xi32>) { + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<1x1x4xi32>) { ^bb0(%out: i32): %idx = linalg.index 0 : index @@ -695,13 +668,45 @@ func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor return %res : tensor<1x1x4xi32> } -// CHECK-LABEL: func.func @vectorize_scalar_read_with_broadcast_from_column_tensor( +// CHECK-LABEL: func.func @scalar_read_with_broadcast_from_column_tensor // CHECK-SAME: %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { -// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> -// CHECK: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex> +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> +// CHECK-DAG: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex> // CHECK: %[[IDX_ELT:.*]] = vector.extract %[[IDX_VEC]][0] : index from vector<1xindex> // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[IDX_ELT]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x4xi32> // CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x4xi32>, tensor<1x1x4xi32> + +// ----- + +// TODO: Currently this fails to vectorise when the indices are non-constant. + +#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( + %src: tensor<3x3x3xf32>, + %init: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> { + + %c0 = arith.constant 0 : index + + %res = linalg.generic { + indexing_maps = [#map], + iterator_types = ["parallel", "parallel", "parallel"] + } outs(%init : tensor<3x1x1xf32>) { + ^bb0(%out: f32): + %1 = tensor.extract %src[%c0, %c0, %c0] : tensor<3x3x3xf32> + linalg.yield %1 : f32 + } -> tensor<3x1x1xf32> + + return %res : tensor<3x1x1xf32> +} + +// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( +// CHECK-SAME: %[[SRC:.*]]: tensor<3x3x3xf32>, +// CHECK-SAME: %[[INIT:.*]]: tensor<3x1x1xf32>) +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector +// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<3x1x1xf32> +// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir new file mode 100644 index 00000000000000..3809bc0bce8974 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir @@ -0,0 +1,313 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file --verify-diagnostics %s | FileCheck %s + +// CHECK-LABEL: define void @tma_store_reduce_1d( +llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.1d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// ----- + +// CHECK-LABEL: define void @tma_store_reduce_2d( +llvm.func @tma_store_reduce_2d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.2d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// ----- + +// CHECK-LABEL: define void @tma_store_reduce_3d_tile( +llvm.func @tma_store_reduce_3d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.3d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// CHECK-LABEL: define void @tma_store_reduce_3d_im2col( +llvm.func @tma_store_reduce_3d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.3d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// ----- + +// CHECK-LABEL: define void @tma_store_reduce_4d_tile( +llvm.func @tma_store_reduce_4d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.4d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// CHECK-LABEL: define void @tma_store_reduce_4d_im2col( +llvm.func @tma_store_reduce_4d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.4d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// ----- + +// CHECK-LABEL: define void @tma_store_reduce_5d_tile( +llvm.func @tma_store_reduce_5d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %d4 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.5d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i32 %[[D4:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// CHECK-LABEL: define void @tma_store_reduce_5d_im2col( +llvm.func @tma_store_reduce_5d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %d4 : i32, %ch : i64) { + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.5d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i32 %[[D4:.*]], i64 %[[CH:.*]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false) + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode} : !llvm.ptr, !llvm.ptr<3> + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 58282adf4dda85..194011d1621a1c 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -55,3 +55,19 @@ llvm.func @tma_prefetch_5d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, % nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] : !llvm.ptr llvm.return } + +// ----- + +llvm.func @tma_reduce_0d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %ch : i64) { + // expected-error @below {{expects coordinates between 1 to 5 dimension}} + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[] {redKind = #nvvm.tma_redux_kind}: !llvm.ptr, !llvm.ptr<3> + llvm.return +} + +// ----- + +llvm.func @tma_reduce_2d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i64) { + // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}} + nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> + llvm.return +} diff --git a/mlir/test/python/dialects/emitc_dialect.py b/mlir/test/python/dialects/emitc_dialect.py new file mode 100644 index 00000000000000..0c42c2d4084f19 --- /dev/null +++ b/mlir/test/python/dialects/emitc_dialect.py @@ -0,0 +1,31 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +import mlir.dialects.emitc as emitc + + +def run(f): + print("\nTEST:", f.__name__) + with Context() as ctx, Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + f(ctx) + print(module) + + +# CHECK-LABEL: TEST: testConstantOp +@run +def testConstantOp(ctx): + i32 = IntegerType.get_signless(32) + a = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 42)) + # CHECK: %{{.*}} = "emitc.constant"() <{value = 42 : i32}> : () -> i32 + + +# CHECK-LABEL: TEST: testAddOp +@run +def testAddOp(ctx): + i32 = IntegerType.get_signless(32) + lhs = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 0)) + rhs = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 0)) + a = emitc.AddOp(i32, lhs, rhs) + # CHECK: %{{.*}} = emitc.add %{{.*}}, %{{.*}} : (i32, i32) -> i32 diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 544becfa30b40f..b5be59051cdfa7 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -529,6 +529,24 @@ mlir_c_api_cc_library( ], ) +mlir_c_api_cc_library( + name = "CAPIEmitC", + srcs = [ + "lib/CAPI/Dialect/EmitC.cpp", + ], + hdrs = ["include/mlir-c/Dialect/EmitC.h"], + capi_deps = [ + ":CAPIIR", + ], + header_deps = [ + ":EmitCPassIncGen", + ], + includes = ["include"], + deps = [ + ":EmitCDialect", + ], +) + mlir_c_api_cc_library( name = "CAPILinalg", srcs = [ diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index 254cab0db4a5d6..3d7a6164a4fa54 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -539,6 +539,36 @@ filegroup( ], ) +##---------------------------------------------------------------------------## +# EmitC dialect. +##---------------------------------------------------------------------------## + +gentbl_filegroup( + name = "EmitCPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=emitc", + ], + "mlir/dialects/_emitc_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/EmitC.td", + deps = [ + "//mlir:EmitCTdFiles", + ], +) + +filegroup( + name = "EmitCPyFiles", + srcs = [ + "mlir/dialects/emitc.py", + ":EmitCPyGen", + ], +) + ##---------------------------------------------------------------------------## # Index dialect. ##---------------------------------------------------------------------------##