diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index d1acba0f26c78f..bd22c536c56fca 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -162,7 +162,7 @@ class DIEBuilder {
 
   /// Clone an attribute in reference format.
   void cloneDieOffsetReferenceAttribute(
-      DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE,
+      DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE,
       const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref);
 
   /// Clone an attribute in block format.
diff --git a/bolt/include/bolt/Core/DebugNames.h b/bolt/include/bolt/Core/DebugNames.h
index 0e61a0e4f9d9f0..cc4e13a481b2d6 100644
--- a/bolt/include/bolt/Core/DebugNames.h
+++ b/bolt/include/bolt/Core/DebugNames.h
@@ -72,8 +72,8 @@ class DWARF5AcceleratorTable {
     return std::move(FullTableBuffer);
   }
   /// Adds a DIE that is referenced across CUs.
-  void addCrossCUDie(const DIE *Die) {
-    CrossCUDies.insert({Die->getOffset(), Die});
+  void addCrossCUDie(DWARFUnit *Unit, const DIE *Die) {
+    CrossCUDies.insert({Die->getOffset(), {Unit, Die}});
   }
   /// Returns true if the DIE can generate an entry for a cross cu reference.
   /// This only checks TAGs of a DIE because when this is invoked DIE might not
@@ -145,7 +145,7 @@ class DWARF5AcceleratorTable {
   llvm::DenseMap<uint64_t, uint32_t> CUOffsetsToPatch;
   // Contains a map of Entry ID to Entry relative offset.
   llvm::DenseMap<uint64_t, uint32_t> EntryRelativeOffsets;
-  llvm::DenseMap<uint64_t, const DIE *> CrossCUDies;
+  llvm::DenseMap<uint64_t, std::pair<DWARFUnit *, const DIE *>> CrossCUDies;
   /// Adds Unit to either CUList, LocalTUList or ForeignTUList.
   /// Input Unit being processed, and DWO ID if Unit is being processed comes
   /// from a DWO section.
@@ -191,6 +191,29 @@ class DWARF5AcceleratorTable {
   void emitData();
   /// Emit augmentation string.
   void emitAugmentationString() const;
+  /// Creates a new entry for a given DIE.
+  std::optional<BOLTDWARF5AccelTableData *>
+  addEntry(DWARFUnit &DU, const DIE &CurrDie,
+           const std::optional<uint64_t> &DWOID,
+           const std::optional<BOLTDWARF5AccelTableData *> &Parent,
+           const std::optional<std::string> &Name,
+           const uint32_t NumberParentsInChain);
+  /// Returns UnitID for a given DWARFUnit.
+  uint32_t getUnitID(const DWARFUnit &Unit,
+                     const std::optional<uint64_t> &DWOID, bool &IsTU);
+  std::optional<std::string> getName(DWARFUnit &DU,
+                                     const std::optional<uint64_t> &DWOID,
+                                     const std::string &NameToUse,
+                                     DIEValue ValName);
+  /// Processes a DIE with references to other DIEs for DW_AT_name and
+  /// DW_AT_linkage_name resolution.
+  /// If DW_AT_name exists method creates a new entry for this DIE and returns
+  /// it.
+  std::optional<BOLTDWARF5AccelTableData *> processReferencedDie(
+      DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
+      const std::optional<BOLTDWARF5AccelTableData *> &Parent,
+      const std::string &NameToUse, const uint32_t NumberParentsInChain,
+      const dwarf::Attribute &Attr);
 };
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index 414912ea1c2076..80ad583e079d49 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -622,7 +622,7 @@ DWARFDie DIEBuilder::resolveDIEReference(
 }
 
 void DIEBuilder::cloneDieOffsetReferenceAttribute(
-    DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE,
+    DIE &Die, DWARFUnit &U, const DWARFDie &InputDIE,
     const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref) {
   DIE *NewRefDie = nullptr;
   DWARFUnit *RefUnit = nullptr;
@@ -654,7 +654,7 @@ void DIEBuilder::cloneDieOffsetReferenceAttribute(
     // Adding referenced DIE to DebugNames to be used when entries are created
     // that contain cross cu references.
     if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec))
-      DebugNamesTable.addCrossCUDie(DieInfo.Die);
+      DebugNamesTable.addCrossCUDie(&U, DieInfo.Die);
     // no matter forward reference or backward reference, we are supposed
     // to calculate them in `finish` due to the possible modification of
     // the DIE.
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index 280c7c505eeda1..f33ca83023a0b0 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -222,134 +222,113 @@ static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) {
   return reinterpret_cast<uint64_t>(&Entry);
 }
 
-std::optional<BOLTDWARF5AccelTableData *>
-DWARF5AcceleratorTable::addAccelTableEntry(
-    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
-    const uint32_t NumberParentsInChain,
-    std::optional<BOLTDWARF5AccelTableData *> &Parent) {
-  if (Unit.getVersion() < 5 || !NeedToCreate)
-    return std::nullopt;
-  std::string NameToUse = "";
-
-  auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU,
-                       uint32_t &DieTag) -> uint32_t {
-    IsTU = Unit.isTypeUnit();
-    DieTag = Die.getTag();
-    if (IsTU) {
-      if (DWOID) {
-        const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
-        auto Iter = TUHashToIndexMap.find(TUHash);
-        assert(Iter != TUHashToIndexMap.end() &&
-               "Could not find TU hash in map");
-        return Iter->second;
-      }
-      return LocalTUList.size() - 1;
+uint32_t DWARF5AcceleratorTable::getUnitID(const DWARFUnit &Unit,
+                                           const std::optional<uint64_t> &DWOID,
+                                           bool &IsTU) {
+  IsTU = Unit.isTypeUnit();
+  if (IsTU) {
+    if (DWOID) {
+      const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
+      auto Iter = TUHashToIndexMap.find(TUHash);
+      assert(Iter != TUHashToIndexMap.end() && "Could not find TU hash in map");
+      return Iter->second;
     }
-    return CUList.size() - 1;
-  };
+    return LocalTUList.size() - 1;
+  }
+  return CUList.size() - 1;
+}
 
-  if (!canProcess(Unit, Die, NameToUse, false))
+std::optional<std::string> DWARF5AcceleratorTable::getName(
+    DWARFUnit &Unit, const std::optional<uint64_t> &DWOID,
+    const std::string &NameToUse, DIEValue ValName) {
+  if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
+      NameToUse.empty())
     return std::nullopt;
-
-  // Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
-  // encounter it.
-  // Invoking it here so that we don't add Units that don't have any entries.
-  if (&Unit != CurrentUnit) {
-    CurrentUnit = &Unit;
-    addUnit(Unit, DWOID);
+  std::string Name = "";
+  uint64_t NameIndexOffset = 0;
+  if (NameToUse.empty()) {
+    NameIndexOffset = ValName.getDIEInteger().getValue();
+    if (ValName.getForm() != dwarf::DW_FORM_strp)
+      NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset);
+    // Counts on strings end with '\0'.
+    Name = std::string(&StrSection.data()[NameIndexOffset]);
+  } else {
+    Name = NameToUse;
   }
-
-  auto getName = [&](DIEValue ValName) -> std::optional<std::string> {
-    if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
-        NameToUse.empty())
-      return std::nullopt;
-    std::string Name = "";
-    uint64_t NameIndexOffset = 0;
-    if (NameToUse.empty()) {
-      NameIndexOffset = ValName.getDIEInteger().getValue();
-      if (ValName.getForm() != dwarf::DW_FORM_strp)
-        NameIndexOffset = getNameOffset(BC, Unit, NameIndexOffset);
-      // Counts on strings end with '\0'.
-      Name = std::string(&StrSection.data()[NameIndexOffset]);
-    } else {
-      Name = NameToUse;
-    }
-    auto &It = Entries[Name];
-    if (It.Values.empty()) {
-      if (DWOID && NameToUse.empty()) {
-        // For DWO Unit the offset is in the .debug_str.dwo section.
-        // Need to find offset for the name in the .debug_str section.
-        llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name));
-        auto ItCache = StrCacheToOffsetMap.find(Hash);
-        if (ItCache == StrCacheToOffsetMap.end())
-          NameIndexOffset = MainBinaryStrWriter.addString(Name);
-        else
-          NameIndexOffset = ItCache->second;
-      }
-      if (!NameToUse.empty())
+  auto &It = Entries[Name];
+  if (It.Values.empty()) {
+    if (DWOID && NameToUse.empty()) {
+      // For DWO Unit the offset is in the .debug_str.dwo section.
+      // Need to find offset for the name in the .debug_str section.
+      llvm::hash_code Hash = llvm::hash_value(llvm::StringRef(Name));
+      auto ItCache = StrCacheToOffsetMap.find(Hash);
+      if (ItCache == StrCacheToOffsetMap.end())
         NameIndexOffset = MainBinaryStrWriter.addString(Name);
-      It.StrOffset = NameIndexOffset;
-      // This the same hash function used in DWARF5AccelTableData.
-      It.HashValue = caseFoldingDjbHash(Name);
+      else
+        NameIndexOffset = ItCache->second;
     }
-    return Name;
-  };
+    if (!NameToUse.empty())
+      NameIndexOffset = MainBinaryStrWriter.addString(Name);
+    It.StrOffset = NameIndexOffset;
+    // This is the same hash function used in DWARF5AccelTableData.
+    It.HashValue = caseFoldingDjbHash(Name);
+  }
+  return Name;
+}
 
-  auto addEntry =
-      [&](DIEValue ValName) -> std::optional<BOLTDWARF5AccelTableData *> {
-    std::optional<std::string> Name = getName(ValName);
-    if (!Name)
-      return std::nullopt;
+std::optional<BOLTDWARF5AccelTableData *> DWARF5AcceleratorTable::addEntry(
+    DWARFUnit &DU, const DIE &CurrDie, const std::optional<uint64_t> &DWOID,
+    const std::optional<BOLTDWARF5AccelTableData *> &Parent,
+    const std::optional<std::string> &Name,
+    const uint32_t NumberParentsInChain) {
+  if (!Name)
+    return std::nullopt;
 
-    auto &It = Entries[*Name];
-    bool IsTU = false;
-    uint32_t DieTag = 0;
-    uint32_t UnitID = getUnitID(Unit, IsTU, DieTag);
-    std::optional<unsigned> SecondIndex = std::nullopt;
-    if (IsTU && DWOID) {
-      auto Iter = CUOffsetsToPatch.find(*DWOID);
-      if (Iter == CUOffsetsToPatch.end())
-        BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
-                     "DWO ID in CU offsets for second Unit Index "
-                  << *Name << ". For DIE at offset: "
-                  << Twine::utohexstr(CurrentUnitOffset + Die.getOffset())
-                  << ".\n";
-      SecondIndex = Iter->second;
-    }
-    std::optional<uint64_t> ParentOffset =
-        (Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt);
-    // This will be populated later in writeEntry.
-    // This way only parent entries get tracked.
-    // Keeping memory footprint down.
-    if (ParentOffset)
-      EntryRelativeOffsets.insert({*ParentOffset, 0});
-    bool IsParentRoot = false;
-    // If there is no parent and no valid Entries in parent chain this is a root
-    // to be marked with a flag.
-    if (!Parent && !NumberParentsInChain)
-      IsParentRoot = true;
-    It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
-        Die.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU,
-        SecondIndex));
-    return It.Values.back();
-  };
+  auto &It = Entries[*Name];
+  bool IsTU = false;
+  uint32_t DieTag = CurrDie.getTag();
+  uint32_t UnitID = getUnitID(DU, DWOID, IsTU);
+  std::optional<unsigned> SecondIndex = std::nullopt;
+  if (IsTU && DWOID) {
+    auto Iter = CUOffsetsToPatch.find(*DWOID);
+    if (Iter == CUOffsetsToPatch.end())
+      BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
+                   "DWO ID in CU offsets for second Unit Index "
+                << *Name << ". For DIE at offset: "
+                << Twine::utohexstr(CurrentUnitOffset + CurrDie.getOffset())
+                << ".\n";
+    SecondIndex = Iter->second;
+  }
+  std::optional<uint64_t> ParentOffset =
+      (Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt);
+  // This will be only populated in writeEntry, in order to keep only the parent
+  // entries, and keep the footprint down.
+  if (ParentOffset)
+    EntryRelativeOffsets.insert({*ParentOffset, 0});
+  bool IsParentRoot = false;
+  // If there is no parent and no valid Entries in parent chain this is a root
+  // to be marked with a flag.
+  if (!Parent && !NumberParentsInChain)
+    IsParentRoot = true;
+  It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
+      CurrDie.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU,
+      SecondIndex));
+  return It.Values.back();
+}
 
-  // Minor optimization not to add entry twice for DW_TAG_namespace if it has no
-  // DW_AT_name.
-  if (!(Die.getTag() == dwarf::DW_TAG_namespace &&
-        !Die.findAttribute(dwarf::Attribute::DW_AT_name)))
-    addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
-  // For the purposes of determining whether a debugging information entry has a
-  // particular attribute (such as DW_AT_name), if debugging information entry A
-  // has a DW_AT_specification or DW_AT_abstract_origin attribute pointing to
-  // another debugging information entry B, any attributes of B are considered
-  // to be part of A.
-  auto processReferencedDie = [&](const dwarf::Attribute &Attr)
-      -> std::optional<BOLTDWARF5AccelTableData *> {
-    const DIEValue Value = Die.findAttribute(Attr);
+std::optional<BOLTDWARF5AccelTableData *>
+DWARF5AcceleratorTable::processReferencedDie(
+    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
+    const std::optional<BOLTDWARF5AccelTableData *> &Parent,
+    const std::string &NameToUse, const uint32_t NumberParentsInChain,
+    const dwarf::Attribute &Attr) {
+  DIEValue Value = Die.findAttribute(Attr);
+  if (!Value)
+    return std::nullopt;
+  auto getReferenceDie = [&](const DIEValue &Value, const DIE *RefDieUsed)
+      -> std::optional<std::pair<DWARFUnit *, const DIE *>> {
     if (!Value)
       return std::nullopt;
-    const DIE *EntryDie = nullptr;
     if (Value.getForm() == dwarf::DW_FORM_ref_addr) {
       auto Iter = CrossCUDies.find(Value.getDIEInteger().getValue());
       if (Iter == CrossCUDies.end()) {
@@ -359,24 +338,97 @@ DWARF5AcceleratorTable::addAccelTableEntry(
                   << ".\n";
         return std::nullopt;
       }
-      EntryDie = Iter->second;
-    } else {
-      const DIEEntry &DIEENtry = Value.getDIEEntry();
-      EntryDie = &DIEENtry.getEntry();
+      return Iter->second;
     }
-
-    addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_linkage_name));
-    return addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_name));
+    const DIEEntry &DIEENtry = Value.getDIEEntry();
+    return {{&Unit, &DIEENtry.getEntry()}};
   };
 
-  if (std::optional<BOLTDWARF5AccelTableData *> Entry =
-          processReferencedDie(dwarf::Attribute::DW_AT_abstract_origin))
+  DIEValue AttrValLinkageName;
+  DIEValue AttrValName = Die.findAttribute(dwarf::Attribute::DW_AT_name);
+  DWARFUnit *RefUnit = &Unit;
+  const DIE *RefDieUsed = &Die;
+  // It is possible to have DW_TAG_subprogram only with  DW_AT_linkage_name that
+  // DW_AT_abstract_origin/DW_AT_specification point to.
+  while (!AttrValName) {
+    std::optional<std::pair<DWARFUnit *, const DIE *>> RefDUDie =
+        getReferenceDie(Value, RefDieUsed);
+    if (!RefDUDie)
+      break;
+    RefUnit = RefDUDie->first;
+    const DIE &RefDie = *RefDUDie->second;
+    RefDieUsed = &RefDie;
+    if (!AttrValLinkageName)
+      AttrValLinkageName =
+          RefDie.findAttribute(dwarf::Attribute::DW_AT_linkage_name);
+    AttrValName = RefDie.findAttribute(dwarf::Attribute::DW_AT_name);
+    Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_abstract_origin);
+    if (!Value)
+      Value = RefDie.findAttribute(dwarf::Attribute::DW_AT_specification);
+  }
+  addEntry(Unit, Die, DWOID, Parent,
+           getName(*RefUnit, DWOID, NameToUse, AttrValLinkageName),
+           NumberParentsInChain);
+  return addEntry(Unit, Die, DWOID, Parent,
+                  getName(*RefUnit, DWOID, NameToUse, AttrValName),
+                  NumberParentsInChain);
+}
+
+std::optional<BOLTDWARF5AccelTableData *>
+DWARF5AcceleratorTable::addAccelTableEntry(
+    DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
+    const uint32_t NumberParentsInChain,
+    std::optional<BOLTDWARF5AccelTableData *> &Parent) {
+  if (Unit.getVersion() < 5 || !NeedToCreate)
+    return std::nullopt;
+  std::string NameToUse = "";
+
+  if (!canProcess(Unit, Die, NameToUse, false))
+    return std::nullopt;
+
+  // Adds a Unit to either CU, LocalTU or ForeignTU list the first time we
+  // encounter it.
+  // Invoking it here so that we don't add Units that don't have any entries.
+  if (&Unit != CurrentUnit) {
+    CurrentUnit = &Unit;
+    addUnit(Unit, DWOID);
+  }
+
+  // Minor optimization not to add entry twice for DW_TAG_namespace if it has no
+  // DW_AT_name.
+  std::optional<BOLTDWARF5AccelTableData *> LinkageEntry = std::nullopt;
+  DIEValue NameVal = Die.findAttribute(dwarf::Attribute::DW_AT_name);
+  DIEValue LinkageNameVal =
+      Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name);
+  if (!(Die.getTag() == dwarf::DW_TAG_namespace && !NameVal))
+    LinkageEntry = addEntry(Unit, Die, DWOID, Parent,
+                            getName(Unit, DWOID, NameToUse, LinkageNameVal),
+                            NumberParentsInChain);
+
+  std::optional<BOLTDWARF5AccelTableData *> NameEntry =
+      addEntry(Unit, Die, DWOID, Parent,
+               getName(Unit, DWOID, NameToUse, NameVal), NumberParentsInChain);
+  if (NameEntry)
+    return NameEntry;
+
+  // The DIE doesn't have DW_AT_name or DW_AT_linkage_name, so we need to see if
+  // we can follow other attributes to find them. For the purposes of
+  // determining whether a debug information entry has a particular
+  // attribute (such as DW_AT_name), if debug information entry A has a
+  // DW_AT_specification or DW_AT_abstract_origin attribute pointing to another
+  // debug information entry B, any attributes of B are considered to be
+  // part of A.
+  if (std::optional<BOLTDWARF5AccelTableData *> Entry = processReferencedDie(
+          Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain,
+          dwarf::Attribute::DW_AT_abstract_origin))
     return *Entry;
-  if (std::optional<BOLTDWARF5AccelTableData *> Entry =
-          processReferencedDie(dwarf::Attribute::DW_AT_specification))
+  if (std::optional<BOLTDWARF5AccelTableData *> Entry = processReferencedDie(
+          Unit, Die, DWOID, Parent, NameToUse, NumberParentsInChain,
+          dwarf::Attribute::DW_AT_specification))
     return *Entry;
 
-  return addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
+  // This point can be hit by DW_TAG_varialbe that has no DW_AT_name.
+  return std::nullopt;
 }
 
 /// Algorithm from llvm implementation.
diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s
new file mode 100644
index 00000000000000..8c9817ce91edb3
--- /dev/null
+++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-linkage-name-only.s
@@ -0,0 +1,568 @@
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s   -o %tmain.o
+# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe
+# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt
+# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s
+
+## Tests that bolt can correctly generate debug_names when there is an DW_TAG_inlined_subroutine
+## with DW_AT_abstract_origin that points to DW_TAG_subprogram that only has DW_AT_linkage_name.
+
+# BOLT:      Name Index @ 0x0 {
+# BOLT-NEXT:  Header {
+# BOLT-NEXT:    Length: 0xA2
+# BOLT-NEXT:    Format: DWARF32
+# BOLT-NEXT:    Version: 5
+# BOLT-NEXT:    CU count: 1
+# BOLT-NEXT:    Local TU count: 0
+# BOLT-NEXT:    Foreign TU count: 0
+# BOLT-NEXT:    Bucket count: 4
+# BOLT-NEXT:    Name count: 4
+# BOLT-NEXT:    Abbreviations table size: 0x19
+# BOLT-NEXT:    Augmentation: 'BOLT'
+# BOLT-NEXT:  }
+# BOLT-NEXT:  Compilation Unit offsets [
+# BOLT-NEXT:    CU[0]: 0x00000000
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Abbreviations [
+# BOLT-NEXT:    Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_base_type
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_subprogram
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_inlined_subroutine
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_ref4
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 0 [
+# BOLT-NEXT:    Name 1 {
+# BOLT-NEXT:      Hash: 0xB888030
+# BOLT-NEXT:      String: {{.+}} "int"
+# BOLT-NEXT:      Entry @ {{.+}} {
+# BOLT-NEXT:        Abbrev: 0x1
+# BOLT-NEXT:        Tag: DW_TAG_base_type
+# BOLT-NEXT:        DW_IDX_die_offset: 0x0000004a
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 1 [
+# BOLT-NEXT:    EMPTY
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 2 [
+# BOLT-NEXT:    Name 2 {
+# BOLT-NEXT:      Hash: 0x7C9A7F6A
+# BOLT-NEXT:      String: {{.+}} "main"
+# BOLT-NEXT:      Entry @ [[REF1:0x[0-9a-f]*]] {
+# BOLT-NEXT:        Abbrev: [[ABBREV2]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x0000004e
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Name 3 {
+# BOLT-NEXT:      Hash: 0xB5063CFE
+# BOLT-NEXT:      String: {{.+}} "_Z3fooi"
+# BOLT-NEXT:      Entry @ {{.+}} {
+# BOLT-NEXT:        Abbrev: [[ABBREV2]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x00000024
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:      Entry @ 0x96 {
+# BOLT-NEXT:        Abbrev: [[ABBREV3]]
+# BOLT-NEXT:        Tag: DW_TAG_inlined_subroutine
+# BOLT-NEXT:        DW_IDX_die_offset: 0x0000007e
+# BOLT-NEXT:        DW_IDX_parent: Entry @ [[REF1]]
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 3 [
+# BOLT-NEXT:    Name 4 {
+# BOLT-NEXT:      Hash: 0x7C952063
+# BOLT-NEXT:      String: {{.+}} "char"
+# BOLT-NEXT:      Entry @ {{.+}} {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_base_type
+# BOLT-NEXT:        DW_IDX_die_offset: 0x0000009f
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+
+## int foo(int i) {
+##   return i ++;
+## }
+## int main(int argc, char* argv[]) {
+##   int i = 0;
+##   [[clang::always_inline]] i = foo(argc);
+##   return i;
+## }
+## Test was manually modified so that DW_TAG_subprogram only had DW_AT_linkage_name.
+
+	.text
+	.file	"main.cpp"
+	.globl	_Z3fooi
+	.p2align	4, 0x90
+	.type	_Z3fooi,@function
+_Z3fooi:
+.Lfunc_begin0:
+	.file	0 "/abstractChain" "main.cpp" md5 0x2e29d55fc1320801a8057a4c50643ea1
+	.loc	0 1 0
+	.loc	0 2 12 prologue_end
+	.loc	0 2 3 epilogue_begin is_stmt 0
+	retq
+.Lfunc_end0:
+	.size	_Z3fooi, .Lfunc_end0-_Z3fooi
+
+	.globl	main
+	.p2align	4, 0x90
+	.type	main,@function
+main:
+.Lfunc_begin1:
+	.loc	0 4 0 is_stmt 1
+.Ltmp2:
+	.loc	0 5 7 prologue_end
+	.loc	0 6 36
+	movl	-12(%rbp), %eax
+.Ltmp3:
+	.loc	0 2 12
+.Ltmp4:
+	.loc	0 6 30
+	.loc	0 7 10
+	.loc	0 7 3 epilogue_begin is_stmt 0
+	retq
+.Ltmp5:
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
+                                        # -- End function
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	110                             # DW_AT_linkage_name
+	.byte	37                              # DW_FORM_strx1
+	#.byte	3                               # DW_AT_name
+	#.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	32                              # DW_AT_inline
+	.byte	33                              # DW_FORM_implicit_const
+	.byte	1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	8                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	9                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	10                              # Abbreviation Code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	88                              # DW_AT_call_file
+	.byte	11                              # DW_FORM_data1
+	.byte	89                              # DW_AT_call_line
+	.byte	11                              # DW_FORM_data1
+	.byte	87                              # DW_AT_call_column
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	11                              # Abbreviation Code
+	.byte	15                              # DW_TAG_pointer_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0x98 DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin0       # DW_AT_high_pc
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	2                               # Abbrev [2] 0x23:0x15 DW_TAG_subprogram
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	56                              # DW_AT_abstract_origin
+	.byte	3                               # Abbrev [3] 0x2f:0x8 DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	124
+	.long	64                              # DW_AT_abstract_origin Manually Modified
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x38:0x12 DW_TAG_subprogram
+	.byte	3                               # DW_AT_linkage_name
+	#.byte	4                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	74                              # DW_AT_type
+                                        # DW_AT_external
+                                        # DW_AT_inline
+	.byte	5                               # Abbrev [5] 0x41:0x8 DW_TAG_formal_parameter
+	.byte	6                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	74                              # DW_AT_type
+	.byte	0                               # End Of Children Mark
+	.byte	6                               # Abbrev [6] 0x4a:0x4 DW_TAG_base_type
+	.byte	5                               # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	7                               # Abbrev [7] 0x4e:0x47 DW_TAG_subprogram
+	.byte	1                               # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin1       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.byte	7                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.long	73                              # DW_AT_type Manually Modified
+                                        # DW_AT_external
+	.byte	8                               # Abbrev [8] 0x5d:0xb DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	116
+	.byte	8                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.long	73                              # DW_AT_type Manually Modified
+	.byte	8                               # Abbrev [8] 0x68:0xb DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	104
+	.byte	9                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.long	148                             # DW_AT_type  Manually Modified
+	.byte	9                               # Abbrev [9] 0x73:0xb DW_TAG_variable
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	100
+	.byte	6                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	5                               # DW_AT_decl_line
+	.long	73                              # DW_AT_type Manually Modified
+	.byte	10                              # Abbrev [10] 0x7e:0x16 DW_TAG_inlined_subroutine
+	.long	56                              # DW_AT_abstract_origin
+	.byte	2                               # DW_AT_low_pc
+	.long	.Ltmp4-.Ltmp3                   # DW_AT_high_pc
+	.byte	0                               # DW_AT_call_file
+	.byte	6                               # DW_AT_call_line
+	.byte	32                              # DW_AT_call_column
+	.byte	3                               # Abbrev [3] 0x8b:0x8 DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	124
+	.long	64                              # DW_AT_abstract_origin Manually Modified
+	.byte	0                               # End Of Children Mark
+	.byte	0                               # End Of Children Mark
+	.byte	11                              # Abbrev [11] 0x95:0x5 DW_TAG_pointer_type
+	.long	153                             # DW_AT_type  Manually Modified
+	.byte	11                              # Abbrev [11] 0x9a:0x5 DW_TAG_pointer_type
+	.long	158                             # DW_AT_type  Manually Modified
+	.byte	6                               # Abbrev [6] 0x9f:0x4 DW_TAG_base_type
+	.byte	10                              # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str_offsets,"",@progbits
+	.long	48                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"clang version 20.0.0git"       # string offset=0
+.Linfo_string1:
+	.asciz	"main.cpp"                      # string offset=24
+.Linfo_string2:
+	.asciz	"/abstractChain" # string offset=33
+.Linfo_string3:
+	.asciz	"foo"                           # string offset=85
+.Linfo_string4:
+	.asciz	"_Z3fooi"                       # string offset=89
+.Linfo_string5:
+	.asciz	"int"                           # string offset=97
+.Linfo_string6:
+	.asciz	"i"                             # string offset=101
+.Linfo_string7:
+	.asciz	"main"                          # string offset=103
+.Linfo_string8:
+	.asciz	"argc"                          # string offset=108
+.Linfo_string9:
+	.asciz	"argv"                          # string offset=113
+.Linfo_string10:
+	.asciz	"char"                          # string offset=118
+	.section	.debug_str_offsets,"",@progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string4
+	.long	.Linfo_string3
+	.long	.Linfo_string5
+	.long	.Linfo_string6
+	.long	.Linfo_string7
+	.long	.Linfo_string8
+	.long	.Linfo_string9
+	.long	.Linfo_string10
+	.section	.debug_addr,"",@progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	.Lfunc_begin0
+	.quad	.Lfunc_begin1
+	.quad	.Ltmp3
+.Ldebug_addr_end0:
+	.section	.debug_names,"",@progbits
+	.long	.Lnames_end0-.Lnames_start0     # Header: unit length
+.Lnames_start0:
+	.short	5                               # Header: version
+	.short	0                               # Header: padding
+	.long	1                               # Header: compilation unit count
+	.long	0                               # Header: local type unit count
+	.long	0                               # Header: foreign type unit count
+	.long	5                               # Header: bucket count
+	.long	5                               # Header: name count
+	.long	.Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+	.long	8                               # Header: augmentation string size
+	.ascii	"LLVM0700"                      # Header: augmentation string
+	.long	.Lcu_begin0                     # Compilation unit 0
+	.long	0                               # Bucket 0
+	.long	1                               # Bucket 1
+	.long	0                               # Bucket 2
+	.long	3                               # Bucket 3
+	.long	4                               # Bucket 4
+	.long	2090499946                      # Hash in Bucket 1
+	.long	-1257882370                     # Hash in Bucket 1
+	.long	193495088                       # Hash in Bucket 3
+	.long	193491849                       # Hash in Bucket 4
+	.long	2090147939                      # Hash in Bucket 4
+	.long	.Linfo_string7                  # String in Bucket 1: main
+	.long	.Linfo_string4                  # String in Bucket 1: _Z3fooi
+	.long	.Linfo_string5                  # String in Bucket 3: int
+	.long	.Linfo_string3                  # String in Bucket 4: foo
+	.long	.Linfo_string10                 # String in Bucket 4: char
+	.long	.Lnames3-.Lnames_entries0       # Offset in Bucket 1
+	.long	.Lnames1-.Lnames_entries0       # Offset in Bucket 1
+	.long	.Lnames2-.Lnames_entries0       # Offset in Bucket 3
+	.long	.Lnames0-.Lnames_entries0       # Offset in Bucket 4
+	.long	.Lnames4-.Lnames_entries0       # Offset in Bucket 4
+.Lnames_abbrev_start0:
+	.byte	1                               # Abbrev code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	2                               # Abbrev code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	3                               # Abbrev code
+	.byte	36                              # DW_TAG_base_type
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev list
+.Lnames_abbrev_end0:
+.Lnames_entries0:
+.Lnames3:
+.L2:
+	.byte	1                               # Abbreviation code
+	.long	78                              # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: main
+.Lnames1:
+.L0:
+	.byte	1                               # Abbreviation code
+	.long	35                              # DW_IDX_die_offset
+.L3:                                    # DW_IDX_parent
+	.byte	2                               # Abbreviation code
+	.long	126                             # DW_IDX_die_offset
+	.long	.L2-.Lnames_entries0            # DW_IDX_parent
+	.byte	0                               # End of list: _Z3fooi
+.Lnames2:
+.L1:
+	.byte	3                               # Abbreviation code
+	.long	74                              # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: int
+.Lnames0:
+	.byte	1                               # Abbreviation code
+	.long	35                              # DW_IDX_die_offset
+	.byte	2                               # DW_IDX_parent
+                                        # Abbreviation code
+	.long	126                             # DW_IDX_die_offset
+	.long	.L2-.Lnames_entries0            # DW_IDX_parent
+	.byte	0                               # End of list: foo
+.Lnames4:
+.L4:
+	.byte	3                               # Abbreviation code
+	.long	159                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: char
+	.p2align	2, 0x0
+.Lnames_end0:
+	.ident	"clang version 20.0.0git"
+	.section	".note.GNU-stack","",@progbits
+	.addrsig
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s
new file mode 100644
index 00000000000000..2075640d6761cd
--- /dev/null
+++ b/bolt/test/X86/dwarf5-debug-names-abstract-origin-specification.s
@@ -0,0 +1,829 @@
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s   -o %tmain.o
+# RUN: %clang %cflags -gdwarf-5 %tmain.o -o %tmain.exe
+# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --debug-names %tmain.exe.bolt > %tlog.txt
+# RUN: cat %tlog.txt | FileCheck -check-prefix=BOLT %s
+
+## This test checks that BOLT correctly generates .debug_names section when there is transative
+## DW_AT_name/DW_AT_linkage_name resolution.
+
+# BOLT:       Abbreviations [
+# BOLT-NEXT:   Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_subprogram
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_class_type
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_inlined_subroutine
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_ref4
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+# BOLT-NEXT:      Tag: DW_TAG_base_type
+# BOLT-NEXT:      DW_IDX_die_offset: DW_FORM_ref4
+# BOLT-NEXT:      DW_IDX_parent: DW_FORM_flag_present
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 0 [
+# BOLT-NEXT:    Name 1 {
+# BOLT-NEXT:      Hash: 0xD72418AA
+# BOLT-NEXT:      String: {{.+}} "_ZL3fooi"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x000000ba
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 1 [
+# BOLT-NEXT:    Name 2 {
+# BOLT-NEXT:      Hash: 0x10614A06
+# BOLT-NEXT:      String: {{.+}} "State"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV2]]
+# BOLT-NEXT:        Tag: DW_TAG_class_type
+# BOLT-NEXT:        DW_IDX_die_offset: 0x0000002b
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:      Entry @ [[REF1:0x[0-9a-f]*]] {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x00000089
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV3]]
+# BOLT-NEXT:        Tag: DW_TAG_inlined_subroutine
+# BOLT-NEXT:        DW_IDX_die_offset: 0x000000a3
+# BOLT-NEXT:        DW_IDX_parent: Entry @ [[REF1]]
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 2 [
+# BOLT-NEXT:    EMPTY
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 3 [
+# BOLT-NEXT:    Name 3 {
+# BOLT-NEXT:      Hash: 0xB888030
+# BOLT-NEXT:      String: {{.+}} "int"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV4]]
+# BOLT-NEXT:        Tag: DW_TAG_base_type
+# BOLT-NEXT:        DW_IDX_die_offset: 0x00000085
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Name 4 {
+# BOLT-NEXT:      Hash: 0x7C9A7F6A
+# BOLT-NEXT:      String: {{.+}} "main"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x00000042
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 4 [
+# BOLT-NEXT:    EMPTY
+# BOLT-NEXT:  ]
+# BOLT-NEXT:  Bucket 5 [
+# BOLT-NEXT:    Name 5 {
+# BOLT-NEXT:      Hash: 0xB887389
+# BOLT-NEXT:      String: {{.+}} "foo"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x000000ba
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Name 6 {
+# BOLT-NEXT:      Hash: 0x7C952063
+# BOLT-NEXT:      String: {{.+}} "char"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV4]]
+# BOLT-NEXT:        Tag: DW_TAG_base_type
+# BOLT-NEXT:        DW_IDX_die_offset: 0x000000d9
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:    }
+# BOLT-NEXT:    Name 7 {
+# BOLT-NEXT:      Hash: 0xFBBDC812
+# BOLT-NEXT:      String: {{.+}} "_ZN5StateC2Ev"
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV1]]
+# BOLT-NEXT:        Tag: DW_TAG_subprogram
+# BOLT-NEXT:        DW_IDX_die_offset: 0x00000089
+# BOLT-NEXT:        DW_IDX_parent: <parent not indexed>
+# BOLT-NEXT:      }
+# BOLT-NEXT:      Entry @ {{.+}}  {
+# BOLT-NEXT:        Abbrev: [[ABBREV3]]
+# BOLT-NEXT:        Tag: DW_TAG_inlined_subroutine
+# BOLT-NEXT:        DW_IDX_die_offset: 0x000000a3
+# BOLT-NEXT:        DW_IDX_parent: Entry @ [[REF1]]
+
+## static int foo(int i) {
+##   return i ++;
+## }
+## class State {
+## public:
+##   State() {[[clang::always_inline]] foo(3);}
+## };
+##
+## int main(int argc, char* argv[]) {
+##   State S;
+##   return 0;
+## }
+
+## Test manually modified to redirect DW_TAG_inlined_subroutine to DW_TAG_subprogram with DW_AT_specification.
+
+	.text
+	.file	"main.cpp"
+	.file	0 "abstractChainTwo" "main.cpp" md5 0x17ad726b6a1fd49ee59559a1302da539
+	.globl	main                            # -- Begin function main
+	.p2align	4, 0x90
+	.type	main,@function
+main:                                   # @main
+.Lfunc_begin0:
+	.loc	0 9 0                           # main.cpp:9:0
+.Ltmp0:
+	.loc	0 10 9 prologue_end             # main.cpp:10:9
+	callq	_ZN5StateC2Ev
+	.loc	0 11 3                          # main.cpp:11:3
+	.loc	0 11 3 epilogue_begin is_stmt 0 # main.cpp:11:3
+	retq
+.Ltmp1:
+.Lfunc_end0:
+	.size	main, .Lfunc_end0-main
+                                        # -- End function
+	.section	.text._ZN5StateC2Ev,"axG",@progbits,_ZN5StateC2Ev,comdat
+	.weak	_ZN5StateC2Ev                   # -- Begin function _ZN5StateC2Ev
+	.p2align	4, 0x90
+	.type	_ZN5StateC2Ev,@function
+_ZN5StateC2Ev:                          # @_ZN5StateC2Ev
+.Lfunc_begin1:
+	.loc	0 6 0 is_stmt 1                 # main.cpp:6:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	movq	%rdi, -16(%rbp)
+	movl	$3, -4(%rbp)
+.Ltmp2:
+	.loc	0 2 12 prologue_end             # main.cpp:2:12
+	movl	-4(%rbp), %eax
+	addl	$1, %eax
+	movl	%eax, -4(%rbp)
+.Ltmp3:
+	.loc	0 6 44 epilogue_begin           # main.cpp:6:44
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp4:
+.Lfunc_end1:
+	.size	_ZN5StateC2Ev, .Lfunc_end1-_ZN5StateC2Ev
+	.cfi_endproc
+                                        # -- End function
+	.text
+	.p2align	4, 0x90                         # -- Begin function _ZL3fooi
+	.type	_ZL3fooi,@function
+_ZL3fooi:                               # @_ZL3fooi
+.Lfunc_begin2:
+	.loc	0 1 0                           # main.cpp:1:0
+	.cfi_startproc
+# %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	movl	%edi, -4(%rbp)
+.Ltmp5:
+	.loc	0 2 12 prologue_end             # main.cpp:2:12
+	movl	-4(%rbp), %eax
+	movl	%eax, %ecx
+	addl	$1, %ecx
+	movl	%ecx, -4(%rbp)
+	.loc	0 2 3 epilogue_begin is_stmt 0  # main.cpp:2:3
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp6:
+.Lfunc_end2:
+	.size	_ZL3fooi, .Lfunc_end2-_ZL3fooi
+	.cfi_endproc
+                                        # -- End function
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	85                              # DW_AT_ranges
+	.byte	35                              # DW_FORM_rnglistx
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	116                             # DW_AT_rnglists_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	2                               # DW_TAG_class_type
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	54                              # DW_AT_calling_convention
+	.byte	11                              # DW_FORM_data1
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	60                              # DW_AT_declaration
+	.byte	25                              # DW_FORM_flag_present
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	50                              # DW_AT_accessibility
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	52                              # DW_AT_artificial
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	15                              # DW_TAG_pointer_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	8                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	9                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	110                             # DW_AT_linkage_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	32                              # DW_AT_inline
+	.byte	33                              # DW_FORM_implicit_const
+	.byte	1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	10                              # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	11                              # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	12                              # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	100                             # DW_AT_object_pointer
+	.byte	19                              # DW_FORM_ref4
+	.byte	110                             # DW_AT_linkage_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	71                              # DW_AT_specification
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	13                              # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	52                              # DW_AT_artificial
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	14                              # Abbreviation Code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	88                              # DW_AT_call_file
+	.byte	11                              # DW_FORM_data1
+	.byte	89                              # DW_AT_call_line
+	.byte	11                              # DW_FORM_data1
+	.byte	87                              # DW_AT_call_column
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	15                              # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	16                              # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	1                               # Abbrev [1] 0xc:0xd7 DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.long	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.quad	0                               # DW_AT_low_pc
+	.byte	0                               # DW_AT_ranges
+	.long	.Laddr_table_base0              # DW_AT_addr_base
+	.long	.Lrnglists_table_base0          # DW_AT_rnglists_base
+	.byte	2                               # Abbrev [2] 0x2b:0x12 DW_TAG_class_type
+	.byte	5                               # DW_AT_calling_convention
+	.byte	3                               # DW_AT_name
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # DW_AT_decl_file
+	.byte	4                               # DW_AT_decl_line
+	.byte	3                               # Abbrev [3] 0x31:0xb DW_TAG_subprogram
+	.byte	3                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	6                               # DW_AT_decl_line
+                                        # DW_AT_declaration
+                                        # DW_AT_external
+	.byte	1                               # DW_AT_accessibility
+                                        # DW_ACCESS_public
+	.byte	4                               # Abbrev [4] 0x36:0x5 DW_TAG_formal_parameter
+	.long	61                              # DW_AT_type
+                                        # DW_AT_artificial
+	.byte	0                               # End Of Children Mark
+	.byte	0                               # End Of Children Mark
+	.byte	5                               # Abbrev [5] 0x3d:0x5 DW_TAG_pointer_type
+	.long	43                              # DW_AT_type
+	.byte	6                               # Abbrev [6] 0x42:0x31 DW_TAG_subprogram
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.byte	8                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	9                               # DW_AT_decl_line
+	.long	133                             # DW_AT_type
+                                        # DW_AT_external
+	.byte	7                               # Abbrev [7] 0x51:0xb DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	120
+	.byte	10                              # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	9                               # DW_AT_decl_line
+	.long	133                             # DW_AT_type
+	.byte	7                               # Abbrev [7] 0x5c:0xb DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	112
+	.byte	11                              # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	9                               # DW_AT_decl_line
+	.long	207                             # DW_AT_type
+	.byte	8                               # Abbrev [8] 0x67:0xb DW_TAG_variable
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	111
+	.byte	13                              # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	10                              # DW_AT_decl_line
+	.long	43                              # DW_AT_type
+	.byte	0                               # End Of Children Mark
+	.byte	9                               # Abbrev [9] 0x73:0x12 DW_TAG_subprogram
+	.byte	4                               # DW_AT_linkage_name
+	.byte	5                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	133                             # DW_AT_type
+                                        # DW_AT_inline
+	.byte	10                              # Abbrev [10] 0x7c:0x8 DW_TAG_formal_parameter
+	.byte	7                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	133                             # DW_AT_type
+	.byte	0                               # End Of Children Mark
+	.byte	11                              # Abbrev [11] 0x85:0x4 DW_TAG_base_type
+	.byte	6                               # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	12                              # Abbrev [12] 0x89:0x31 DW_TAG_subprogram
+	.byte	1                               # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin1       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	154                             # DW_AT_object_pointer
+	.byte	9                               # DW_AT_linkage_name
+	.long	49                              # DW_AT_specification
+	.byte	13                              # Abbrev [13] 0x9a:0x9 DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	112
+	.byte	14                              # DW_AT_name
+	.long	221                             # DW_AT_type
+                                        # DW_AT_artificial
+	.byte	14                              # Abbrev [14] 0xa3:0x16 DW_TAG_inlined_subroutine
+	.long	137                             # DW_AT_abstract_origin Manually Modified
+	.byte	2                               # DW_AT_low_pc
+	.long	.Ltmp3-.Ltmp2                   # DW_AT_high_pc
+	.byte	0                               # DW_AT_call_file
+	.byte	6                               # DW_AT_call_line
+	.byte	37                              # DW_AT_call_column
+	.byte	15                              # Abbrev [15] 0xb0:0x8 DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	124
+	.long	124                             # DW_AT_abstract_origin
+	.byte	0                               # End Of Children Mark
+	.byte	0                               # End Of Children Mark
+	.byte	16                              # Abbrev [16] 0xba:0x15 DW_TAG_subprogram
+	.byte	3                               # DW_AT_low_pc
+	.long	.Lfunc_end2-.Lfunc_begin2       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.long	115                             # DW_AT_abstract_origin
+	.byte	15                              # Abbrev [15] 0xc6:0x8 DW_TAG_formal_parameter
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	124
+	.long	124                             # DW_AT_abstract_origin
+	.byte	0                               # End Of Children Mark
+	.byte	5                               # Abbrev [5] 0xcf:0x5 DW_TAG_pointer_type
+	.long	212                             # DW_AT_type
+	.byte	5                               # Abbrev [5] 0xd4:0x5 DW_TAG_pointer_type
+	.long	217                             # DW_AT_type
+	.byte	11                              # Abbrev [11] 0xd9:0x4 DW_TAG_base_type
+	.byte	12                              # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	5                               # Abbrev [5] 0xdd:0x5 DW_TAG_pointer_type
+	.long	43                              # DW_AT_type
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_rnglists,"",@progbits
+	.long	.Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length
+.Ldebug_list_header_start0:
+	.short	5                               # Version
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+	.long	1                               # Offset entry count
+.Lrnglists_table_base0:
+	.long	.Ldebug_ranges0-.Lrnglists_table_base0
+.Ldebug_ranges0:
+	.byte	1                               # DW_RLE_base_addressx
+	.byte	0                               #   base address index
+	.byte	4                               # DW_RLE_offset_pair
+	.uleb128 .Lfunc_begin0-.Lfunc_begin0    #   starting offset
+	.uleb128 .Lfunc_end0-.Lfunc_begin0      #   ending offset
+	.byte	4                               # DW_RLE_offset_pair
+	.uleb128 .Lfunc_begin2-.Lfunc_begin0    #   starting offset
+	.uleb128 .Lfunc_end2-.Lfunc_begin0      #   ending offset
+	.byte	3                               # DW_RLE_startx_length
+	.byte	1                               #   start index
+	.uleb128 .Lfunc_end1-.Lfunc_begin1      #   length
+	.byte	0                               # DW_RLE_end_of_list
+.Ldebug_list_header_end0:
+	.section	.debug_str_offsets,"",@progbits
+	.long	64                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"clang version 20.0.0git"       # string offset=0
+.Linfo_string1:
+	.asciz	"main.cpp"                      # string offset=24
+.Linfo_string2:
+	.asciz	"abstractChainTwo" # string offset=33
+.Linfo_string3:
+	.asciz	"State"                         # string offset=88
+.Linfo_string4:
+	.asciz	"main"                          # string offset=94
+.Linfo_string5:
+	.asciz	"_ZL3fooi"                      # string offset=99
+.Linfo_string6:
+	.asciz	"foo"                           # string offset=108
+.Linfo_string7:
+	.asciz	"int"                           # string offset=112
+.Linfo_string8:
+	.asciz	"i"                             # string offset=116
+.Linfo_string9:
+	.asciz	"_ZN5StateC2Ev"                 # string offset=118
+.Linfo_string10:
+	.asciz	"argc"                          # string offset=132
+.Linfo_string11:
+	.asciz	"argv"                          # string offset=137
+.Linfo_string12:
+	.asciz	"char"                          # string offset=142
+.Linfo_string13:
+	.asciz	"S"                             # string offset=147
+.Linfo_string14:
+	.asciz	"this"                          # string offset=149
+	.section	.debug_str_offsets,"",@progbits
+	.long	.Linfo_string0
+	.long	.Linfo_string1
+	.long	.Linfo_string2
+	.long	.Linfo_string3
+	.long	.Linfo_string5
+	.long	.Linfo_string6
+	.long	.Linfo_string7
+	.long	.Linfo_string8
+	.long	.Linfo_string4
+	.long	.Linfo_string9
+	.long	.Linfo_string10
+	.long	.Linfo_string11
+	.long	.Linfo_string12
+	.long	.Linfo_string13
+	.long	.Linfo_string14
+	.section	.debug_addr,"",@progbits
+	.long	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	.Lfunc_begin0
+	.quad	.Lfunc_begin1
+	.quad	.Ltmp2
+	.quad	.Lfunc_begin2
+.Ldebug_addr_end0:
+	.section	.debug_names,"",@progbits
+	.long	.Lnames_end0-.Lnames_start0     # Header: unit length
+.Lnames_start0:
+	.short	5                               # Header: version
+	.short	0                               # Header: padding
+	.long	1                               # Header: compilation unit count
+	.long	0                               # Header: local type unit count
+	.long	0                               # Header: foreign type unit count
+	.long	7                               # Header: bucket count
+	.long	7                               # Header: name count
+	.long	.Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+	.long	8                               # Header: augmentation string size
+	.ascii	"LLVM0700"                      # Header: augmentation string
+	.long	.Lcu_begin0                     # Compilation unit 0
+	.long	1                               # Bucket 0
+	.long	2                               # Bucket 1
+	.long	0                               # Bucket 2
+	.long	3                               # Bucket 3
+	.long	0                               # Bucket 4
+	.long	5                               # Bucket 5
+	.long	0                               # Bucket 6
+	.long	-685500246                      # Hash in Bucket 0
+	.long	274811398                       # Hash in Bucket 1
+	.long	193495088                       # Hash in Bucket 3
+	.long	2090499946                      # Hash in Bucket 3
+	.long	193491849                       # Hash in Bucket 5
+	.long	2090147939                      # Hash in Bucket 5
+	.long	-71448558                       # Hash in Bucket 5
+	.long	.Linfo_string5                  # String in Bucket 0: _ZL3fooi
+	.long	.Linfo_string3                  # String in Bucket 1: State
+	.long	.Linfo_string7                  # String in Bucket 3: int
+	.long	.Linfo_string4                  # String in Bucket 3: main
+	.long	.Linfo_string6                  # String in Bucket 5: foo
+	.long	.Linfo_string12                 # String in Bucket 5: char
+	.long	.Linfo_string9                  # String in Bucket 5: _ZN5StateC2Ev
+	.long	.Lnames5-.Lnames_entries0       # Offset in Bucket 0
+	.long	.Lnames0-.Lnames_entries0       # Offset in Bucket 1
+	.long	.Lnames2-.Lnames_entries0       # Offset in Bucket 3
+	.long	.Lnames1-.Lnames_entries0       # Offset in Bucket 3
+	.long	.Lnames4-.Lnames_entries0       # Offset in Bucket 5
+	.long	.Lnames6-.Lnames_entries0       # Offset in Bucket 5
+	.long	.Lnames3-.Lnames_entries0       # Offset in Bucket 5
+.Lnames_abbrev_start0:
+	.byte	1                               # Abbrev code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	2                               # Abbrev code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	3                               # Abbrev code
+	.byte	2                               # DW_TAG_class_type
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	4                               # Abbrev code
+	.byte	36                              # DW_TAG_base_type
+	.byte	3                               # DW_IDX_die_offset
+	.byte	19                              # DW_FORM_ref4
+	.byte	4                               # DW_IDX_parent
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev
+	.byte	0                               # End of abbrev list
+.Lnames_abbrev_end0:
+.Lnames_entries0:
+.Lnames5:
+.L1:
+	.byte	1                               # Abbreviation code
+	.long	163                             # DW_IDX_die_offset
+	.long	.L2-.Lnames_entries0            # DW_IDX_parent
+.L0:
+	.byte	2                               # Abbreviation code
+	.long	186                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: _ZL3fooi
+.Lnames0:
+.L5:
+	.byte	3                               # Abbreviation code
+	.long	43                              # DW_IDX_die_offset
+.L2:                                    # DW_IDX_parent
+	.byte	2                               # Abbreviation code
+	.long	137                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: State
+.Lnames2:
+.L4:
+	.byte	4                               # Abbreviation code
+	.long	133                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: int
+.Lnames1:
+.L6:
+	.byte	2                               # Abbreviation code
+	.long	66                              # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: main
+.Lnames4:
+	.byte	1                               # Abbreviation code
+	.long	163                             # DW_IDX_die_offset
+	.long	.L2-.Lnames_entries0            # DW_IDX_parent
+	.byte	2                               # Abbreviation code
+	.long	186                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: foo
+.Lnames6:
+.L3:
+	.byte	4                               # Abbreviation code
+	.long	217                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: char
+.Lnames3:
+	.byte	2                               # Abbreviation code
+	.long	137                             # DW_IDX_die_offset
+	.byte	0                               # DW_IDX_parent
+                                        # End of list: _ZN5StateC2Ev
+	.p2align	2, 0x0
+.Lnames_end0:
+	.ident	"clang version 20.0.0git"
+	.section	".note.GNU-stack","",@progbits
+	.addrsig
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
index f2ff27d85fb004..55ca4809f058ab 100644
--- a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
@@ -12,20 +12,43 @@
 #include "../utils/OptionsUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include <array>
 
 using namespace clang::ast_matchers;
+using clang::ast_matchers::internal::Matcher;
 
 namespace clang::tidy::bugprone {
 
 namespace {
 
-AST_MATCHER_P(QualType, hasCleanType, ast_matchers::internal::Matcher<QualType>,
-              InnerMatcher) {
+AST_MATCHER_P(QualType, hasCleanType, Matcher<QualType>, InnerMatcher) {
   return InnerMatcher.matches(
       Node.getNonReferenceType().getUnqualifiedType().getCanonicalType(),
       Finder, Builder);
 }
 
+constexpr std::array<StringRef, 2> NameList{
+    "::std::make_unique",
+    "::std::make_shared",
+};
+
+Matcher<Expr> constructFrom(Matcher<QualType> TypeMatcher,
+                            Matcher<Expr> ArgumentMatcher) {
+  return expr(
+      anyOf(
+          // construct optional
+          cxxConstructExpr(argumentCountIs(1U), hasType(TypeMatcher),
+                           hasArgument(0U, ArgumentMatcher)),
+          // known template methods in std
+          callExpr(argumentCountIs(1),
+                   callee(functionDecl(
+                       matchers::matchesAnyListedName(NameList),
+                       hasTemplateArgument(0, refersToType(TypeMatcher)))),
+                   hasArgument(0, ArgumentMatcher))),
+      unless(anyOf(hasAncestor(typeLoc()),
+                   hasAncestor(expr(matchers::hasUnevaluatedContext())))));
+}
+
 } // namespace
 
 OptionalValueConversionCheck::OptionalValueConversionCheck(
@@ -67,12 +90,9 @@ void OptionalValueConversionCheck::registerMatchers(MatchFinder *Finder) {
       callExpr(argumentCountIs(1), callee(functionDecl(hasName("::std::move"))),
                hasArgument(0, ignoringImpCasts(OptionalDereferenceMatcher)));
   Finder->addMatcher(
-      cxxConstructExpr(
-          argumentCountIs(1U), hasType(BindOptionalType),
-          hasArgument(0U, ignoringImpCasts(anyOf(OptionalDereferenceMatcher,
-                                                 StdMoveCallMatcher))),
-          unless(anyOf(hasAncestor(typeLoc()),
-                       hasAncestor(expr(matchers::hasUnevaluatedContext())))))
+      expr(constructFrom(BindOptionalType,
+                         ignoringImpCasts(anyOf(OptionalDereferenceMatcher,
+                                                StdMoveCallMatcher))))
           .bind("expr"),
       this);
 }
diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
index c919d49b42873a..bab1167fb15ff2 100644
--- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
@@ -36,6 +36,7 @@ add_clang_library(clangTidyModernizeModule STATIC
   UseEmplaceCheck.cpp
   UseEqualsDefaultCheck.cpp
   UseEqualsDeleteCheck.cpp
+  UseIntegerSignComparisonCheck.cpp
   UseNodiscardCheck.cpp
   UseNoexceptCheck.cpp
   UseNullptrCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
index 18607593320635..fc46c72982fdce 100644
--- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -37,6 +37,7 @@
 #include "UseEmplaceCheck.h"
 #include "UseEqualsDefaultCheck.h"
 #include "UseEqualsDeleteCheck.h"
+#include "UseIntegerSignComparisonCheck.h"
 #include "UseNodiscardCheck.h"
 #include "UseNoexceptCheck.h"
 #include "UseNullptrCheck.h"
@@ -76,6 +77,8 @@ class ModernizeModule : public ClangTidyModule {
     CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
     CheckFactories.registerCheck<UseDesignatedInitializersCheck>(
         "modernize-use-designated-initializers");
+    CheckFactories.registerCheck<UseIntegerSignComparisonCheck>(
+        "modernize-use-integer-sign-comparison");
     CheckFactories.registerCheck<UseRangesCheck>("modernize-use-ranges");
     CheckFactories.registerCheck<UseStartsEndsWithCheck>(
         "modernize-use-starts-ends-with");
diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp
new file mode 100644
index 00000000000000..8f807bc0a96d56
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp
@@ -0,0 +1,171 @@
+//===--- UseIntegerSignComparisonCheck.cpp - clang-tidy -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "UseIntegerSignComparisonCheck.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+using namespace clang::ast_matchers::internal;
+
+namespace clang::tidy::modernize {
+
+/// Find if the passed type is the actual "char" type,
+/// not applicable to explicit "signed char" or "unsigned char" types.
+static bool isActualCharType(const clang::QualType &Ty) {
+  using namespace clang;
+  const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
+  if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
+    return (BT->getKind() == BuiltinType::Char_U ||
+            BT->getKind() == BuiltinType::Char_S);
+  return false;
+}
+
+namespace {
+AST_MATCHER(clang::QualType, isActualChar) {
+  return clang::tidy::modernize::isActualCharType(Node);
+}
+} // namespace
+
+static BindableMatcher<clang::Stmt>
+intCastExpression(bool IsSigned,
+                  const std::string &CastBindName = std::string()) {
+  // std::cmp_{} functions trigger a compile-time error if either LHS or RHS
+  // is a non-integer type, char, enum or bool
+  // (unsigned char/ signed char are Ok and can be used).
+  auto IntTypeExpr = expr(hasType(hasCanonicalType(qualType(
+      isInteger(), IsSigned ? isSignedInteger() : isUnsignedInteger(),
+      unless(isActualChar()), unless(booleanType()), unless(enumType())))));
+
+  const auto ImplicitCastExpr =
+      CastBindName.empty() ? implicitCastExpr(hasSourceExpression(IntTypeExpr))
+                           : implicitCastExpr(hasSourceExpression(IntTypeExpr))
+                                 .bind(CastBindName);
+
+  const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
+  const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
+  const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr));
+
+  return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr,
+                    FunctionalCastExpr));
+}
+
+static StringRef parseOpCode(BinaryOperator::Opcode Code) {
+  switch (Code) {
+  case BO_LT:
+    return "cmp_less";
+  case BO_GT:
+    return "cmp_greater";
+  case BO_LE:
+    return "cmp_less_equal";
+  case BO_GE:
+    return "cmp_greater_equal";
+  case BO_EQ:
+    return "cmp_equal";
+  case BO_NE:
+    return "cmp_not_equal";
+  default:
+    return "";
+  }
+}
+
+UseIntegerSignComparisonCheck::UseIntegerSignComparisonCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      IncludeInserter(Options.getLocalOrGlobal("IncludeStyle",
+                                               utils::IncludeSorter::IS_LLVM),
+                      areDiagsSelfContained()) {}
+
+void UseIntegerSignComparisonCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "IncludeStyle", IncludeInserter.getStyle());
+}
+
+void UseIntegerSignComparisonCheck::registerMatchers(MatchFinder *Finder) {
+  const auto SignedIntCastExpr = intCastExpression(true, "sIntCastExpression");
+  const auto UnSignedIntCastExpr = intCastExpression(false);
+
+  // Flag all operators "==", "<=", ">=", "<", ">", "!="
+  // that are used between signed/unsigned
+  const auto CompareOperator =
+      binaryOperator(hasAnyOperatorName("==", "<=", ">=", "<", ">", "!="),
+                     hasOperands(SignedIntCastExpr, UnSignedIntCastExpr),
+                     unless(isInTemplateInstantiation()))
+          .bind("intComparison");
+
+  Finder->addMatcher(CompareOperator, this);
+}
+
+void UseIntegerSignComparisonCheck::registerPPCallbacks(
+    const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
+  IncludeInserter.registerPreprocessor(PP);
+}
+
+void UseIntegerSignComparisonCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *SignedCastExpression =
+      Result.Nodes.getNodeAs<ImplicitCastExpr>("sIntCastExpression");
+  assert(SignedCastExpression);
+
+  // Ignore the match if we know that the signed int value is not negative.
+  Expr::EvalResult EVResult;
+  if (!SignedCastExpression->isValueDependent() &&
+      SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
+                                                        *Result.Context)) {
+    const llvm::APSInt SValue = EVResult.Val.getInt();
+    if (SValue.isNonNegative())
+      return;
+  }
+
+  const auto *BinaryOp =
+      Result.Nodes.getNodeAs<BinaryOperator>("intComparison");
+  if (BinaryOp == nullptr)
+    return;
+
+  const BinaryOperator::Opcode OpCode = BinaryOp->getOpcode();
+
+  const Expr *LHS = BinaryOp->getLHS()->IgnoreImpCasts();
+  const Expr *RHS = BinaryOp->getRHS()->IgnoreImpCasts();
+  if (LHS == nullptr || RHS == nullptr)
+    return;
+  const Expr *SubExprLHS = nullptr;
+  const Expr *SubExprRHS = nullptr;
+  SourceRange R1 = SourceRange(LHS->getBeginLoc());
+  SourceRange R2 = SourceRange(BinaryOp->getOperatorLoc());
+  SourceRange R3 = SourceRange(Lexer::getLocForEndOfToken(
+      RHS->getEndLoc(), 0, *Result.SourceManager, getLangOpts()));
+  if (const auto *LHSCast = llvm::dyn_cast<ExplicitCastExpr>(LHS)) {
+    SubExprLHS = LHSCast->getSubExpr();
+    R1 = SourceRange(LHS->getBeginLoc(),
+                     SubExprLHS->getBeginLoc().getLocWithOffset(-1));
+    R2.setBegin(Lexer::getLocForEndOfToken(
+        SubExprLHS->getEndLoc(), 0, *Result.SourceManager, getLangOpts()));
+  }
+  if (const auto *RHSCast = llvm::dyn_cast<ExplicitCastExpr>(RHS)) {
+    SubExprRHS = RHSCast->getSubExpr();
+    R2.setEnd(SubExprRHS->getBeginLoc().getLocWithOffset(-1));
+  }
+  DiagnosticBuilder Diag =
+      diag(BinaryOp->getBeginLoc(),
+           "comparison between 'signed' and 'unsigned' integers");
+  const std::string CmpNamespace = ("std::" + parseOpCode(OpCode)).str();
+  const std::string CmpHeader = "<utility>";
+  // Prefer modernize-use-integer-sign-comparison when C++20 is available!
+  Diag << FixItHint::CreateReplacement(
+      CharSourceRange(R1, SubExprLHS != nullptr),
+      llvm::Twine(CmpNamespace + "(").str());
+  Diag << FixItHint::CreateReplacement(R2, ",");
+  Diag << FixItHint::CreateReplacement(CharSourceRange::getCharRange(R3), ")");
+
+  // If there is no include for cmp_{*} functions, we'll add it.
+  Diag << IncludeInserter.createIncludeInsertion(
+      Result.SourceManager->getFileID(BinaryOp->getBeginLoc()), CmpHeader);
+}
+
+} // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h
new file mode 100644
index 00000000000000..a1074829d6eca5
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.h
@@ -0,0 +1,42 @@
+//===--- UseIntegerSignComparisonCheck.h - clang-tidy -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H
+
+#include "../ClangTidyCheck.h"
+#include "../utils/IncludeInserter.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+namespace clang::tidy::modernize {
+
+/// Replace comparisons between signed and unsigned integers with their safe
+/// C++20 ``std::cmp_*`` alternative, if available.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/use-integer-sign-comparison.html
+class UseIntegerSignComparisonCheck : public ClangTidyCheck {
+public:
+  UseIntegerSignComparisonCheck(StringRef Name, ClangTidyContext *Context);
+
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
+                           Preprocessor *ModuleExpanderPP) override;
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus20;
+  }
+
+private:
+  utils::IncludeInserter IncludeInserter;
+};
+
+} // namespace clang::tidy::modernize
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USEINTEGERSIGNCOMPARISONCHECK_H
diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index fddfffe7523d95..207e4c3e6722c8 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -458,20 +458,6 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
       PrevAlias[Self] = T;
       NextAlias[T] = Self;
     };
-    // Also grab prefixes for each option, these are not fully exposed.
-    llvm::ArrayRef<llvm::StringLiteral> Prefixes[DriverID::LastOption];
-
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
-#define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS,       \
-               FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,       \
-               METAVAR, VALUES)                                                \
-  Prefixes[DriverID::OPT_##ID] = PREFIX;
-#include "clang/Driver/Options.inc"
-#undef OPTION
-#undef PREFIX
 
     struct {
       DriverID ID;
@@ -498,7 +484,9 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
       llvm::SmallVector<Rule> Rules;
       // Iterate over each alias, to add rules for parsing it.
       for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) {
-        if (!Prefixes[A].size()) // option groups.
+        llvm::SmallVector<llvm::StringRef, 4> Prefixes;
+        DriverTable.appendOptionPrefixes(A, Prefixes);
+        if (Prefixes.empty()) // option groups.
           continue;
         auto Opt = DriverTable.getOption(A);
         // Exclude - and -foo pseudo-options.
@@ -507,7 +495,7 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
         auto Modes = getModes(Opt);
         std::pair<unsigned, unsigned> ArgCount = getArgCount(Opt);
         // Iterate over each spelling of the alias, e.g. -foo vs --foo.
-        for (StringRef Prefix : Prefixes[A]) {
+        for (StringRef Prefix : Prefixes) {
           llvm::SmallString<64> Buf(Prefix);
           Buf.append(Opt.getName());
           llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey();
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index b2b66dca6ccf85..6803842106791b 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -136,10 +136,16 @@ New checks
   Gives warnings for tagged unions, where the number of tags is
   different from the number of data members inside the union.
 
+- New :doc:`modernize-use-integer-sign-comparison
+  <clang-tidy/checks/modernize/use-integer-sign-comparison>` check.
+
+  Replace comparisons between signed and unsigned integers with their safe
+  C++20 ``std::cmp_*`` alternative, if available.
+
 - New :doc:`portability-template-virtual-member-function
   <clang-tidy/checks/portability/template-virtual-member-function>` check.
 
-  Finds cases when an uninstantiated virtual member function in a template class 
+  Finds cases when an uninstantiated virtual member function in a template class
   causes cross-compiler incompatibility.
 
 New check aliases
@@ -176,6 +182,10 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/forwarding-reference-overload>` check by fixing
   a crash when determining if an ``enable_if[_t]`` was found.
 
+- Improved :doc:`bugprone-optional-value-conversion
+  <clang-tidy/checks/bugprone/optional-value-conversion>` to support detecting
+  conversion directly by ``std::make_unique`` and ``std::make_shared``.
+
 - Improved :doc:`bugprone-posix-return
   <clang-tidy/checks/bugprone/posix-return>` check to support integer literals
   as LHS and posix call as RHS of comparison.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index d731b13fc0df44..41f8f958e9e181 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -301,6 +301,7 @@ Clang-Tidy Checks
    :doc:`modernize-use-emplace <modernize/use-emplace>`, "Yes"
    :doc:`modernize-use-equals-default <modernize/use-equals-default>`, "Yes"
    :doc:`modernize-use-equals-delete <modernize/use-equals-delete>`, "Yes"
+   :doc:`modernize-use-integer-sign-comparison <modernize/use-integer-sign-comparison>`, "Yes"
    :doc:`modernize-use-nodiscard <modernize/use-nodiscard>`, "Yes"
    :doc:`modernize-use-noexcept <modernize/use-noexcept>`, "Yes"
    :doc:`modernize-use-nullptr <modernize/use-nullptr>`, "Yes"
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst
new file mode 100644
index 00000000000000..7e2c13b782694f
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-integer-sign-comparison.rst
@@ -0,0 +1,36 @@
+.. title:: clang-tidy - modernize-use-integer-sign-comparison
+
+modernize-use-integer-sign-comparison
+=====================================
+
+Replace comparisons between signed and unsigned integers with their safe
+C++20 ``std::cmp_*`` alternative, if available.
+
+The check provides a replacement only for C++20 or later, otherwise
+it highlights the problem and expects the user to fix it manually.
+
+Examples of fixes created by the check:
+
+.. code-block:: c++
+
+  unsigned int func(int a, unsigned int b) {
+    return a == b;
+  }
+
+becomes
+
+.. code-block:: c++
+
+  #include <utility>
+
+  unsigned int func(int a, unsigned int b) {
+    return std::cmp_equal(a, b);
+  }
+
+Options
+-------
+
+.. option:: IncludeStyle
+
+  A string specifying which include-style is used, `llvm` or `google`.
+  Default is `llvm`.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
new file mode 100644
index 00000000000000..768ab1ce014cec
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
@@ -0,0 +1,53 @@
+// RUN: %check_clang_tidy -std=c++17-or-later %s bugprone-optional-value-conversion %t
+
+namespace std {
+template <typename T> struct optional {
+  constexpr optional() noexcept;
+  constexpr optional(T &&) noexcept;
+  constexpr optional(const T &) noexcept;
+  template <typename U> constexpr optional(U &&) noexcept;
+  const T &operator*() const;
+  T *operator->();
+  const T *operator->() const;
+  T &operator*();
+  const T &value() const;
+  T &value();
+  const T &get() const;
+  T &get();
+  T value_or(T) const;
+};
+
+template <class T> T &&move(T &x) { return static_cast<T &&>(x); }
+
+template <typename T> class default_delete {};
+
+template <typename type, typename Deleter = std::default_delete<type>>
+class unique_ptr {};
+
+template <typename type>
+class shared_ptr {};
+
+template <class T, class... Args> unique_ptr<T> make_unique(Args &&...args);
+template <class T, class... Args> shared_ptr<T> make_shared(Args &&...args);
+
+} // namespace std
+
+struct A {
+    explicit A (int);
+};
+std::optional<int> opt;
+
+void invalid() {
+  std::make_unique<std::optional<int>>(opt.value());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional<int>' into 'int' and back into 'std::optional<int>', remove potentially error-prone optional dereference [bugprone-optional-value-conversion]
+  using A = std::optional<int>;
+  std::make_unique<A>(opt.value());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional<int>' into 'int' and back into 'std::optional<int>', remove potentially error-prone optional dereference [bugprone-optional-value-conversion]
+  std::make_shared<std::optional<int>>(opt.value());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 'std::optional<int>' into 'int' and back into 'std::optional<int>', remove potentially error-prone optional dereference [bugprone-optional-value-conversion]
+}
+
+void valid() {
+  std::make_unique<A>(opt.value());
+  std::make_shared<A>(opt.value());
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp
new file mode 100644
index 00000000000000..99f00444c2d3f3
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp
@@ -0,0 +1,122 @@
+// CHECK-FIXES: #include <utility>
+// RUN: %check_clang_tidy -std=c++20 %s modernize-use-integer-sign-comparison %t
+
+// The code that triggers the check
+#define MAX_MACRO(a, b) (a < b) ? b : a
+
+unsigned int FuncParameters(int bla) {
+    unsigned int result = 0;
+    if (result == bla)
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_equal(result , bla))
+
+    return 1;
+}
+
+template <typename T>
+void TemplateFuncParameter(T val) {
+    unsigned long uL = 0;
+    if (val >= uL)
+        return;
+// CHECK-MESSAGES-NOT: warning:
+}
+
+template <typename T1, typename T2>
+int TemplateFuncParameters(T1 val1, T2 val2) {
+    if (val1 >= val2)
+        return 0;
+// CHECK-MESSAGES-NOT: warning:
+    return 1;
+}
+
+int AllComparisons() {
+    unsigned int uVar = 42;
+    unsigned short uArray[7] = {0, 1, 2, 3, 9, 7, 9};
+
+    int sVar = -42;
+    short sArray[7] = {-1, -2, -8, -94, -5, -4, -6};
+
+    enum INT_TEST {
+      VAL1 = 0,
+      VAL2 = -1
+    };
+
+    char ch = 'a';
+    unsigned char uCh = 'a';
+    signed char sCh = 'a';
+    bool bln = false;
+
+    if (bln == sVar)
+      return 0;
+// CHECK-MESSAGES-NOT: warning:
+
+    if (ch > uCh)
+      return 0;
+// CHECK-MESSAGES-NOT: warning:
+
+    if (sVar <= INT_TEST::VAL2)
+      return 0;
+// CHECK-MESSAGES-NOT: warning:
+
+    if (uCh < sCh)
+      return -1;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_less(uCh , sCh))
+
+    if ((int)uVar < sVar)
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_less(uVar, sVar))
+
+    (uVar != sVar) ? uVar = sVar
+                   : sVar = uVar;
+// CHECK-MESSAGES: :[[@LINE-2]]:6: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: (std::cmp_not_equal(uVar , sVar)) ? uVar = sVar
+
+    while (uArray[0] <= sArray[0])
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:12: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: while (std::cmp_less_equal(uArray[0] , sArray[0]))
+
+    if (uArray[1] > sArray[1])
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_greater(uArray[1] , sArray[1]))
+
+    MAX_MACRO(uVar, sArray[0]);
+// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+
+    if (static_cast<unsigned int>(uArray[2]) < static_cast<int>(sArray[2]))
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_less(uArray[2],sArray[2]))
+
+    if ((unsigned int)uArray[3] < (int)sArray[3])
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_less(uArray[3],sArray[3]))
+
+    if ((unsigned int)(uArray[4]) < (int)(sArray[4]))
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_less((uArray[4]),(sArray[4])))
+
+    if (uArray[5] > sArray[5])
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_greater(uArray[5] , sArray[5]))
+
+    #define VALUE sArray[6]
+    if (uArray[6] > VALUE)
+        return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_greater(uArray[6] , VALUE))
+
+
+    FuncParameters(uVar);
+    TemplateFuncParameter(sVar);
+    TemplateFuncParameters(uVar, sVar);
+
+    return 0;
+}
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 8c39ef3d5a9fa6..88d93a79d00f8f 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -747,9 +747,9 @@ class DeclaratorDecl : public ValueDecl {
   /// ignoring outer template declarations.
   SourceLocation InnerLocStart;
 
-  bool hasExtInfo() const { return DeclInfo.is<ExtInfo*>(); }
-  ExtInfo *getExtInfo() { return DeclInfo.get<ExtInfo*>(); }
-  const ExtInfo *getExtInfo() const { return DeclInfo.get<ExtInfo*>(); }
+  bool hasExtInfo() const { return isa<ExtInfo *>(DeclInfo); }
+  ExtInfo *getExtInfo() { return cast<ExtInfo *>(DeclInfo); }
+  const ExtInfo *getExtInfo() const { return cast<ExtInfo *>(DeclInfo); }
 
 protected:
   DeclaratorDecl(Kind DK, DeclContext *DC, SourceLocation L,
@@ -762,9 +762,8 @@ class DeclaratorDecl : public ValueDecl {
   friend class ASTDeclWriter;
 
   TypeSourceInfo *getTypeSourceInfo() const {
-    return hasExtInfo()
-      ? getExtInfo()->TInfo
-      : DeclInfo.get<TypeSourceInfo*>();
+    return hasExtInfo() ? getExtInfo()->TInfo
+                        : cast<TypeSourceInfo *>(DeclInfo);
   }
 
   void setTypeSourceInfo(TypeSourceInfo *TI) {
@@ -3587,10 +3586,10 @@ class TagDecl : public TypeDecl,
   /// otherwise, it is a null (TypedefNameDecl) pointer.
   llvm::PointerUnion<TypedefNameDecl *, ExtInfo *> TypedefNameDeclOrQualifier;
 
-  bool hasExtInfo() const { return TypedefNameDeclOrQualifier.is<ExtInfo *>(); }
-  ExtInfo *getExtInfo() { return TypedefNameDeclOrQualifier.get<ExtInfo *>(); }
+  bool hasExtInfo() const { return isa<ExtInfo *>(TypedefNameDeclOrQualifier); }
+  ExtInfo *getExtInfo() { return cast<ExtInfo *>(TypedefNameDeclOrQualifier); }
   const ExtInfo *getExtInfo() const {
-    return TypedefNameDeclOrQualifier.get<ExtInfo *>();
+    return cast<ExtInfo *>(TypedefNameDeclOrQualifier);
   }
 
 protected:
@@ -3793,7 +3792,7 @@ class TagDecl : public TypeDecl,
 
   TypedefNameDecl *getTypedefNameForAnonDecl() const {
     return hasExtInfo() ? nullptr
-                        : TypedefNameDeclOrQualifier.get<TypedefNameDecl *>();
+                        : cast<TypedefNameDecl *>(TypedefNameDeclOrQualifier);
   }
 
   void setTypedefNameForAnonDecl(TypedefNameDecl *TDD);
@@ -4011,7 +4010,7 @@ class EnumDecl : public TagDecl {
       return QualType();
     if (const Type *T = IntegerType.dyn_cast<const Type*>())
       return QualType(T, 0);
-    return IntegerType.get<TypeSourceInfo*>()->getType().getUnqualifiedType();
+    return cast<TypeSourceInfo *>(IntegerType)->getType().getUnqualifiedType();
   }
 
   /// Set the underlying integer type.
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index a3447d19909752..82932e098c86f0 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -271,16 +271,12 @@ class alignas(8) Decl {
   ///                // LexicalDC == global namespace
   llvm::PointerUnion<DeclContext*, MultipleDC*> DeclCtx;
 
-  bool isInSemaDC() const { return DeclCtx.is<DeclContext*>(); }
-  bool isOutOfSemaDC() const { return DeclCtx.is<MultipleDC*>(); }
+  bool isInSemaDC() const { return isa<DeclContext *>(DeclCtx); }
+  bool isOutOfSemaDC() const { return isa<MultipleDC *>(DeclCtx); }
 
-  MultipleDC *getMultipleDC() const {
-    return DeclCtx.get<MultipleDC*>();
-  }
+  MultipleDC *getMultipleDC() const { return cast<MultipleDC *>(DeclCtx); }
 
-  DeclContext *getSemanticDC() const {
-    return DeclCtx.get<DeclContext*>();
-  }
+  DeclContext *getSemanticDC() const { return cast<DeclContext *>(DeclCtx); }
 
   /// Loc - The location of this decl.
   SourceLocation Loc;
@@ -1340,7 +1336,7 @@ class DeclListNode {
       assert(Ptr && "dereferencing end() iterator");
       if (DeclListNode *CurNode = Ptr.dyn_cast<DeclListNode*>())
         return CurNode->D;
-      return Ptr.get<NamedDecl*>();
+      return cast<NamedDecl *>(Ptr);
     }
     void operator->() const { } // Unsupported.
     bool operator==(const iterator &X) const { return Ptr == X.Ptr; }
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index e389b5cd6df5b9..c232556edeff70 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -2388,19 +2388,19 @@ class CXXCtorInitializer final {
 
   /// Determine whether this initializer is initializing a base class.
   bool isBaseInitializer() const {
-    return Initializee.is<TypeSourceInfo*>() && !IsDelegating;
+    return isa<TypeSourceInfo *>(Initializee) && !IsDelegating;
   }
 
   /// Determine whether this initializer is initializing a non-static
   /// data member.
-  bool isMemberInitializer() const { return Initializee.is<FieldDecl*>(); }
+  bool isMemberInitializer() const { return isa<FieldDecl *>(Initializee); }
 
   bool isAnyMemberInitializer() const {
     return isMemberInitializer() || isIndirectMemberInitializer();
   }
 
   bool isIndirectMemberInitializer() const {
-    return Initializee.is<IndirectFieldDecl*>();
+    return isa<IndirectFieldDecl *>(Initializee);
   }
 
   /// Determine whether this initializer is an implicit initializer
@@ -2416,7 +2416,7 @@ class CXXCtorInitializer final {
   /// Determine whether this initializer is creating a delegating
   /// constructor.
   bool isDelegatingInitializer() const {
-    return Initializee.is<TypeSourceInfo*>() && IsDelegating;
+    return isa<TypeSourceInfo *>(Initializee) && IsDelegating;
   }
 
   /// Determine whether this initializer is a pack expansion.
@@ -2457,21 +2457,21 @@ class CXXCtorInitializer final {
   /// non-static data member being initialized. Otherwise, returns null.
   FieldDecl *getMember() const {
     if (isMemberInitializer())
-      return Initializee.get<FieldDecl*>();
+      return cast<FieldDecl *>(Initializee);
     return nullptr;
   }
 
   FieldDecl *getAnyMember() const {
     if (isMemberInitializer())
-      return Initializee.get<FieldDecl*>();
+      return cast<FieldDecl *>(Initializee);
     if (isIndirectMemberInitializer())
-      return Initializee.get<IndirectFieldDecl*>()->getAnonField();
+      return cast<IndirectFieldDecl *>(Initializee)->getAnonField();
     return nullptr;
   }
 
   IndirectFieldDecl *getIndirectMember() const {
     if (isIndirectMemberInitializer())
-      return Initializee.get<IndirectFieldDecl*>();
+      return cast<IndirectFieldDecl *>(Initializee);
     return nullptr;
   }
 
diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h
index dd92d40b804232..44ccf8932a1830 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1965,7 +1965,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl,
             SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
       return PartialSpec->PartialSpecialization;
 
-    return SpecializedTemplate.get<ClassTemplateDecl*>();
+    return cast<ClassTemplateDecl *>(SpecializedTemplate);
   }
 
   /// Retrieve the set of template arguments that should be used
@@ -2013,7 +2013,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl,
   const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
     if (auto *Info = ExplicitInfo.dyn_cast<ExplicitInstantiationInfo *>())
       return Info->TemplateArgsAsWritten;
-    return ExplicitInfo.get<const ASTTemplateArgumentListInfo *>();
+    return cast<const ASTTemplateArgumentListInfo *>(ExplicitInfo);
   }
 
   /// Set the template argument list as written in the sources.
@@ -2734,7 +2734,7 @@ class VarTemplateSpecializationDecl : public VarDecl,
             SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
       return PartialSpec->PartialSpecialization;
 
-    return SpecializedTemplate.get<VarTemplateDecl *>();
+    return cast<VarTemplateDecl *>(SpecializedTemplate);
   }
 
   /// Retrieve the set of template arguments that should be used
@@ -2782,7 +2782,7 @@ class VarTemplateSpecializationDecl : public VarDecl,
   const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
     if (auto *Info = ExplicitInfo.dyn_cast<ExplicitInstantiationInfo *>())
       return Info->TemplateArgsAsWritten;
-    return ExplicitInfo.get<const ASTTemplateArgumentListInfo *>();
+    return cast<const ASTTemplateArgumentListInfo *>(ExplicitInfo);
   }
 
   /// Set the template argument list as written in the sources.
@@ -3309,7 +3309,7 @@ inline NamedDecl *getAsNamedDecl(TemplateParameter P) {
     return PD;
   if (auto *PD = P.dyn_cast<NonTypeTemplateParmDecl *>())
     return PD;
-  return P.get<TemplateTemplateParmDecl *>();
+  return cast<TemplateTemplateParmDecl *>(P);
 }
 
 inline TemplateDecl *getAsTypeTemplateDecl(Decl *D) {
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 1a24b8857674ca..4cec89c979f775 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -878,7 +878,7 @@ class CXXTypeidExpr : public Expr {
   /// object. This is not a strong guarantee.
   bool isMostDerived(const ASTContext &Context) const;
 
-  bool isTypeOperand() const { return Operand.is<TypeSourceInfo *>(); }
+  bool isTypeOperand() const { return isa<TypeSourceInfo *>(Operand); }
 
   /// Retrieves the type operand of this typeid() expression after
   /// various required adjustments (removing reference types, cv-qualifiers).
@@ -887,11 +887,11 @@ class CXXTypeidExpr : public Expr {
   /// Retrieve source information for the type operand.
   TypeSourceInfo *getTypeOperandSourceInfo() const {
     assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)");
-    return Operand.get<TypeSourceInfo *>();
+    return cast<TypeSourceInfo *>(Operand);
   }
   Expr *getExprOperand() const {
     assert(!isTypeOperand() && "Cannot call getExprOperand for typeid(type)");
-    return static_cast<Expr*>(Operand.get<Stmt *>());
+    return static_cast<Expr *>(cast<Stmt *>(Operand));
   }
 
   SourceLocation getBeginLoc() const LLVM_READONLY { return Range.getBegin(); }
@@ -1093,7 +1093,7 @@ class CXXUuidofExpr : public Expr {
       Operand = (TypeSourceInfo*)nullptr;
   }
 
-  bool isTypeOperand() const { return Operand.is<TypeSourceInfo *>(); }
+  bool isTypeOperand() const { return isa<TypeSourceInfo *>(Operand); }
 
   /// Retrieves the type operand of this __uuidof() expression after
   /// various required adjustments (removing reference types, cv-qualifiers).
@@ -1102,11 +1102,11 @@ class CXXUuidofExpr : public Expr {
   /// Retrieve source information for the type operand.
   TypeSourceInfo *getTypeOperandSourceInfo() const {
     assert(isTypeOperand() && "Cannot call getTypeOperand for __uuidof(expr)");
-    return Operand.get<TypeSourceInfo *>();
+    return cast<TypeSourceInfo *>(Operand);
   }
   Expr *getExprOperand() const {
     assert(!isTypeOperand() && "Cannot call getExprOperand for __uuidof(type)");
-    return static_cast<Expr*>(Operand.get<Stmt *>());
+    return static_cast<Expr *>(cast<Stmt *>(Operand));
   }
 
   MSGuidDecl *getGuidDecl() const { return Guid; }
@@ -4750,24 +4750,24 @@ class MaterializeTemporaryExpr : public Expr {
   /// be materialized into a glvalue.
   Expr *getSubExpr() const {
     return cast<Expr>(
-        State.is<Stmt *>()
-            ? State.get<Stmt *>()
-            : State.get<LifetimeExtendedTemporaryDecl *>()->getTemporaryExpr());
+        isa<Stmt *>(State)
+            ? cast<Stmt *>(State)
+            : cast<LifetimeExtendedTemporaryDecl *>(State)->getTemporaryExpr());
   }
 
   /// Retrieve the storage duration for the materialized temporary.
   StorageDuration getStorageDuration() const {
-    return State.is<Stmt *>() ? SD_FullExpression
-                              : State.get<LifetimeExtendedTemporaryDecl *>()
+    return isa<Stmt *>(State) ? SD_FullExpression
+                              : cast<LifetimeExtendedTemporaryDecl *>(State)
                                     ->getStorageDuration();
   }
 
   /// Get the storage for the constant value of a materialized temporary
   /// of static storage duration.
   APValue *getOrCreateValue(bool MayCreate) const {
-    assert(State.is<LifetimeExtendedTemporaryDecl *>() &&
+    assert(isa<LifetimeExtendedTemporaryDecl *>(State) &&
            "the temporary has not been lifetime extended");
-    return State.get<LifetimeExtendedTemporaryDecl *>()->getOrCreateValue(
+    return cast<LifetimeExtendedTemporaryDecl *>(State)->getOrCreateValue(
         MayCreate);
   }
 
@@ -4782,8 +4782,8 @@ class MaterializeTemporaryExpr : public Expr {
   /// Get the declaration which triggered the lifetime-extension of this
   /// temporary, if any.
   ValueDecl *getExtendingDecl() {
-    return State.is<Stmt *>() ? nullptr
-                              : State.get<LifetimeExtendedTemporaryDecl *>()
+    return isa<Stmt *>(State) ? nullptr
+                              : cast<LifetimeExtendedTemporaryDecl *>(State)
                                     ->getExtendingDecl();
   }
   const ValueDecl *getExtendingDecl() const {
@@ -4793,8 +4793,8 @@ class MaterializeTemporaryExpr : public Expr {
   void setExtendingDecl(ValueDecl *ExtendedBy, unsigned ManglingNumber);
 
   unsigned getManglingNumber() const {
-    return State.is<Stmt *>() ? 0
-                              : State.get<LifetimeExtendedTemporaryDecl *>()
+    return isa<Stmt *>(State) ? 0
+                              : cast<LifetimeExtendedTemporaryDecl *>(State)
                                     ->getManglingNumber();
   }
 
@@ -4820,17 +4820,17 @@ class MaterializeTemporaryExpr : public Expr {
 
   // Iterators
   child_range children() {
-    return State.is<Stmt *>()
+    return isa<Stmt *>(State)
                ? child_range(State.getAddrOfPtr1(), State.getAddrOfPtr1() + 1)
-               : State.get<LifetimeExtendedTemporaryDecl *>()->childrenExpr();
+               : cast<LifetimeExtendedTemporaryDecl *>(State)->childrenExpr();
   }
 
   const_child_range children() const {
-    return State.is<Stmt *>()
+    return isa<Stmt *>(State)
                ? const_child_range(State.getAddrOfPtr1(),
                                    State.getAddrOfPtr1() + 1)
                : const_cast<const LifetimeExtendedTemporaryDecl *>(
-                     State.get<LifetimeExtendedTemporaryDecl *>())
+                     cast<LifetimeExtendedTemporaryDecl *>(State))
                      ->childrenExpr();
   }
 };
diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h
index f3e32ce3961981..86c4155b6a853e 100644
--- a/clang/include/clang/AST/ExprConcepts.h
+++ b/clang/include/clang/AST/ExprConcepts.h
@@ -261,13 +261,13 @@ class TypeRequirement : public Requirement {
     assert(Status == SS_SubstitutionFailure &&
            "Attempted to get substitution diagnostic when there has been no "
            "substitution failure.");
-    return Value.get<SubstitutionDiagnostic *>();
+    return cast<SubstitutionDiagnostic *>(Value);
   }
 
   TypeSourceInfo *getType() const {
     assert(!isSubstitutionFailure() &&
            "Attempted to get type when there has been a substitution failure.");
-    return Value.get<TypeSourceInfo *>();
+    return cast<TypeSourceInfo *>(Value);
   }
 
   static bool classof(const Requirement *R) {
@@ -409,14 +409,14 @@ class ExprRequirement : public Requirement {
     assert(isExprSubstitutionFailure() &&
            "Attempted to get expression substitution diagnostic when there has "
            "been no expression substitution failure");
-    return Value.get<SubstitutionDiagnostic *>();
+    return cast<SubstitutionDiagnostic *>(Value);
   }
 
   Expr *getExpr() const {
     assert(!isExprSubstitutionFailure() &&
            "ExprRequirement has no expression because there has been a "
            "substitution failure.");
-    return Value.get<Expr *>();
+    return cast<Expr *>(Value);
   }
 
   static bool classof(const Requirement *R) {
diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h
index f833916c91aa54..1fccc260695820 100644
--- a/clang/include/clang/AST/ExprObjC.h
+++ b/clang/include/clang/AST/ExprObjC.h
@@ -752,28 +752,24 @@ class ObjCPropertyRefExpr : public Expr {
     setMethodRefFlag(MethodRef_Setter, val);
   }
 
-  const Expr *getBase() const {
-    return cast<Expr>(Receiver.get<Stmt*>());
-  }
-  Expr *getBase() {
-    return cast<Expr>(Receiver.get<Stmt*>());
-  }
+  const Expr *getBase() const { return cast<Expr>(cast<Stmt *>(Receiver)); }
+  Expr *getBase() { return cast<Expr>(cast<Stmt *>(Receiver)); }
 
   SourceLocation getLocation() const { return IdLoc; }
 
   SourceLocation getReceiverLocation() const { return ReceiverLoc; }
 
   QualType getSuperReceiverType() const {
-    return QualType(Receiver.get<const Type*>(), 0);
+    return QualType(cast<const Type *>(Receiver), 0);
   }
 
   ObjCInterfaceDecl *getClassReceiver() const {
-    return Receiver.get<ObjCInterfaceDecl*>();
+    return cast<ObjCInterfaceDecl *>(Receiver);
   }
 
-  bool isObjectReceiver() const { return Receiver.is<Stmt*>(); }
-  bool isSuperReceiver() const { return Receiver.is<const Type*>(); }
-  bool isClassReceiver() const { return Receiver.is<ObjCInterfaceDecl*>(); }
+  bool isObjectReceiver() const { return isa<Stmt *>(Receiver); }
+  bool isSuperReceiver() const { return isa<const Type *>(Receiver); }
+  bool isClassReceiver() const { return isa<ObjCInterfaceDecl *>(Receiver); }
 
   /// Determine the type of the base, regardless of the kind of receiver.
   QualType getReceiverType(const ASTContext &ctx) const;
@@ -787,7 +783,7 @@ class ObjCPropertyRefExpr : public Expr {
 
   // Iterators
   child_range children() {
-    if (Receiver.is<Stmt*>()) {
+    if (isa<Stmt *>(Receiver)) {
       Stmt **begin = reinterpret_cast<Stmt**>(&Receiver); // hack!
       return child_range(begin, begin+1);
     }
diff --git a/clang/include/clang/AST/Redeclarable.h b/clang/include/clang/AST/Redeclarable.h
index 8d320a9ced2792..bba789375cb6ed 100644
--- a/clang/include/clang/AST/Redeclarable.h
+++ b/clang/include/clang/AST/Redeclarable.h
@@ -113,25 +113,24 @@ class Redeclarable {
     DeclLink(PreviousTag, decl_type *D) : Link(NotKnownLatest(Previous(D))) {}
 
     bool isFirst() const {
-      return Link.is<KnownLatest>() ||
+      return isa<KnownLatest>(Link) ||
              // FIXME: 'template' is required on the next line due to an
              // apparent clang bug.
-             Link.get<NotKnownLatest>().template is<UninitializedLatest>();
+             cast<NotKnownLatest>(Link).template is<UninitializedLatest>();
     }
 
     decl_type *getPrevious(const decl_type *D) const {
-      if (Link.is<NotKnownLatest>()) {
-        NotKnownLatest NKL = Link.get<NotKnownLatest>();
-        if (NKL.is<Previous>())
-          return static_cast<decl_type*>(NKL.get<Previous>());
+      if (NotKnownLatest NKL = dyn_cast<NotKnownLatest>(Link)) {
+        if (auto *Prev = dyn_cast<Previous>(NKL))
+          return static_cast<decl_type *>(Prev);
 
         // Allocate the generational 'most recent' cache now, if needed.
         Link = KnownLatest(*reinterpret_cast<const ASTContext *>(
-                               NKL.get<UninitializedLatest>()),
+                               cast<UninitializedLatest>(NKL)),
                            const_cast<decl_type *>(D));
       }
 
-      return static_cast<decl_type*>(Link.get<KnownLatest>().get(D));
+      return static_cast<decl_type *>(cast<KnownLatest>(Link).get(D));
     }
 
     void setPrevious(decl_type *D) {
@@ -141,25 +140,24 @@ class Redeclarable {
 
     void setLatest(decl_type *D) {
       assert(isFirst() && "decl became canonical unexpectedly");
-      if (Link.is<NotKnownLatest>()) {
-        NotKnownLatest NKL = Link.get<NotKnownLatest>();
+      if (NotKnownLatest NKL = dyn_cast<NotKnownLatest>(Link)) {
         Link = KnownLatest(*reinterpret_cast<const ASTContext *>(
-                               NKL.get<UninitializedLatest>()),
+                               cast<UninitializedLatest>(NKL)),
                            D);
       } else {
-        auto Latest = Link.get<KnownLatest>();
+        auto Latest = cast<KnownLatest>(Link);
         Latest.set(D);
         Link = Latest;
       }
     }
 
-    void markIncomplete() { Link.get<KnownLatest>().markIncomplete(); }
+    void markIncomplete() { cast<KnownLatest>(Link).markIncomplete(); }
 
     Decl *getLatestNotUpdated() const {
       assert(isFirst() && "expected a canonical decl");
-      if (Link.is<NotKnownLatest>())
+      if (isa<NotKnownLatest>(Link))
         return nullptr;
-      return Link.get<KnownLatest>().getNotUpdated();
+      return cast<KnownLatest>(Link).getNotUpdated();
     }
   };
 
diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h
index a8f0263d5505ac..9d0ee24a4f5e35 100644
--- a/clang/include/clang/AST/TemplateBase.h
+++ b/clang/include/clang/AST/TemplateBase.h
@@ -484,7 +484,7 @@ struct TemplateArgumentLocInfo {
       Pointer;
 
   TemplateTemplateArgLocInfo *getTemplate() const {
-    return Pointer.get<TemplateTemplateArgLocInfo *>();
+    return cast<TemplateTemplateArgLocInfo *>(Pointer);
   }
 
 public:
@@ -499,10 +499,10 @@ struct TemplateArgumentLocInfo {
                           SourceLocation EllipsisLoc);
 
   TypeSourceInfo *getAsTypeSourceInfo() const {
-    return Pointer.get<TypeSourceInfo *>();
+    return cast<TypeSourceInfo *>(Pointer);
   }
 
-  Expr *getAsExpr() const { return Pointer.get<Expr *>(); }
+  Expr *getAsExpr() const { return cast<Expr *>(Pointer); }
 
   NestedNameSpecifierLoc getTemplateQualifierLoc() const {
     const auto *Template = getTemplate();
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 30dc5c55d411ab..b9f40faf0b18e6 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2468,4 +2468,11 @@ let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
   // 8-bit floating-point convert to BFloat16/Float16 (top)
   def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
   def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point
+  def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, "aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // Single-precision convert, narrow and interleave to 8-bit floating-point (top and bottom)
+  def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>",  "f", MergeNone, "aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 4fe17ec01906e9..57175da32b31cd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1876,7 +1876,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
   }
 
   // Check for overlapping memory regions.
-  if (!Move && SrcPtr.block() == DestPtr.block()) {
+  if (!Move && Pointer::pointToSameBlock(SrcPtr, DestPtr)) {
     unsigned SrcIndex = SrcPtr.getIndex() * SrcPtr.elemSize();
     unsigned DstIndex = DestPtr.getIndex() * DestPtr.elemSize();
     unsigned N = Size.getZExtValue();
diff --git a/clang/lib/Driver/DriverOptions.cpp b/clang/lib/Driver/DriverOptions.cpp
index 053e7f1c6404fe..cde1f8989935b0 100644
--- a/clang/lib/Driver/DriverOptions.cpp
+++ b/clang/lib/Driver/DriverOptions.cpp
@@ -14,24 +14,21 @@ using namespace clang::driver;
 using namespace clang::driver::options;
 using namespace llvm::opt;
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "clang/Driver/Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 #define OPTTABLE_VALUES_CODE
 #include "clang/Driver/Options.inc"
 #undef OPTTABLE_VALUES_CODE
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "clang/Driver/Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
-static constexpr const llvm::StringLiteral PrefixTable_init[] =
-#define PREFIX_UNION(VALUES) VALUES
+#define OPTTABLE_PREFIXES_UNION_CODE
 #include "clang/Driver/Options.inc"
-#undef PREFIX_UNION
-    ;
-static constexpr const llvm::ArrayRef<llvm::StringLiteral>
-    PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
+#undef OPTTABLE_PREFIXES_UNION_CODE
 
 static constexpr OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -43,7 +40,9 @@ namespace {
 
 class DriverOptTable : public PrecomputedOptTable {
 public:
-  DriverOptTable() : PrecomputedOptTable(InfoTable, PrefixTable) {}
+  DriverOptTable()
+      : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                            OptionPrefixesUnion) {}
 };
 }
 
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 1abfe8fd92807e..e77857930996b2 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -418,8 +418,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
                 Add & NotAllowedWithExecuteOnly & ~DiagnosedKinds) {
           if (DiagnoseErrors) {
             std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose);
-            D.Diag(diag::err_drv_argument_not_allowed_with)
-                << Desc << Triple.str();
+            llvm::opt::Arg *A = Args.getLastArgNoClaim(
+                options::OPT_mexecute_only, options::OPT_mno_execute_only);
+            if (A && A->getOption().matches(options::OPT_mexecute_only))
+              D.Diag(diag::err_drv_argument_not_allowed_with)
+                  << Desc << A->getAsString(Args);
+            else
+              D.Diag(diag::err_drv_unsupported_opt_for_target)
+                  << Desc << Triple.str();
           }
           DiagnosedKinds |= KindsToDiagnose;
         }
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 03dbdc27975b42..3c78b12b0741e0 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -494,6 +494,39 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
     else
       A.renderAsInput(Args, CmdArgs);
   }
+  if (const Arg *A = Args.getLastArg(options::OPT_fveclib)) {
+    const llvm::Triple &Triple = TC.getTriple();
+    StringRef V = A->getValue();
+    if (V == "ArmPL" && (Triple.isOSLinux() || Triple.isOSDarwin())) {
+      // To support -fveclib=ArmPL we need to link against libamath. Some of the
+      // libamath functions depend on libm, at the same time, libamath exports
+      // its own implementation of some of the libm functions. These are faster
+      // and potentially less accurate implementations, hence we need to be
+      // careful what is being linked in. Since here we are interested only in
+      // the subset of libamath functions that is covered by the veclib
+      // mappings, we need to prioritize libm functions by putting -lm before
+      // -lamath (and then -lm again, to fulfill libamath requirements).
+      //
+      // Therefore we need to do the following:
+      //
+      // 1. On Linux, link only when actually needed.
+      //
+      // 2. Prefer libm functions over libamath.
+      //
+      // 3. Link against libm to resolve libamath dependencies.
+      //
+      if (Triple.isOSLinux()) {
+        CmdArgs.push_back(Args.MakeArgString("--push-state"));
+        CmdArgs.push_back(Args.MakeArgString("--as-needed"));
+      }
+      CmdArgs.push_back(Args.MakeArgString("-lm"));
+      CmdArgs.push_back(Args.MakeArgString("-lamath"));
+      CmdArgs.push_back(Args.MakeArgString("-lm"));
+      if (Triple.isOSLinux())
+        CmdArgs.push_back(Args.MakeArgString("--pop-state"));
+      addArchSpecificRPath(TC, Args, CmdArgs);
+    }
+  }
 }
 
 void tools::addLinkerCompressDebugSectionsOption(
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 80799d1e715f07..752c2e2751ab67 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -84,6 +84,12 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   else if (TC.getTriple().isWindowsArm64EC())
     CmdArgs.push_back("-machine:arm64ec");
 
+  if (const Arg *A = Args.getLastArg(options::OPT_fveclib)) {
+    StringRef V = A->getValue();
+    if (V == "ArmPL")
+      CmdArgs.push_back(Args.MakeArgString("--dependent-lib=amath"));
+  }
+
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
       !C.getDriver().IsCLMode() && !C.getDriver().IsFlangMode()) {
     CmdArgs.push_back("-defaultlib:libcmt");
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 98136b7a455d9c..23906d5c06d380 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -277,6 +277,14 @@ CowCompilerInvocation::getMutPreprocessorOutputOpts() {
 
 using ArgumentConsumer = CompilerInvocation::ArgumentConsumer;
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "clang/Driver/Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+static llvm::StringRef lookupStrInTable(unsigned Offset) {
+  return &OptionStrTable[Offset];
+}
+
 #define SIMPLE_ENUM_VALUE_TABLE
 #include "clang/Driver/Options.inc"
 #undef SIMPLE_ENUM_VALUE_TABLE
@@ -303,6 +311,11 @@ static std::optional<bool> normalizeSimpleNegativeFlag(OptSpecifier Opt,
 /// denormalizeSimpleFlags never looks at it. Avoid bloating compile-time with
 /// unnecessary template instantiations and just ignore it with a variadic
 /// argument.
+static void denormalizeSimpleFlag(ArgumentConsumer Consumer,
+                                  unsigned SpellingOffset, Option::OptionClass,
+                                  unsigned, /*T*/...) {
+  Consumer(lookupStrInTable(SpellingOffset));
+}
 static void denormalizeSimpleFlag(ArgumentConsumer Consumer,
                                   const Twine &Spelling, Option::OptionClass,
                                   unsigned, /*T*/...) {
@@ -343,10 +356,10 @@ static auto makeBooleanOptionNormalizer(bool Value, bool OtherValue,
 }
 
 static auto makeBooleanOptionDenormalizer(bool Value) {
-  return [Value](ArgumentConsumer Consumer, const Twine &Spelling,
+  return [Value](ArgumentConsumer Consumer, unsigned SpellingOffset,
                  Option::OptionClass, unsigned, bool KeyPath) {
     if (KeyPath == Value)
-      Consumer(Spelling);
+      Consumer(lookupStrInTable(SpellingOffset));
   };
 }
 
@@ -371,6 +384,14 @@ static void denormalizeStringImpl(ArgumentConsumer Consumer,
   }
 }
 
+template <typename T>
+static void
+denormalizeString(ArgumentConsumer Consumer, unsigned SpellingOffset,
+                  Option::OptionClass OptClass, unsigned TableIndex, T Value) {
+  denormalizeStringImpl(Consumer, lookupStrInTable(SpellingOffset), OptClass,
+                        TableIndex, Twine(Value));
+}
+
 template <typename T>
 static void denormalizeString(ArgumentConsumer Consumer, const Twine &Spelling,
                               Option::OptionClass OptClass, unsigned TableIndex,
@@ -417,14 +438,14 @@ static std::optional<unsigned> normalizeSimpleEnum(OptSpecifier Opt,
 }
 
 static void denormalizeSimpleEnumImpl(ArgumentConsumer Consumer,
-                                      const Twine &Spelling,
+                                      unsigned SpellingOffset,
                                       Option::OptionClass OptClass,
                                       unsigned TableIndex, unsigned Value) {
   assert(TableIndex < SimpleEnumValueTablesSize);
   const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex];
   if (auto MaybeEnumVal = findValueTableByValue(Table, Value)) {
-    denormalizeString(Consumer, Spelling, OptClass, TableIndex,
-                      MaybeEnumVal->Name);
+    denormalizeString(Consumer, lookupStrInTable(SpellingOffset), OptClass,
+                      TableIndex, MaybeEnumVal->Name);
   } else {
     llvm_unreachable("The simple enum value was not correctly defined in "
                      "the tablegen option description");
@@ -433,11 +454,11 @@ static void denormalizeSimpleEnumImpl(ArgumentConsumer Consumer,
 
 template <typename T>
 static void denormalizeSimpleEnum(ArgumentConsumer Consumer,
-                                  const Twine &Spelling,
+                                  unsigned SpellingOffset,
                                   Option::OptionClass OptClass,
                                   unsigned TableIndex, T Value) {
-  return denormalizeSimpleEnumImpl(Consumer, Spelling, OptClass, TableIndex,
-                                   static_cast<unsigned>(Value));
+  return denormalizeSimpleEnumImpl(Consumer, SpellingOffset, OptClass,
+                                   TableIndex, static_cast<unsigned>(Value));
 }
 
 static std::optional<std::string> normalizeString(OptSpecifier Opt,
@@ -473,7 +494,7 @@ normalizeStringVector(OptSpecifier Opt, int, const ArgList &Args,
 }
 
 static void denormalizeStringVector(ArgumentConsumer Consumer,
-                                    const Twine &Spelling,
+                                    unsigned SpellingOffset,
                                     Option::OptionClass OptClass,
                                     unsigned TableIndex,
                                     const std::vector<std::string> &Values) {
@@ -487,15 +508,16 @@ static void denormalizeStringVector(ArgumentConsumer Consumer,
         CommaJoinedValue.append(Value);
       }
     }
-    denormalizeString(Consumer, Spelling, Option::OptionClass::JoinedClass,
-                      TableIndex, CommaJoinedValue);
+    denormalizeString(Consumer, SpellingOffset,
+                      Option::OptionClass::JoinedClass, TableIndex,
+                      CommaJoinedValue);
     break;
   }
   case Option::JoinedClass:
   case Option::SeparateClass:
   case Option::JoinedOrSeparateClass:
     for (const std::string &Value : Values)
-      denormalizeString(Consumer, Spelling, OptClass, TableIndex, Value);
+      denormalizeString(Consumer, SpellingOffset, OptClass, TableIndex, Value);
     break;
   default:
     llvm_unreachable("Cannot denormalize an option with option class "
@@ -532,10 +554,11 @@ static T extractMaskValue(T KeyPath) {
 }
 
 #define PARSE_OPTION_WITH_MARSHALLING(                                         \
-    ARGS, DIAGS, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS,     \
-    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \
-    SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK,          \
-    IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX)   \
+    ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS,         \
+    ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,       \
+    METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE,        \
+    IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \
+    TABLE_INDEX)                                                               \
   if ((VISIBILITY) & options::CC1Option) {                                     \
     KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE);                                  \
     if (IMPLIED_CHECK)                                                         \
@@ -549,8 +572,8 @@ static T extractMaskValue(T KeyPath) {
 // Capture the extracted value as a lambda argument to avoid potential issues
 // with lifetime extension of the reference.
 #define GENERATE_OPTION_WITH_MARSHALLING(                                      \
-    CONSUMER, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \
-    VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES,        \
+    CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \
+    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \
     SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK,          \
     IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX)   \
   if ((VISIBILITY) & options::CC1Option) {                                     \
@@ -559,8 +582,8 @@ static T extractMaskValue(T KeyPath) {
           (Extracted !=                                                        \
            static_cast<decltype(KEYPATH)>((IMPLIED_CHECK) ? (IMPLIED_VALUE)    \
                                                           : (DEFAULT_VALUE)))) \
-        DENORMALIZER(CONSUMER, SPELLING, Option::KIND##Class, TABLE_INDEX,     \
-                     Extracted);                                               \
+        DENORMALIZER(CONSUMER, SPELLING_OFFSET, Option::KIND##Class,           \
+                     TABLE_INDEX, Extracted);                                  \
     }(EXTRACTOR(KEYPATH));                                                     \
   }
 
diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp
index b64c72904b19a4..bc59de3c1a0ada 100644
--- a/clang/lib/Parse/ParseOpenACC.cpp
+++ b/clang/lib/Parse/ParseOpenACC.cpp
@@ -86,10 +86,14 @@ OpenACCClauseKind getOpenACCClauseKind(Token Tok) {
   if (Tok.is(tok::kw_if))
     return OpenACCClauseKind::If;
 
-  // 'private' is also a keyword, make sure we pare it correctly.
+  // 'private' is also a keyword, make sure we parse it correctly.
   if (Tok.is(tok::kw_private))
     return OpenACCClauseKind::Private;
 
+  // 'delete' is a keyword, make sure we parse it correctly.
+  if (Tok.is(tok::kw_delete))
+    return OpenACCClauseKind::Delete;
+
   if (!Tok.is(tok::identifier))
     return OpenACCClauseKind::Invalid;
 
diff --git a/clang/test/CodeGen/AArch64/cpu-supports.c b/clang/test/CodeGen/AArch64/cpu-supports.c
index 76fcea0be31581..406201781d4803 100644
--- a/clang/test/CodeGen/AArch64/cpu-supports.c
+++ b/clang/test/CodeGen/AArch64/cpu-supports.c
@@ -18,8 +18,8 @@
 // CHECK-NEXT:    br label [[RETURN:%.*]]
 // CHECK:       if.end:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 17867063951360
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17867063951360
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 17936857268992
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17936857268992
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]]
 // CHECK:       if.then1:
@@ -27,8 +27,8 @@
 // CHECK-NEXT:    br label [[RETURN]]
 // CHECK:       if.end2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 171136785840078848
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171136785840078848
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 171141184020873984
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 171141184020873984
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]]
 // CHECK:       if.then3:
diff --git a/clang/test/CodeGen/AArch64/fmv-dependencies.c b/clang/test/CodeGen/AArch64/fmv-dependencies.c
index f74b7aa32c7dca..3a524b89496e04 100644
--- a/clang/test/CodeGen/AArch64/fmv-dependencies.c
+++ b/clang/test/CodeGen/AArch64/fmv-dependencies.c
@@ -183,10 +183,10 @@ int caller() {
 // CHECK: attributes #[[sha2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+v8a"
 // CHECK: attributes #[[sha3]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sha2,+sha3,+v8a"
 // CHECK: attributes #[[sm4]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+sm4,+v8a"
-// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+v8a"
-// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
-// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
-// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a"
+// CHECK: attributes #[[sme]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+v8a"
+// CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a"
+// CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a"
+// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sme,+sme2,+v8a"
 // CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
 // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
 // CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fmv,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
new file mode 100644
index 00000000000000..ed5b0ce02af4bd
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS        -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include <arm_sme.h>
+#else
+#include <arm_sve.h>
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_bf16(
+// CHECK-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm(
+// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_f16(
+// CHECK-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svcvtn_f8_f1613svfloat16x2_tm(
+// CHECK-CXX-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcvtn_f8_f16(svfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtn_mf8,_f16_x2,_fpm)(zn_zm, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnb_f8_f32(
+// CHECK-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnb_f8_f3213svfloat32x2_tm(
+// CHECK-CXX-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtnb_mf8,_f32_x2,_fpm)(zn_zm, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnt_f8_f32(
+// CHECK-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnt_f8_f32u13__SVMfloat8_t13svfloat32x2_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm);
+}
diff --git a/clang/test/CodeGen/AArch64/mixed-target-attributes.c b/clang/test/CodeGen/AArch64/mixed-target-attributes.c
index bb6fb7eb8862a2..1ccb0c6177c8c5 100644
--- a/clang/test/CodeGen/AArch64/mixed-target-attributes.c
+++ b/clang/test/CodeGen/AArch64/mixed-target-attributes.c
@@ -66,24 +66,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @explicit_default._Mjscvt
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 64
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 832
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @explicit_default._Mrdm
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 16
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 784
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
@@ -140,24 +140,24 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @implicit_default._Mjscvt
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 64
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 832
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @implicit_default._Mrdm
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 16
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 16
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 784
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 784
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
@@ -207,16 +207,16 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @default_def_with_version_decls._Mjscvt
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 784
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 961279424754d5..6b7acbbd4fc597 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -64,16 +64,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 32896
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 33664
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @ftc._MaesMlse
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
@@ -100,16 +100,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @ftc_def._MmemtagMsha2
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4096
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4864
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
@@ -129,8 +129,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4864
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -157,8 +157,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1040
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1808
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1808
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -310,16 +310,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 619551195904
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 619551195904
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @ftc_inline2._MfcmaMsve2-bitperm
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 65536
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 65792
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65792
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
@@ -360,8 +360,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 18014673387388928
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014673387388928
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 18014743180706560
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014743180706560
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -376,8 +376,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline1._MpredresMrcpc
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 513
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 769
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 769
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
@@ -411,8 +411,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -521,16 +521,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 32896
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 32896
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 33664
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc._MaesMlse
 // CHECK-MTE-BTI:       resolver_else:
 // CHECK-MTE-BTI-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 68719476736
-// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 68719476736
+// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352
+// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352
 // CHECK-MTE-BTI-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK-MTE-BTI:       resolver_return1:
@@ -557,16 +557,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 17592186049280
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186049280
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_def._MmemtagMsha2
 // CHECK-MTE-BTI:       resolver_else:
 // CHECK-MTE-BTI-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4096
-// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4096
+// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4864
+// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 4864
 // CHECK-MTE-BTI-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK-MTE-BTI:       resolver_return1:
@@ -586,8 +586,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4864
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4864
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
@@ -614,8 +614,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1040
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1040
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1808
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1808
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
@@ -767,16 +767,16 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 549757911040
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 549757911040
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 619551195904
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 619551195904
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_inline2._MfcmaMsve2-bitperm
 // CHECK-MTE-BTI:       resolver_else:
 // CHECK-MTE-BTI-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 65536
-// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65536
+// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 65792
+// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 65792
 // CHECK-MTE-BTI-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK-MTE-BTI:       resolver_return1:
@@ -817,8 +817,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 18014673387388928
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014673387388928
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 18014743180706560
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 18014743180706560
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
@@ -833,8 +833,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_inline1._MpredresMrcpc
 // CHECK-MTE-BTI:       resolver_else2:
 // CHECK-MTE-BTI-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 513
-// CHECK-MTE-BTI-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 513
+// CHECK-MTE-BTI-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 769
+// CHECK-MTE-BTI-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 769
 // CHECK-MTE-BTI-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK-MTE-BTI:       resolver_return3:
@@ -868,8 +868,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 70369817919488
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817919488
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index 4194ce26870504..428e7937d8d39e 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -460,24 +460,24 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 11
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 11
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 66315
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66315
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @fmv._MflagmMfp16fmlMrng
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927940
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72057594037927940
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 72061992218723078
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 72061992218723078
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @fmv._Mflagm2Msme-i16i64
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254741008
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254741008
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254741776
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254741776
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
@@ -492,32 +492,32 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:    ret ptr @fmv._McrcMls64
 // CHECK:       resolver_else6:
 // CHECK-NEXT:    [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 17592186044424
-// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186044424
+// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 17592186110728
+// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186110728
 // CHECK-NEXT:    [[TMP19:%.*]] = and i1 true, [[TMP18]]
 // CHECK-NEXT:    br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
 // CHECK:       resolver_return7:
 // CHECK-NEXT:    ret ptr @fmv._Mfp16fmlMmemtag
 // CHECK:       resolver_else8:
 // CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 33024
-// CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 33024
+// CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 33536
+// CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 33536
 // CHECK-NEXT:    [[TMP23:%.*]] = and i1 true, [[TMP22]]
 // CHECK-NEXT:    br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]]
 // CHECK:       resolver_return9:
 // CHECK-NEXT:    ret ptr @fmv._MaesMfp
 // CHECK:       resolver_else10:
 // CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP25:%.*]] = and i64 [[TMP24]], 4224
-// CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4224
+// CHECK-NEXT:    [[TMP25:%.*]] = and i64 [[TMP24]], 4992
+// CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4992
 // CHECK-NEXT:    [[TMP27:%.*]] = and i1 true, [[TMP26]]
 // CHECK-NEXT:    br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]]
 // CHECK:       resolver_return11:
 // CHECK-NEXT:    ret ptr @fmv._MlseMsha2
 // CHECK:       resolver_else12:
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 144115188075855872
-// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144115188075855872
+// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 144119586256651008
+// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144119586256651008
 // CHECK-NEXT:    [[TMP31:%.*]] = and i1 true, [[TMP30]]
 // CHECK-NEXT:    br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
 // CHECK:       resolver_return13:
@@ -538,8 +538,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254741504
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254741504
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254741760
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254741760
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -560,16 +560,16 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 66048
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66048
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 66304
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66304
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @fmv_two._Mfp16Msimd
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 512
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 512
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 768
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 768
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
@@ -765,128 +765,128 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4398048673856
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398048673856
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4398182892352
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398182892352
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @fmv_inline._MfcmaMfp16MrdmMsme
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 864708720641179648
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864708720641179648
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 864708720653762560
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864708720653762560
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @fmv_inline._MmemtagMmopsMrcpc3
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 894427038464
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 894427038464
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
 // CHECK-NEXT:    ret ptr @fmv_inline._Msve2Msve2-aesMsve2-bitperm
 // CHECK:       resolver_else4:
 // CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 34359775232
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359775232
+// CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 35433583360
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 35433583360
 // CHECK-NEXT:    [[TMP15:%.*]] = and i1 true, [[TMP14]]
 // CHECK-NEXT:    br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
 // CHECK:       resolver_return5:
 // CHECK-NEXT:    ret ptr @fmv_inline._MaesMf64mmMsha2
 // CHECK:       resolver_else6:
 // CHECK-NEXT:    [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
-// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
+// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 18320798464
+// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 18320798464
 // CHECK-NEXT:    [[TMP19:%.*]] = and i1 true, [[TMP18]]
 // CHECK-NEXT:    br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
 // CHECK:       resolver_return7:
 // CHECK-NEXT:    ret ptr @fmv_inline._Mf32mmMi8mmMsha3
 // CHECK:       resolver_else8:
 // CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
-// CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 19791209299968
+// CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 19861002584864
+// CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 19861002584864
 // CHECK-NEXT:    [[TMP23:%.*]] = and i1 true, [[TMP22]]
 // CHECK-NEXT:    br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]]
 // CHECK:       resolver_return9:
 // CHECK-NEXT:    ret ptr @fmv_inline._MmemtagMsve2-sm4
 // CHECK:       resolver_else10:
 // CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP25:%.*]] = and i64 [[TMP24]], 1374389534720
-// CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 1374389534720
+// CHECK-NEXT:    [[TMP25:%.*]] = and i64 [[TMP24]], 1444182864640
+// CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 1444182864640
 // CHECK-NEXT:    [[TMP27:%.*]] = and i1 true, [[TMP26]]
 // CHECK-NEXT:    br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]]
 // CHECK:       resolver_return11:
 // CHECK-NEXT:    ret ptr @fmv_inline._Msve2-aesMsve2-sha3
 // CHECK:       resolver_else12:
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552
-// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552
+// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 1208025856
+// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1208025856
 // CHECK-NEXT:    [[TMP31:%.*]] = and i1 true, [[TMP30]]
 // CHECK-NEXT:    br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
 // CHECK:       resolver_return13:
 // CHECK-NEXT:    ret ptr @fmv_inline._Mbf16Msve
 // CHECK:       resolver_else14:
 // CHECK-NEXT:    [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP33:%.*]] = and i64 [[TMP32]], 134348800
-// CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 134348800
+// CHECK-NEXT:    [[TMP33:%.*]] = and i64 [[TMP32]], 134349568
+// CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 134349568
 // CHECK-NEXT:    [[TMP35:%.*]] = and i1 true, [[TMP34]]
 // CHECK-NEXT:    br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]]
 // CHECK:       resolver_return15:
 // CHECK-NEXT:    ret ptr @fmv_inline._Mbf16Mdit
 // CHECK:       resolver_else16:
 // CHECK-NEXT:    [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP37:%.*]] = and i64 [[TMP36]], 20971520
-// CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 20971520
+// CHECK-NEXT:    [[TMP37:%.*]] = and i64 [[TMP36]], 20971776
+// CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i64 [[TMP37]], 20971776
 // CHECK-NEXT:    [[TMP39:%.*]] = and i1 true, [[TMP38]]
 // CHECK-NEXT:    br i1 [[TMP39]], label [[RESOLVER_RETURN17:%.*]], label [[RESOLVER_ELSE18:%.*]]
 // CHECK:       resolver_return17:
 // CHECK-NEXT:    ret ptr @fmv_inline._MfrinttsMrcpc
 // CHECK:       resolver_else18:
 // CHECK-NEXT:    [[TMP40:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP41:%.*]] = and i64 [[TMP40]], 8650752
-// CHECK-NEXT:    [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 8650752
+// CHECK-NEXT:    [[TMP41:%.*]] = and i64 [[TMP40]], 12845056
+// CHECK-NEXT:    [[TMP42:%.*]] = icmp eq i64 [[TMP41]], 12845056
 // CHECK-NEXT:    [[TMP43:%.*]] = and i1 true, [[TMP42]]
 // CHECK-NEXT:    br i1 [[TMP43]], label [[RESOLVER_RETURN19:%.*]], label [[RESOLVER_ELSE20:%.*]]
 // CHECK:       resolver_return19:
 // CHECK-NEXT:    ret ptr @fmv_inline._MdpbMrcpc2
 // CHECK:       resolver_else20:
 // CHECK-NEXT:    [[TMP44:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP45:%.*]] = and i64 [[TMP44]], 1572864
-// CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 1572864
+// CHECK-NEXT:    [[TMP45:%.*]] = and i64 [[TMP44]], 1835264
+// CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[TMP45]], 1835264
 // CHECK-NEXT:    [[TMP47:%.*]] = and i1 true, [[TMP46]]
 // CHECK-NEXT:    br i1 [[TMP47]], label [[RESOLVER_RETURN21:%.*]], label [[RESOLVER_ELSE22:%.*]]
 // CHECK:       resolver_return21:
 // CHECK-NEXT:    ret ptr @fmv_inline._Mdpb2Mjscvt
 // CHECK:       resolver_else22:
 // CHECK-NEXT:    [[TMP48:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP49:%.*]] = and i64 [[TMP48]], 520
-// CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[TMP49]], 520
+// CHECK-NEXT:    [[TMP49:%.*]] = and i64 [[TMP48]], 66312
+// CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[TMP49]], 66312
 // CHECK-NEXT:    [[TMP51:%.*]] = and i1 true, [[TMP50]]
 // CHECK-NEXT:    br i1 [[TMP51]], label [[RESOLVER_RETURN23:%.*]], label [[RESOLVER_ELSE24:%.*]]
 // CHECK:       resolver_return23:
 // CHECK-NEXT:    ret ptr @fmv_inline._Mfp16fmlMsimd
 // CHECK:       resolver_else24:
 // CHECK-NEXT:    [[TMP52:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP53:%.*]] = and i64 [[TMP52]], 32784
-// CHECK-NEXT:    [[TMP54:%.*]] = icmp eq i64 [[TMP53]], 32784
+// CHECK-NEXT:    [[TMP53:%.*]] = and i64 [[TMP52]], 33552
+// CHECK-NEXT:    [[TMP54:%.*]] = icmp eq i64 [[TMP53]], 33552
 // CHECK-NEXT:    [[TMP55:%.*]] = and i1 true, [[TMP54]]
 // CHECK-NEXT:    br i1 [[TMP55]], label [[RESOLVER_RETURN25:%.*]], label [[RESOLVER_ELSE26:%.*]]
 // CHECK:       resolver_return25:
 // CHECK-NEXT:    ret ptr @fmv_inline._MaesMdotprod
 // CHECK:       resolver_else26:
 // CHECK-NEXT:    [[TMP56:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP57:%.*]] = and i64 [[TMP56]], 192
-// CHECK-NEXT:    [[TMP58:%.*]] = icmp eq i64 [[TMP57]], 192
+// CHECK-NEXT:    [[TMP57:%.*]] = and i64 [[TMP56]], 960
+// CHECK-NEXT:    [[TMP58:%.*]] = icmp eq i64 [[TMP57]], 960
 // CHECK-NEXT:    [[TMP59:%.*]] = and i1 true, [[TMP58]]
 // CHECK-NEXT:    br i1 [[TMP59]], label [[RESOLVER_RETURN27:%.*]], label [[RESOLVER_ELSE28:%.*]]
 // CHECK:       resolver_return27:
 // CHECK-NEXT:    ret ptr @fmv_inline._MlseMrdm
 // CHECK:       resolver_else28:
 // CHECK-NEXT:    [[TMP60:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP61:%.*]] = and i64 [[TMP60]], 288
-// CHECK-NEXT:    [[TMP62:%.*]] = icmp eq i64 [[TMP61]], 288
+// CHECK-NEXT:    [[TMP61:%.*]] = and i64 [[TMP60]], 800
+// CHECK-NEXT:    [[TMP62:%.*]] = icmp eq i64 [[TMP61]], 800
 // CHECK-NEXT:    [[TMP63:%.*]] = and i1 true, [[TMP62]]
 // CHECK-NEXT:    br i1 [[TMP63]], label [[RESOLVER_RETURN29:%.*]], label [[RESOLVER_ELSE30:%.*]]
 // CHECK:       resolver_return29:
@@ -899,8 +899,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -913,8 +913,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 65536
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65536
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 65792
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65792
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
@@ -941,16 +941,16 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048576
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048576
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1048832
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1048832
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @default_def_with_version_decls._Mjscvt
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 64
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 64
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 832
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 832
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
diff --git a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
index 0cdc3b32521ff3..a2cc9f30f026af 100644
--- a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
+++ b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
@@ -57,8 +57,8 @@ void run_foo_tml() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806528
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806528
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806784
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806784
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
@@ -77,8 +77,8 @@ void run_foo_tml() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806528
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806528
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 9007199254806784
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 9007199254806784
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
@@ -173,16 +173,16 @@ void run_foo_tml() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36596145153180416
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36596145153180416
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
 // CHECK-NEXT:    ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs
 // CHECK:       [[RESOLVER_ELSE]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16777216
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16777472
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777472
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
 // CHECK:       [[RESOLVER_RETURN1]]:
@@ -222,16 +222,16 @@ void run_foo_tml() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36596145153180416
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36596145153180416
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
 // CHECK-NEXT:    ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs
 // CHECK:       [[RESOLVER_ELSE]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16777216
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777216
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16777472
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16777472
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
 // CHECK:       [[RESOLVER_RETURN1]]:
diff --git a/clang/test/CodeGenCXX/attr-target-version.cpp b/clang/test/CodeGenCXX/attr-target-version.cpp
index 0fd9bc33df8094..b6ba07ed29504e 100644
--- a/clang/test/CodeGenCXX/attr-target-version.cpp
+++ b/clang/test/CodeGenCXX/attr-target-version.cpp
@@ -235,8 +235,8 @@ int bar() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36028797153181696
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36028797153181696
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 36033195199759104
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36033195199759104
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
@@ -249,8 +249,8 @@ int bar() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 134217760
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217760
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 134218528
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134218528
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
@@ -271,8 +271,8 @@ int bar() {
 // CHECK-NEXT:    ret ptr @_ZN7MyClass3gooEi._Mcrc
 // CHECK:       [[RESOLVER_ELSE]]:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 16
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 16
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 784
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 784
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]]
 // CHECK:       [[RESOLVER_RETURN1]]:
@@ -285,8 +285,8 @@ int bar() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
@@ -299,8 +299,8 @@ int bar() {
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 65536
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65536
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 65792
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 65792
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
diff --git a/clang/test/CodeGenCXX/fmv-namespace.cpp b/clang/test/CodeGenCXX/fmv-namespace.cpp
index d61f6dc9a7071b..1a76ee03565245 100644
--- a/clang/test/CodeGenCXX/fmv-namespace.cpp
+++ b/clang/test/CodeGenCXX/fmv-namespace.cpp
@@ -76,8 +76,8 @@ __attribute((target_version("mops"))) int bar() { return 1; }
 // CHECK-NEXT:  [[RESOLVER_ENTRY:.*:]]
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073741824
-// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073741824
+// CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1073807616
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1073807616
 // CHECK-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:    br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]]
 // CHECK:       [[RESOLVER_RETURN]]:
diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index 15f190165a7d73..bb692b2aeea1d3 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -989,19 +989,25 @@
 // RUN: not %clang --target=x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL
 // CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small'
 
-// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION
-// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=undefined -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION
-// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI
-// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function -fsanitize=kcfi %s -### 2>&1 | FileCheck %s  --check-prefix=CHECK-UBSAN-KCFI --check-prefix=CHECK-UBSAN-FUNCTION
+// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-TARGET
+// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=undefined -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-TARGET
+// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-TARGET
+// RUN: not %clang --target=x86_64-sie-ps5 -fsanitize=function -fsanitize=kcfi %s -### 2>&1 | FileCheck %s  --check-prefix=CHECK-UBSAN-KCFI-TARGET --check-prefix=CHECK-UBSAN-FUNCTION-TARGET
 // RUN: %clang --target=x86_64-sie-ps5 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED
 // CHECK-UBSAN-UNDEFINED: "-fsanitize={{((alignment|array-bounds|bool|builtin|enum|float-cast-overflow|integer-divide-by-zero|nonnull-attribute|null|pointer-overflow|return|returns-nonnull-attribute|shift-base|shift-exponent|signed-integer-overflow|unreachable|vla-bound),?){17}"}}
 
-// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION
-// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI
+// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MEXECUTE-ONLY
+// RUN: not %clang --target=armv6t2-eabi -mpure-code -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MPURE-CODE
+// RUN: not %clang --target=armv6t2-eabi -mexecute-only -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-MEXECUTE-ONLY
+// RUN: not %clang --target=armv6t2-eabi -mpure-code -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI-MPURE-CODE
 // RUN: %clang --target=armv6t2-eabi -mexecute-only -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED-VPTR
 
-// CHECK-UBSAN-KCFI-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}}
-// CHECK-UBSAN-FUNCTION-DAG: error: invalid argument '-fsanitize=function' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}}
+// CHECK-UBSAN-KCFI-TARGET-DAG: error: unsupported option '-fsanitize=kcfi' for target 'x86_64-sie-ps5'
+// CHECK-UBSAN-KCFI-MEXECUTE-ONLY-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with '-mexecute-only'
+// CHECK-UBSAN-KCFI-MPURE-CODE-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with '-mpure-code'
+// CHECK-UBSAN-FUNCTION-TARGET-DAG: error: unsupported option '-fsanitize=function' for target 'x86_64-sie-ps5'
+// CHECK-UBSAN-FUNCTION-MEXECUTE-ONLY-DAG: error: invalid argument '-fsanitize=function' not allowed with '-mexecute-only'
+// CHECK-UBSAN-FUNCTION-MPURE-CODE-DAG: error: invalid argument '-fsanitize=function' not allowed with '-mpure-code'
 // CHECK-UBSAN-UNDEFINED-VPTR: "-fsanitize={{((alignment|array-bounds|bool|builtin|enum|float-cast-overflow|integer-divide-by-zero|nonnull-attribute|null|pointer-overflow|return|returns-nonnull-attribute|shift-base|shift-exponent|signed-integer-overflow|unreachable|vla-bound|vptr),?){18}"}}
 
 // * Test BareMetal toolchain sanitizer support *
diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c
index 09a12c2327137c..7d0985c4dd4f48 100644
--- a/clang/test/Driver/fveclib.c
+++ b/clang/test/Driver/fveclib.c
@@ -112,3 +112,20 @@
 /* Verify no warning when math-errno is re-enabled for a different veclib (that does not imply -fno-math-errno). */
 // RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -fmath-errno -fveclib=LIBMVEC %s 2>&1 | FileCheck --check-prefix=CHECK-REPEAT-VECLIB %s
 // CHECK-REPEAT-VECLIB-NOT: math errno enabled
+
+/// Verify that vectorized routines library is being linked in.
+// RUN: %clang -### --target=aarch64-pc-windows-msvc -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-MSVC %s
+// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-LINUX %s
+// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX %s
+// RUN: %clang -### --target=arm64-apple-darwin -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-DARWIN %s
+// RUN: %clang -### --target=arm64-apple-darwin -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN %s
+// CHECK-LINKING-ARMPL-LINUX: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+// CHECK-LINKING-ARMPL-DARWIN: "-lm" "-lamath" "-lm"
+// CHECK-LINKING-ARMPL-MSVC: "--dependent-lib=amath"
+// CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX: "-lamath" {{.*}}"--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+// CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN: "-lamath" {{.*}}"-lm" "-lamath" "-lm"
+
+/// Verify that the RPATH is being set when needed.
+// RUN: %clang -### --target=aarch64-linux-gnu -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_arch_subdir -frtlib-add-rpath -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-RPATH-ARMPL %s
+// CHECK-RPATH-ARMPL: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+// CHECK-RPATH-ARMPL-SAME: "-rpath"
diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c
index 656b31444a9eed..3741ed099cf5c2 100644
--- a/clang/test/ParserOpenACC/parse-clauses.c
+++ b/clang/test/ParserOpenACC/parse-clauses.c
@@ -528,22 +528,30 @@ void VarListClauses() {
 #pragma acc serial firstprivate(s.array[s.value : 5], s.value), self
   for(int i = 0; i < 5;++i) {}
 
-  // expected-error@+2{{expected ','}}
-  // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}}
-#pragma acc serial delete(s.array[s.value] s.array[s.value :5] ), self
+  // expected-warning@+4{{OpenACC construct 'exit data' not yet implemented}}
+  // expected-error@+3{{expected ','}}
+  // expected-warning@+2{{OpenACC clause 'delete' not yet implemented, clause ignored}}
+  // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}}
+#pragma acc exit data delete(s.array[s.value] s.array[s.value :5] ) async
   for(int i = 0; i < 5;++i) {}
 
-  // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}}
-#pragma acc serial delete(s.array[s.value : 5], s.value), self
+  // expected-warning@+3{{OpenACC construct 'exit data' not yet implemented}}
+  // expected-warning@+2{{OpenACC clause 'delete' not yet implemented, clause ignored}}
+  // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}}
+#pragma acc exit data delete(s.array[s.value : 5], s.value),async
   for(int i = 0; i < 5;++i) {}
 
-  // expected-error@+2{{expected ','}}
-  // expected-warning@+1{{OpenACC clause 'use_device' not yet implemented, clause ignored}}
-#pragma acc serial use_device(s.array[s.value] s.array[s.value :5] ), self
+  // expected-warning@+4{{OpenACC construct 'exit data' not yet implemented}}
+  // expected-error@+3{{expected ','}}
+  // expected-warning@+2{{OpenACC clause 'use_device' not yet implemented, clause ignored}}
+  // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}}
+#pragma acc exit data use_device(s.array[s.value] s.array[s.value :5] ),async
   for(int i = 0; i < 5;++i) {}
 
-  // expected-warning@+1{{OpenACC clause 'use_device' not yet implemented, clause ignored}}
-#pragma acc serial use_device(s.array[s.value : 5], s.value), self
+  // expected-warning@+3{{OpenACC construct 'exit data' not yet implemented}}
+  // expected-warning@+2{{OpenACC clause 'use_device' not yet implemented, clause ignored}}
+  // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}}
+#pragma acc exit data use_device(s.array[s.value : 5], s.value), async
   for(int i = 0; i < 5;++i) {}
 
   // expected-error@+2{{expected ','}}
diff --git a/clang/test/ParserOpenACC/parse-clauses.cpp b/clang/test/ParserOpenACC/parse-clauses.cpp
index dc985826a4efe9..4dc966ea9879f9 100644
--- a/clang/test/ParserOpenACC/parse-clauses.cpp
+++ b/clang/test/ParserOpenACC/parse-clauses.cpp
@@ -34,6 +34,11 @@ void templ() {
 
 #pragma acc parallel async
   for(;;){}
+
+  // expected-warning@+2{{OpenACC construct 'exit data' not yet implemented}}
+  // expected-warning@+1{{OpenACC clause 'delete' not yet implemented, clause ignored}}
+#pragma acc exit data delete(I)
+  ;
 }
 
 struct S {
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
index 2e94c2314f1824..e47efccf480433 100644
--- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
@@ -1,6 +1,6 @@
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -verify -emit-llvm -o - %s
 
 #include <arm_sve.h>
 
@@ -21,4 +21,13 @@ void test_features(svmfloat8_t zn, fpm_t fpm) {
   // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
   svcvtlt2_f16_mf8_fpm(zn, fpm);
   // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+
+  svcvtn_mf8_bf16_x2_fpm(svcreate2(svundef_bf16(), svundef_bf16()), fpm);
+  // expected-error@-1 {{'svcvtn_mf8_bf16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+  svcvtn_mf8_f16_x2_fpm(svcreate2(svundef_f16(), svundef_f16()), fpm);
+  // expected-error@-1 {{'svcvtn_mf8_f16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+  svcvtnb_mf8_f32_x2_fpm(svcreate2(svundef_f32(), svundef_f32()), fpm);
+  // expected-error@-1 {{'svcvtnb_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+  svcvtnt_mf8_f32_x2_fpm(zn, svcreate2(svundef_f32(), svundef_f32()), fpm);
+  // expected-error@-1 {{'svcvtnt_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
 }
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 3fa79636de5d75..8a2c3463189fa9 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -31,41 +31,21 @@ namespace drv = clang::driver::options;
 namespace clang {
 namespace installapi {
 
-/// Create prefix string literals used in InstallAPIOpts.td.
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "InstallAPIOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
 
-static constexpr const llvm::StringLiteral PrefixTable_init[] =
-#define PREFIX_UNION(VALUES) VALUES
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "InstallAPIOpts.inc"
-#undef PREFIX_UNION
-    ;
-static constexpr const ArrayRef<StringLiteral>
-    PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_UNION_CODE
+#include "InstallAPIOpts.inc"
+#undef OPTTABLE_PREFIXES_UNION_CODE
 
 /// Create table mapping all options defined in InstallAPIOpts.td.
 static constexpr OptTable::Info InfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,         \
-               VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR,     \
-               VALUES)                                                         \
-  {PREFIX,                                                                     \
-   NAME,                                                                       \
-   HELPTEXT,                                                                   \
-   HELPTEXTSFORVARIANTS,                                                       \
-   METAVAR,                                                                    \
-   OPT_##ID,                                                                   \
-   Option::KIND##Class,                                                        \
-   PARAM,                                                                      \
-   FLAGS,                                                                      \
-   VISIBILITY,                                                                 \
-   OPT_##GROUP,                                                                \
-   OPT_##ALIAS,                                                                \
-   ALIASARGS,                                                                  \
-   VALUES},
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
 #include "InstallAPIOpts.inc"
 #undef OPTION
 };
@@ -75,7 +55,9 @@ namespace {
 /// \brief Create OptTable class for parsing actual command line arguments.
 class DriverOptTable : public opt::PrecomputedOptTable {
 public:
-  DriverOptTable() : PrecomputedOptTable(InfoTable, PrefixTable) {}
+  DriverOptTable()
+      : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                            OptionPrefixesUnion) {}
 };
 
 } // end anonymous namespace.
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index ebafd7eb7774ec..fae32a3503c185 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -174,12 +174,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "LinkerWrapperOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "LinkerWrapperOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -189,7 +190,8 @@ static constexpr OptTable::Info InfoTable[] = {
 
 class WrapperOptTable : public opt::GenericOptTable {
 public:
-  WrapperOptTable() : opt::GenericOptTable(InfoTable) {}
+  WrapperOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 
 const OptTable &getOptTable() {
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
index bc191afdca739d..faf73a7c2f1938 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
@@ -109,12 +109,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "NVLinkOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "NVLinkOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -124,7 +125,8 @@ static constexpr OptTable::Info InfoTable[] = {
 
 class WrapperOptTable : public opt::GenericOptTable {
 public:
-  WrapperOptTable() : opt::GenericOptTable(InfoTable) {}
+  WrapperOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 
 const OptTable &getOptTable() {
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 58b56dcfd3bece..bd36181fca3f31 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -50,12 +50,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  constexpr llvm::StringLiteral NAME##_init[] = VALUE;                         \
-  constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                          \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 const llvm::opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -65,7 +66,8 @@ const llvm::opt::OptTable::Info InfoTable[] = {
 
 class ScanDepsOptTable : public llvm::opt::GenericOptTable {
 public:
-  ScanDepsOptTable() : GenericOptTable(InfoTable) {
+  ScanDepsOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 076458a275d986..2bcb3757d49d08 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -88,12 +88,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "SYCLLinkOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "SYCLLinkOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -103,7 +104,8 @@ static constexpr OptTable::Info InfoTable[] = {
 
 class LinkerOptTable : public opt::GenericOptTable {
 public:
-  LinkerOptTable() : opt::GenericOptTable(InfoTable) {}
+  LinkerOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 
 const OptTable &getOptTable() {
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 70dc7d860d8f6a..b32b42423f6a90 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -573,8 +573,8 @@ set(aarch64_SOURCES
 )
 
 if (COMPILER_RT_HAS_AARCH64_SME)
-  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
-    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
+  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
+    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c)
     message(STATUS "AArch64 SME ABI routines enabled")
     set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
   else()
@@ -842,6 +842,8 @@ else ()
 
   if(COMPILER_RT_DISABLE_AARCH64_FMV)
     list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV)
+  elseif(COMPILER_RT_BAREMETAL_BUILD)
+    list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV)
   endif()
 
   append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
new file mode 100644
index 00000000000000..4333353f8d2d1b
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
@@ -0,0 +1,10 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// We rely on the FMV __aarch64_cpu_features mechanism to determine
+// which features are set at runtime.
+
+#include "../cpu_model/AArch64CPUFeatures.inc"
+_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30");
+_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42");
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
deleted file mode 100644
index d3cd8278a5d214..00000000000000
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-__attribute__((visibility("hidden"), nocommon))
-_Bool __aarch64_has_sme_and_tpidr2_el0;
-
-// We have multiple ways to check that the function has SME, depending on our
-// target.
-// * For Linux/Glibc we can use getauxval().
-// * For Android we can use getauxval().
-// * For newlib we can use __aarch64_sme_accessible().
-
-#if defined(__linux__)
-
-#if defined(__ANDROID__)
-#include <sys/auxv.h>
-#elif __has_include(<sys/auxv.h>)
-#include <sys/auxv.h>
-#else
-#define getauxval(x) 0
-#endif
-#include "../cpu_model/aarch64/hwcap.inc"
-
-static _Bool has_sme(void) { return getauxval(AT_HWCAP2) & HWCAP2_SME; }
-
-#else  // defined(__linux__)
-
-#if defined(COMPILER_RT_SHARED_LIB)
-__attribute__((weak))
-#endif
-extern _Bool __aarch64_sme_accessible(void);
-
-static _Bool has_sme(void)  {
-#if defined(COMPILER_RT_SHARED_LIB)
-  if (!__aarch64_sme_accessible)
-    return 0;
-#endif
-  return __aarch64_sme_accessible();
-}
-
-#endif // defined(__linux__)
-
-#if __GNUC__ >= 9
-#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
-#endif
-__attribute__((constructor(90)))
-static void init_aarch64_has_sme(void) {
-  __aarch64_has_sme_and_tpidr2_el0 = has_sme();
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 623a95dd4dae5f..a6bb921bd9e6b9 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -9,18 +9,15 @@
 #include "../assembly.h"
 
 .set FEAT_SVE_BIT, 30
+.set FEAT_SME_BIT, 42
 .set SVCR_PSTATE_SM_BIT, 0
 
 #if !defined(__APPLE__)
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
-#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
 #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
-#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
 #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
@@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
   mov x0, xzr
   mov x1, xzr
 
-  adrp  x16, TPIDR2_SYMBOL
-  ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
-  cbz w16, 1f
+  adrp x16, CPU_FEATS_SYMBOL
+  ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x16, #FEAT_SME_BIT, 1f
 0:
   orr x0, x0, #0xC000000000000000
   mrs x16, SVCR
@@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
   BTI_C
   // If the current thread does not have access to TPIDR2_EL0, the subroutine
   // does nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 1f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 1f
 
   // If TPIDR2_EL0 is null, the subroutine does nothing.
   mrs x16, TPIDR2_EL0
@@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   BTI_C
   // If the current thread does not have access to SME, the subroutine does
   // nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 0f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 0f
 
   // Otherwise, the subroutine behaves as if it did the following:
   // * Call __arm_tpidr2_save.
@@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   BTI_C
 
   adrp    x17, CPU_FEATS_SYMBOL
-  ldr     w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
   tbnz    w17, #FEAT_SVE_BIT, 1f
-  adrp    x17, TPIDR2_SYMBOL
-  ldrb    w17, [x17, TPIDR2_SYMBOL_OFFSET]
-  cbz     x17, 2f
+  tbz     x17, #FEAT_SME_BIT, 2f
 0:
   mrs     x17, SVCR
   tbz     x17, #SVCR_PSTATE_SM_BIT, 2f
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 74e5e01b66c547..4082fd62ea11a2 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -80,6 +80,8 @@ struct {
 #include "aarch64/fmv/getauxval.inc"
 #elif defined(_WIN32)
 #include "aarch64/fmv/windows.inc"
+#elif defined(ENABLE_BAREMETAL_AARCH64_FMV)
+#include "aarch64/fmv/baremetal.inc"
 #else
 #include "aarch64/fmv/unimplemented.inc"
 #endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
new file mode 100644
index 00000000000000..f188e84808e019
--- /dev/null
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -0,0 +1,31 @@
+// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
+// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
+
+#if defined(COMPILER_RT_SHARED_LIB)
+__attribute__((weak))
+#endif
+extern _Bool
+__aarch64_sme_accessible(void);
+
+static _Bool has_sme(void) {
+#if defined(COMPILER_RT_SHARED_LIB)
+  if (!__aarch64_sme_accessible)
+    return 0;
+#endif
+  return __aarch64_sme_accessible();
+}
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+                                  const __ifunc_arg_t *arg) {}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+  // CPU features already initialized.
+  if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+    return;
+
+  unsigned long long feat = 0;
+  if (has_sme())
+    feat |= 1ULL << FEAT_SME;
+
+  __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
+}
diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index b0fb263b62f156..4afc74933a33bc 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -634,6 +634,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xD284:  // 84 D2 : test dl,dl
       return 2;
 
+    case 0xEC83:  // 83 EC XX : sub esp, XX
     case 0xC1F6:  // F6 C1 XX : test cl, XX
       return 3;
 
@@ -644,8 +645,6 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
   switch (0x00FFFFFF & *(u32*)address) {
     case 0xF8E483:  // 83 E4 F8 : and esp, 0xFFFFFFF8
-    case 0x64EC83:  // 83 EC 64 : sub esp, 64h
-      return 3;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
   }
@@ -873,7 +872,6 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
     case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
     case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
-    case 0xEC83:  // 83 EC XX : sub esp, XX
     case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
       return 3;
     case 0xC1F7:  // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
diff --git a/compiler-rt/lib/interception/tests/interception_win_test.cpp b/compiler-rt/lib/interception/tests/interception_win_test.cpp
index 761c07d4288f75..6e01209ac3a7e4 100644
--- a/compiler-rt/lib/interception/tests/interception_win_test.cpp
+++ b/compiler-rt/lib/interception/tests/interception_win_test.cpp
@@ -852,6 +852,7 @@ const struct InstructionSizeData {
     { 2, {0x8B, 0xC1}, 0, "8B C1 : mov eax, ecx"},
     { 2, {0x8B, 0xEC}, 0, "8B EC : mov ebp, esp"},
     { 2, {0x8B, 0xFF}, 0, "8B FF : mov edi, edi"},
+    { 3, {0x83, 0xEC, 0x72}, 0, "83 EC XX : sub esp, XX"},
     { 3, {0xc2, 0x71, 0x72}, 0, "C2 XX XX : ret XX (needed for registering weak functions)"},
     { 5, {0x68, 0x71, 0x72, 0x73, 0x74}, 0, "68 XX XX XX XX : push imm32"},
     { 5, {0xb8, 0x71, 0x72, 0x73, 0x74}, 0, "b8 XX XX XX XX : mov eax, XX XX XX XX"},
diff --git a/flang/include/flang/Common/Fortran-consts.h b/flang/include/flang/Common/Fortran-consts.h
index eedcdae335c400..cf7884e7454c0c 100644
--- a/flang/include/flang/Common/Fortran-consts.h
+++ b/flang/include/flang/Common/Fortran-consts.h
@@ -9,7 +9,7 @@
 #ifndef FORTRAN_COMMON_FORTRAN_CONSTS_H_
 #define FORTRAN_COMMON_FORTRAN_CONSTS_H_
 
-#include "flang/Common/enum-class.h"
+#include "enum-set.h"
 #include <cstdint>
 
 namespace Fortran::common {
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index c6f35a07d81ea5..13825eb7ba41e3 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -546,6 +546,7 @@ class ParseTreeDumper {
   NODE(parser, OmpEndCriticalDirective)
   NODE(parser, OmpEndLoopDirective)
   NODE(parser, OmpEndSectionsDirective)
+  NODE(parser, OmpFailClause)
   NODE(parser, OmpFromClause)
   NODE(OmpFromClause, Modifier)
   NODE(parser, OmpExpectation)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 8160b095f06dd9..2b4cb21017fa0d 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -269,6 +269,7 @@ struct OpenACCRoutineConstruct;
 struct OpenMPConstruct;
 struct OpenMPDeclarativeConstruct;
 struct OmpEndLoopDirective;
+struct OmpMemoryOrderClause;
 struct CUFKernelDoConstruct;
 
 // Cooked character stream locations
@@ -3914,6 +3915,14 @@ struct OmpDeviceTypeClause {
   WRAPPER_CLASS_BOILERPLATE(OmpDeviceTypeClause, DeviceTypeDescription);
 };
 
+// OMP 5.2 15.8.3 extended-atomic, fail-clause ->
+//    FAIL(memory-order)
+struct OmpFailClause {
+  WRAPPER_CLASS_BOILERPLATE(
+      OmpFailClause, common::Indirection<OmpMemoryOrderClause>);
+  CharBlock source;
+};
+
 // Ref: [4.5:107-109], [5.0:176-180], [5.1:205-210], [5.2:167-168]
 //
 // from-clause ->
@@ -4317,11 +4326,12 @@ struct OmpMemoryOrderClause {
 };
 
 // 2.17.7 Atomic construct
-//        atomic-clause -> memory-order-clause | HINT(hint-expression)
+//        atomic-clause -> memory-order-clause | HINT(hint-expression) |
+//        FAIL(memory-order)
 struct OmpAtomicClause {
   UNION_CLASS_BOILERPLATE(OmpAtomicClause);
   CharBlock source;
-  std::variant<OmpMemoryOrderClause, OmpClause> u;
+  std::variant<OmpMemoryOrderClause, OmpFailClause, OmpClause> u;
 };
 
 // atomic-clause-list -> [atomic-clause, [atomic-clause], ...]
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index a1c0e8f417bcd1..99835c515463b9 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -38,7 +38,7 @@ DataSharingProcessor::DataSharingProcessor(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, lower::pft::Evaluation &eval,
     bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization,
-    lower::SymMap *symTable)
+    lower::SymMap &symTable)
     : converter(converter), semaCtx(semaCtx),
       firOpBuilder(converter.getFirOpBuilder()), clauses(clauses), eval(eval),
       shouldCollectPreDeterminedSymbols(shouldCollectPreDeterminedSymbols),
@@ -93,7 +93,7 @@ void DataSharingProcessor::insertDeallocs() {
       fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
       mlir::omp::PrivateClauseOp privatizer = symToPrivatizer.at(sym);
 
-      lower::SymMapScope scope(*symTable);
+      lower::SymMapScope scope(symTable);
       mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
 
       mlir::Region &deallocRegion = privatizer.getDeallocRegion();
@@ -102,8 +102,8 @@ void DataSharingProcessor::insertDeallocs() {
           &deallocRegion, /*insertPt=*/{}, symType, symLoc);
 
       firOpBuilder.setInsertionPointToEnd(deallocEntryBlock);
-      symTable->addSymbol(*sym,
-                          fir::substBase(symExV, deallocRegion.getArgument(0)));
+      symTable.addSymbol(*sym,
+                         fir::substBase(symExV, deallocRegion.getArgument(0)));
 
       converter.createHostAssociateVarCloneDealloc(*sym);
       firOpBuilder.create<mlir::omp::YieldOp>(hsb.getAddr().getLoc());
@@ -474,7 +474,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
         isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
                        : mlir::omp::DataSharingClauseType::Private);
     fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
-    lower::SymMapScope outerScope(*symTable);
+    lower::SymMapScope outerScope(symTable);
 
     // Populate the `alloc` region.
     {
@@ -491,10 +491,10 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
               evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
               .first;
 
-      symTable->addSymbol(*sym, localExV);
-      lower::SymMapScope innerScope(*symTable);
+      symTable.addSymbol(*sym, localExV);
+      lower::SymMapScope innerScope(symTable);
       cloneSymbol(sym);
-      mlir::Value cloneAddr = symTable->shallowLookupSymbol(*sym).getAddr();
+      mlir::Value cloneAddr = symTable.shallowLookupSymbol(*sym).getAddr();
       mlir::Type cloneType = cloneAddr.getType();
 
       // A `convert` op is required for variables that are storage associated
@@ -522,25 +522,24 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
       auto addSymbol = [&](unsigned argIdx, bool force = false) {
         symExV.match(
             [&](const fir::MutableBoxValue &box) {
-              symTable->addSymbol(
+              symTable.addSymbol(
                   *sym, fir::substBase(box, copyRegion.getArgument(argIdx)),
                   force);
             },
             [&](const auto &box) {
-              symTable->addSymbol(*sym, copyRegion.getArgument(argIdx), force);
+              symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force);
             });
       };
 
       addSymbol(0, true);
-      lower::SymMapScope innerScope(*symTable);
+      lower::SymMapScope innerScope(symTable);
       addSymbol(1);
 
       auto ip = firOpBuilder.saveInsertionPoint();
       copyFirstPrivateSymbol(sym, &ip);
 
       firOpBuilder.create<mlir::omp::YieldOp>(
-          hsb.getAddr().getLoc(),
-          symTable->shallowLookupSymbol(*sym).getAddr());
+          hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr());
     }
 
     return result;
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index ff186483e04a83..2f5c69cc264cea 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -86,7 +86,7 @@ class DataSharingProcessor {
   lower::pft::Evaluation &eval;
   bool shouldCollectPreDeterminedSymbols;
   bool useDelayedPrivatization;
-  lower::SymMap *symTable;
+  lower::SymMap &symTable;
   OMPConstructSymbolVisitor visitor;
 
   bool needBarrier();
@@ -122,8 +122,7 @@ class DataSharingProcessor {
                        const List<Clause> &clauses,
                        lower::pft::Evaluation &eval,
                        bool shouldCollectPreDeterminedSymbols,
-                       bool useDelayedPrivatization = false,
-                       lower::SymMap *symTable = nullptr);
+                       bool useDelayedPrivatization, lower::SymMap &symTable);
 
   // Privatisation is split into two steps.
   // Step1 performs cloning of all privatisation clauses and copying for
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2ef4d184a6321f..c167d347b43159 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -718,7 +718,8 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
   std::optional<DataSharingProcessor> tempDsp;
   if (privatize && !info.dsp) {
     tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
-                    Fortran::lower::omp::isLastItemInQueue(item, queue));
+                    Fortran::lower::omp::isLastItemInQueue(item, queue),
+                    /*useDelayedPrivatization=*/false, info.symTable);
     tempDsp->processStep1();
   }
 
@@ -1423,7 +1424,7 @@ static void genLoopOp(lower::AbstractConverter &converter,
 
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/true, &symTable);
+                           /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&loopClauseOps);
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -1544,7 +1545,8 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   // Insert privatizations before SECTIONS
   lower::SymMapScope scope(symTable);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
-                           lower::omp::isLastItemInQueue(item, queue));
+                           lower::omp::isLastItemInQueue(item, queue),
+                           /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();
 
   List<Clause> nonDsaClauses;
@@ -1695,7 +1697,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/
                            lower::omp::isLastItemInQueue(item, queue),
-                           /*useDelayedPrivatization=*/true, &symTable);
+                           /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&clauseOps);
 
   // 5.8.1 Implicit Data-Mapping Attribute Rules
@@ -1896,7 +1898,7 @@ genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
 
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            lower::omp::isLastItemInQueue(item, queue),
-                           /*useDelayedPrivatization=*/true, &symTable);
+                           /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&clauseOps);
 
   EntryBlockArgs taskArgs;
@@ -2011,7 +2013,7 @@ static void genStandaloneDistribute(lower::AbstractConverter &converter,
 
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           enableDelayedPrivatizationStaging, &symTable);
+                           enableDelayedPrivatizationStaging, symTable);
   dsp.processStep1(&distributeClauseOps);
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2045,7 +2047,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter,
 
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           enableDelayedPrivatizationStaging, &symTable);
+                           enableDelayedPrivatizationStaging, symTable);
   dsp.processStep1(&wsloopClauseOps);
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2084,7 +2086,7 @@ static void genStandaloneParallel(lower::AbstractConverter &converter,
   if (enableDelayedPrivatization) {
     dsp.emplace(converter, semaCtx, item->clauses, eval,
                 lower::omp::isLastItemInQueue(item, queue),
-                /*useDelayedPrivatization=*/true, &symTable);
+                /*useDelayedPrivatization=*/true, symTable);
     dsp->processStep1(&parallelClauseOps);
   }
 
@@ -2113,7 +2115,7 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
   // TODO: Support delayed privatization.
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, &symTable);
+                           /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
@@ -2167,7 +2169,7 @@ static void genCompositeDistributeParallelDo(
 
   DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/true, &symTable);
+                           /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&parallelClauseOps);
 
   EntryBlockArgs parallelArgs;
@@ -2235,7 +2237,7 @@ static void genCompositeDistributeParallelDoSimd(
 
   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/true, &symTable);
+                           /*useDelayedPrivatization=*/true, symTable);
   dsp.processStep1(&parallelClauseOps);
 
   EntryBlockArgs parallelArgs;
@@ -2323,7 +2325,7 @@ static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
   // TODO: Support delayed privatization.
   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, &symTable);
+                           /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();
 
   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
@@ -2380,7 +2382,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
   // TODO: Support delayed privatization.
   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, &symTable);
+                           /*useDelayedPrivatization=*/false, symTable);
   dsp.processStep1();
 
   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index b61f9767ccc2b8..f446e59b0a7230 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -13,6 +13,7 @@
 #include "flang/Optimizer/Builder/Complex.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Builder/IntrinsicCall.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
@@ -331,6 +332,108 @@ class SumAsElementalConversion : public mlir::OpRewritePattern<hlfir::SumOp> {
   }
 };
 
+class CShiftAsElementalConversion
+    : public mlir::OpRewritePattern<hlfir::CShiftOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::CShiftOp>::OpRewritePattern;
+
+  explicit CShiftAsElementalConversion(mlir::MLIRContext *ctx)
+      : OpRewritePattern(ctx) {
+    setHasBoundedRewriteRecursion();
+  }
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::CShiftOp cshift,
+                  mlir::PatternRewriter &rewriter) const override {
+    using Fortran::common::maxRank;
+
+    mlir::Location loc = cshift.getLoc();
+    fir::FirOpBuilder builder{rewriter, cshift.getOperation()};
+    hlfir::ExprType expr = mlir::dyn_cast<hlfir::ExprType>(cshift.getType());
+    assert(expr &&
+           "expected an expression type for the result of hlfir.cshift");
+    mlir::Type elementType = expr.getElementType();
+    hlfir::Entity array = hlfir::Entity{cshift.getArray()};
+    mlir::Value arrayShape = hlfir::genShape(loc, builder, array);
+    llvm::SmallVector<mlir::Value> arrayExtents =
+        hlfir::getExplicitExtentsFromShape(arrayShape, builder);
+    unsigned arrayRank = expr.getRank();
+    llvm::SmallVector<mlir::Value, 1> typeParams;
+    hlfir::genLengthParameters(loc, builder, array, typeParams);
+    hlfir::Entity shift = hlfir::Entity{cshift.getShift()};
+    // The new index computation involves MODULO, which is not implemented
+    // for IndexType, so use I64 instead.
+    mlir::Type calcType = builder.getI64Type();
+
+    mlir::Value one = builder.createIntegerConstant(loc, calcType, 1);
+    mlir::Value shiftVal;
+    if (shift.isScalar()) {
+      shiftVal = hlfir::loadTrivialScalar(loc, builder, shift);
+      shiftVal = builder.createConvert(loc, calcType, shiftVal);
+    }
+
+    int64_t dimVal = 1;
+    if (arrayRank == 1) {
+      // When it is a 1D CSHIFT, we may assume that the DIM argument
+      // (whether it is present or absent) is equal to 1, otherwise,
+      // the program is illegal.
+      assert(shiftVal && "SHIFT must be scalar");
+    } else {
+      if (mlir::Value dim = cshift.getDim())
+        dimVal = fir::getIntIfConstant(dim).value_or(0);
+      assert(dimVal > 0 && dimVal <= arrayRank &&
+             "DIM must be present and a positive constant not exceeding "
+             "the array's rank");
+    }
+
+    auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                         mlir::ValueRange inputIndices) -> hlfir::Entity {
+      llvm::SmallVector<mlir::Value, maxRank> indices{inputIndices};
+      if (!shift.isScalar()) {
+        // When the array is not a vector, section
+        // (s(1), s(2), ..., s(dim-1), :, s(dim+1), ..., s(n)
+        // of the result has a value equal to:
+        // CSHIFT(ARRAY(s(1), s(2), ..., s(dim-1), :, s(dim+1), ..., s(n)),
+        //        SH, 1),
+        // where SH is either SHIFT (if scalar) or
+        // SHIFT(s(1), s(2), ..., s(dim-1), s(dim+1), ..., s(n)).
+        llvm::SmallVector<mlir::Value, maxRank> shiftIndices{indices};
+        shiftIndices.erase(shiftIndices.begin() + dimVal - 1);
+        hlfir::Entity shiftElement =
+            hlfir::getElementAt(loc, builder, shift, shiftIndices);
+        shiftVal = hlfir::loadTrivialScalar(loc, builder, shiftElement);
+        shiftVal = builder.createConvert(loc, calcType, shiftVal);
+      }
+
+      // Element i of the result (1-based) is element
+      // 'MODULO(i + SH - 1, SIZE(ARRAY)) + 1' (1-based) of the original
+      // ARRAY (or its section, when ARRAY is not a vector).
+      mlir::Value index =
+          builder.createConvert(loc, calcType, inputIndices[dimVal - 1]);
+      mlir::Value extent = arrayExtents[dimVal - 1];
+      mlir::Value newIndex =
+          builder.create<mlir::arith::AddIOp>(loc, index, shiftVal);
+      newIndex = builder.create<mlir::arith::SubIOp>(loc, newIndex, one);
+      newIndex = fir::IntrinsicLibrary{builder, loc}.genModulo(
+          calcType, {newIndex, builder.createConvert(loc, calcType, extent)});
+      newIndex = builder.create<mlir::arith::AddIOp>(loc, newIndex, one);
+      newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex);
+
+      indices[dimVal - 1] = newIndex;
+      hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices);
+      return hlfir::loadTrivialScalar(loc, builder, element);
+    };
+
+    hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
+        loc, builder, elementType, arrayShape, typeParams, genKernel,
+        /*isUnordered=*/true,
+        array.isPolymorphic() ? static_cast<mlir::Value>(array) : nullptr,
+        cshift.getResult().getType());
+    rewriter.replaceOp(cshift, elementalOp);
+    return mlir::success();
+  }
+};
+
 class SimplifyHLFIRIntrinsics
     : public hlfir::impl::SimplifyHLFIRIntrinsicsBase<SimplifyHLFIRIntrinsics> {
 public:
@@ -339,6 +442,7 @@ class SimplifyHLFIRIntrinsics
     mlir::RewritePatternSet patterns(context);
     patterns.insert<TransposeAsElementalConversion>(context);
     patterns.insert<SumAsElementalConversion>(context);
+    patterns.insert<CShiftAsElementalConversion>(context);
     mlir::ConversionTarget target(*context);
     // don't transform transpose of polymorphic arrays (not currently supported
     // by hlfir.elemental)
@@ -375,6 +479,24 @@ class SimplifyHLFIRIntrinsics
       }
       return true;
     });
+    target.addDynamicallyLegalOp<hlfir::CShiftOp>([](hlfir::CShiftOp cshift) {
+      unsigned resultRank = hlfir::Entity{cshift}.getRank();
+      if (resultRank == 1)
+        return false;
+
+      mlir::Value dim = cshift.getDim();
+      if (!dim)
+        return false;
+
+      // If DIM is present, then it must be constant to please
+      // the conversion. In addition, ignore cases with
+      // illegal DIM values.
+      if (auto dimVal = fir::getIntIfConstant(dim))
+        if (*dimVal > 0 && *dimVal <= resultRank)
+          return false;
+
+      return true;
+    });
     target.markUnknownOpDynamicallyLegal(
         [](mlir::Operation *) { return true; });
     if (mlir::failed(mlir::applyFullConversion(getOperation(), target,
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index 72b894d1cec757..20d47fda32380d 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/allocatable.h"
@@ -27,6 +28,8 @@ namespace fir {
 
 namespace {
 
+static constexpr llvm::StringRef builtinPrefix = "_QM__fortran_builtins";
+
 static void processAddrOfOp(fir::AddrOfOp addrOfOp,
                             mlir::SymbolTable &symbolTable,
                             llvm::DenseSet<fir::GlobalOp> &candidates) {
@@ -35,22 +38,46 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp,
     // TO DO: limit candidates to non-scalars. Scalars appear to have been
     // folded in already.
     if (globalOp.getConstant()) {
+      // Limit recursion to builtin global for now.
+      if (globalOp.getSymName().starts_with(builtinPrefix)) {
+        globalOp.walk([&](fir::AddrOfOp op) {
+          processAddrOfOp(op, symbolTable, candidates);
+        });
+      }
       candidates.insert(globalOp);
     }
   }
 }
 
+static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable,
+                           llvm::DenseSet<fir::GlobalOp> &candidates) {
+  if (auto recTy = mlir::dyn_cast<fir::RecordType>(
+          fir::unwrapRefType(emboxOp.getMemref().getType())))
+    // Only look at builtin record type.
+    if (recTy.getName().starts_with(builtinPrefix))
+      if (auto globalOp = symbolTable.lookup<fir::GlobalOp>(
+              fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) {
+        if (!candidates.contains(globalOp)) {
+          globalOp.walk([&](fir::AddrOfOp op) {
+            processAddrOfOp(op, symbolTable, candidates);
+          });
+          candidates.insert(globalOp);
+        }
+      }
+}
+
 static void
 prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
                              mlir::SymbolTable &symbolTable,
                              llvm::DenseSet<fir::GlobalOp> &candidates) {
-
   auto cudaProcAttr{
       funcOp->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName())};
   if (cudaProcAttr && cudaProcAttr.getValue() != cuf::ProcAttribute::Host) {
-    funcOp.walk([&](fir::AddrOfOp addrOfOp) {
-      processAddrOfOp(addrOfOp, symbolTable, candidates);
+    funcOp.walk([&](fir::AddrOfOp op) {
+      processAddrOfOp(op, symbolTable, candidates);
     });
+    funcOp.walk(
+        [&](fir::EmboxOp op) { processEmboxOp(op, symbolTable, candidates); });
   }
 }
 
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 86d475c1a15422..f8fda92d5ac2bb 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -739,6 +739,9 @@ TYPE_PARSER(sourced(construct<OpenMPCancellationPointConstruct>(
 TYPE_PARSER(sourced(construct<OpenMPCancelConstruct>(verbatim("CANCEL"_tok),
     Parser<OmpCancelType>{}, maybe("IF" >> parenthesized(scalarLogicalExpr)))))
 
+TYPE_PARSER(sourced(construct<OmpFailClause>(
+    parenthesized(indirect(Parser<OmpMemoryOrderClause>{})))))
+
 // 2.17.7 Atomic construct/2.17.8 Flush construct [OpenMP 5.0]
 //        memory-order-clause ->
 //                               seq_cst
@@ -767,6 +770,7 @@ TYPE_PARSER(construct<OmpAtomicDefaultMemOrderClause>(
 //        atomic-clause -> memory-order-clause | HINT(hint-expression)
 TYPE_PARSER(sourced(construct<OmpAtomicClause>(
     construct<OmpAtomicClause>(Parser<OmpMemoryOrderClause>{}) ||
+    construct<OmpAtomicClause>("FAIL" >> Parser<OmpFailClause>{}) ||
     construct<OmpAtomicClause>("HINT" >>
         sourced(construct<OmpClause>(
             construct<OmpClause::Hint>(parenthesized(constantExpr))))))))
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4782cc1f2d7d7d..a10be3f1c797de 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2702,10 +2702,16 @@ class UnparseVisitor {
     Put("\n");
     EndOpenMP();
   }
+  void Unparse(const OmpFailClause &x) {
+    Word("FAIL(");
+    Walk(x.v);
+    Put(")");
+  }
   void Unparse(const OmpMemoryOrderClause &x) { Walk(x.v); }
   void Unparse(const OmpAtomicClause &x) {
     common::visit(common::visitors{
                       [&](const OmpMemoryOrderClause &y) { Walk(y); },
+                      [&](const OmpFailClause &y) { Walk(y); },
                       [&](const OmpClause &z) { Walk(z); },
                   },
         x.u);
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 3c9c5a02a338a6..1e78cf359a213e 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2530,18 +2530,27 @@ void OmpStructureChecker::CheckAtomicCaptureConstruct(
 void OmpStructureChecker::CheckAtomicMemoryOrderClause(
     const parser::OmpAtomicClauseList *leftHandClauseList,
     const parser::OmpAtomicClauseList *rightHandClauseList) {
-  int numMemoryOrderClause = 0;
+  int numMemoryOrderClause{0};
+  int numFailClause{0};
   auto checkForValidMemoryOrderClause =
       [&](const parser::OmpAtomicClauseList *clauseList) {
         for (const auto &clause : clauseList->v) {
-          if (std::get_if<Fortran::parser::OmpMemoryOrderClause>(&clause.u)) {
-            numMemoryOrderClause++;
-            if (numMemoryOrderClause > 1) {
+          if (std::get_if<parser::OmpFailClause>(&clause.u)) {
+            numFailClause++;
+            if (numFailClause > 1) {
               context_.Say(clause.source,
-                  "More than one memory order clause not allowed on "
-                  "OpenMP Atomic construct"_err_en_US);
+                  "More than one FAIL clause not allowed on OpenMP ATOMIC construct"_err_en_US);
               return;
             }
+          } else {
+            if (std::get_if<Fortran::parser::OmpMemoryOrderClause>(&clause.u)) {
+              numMemoryOrderClause++;
+              if (numMemoryOrderClause > 1) {
+                context_.Say(clause.source,
+                    "More than one memory order clause not allowed on OpenMP ATOMIC construct"_err_en_US);
+                return;
+              }
+            }
           }
         }
       };
@@ -2816,8 +2825,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) {
 
 // Following clauses do not have a separate node in parse-tree.h.
 CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent)
-CHECK_SIMPLE_CLAUSE(AcqRel, OMPC_acq_rel)
-CHECK_SIMPLE_CLAUSE(Acquire, OMPC_acquire)
 CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity)
 CHECK_SIMPLE_CLAUSE(Capture, OMPC_capture)
 CHECK_SIMPLE_CLAUSE(Contains, OMPC_contains)
@@ -2853,9 +2860,6 @@ CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup)
 CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch)
 CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial)
 CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind)
-CHECK_SIMPLE_CLAUSE(Release, OMPC_release)
-CHECK_SIMPLE_CLAUSE(Relaxed, OMPC_relaxed)
-CHECK_SIMPLE_CLAUSE(SeqCst, OMPC_seq_cst)
 CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd)
 CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes)
 CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation)
@@ -2883,7 +2887,6 @@ CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare)
 CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type)
 CHECK_SIMPLE_CLAUSE(OmpxAttribute, OMPC_ompx_attribute)
 CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare)
-CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail)
 CHECK_SIMPLE_CLAUSE(Weak, OMPC_weak)
 
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTeams, OMPC_num_teams)
@@ -2896,6 +2899,53 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen)
 CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen)
 
+void OmpStructureChecker::Enter(const parser::OmpClause::AcqRel &) {
+  if (!isFailClause)
+    CheckAllowedClause(llvm::omp::Clause::OMPC_acq_rel);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Acquire &) {
+  if (!isFailClause)
+    CheckAllowedClause(llvm::omp::Clause::OMPC_acquire);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Release &) {
+  if (!isFailClause)
+    CheckAllowedClause(llvm::omp::Clause::OMPC_release);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Relaxed &) {
+  if (!isFailClause)
+    CheckAllowedClause(llvm::omp::Clause::OMPC_relaxed);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::SeqCst &) {
+  if (!isFailClause)
+    CheckAllowedClause(llvm::omp::Clause::OMPC_seq_cst);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Fail &) {
+  assert(!isFailClause && "Unexpected FAIL clause inside a FAIL clause?");
+  isFailClause = true;
+  CheckAllowedClause(llvm::omp::Clause::OMPC_fail);
+}
+
+void OmpStructureChecker::Leave(const parser::OmpClause::Fail &) {
+  assert(isFailClause && "Expected to be inside a FAIL clause here");
+  isFailClause = false;
+}
+
+void OmpStructureChecker::Enter(const parser::OmpFailClause &) {
+  assert(!isFailClause && "Unexpected FAIL clause inside a FAIL clause?");
+  isFailClause = true;
+  CheckAllowedClause(llvm::omp::Clause::OMPC_fail);
+}
+
+void OmpStructureChecker::Leave(const parser::OmpFailClause &) {
+  assert(isFailClause && "Expected to be inside a FAIL clause here");
+  isFailClause = false;
+}
+
 // Restrictions specific to each clause are implemented apart from the
 // generalized restrictions.
 
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 89af46d9171ad3..e28e5f6d7b0d5a 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -142,6 +142,10 @@ class OmpStructureChecker
 #define GEN_FLANG_CLAUSE_CHECK_ENTER
 #include "llvm/Frontend/OpenMP/OMP.inc"
 
+  void Leave(const parser::OmpClause::Fail &);
+  void Enter(const parser::OmpFailClause &);
+  void Leave(const parser::OmpFailClause &);
+
 private:
   bool CheckAllowedClause(llvmOmpClause clause);
   bool IsVariableListItem(const Symbol &sym);
@@ -276,6 +280,7 @@ class OmpStructureChecker
   using LoopConstruct = std::variant<const parser::DoConstruct *,
       const parser::OpenMPLoopConstruct *>;
   std::vector<LoopConstruct> loopStack_;
+  bool isFailClause{false};
 };
 
 /// Find a duplicate entry in the range, and return an iterator to it.
diff --git a/flang/test/Driver/fveclib.f90 b/flang/test/Driver/fveclib.f90
index 14c59b0616f828..490ce974724a6d 100644
--- a/flang/test/Driver/fveclib.f90
+++ b/flang/test/Driver/fveclib.f90
@@ -30,3 +30,20 @@
 
 ! TODO: if we add support for -nostdlib or -nodefaultlibs we need to test that
 ! these prevent "-framework Accelerate" being added on Darwin
+
+! RUN: %flang -### --target=aarch64-pc-windows-msvc -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-MSVC %s
+! RUN: %flang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-LINUX %s
+! RUN: %flang -### --target=aarch64-linux-gnu -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX %s
+! RUN: %flang -### --target=arm64-apple-darwin -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-LINKING-ARMPL-DARWIN %s
+! RUN: %flang -### --target=arm64-apple-darwin -fveclib=ArmPL %s -lamath 2>&1 | FileCheck --check-prefix=CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN %s
+! CHECK-LINKING-ARMPL-LINUX: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+! CHECK-LINKING-ARMPL-DARWIN: "-lm" "-lamath" "-lm"
+! CHECK-LINKING-ARMPL-MSVC: "--dependent-lib=amath"
+! CHECK-LINKING-AMATH-BEFORE-ARMPL-LINUX: "-lamath" {{.*}}"--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+! CHECK-LINKING-AMATH-BEFORE-ARMPL-DARWIN: "-lamath" {{.*}}"-lm" "-lamath" "-lm"
+
+! RUN: %flang -### --target=aarch64-linux-gnu -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_arch_subdir -frtlib-add-rpath -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-RPATH-ARMPL %s
+! CHECK-RPATH-ARMPL: "--push-state" "--as-needed" "-lm" "-lamath" "-lm" "--pop-state"
+! We need to see "-rpath" at least twice, one for veclib, one for the Fortran runtime
+! CHECK-RPATH-ARMPL-SAME: "-rpath"
+! CHECK-RPATH-ARMPL-SAME: "-rpath"
diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
index ec5ed06824e227..ef32722488fe2a 100644
--- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
@@ -191,3 +191,120 @@ // Test that global used in device function are flagged with the correct
 // CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>
 // CHECK: gpu.module @cuda_device_mod
 // CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>
+
+// -----
+
+func.func @_QMmtestsPtestany() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+  %1135 = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> {bindc_name = "x", uniq_name = "_QMmtestsFtestanyEx"}
+  %1136 = fir.declare %1135 {uniq_name = "_QMmtestsFtestanyEx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>
+  %1138 = fir.embox %1136 : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>) -> !fir.box<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>
+  return
+}
+fir.type_info @_QM__fortran_builtinsT__builtin_c_devptr noinit nodestroy nofinal : !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+fir.global linkonce_odr @_QM__fortran_builtinsE.dt.__builtin_c_devptr constant : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}> {
+  %c1_i8 = arith.constant 1 : i8
+  %c0_i8 = arith.constant 0 : i8
+  %c0_i32 = arith.constant 0 : i32
+  %c1 = arith.constant 1 : index
+  %c8_i64 = arith.constant 8 : i64
+  %c18 = arith.constant 18 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.undefined !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %1 = fir.zero_bits !fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>
+  %2 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %3 = fir.embox %1(%2) : (!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>
+  %4 = fir.insert_value %0, %3, ["binding", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %5 = fir.address_of(@_QM__fortran_builtinsE.n.__builtin_c_devptr) : !fir.ref<!fir.char<1,18>>
+  %6 = fir.declare %5 typeparams %c18 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QM__fortran_builtinsE.n.__builtin_c_devptr"} : (!fir.ref<!fir.char<1,18>>, index) -> !fir.ref<!fir.char<1,18>>
+  %7 = fir.embox %6 : (!fir.ref<!fir.char<1,18>>) -> !fir.box<!fir.char<1,18>>
+  %8 = fir.rebox %7 : (!fir.box<!fir.char<1,18>>) -> !fir.box<!fir.ptr<!fir.char<1,?>>>
+  %9 = fir.insert_value %4, %8, ["name", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.char<1,?>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %10 = fir.insert_value %9, %c8_i64, ["sizeinbytes", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i64) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %11 = fir.zero_bits !fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>
+  %12 = fir.embox %11 : (!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>
+  %13 = fir.insert_value %10, %12, ["uninstantiated", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %14 = fir.zero_bits !fir.ptr<!fir.array<?xi64>>
+  %15 = fir.embox %14(%2) : (!fir.ptr<!fir.array<?xi64>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi64>>>
+  %16 = fir.insert_value %13, %15, ["kindparameter", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?xi64>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %17 = fir.zero_bits !fir.ptr<!fir.array<?xi8>>
+  %18 = fir.embox %17(%2) : (!fir.ptr<!fir.array<?xi8>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xi8>>>
+  %19 = fir.insert_value %16, %18, ["lenparameterkind", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?xi8>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %20 = fir.address_of(@_QM__fortran_builtinsE.c.__builtin_c_devptr) : !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>
+  %21 = fir.shape_shift %c0, %c1 : (index, index) -> !fir.shapeshift<1>
+  %22 = fir.declare %20(%21) {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QM__fortran_builtinsE.c.__builtin_c_devptr"} : (!fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>
+  %23 = fir.embox %22(%21) : (!fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>
+  %24 = fir.shift %c0 : (index) -> !fir.shift<1>
+  %25 = fir.rebox %23(%24) : (!fir.box<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shift<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>
+  %26 = fir.insert_value %19, %25, ["component", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %27 = fir.zero_bits !fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>
+  %28 = fir.embox %27(%2) : (!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>
+  %29 = fir.insert_value %26, %28, ["procptr", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %30 = fir.zero_bits !fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>
+  %31 = fir.embox %30(%2) : (!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>
+  %32 = fir.insert_value %29, %31, ["special", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %33 = fir.insert_value %32, %c0_i32, ["specialbitset", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i32) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %34 = fir.insert_value %33, %c0_i8, ["hasparent", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %35 = fir.insert_value %34, %c1_i8, ["noinitializationneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %36 = fir.insert_value %35, %c1_i8, ["nodestructionneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  %37 = fir.insert_value %36, %c1_i8, ["nofinalizationneeded", !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>] : (!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>, i8) -> !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+  fir.has_value %37 : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>
+}
+fir.global linkonce_odr @_QM__fortran_builtinsE.n.__builtin_c_devptr constant : !fir.char<1,18> {
+  %0 = fir.string_lit "__builtin_c_devptr"(18) : !fir.char<1,18>
+  fir.has_value %0 : !fir.char<1,18>
+}
+fir.global linkonce_odr @_QM__fortran_builtinsE.c.__builtin_c_devptr constant : !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>> {
+  %c0 = arith.constant 0 : index
+  %c0_i64 = arith.constant 0 : i64
+  %c0_i8 = arith.constant 0 : i8
+  %c5_i8 = arith.constant 5 : i8
+  %c1_i8 = arith.constant 1 : i8
+  %c4 = arith.constant 4 : index
+  %0 = fir.undefined !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>
+  %1 = fir.undefined !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %2 = fir.address_of(@_QM__fortran_builtinsE.n.cptr) : !fir.ref<!fir.char<1,4>>
+  %3 = fir.declare %2 typeparams %c4 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QM__fortran_builtinsE.n.cptr"} : (!fir.ref<!fir.char<1,4>>, index) -> !fir.ref<!fir.char<1,4>>
+  %4 = fir.embox %3 : (!fir.ref<!fir.char<1,4>>) -> !fir.box<!fir.char<1,4>>
+  %5 = fir.rebox %4 : (!fir.box<!fir.char<1,4>>) -> !fir.box<!fir.ptr<!fir.char<1,?>>>
+  %6 = fir.insert_value %1, %5, ["name", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box<!fir.ptr<!fir.char<1,?>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %7 = fir.insert_value %6, %c1_i8, ["genre", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %8 = fir.insert_value %7, %c5_i8, ["category", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %9 = fir.insert_value %8, %c0_i8, ["kind", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %10 = fir.insert_value %9, %c0_i8, ["rank", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i8) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %11 = fir.insert_value %10, %c0_i64, ["offset", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, i64) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %12 = fir.undefined !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>
+  %13 = fir.insert_value %12, %c1_i8, ["genre", !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>] : (!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>, i8) -> !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>
+  %14 = fir.insert_value %13, %c0_i64, ["value", !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>] : (!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>, i64) -> !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>
+  %15 = fir.insert_value %11, %14, ["characterlen", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %16 = fir.address_of(@_QM__fortran_builtinsE.dt.__builtin_c_ptr) : !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>
+  %17 = fir.declare %16 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QM__fortran_builtinsE.dt.__builtin_c_ptr"} : (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>
+  %18 = fir.embox %17 : (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>
+  %19 = fir.rebox %18 : (!fir.box<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> !fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>
+  %20 = fir.insert_value %15, %19, ["derived", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %21 = fir.zero_bits !fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>
+  %22 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %23 = fir.embox %21(%22) : (!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>
+  %24 = fir.insert_value %20, %23, ["lenvalue", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %25 = fir.zero_bits !fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>
+  %26 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2>
+  %27 = fir.embox %25(%26) : (!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>, !fir.shape<2>) -> !fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>
+  %28 = fir.insert_value %24, %27, ["bounds", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %29 = fir.zero_bits !fir.ref<none>
+  %30 = fir.convert %29 : (!fir.ref<none>) -> i64
+  %31 = fir.undefined !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+  %32 = fir.insert_value %31, %30, ["__address", !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>] : (!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>, i64) -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+  %33 = fir.insert_value %28, %32, ["initialization", !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>] : (!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+  %34 = fir.insert_value %0, %33, [0 : index] : (!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>) -> !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>
+  fir.has_value %34 : !fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>
+}
+fir.global linkonce_odr @_QM__fortran_builtinsE.n.cptr constant : !fir.char<1,4> {
+  %0 = fir.string_lit "cptr"(4) : !fir.char<1,4>
+  fir.has_value %0 : !fir.char<1,4>
+}
+
+// CHECK-LABEL: func.func @_QMmtestsPtestany()
+// CHECK: gpu.module @cuda_device_mod
+// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.n.cptr
+// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.c.__builtin_c_devptr
+// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.dt.__builtin_c_devptr
+// CHECK-DAG: fir.global linkonce_odr @_QM__fortran_builtinsE.n.__builtin_c_devptr
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
new file mode 100644
index 00000000000000..acb89c0719aa08
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir
@@ -0,0 +1,304 @@
+// Test hlfir.cshift simplification to hlfir.elemental:
+// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s
+
+func.func @cshift_vector(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<i32>) {
+  %res = hlfir.cshift %arg0 %arg1 : (!fir.box<!fir.array<?xi32>>, !fir.ref<i32>) -> !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_vector(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>,
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.ref<i32>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> i64
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
+// CHECK:             %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_7]] : i64
+// CHECK:             %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_5]] : i64
+// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:             %[[VAL_14:.*]] = arith.remsi %[[VAL_12]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_15:.*]] = arith.xori %[[VAL_12]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_16:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_18:.*]] = arith.cmpi ne, %[[VAL_14]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_18]], %[[VAL_17]] : i1
+// CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_14]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_5]] : i64
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index
+// CHECK:             %[[VAL_24:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_24]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_26:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_27:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_26]] : index
+// CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_27]] : index
+// CHECK:             %[[VAL_29:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<i32>
+// CHECK:             hlfir.yield_element %[[VAL_30]] : i32
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @cshift_2d_by_scalar(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: !fir.ref<i32>) {
+  %dim = arith.constant 2 : i32
+  %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box<!fir.array<?x?xi32>>, !fir.ref<i32>, i32) -> !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_2d_by_scalar(
+// CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>>,
+// CHECK-SAME:                                   %[[VAL_1:.*]]: !fir.ref<i32>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]]#1, %[[VAL_6]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+// CHECK:             %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i64
+// CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_10]] : i64
+// CHECK:             %[[VAL_16:.*]] = arith.subi %[[VAL_15]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64
+// CHECK:             %[[VAL_18:.*]] = arith.remsi %[[VAL_16]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_19:.*]] = arith.xori %[[VAL_16]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_20:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_20]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_20]] : i64
+// CHECK:             %[[VAL_23:.*]] = arith.andi %[[VAL_22]], %[[VAL_21]] : i1
+// CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_18]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_18]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.addi %[[VAL_25]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i64) -> index
+// CHECK:             %[[VAL_28:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_29:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_28]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_30:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_30]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_32:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_33:.*]] = arith.subi %[[VAL_29]]#0, %[[VAL_32]] : index
+// CHECK:             %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index
+// CHECK:             %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_32]] : index
+// CHECK:             %[[VAL_36:.*]] = arith.addi %[[VAL_27]], %[[VAL_35]] : index
+// CHECK:             %[[VAL_37:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:             %[[VAL_38:.*]] = fir.load %[[VAL_37]] : !fir.ref<i32>
+// CHECK:             hlfir.yield_element %[[VAL_38]] : i32
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @cshift_2d_by_vector(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: !fir.box<!fir.array<?xi32>>) {
+  %dim = arith.constant 2 : i32
+  %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?xi32>>, i32) -> !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_2d_by_vector(
+// CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>>,
+// CHECK-SAME:                                   %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]]#1, %[[VAL_6]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_12]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_15:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_14]] : index
+// CHECK:             %[[VAL_16:.*]] = arith.addi %[[VAL_10]], %[[VAL_15]] : index
+// CHECK:             %[[VAL_17:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+// CHECK:             %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64
+// CHECK:             %[[VAL_20:.*]] = fir.convert %[[VAL_11]] : (index) -> i64
+// CHECK:             %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64
+// CHECK:             %[[VAL_24:.*]] = arith.remsi %[[VAL_22]], %[[VAL_23]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.xori %[[VAL_22]], %[[VAL_23]] : i64
+// CHECK:             %[[VAL_26:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_27:.*]] = arith.cmpi slt, %[[VAL_25]], %[[VAL_26]] : i64
+// CHECK:             %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_24]], %[[VAL_26]] : i64
+// CHECK:             %[[VAL_29:.*]] = arith.andi %[[VAL_28]], %[[VAL_27]] : i1
+// CHECK:             %[[VAL_30:.*]] = arith.addi %[[VAL_24]], %[[VAL_23]] : i64
+// CHECK:             %[[VAL_31:.*]] = arith.select %[[VAL_29]], %[[VAL_30]], %[[VAL_24]] : i64
+// CHECK:             %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i64) -> index
+// CHECK:             %[[VAL_34:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_35:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_34]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_36:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_36]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_38:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_39:.*]] = arith.subi %[[VAL_35]]#0, %[[VAL_38]] : index
+// CHECK:             %[[VAL_40:.*]] = arith.addi %[[VAL_10]], %[[VAL_39]] : index
+// CHECK:             %[[VAL_41:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_38]] : index
+// CHECK:             %[[VAL_42:.*]] = arith.addi %[[VAL_33]], %[[VAL_41]] : index
+// CHECK:             %[[VAL_43:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_40]], %[[VAL_42]])  : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:             %[[VAL_44:.*]] = fir.load %[[VAL_43]] : !fir.ref<i32>
+// CHECK:             hlfir.yield_element %[[VAL_44]] : i32
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @cshift_vector_char(%arg0: !fir.box<!fir.array<?x!fir.char<2,?>>>, %arg1: !fir.ref<i32>) {
+  %res = hlfir.cshift %arg0 %arg1 : (!fir.box<!fir.array<?x!fir.char<2,?>>>, !fir.ref<i32>) -> !hlfir.expr<?x!fir.char<2,?>>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_vector_char(
+// CHECK-SAME:                                  %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.char<2,?>>>,
+// CHECK-SAME:                                  %[[VAL_1:.*]]: !fir.ref<i32>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_5:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>) -> index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_7:.*]] = arith.divsi %[[VAL_5]], %[[VAL_6]] : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_4]] typeparams %[[VAL_7]] unordered : (!fir.shape<1>, index) -> !hlfir.expr<?x!fir.char<2,?>> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index):
+// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_10]] : i64
+// CHECK:             %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:             %[[VAL_17:.*]] = arith.remsi %[[VAL_15]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_18:.*]] = arith.xori %[[VAL_15]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_19:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi ne, %[[VAL_17]], %[[VAL_19]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.andi %[[VAL_21]], %[[VAL_20]] : i1
+// CHECK:             %[[VAL_23:.*]] = arith.addi %[[VAL_17]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_24:.*]] = arith.select %[[VAL_22]], %[[VAL_23]], %[[VAL_17]] : i64
+// CHECK:             %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_8]] : i64
+// CHECK:             %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i64) -> index
+// CHECK:             %[[VAL_27:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>) -> index
+// CHECK:             %[[VAL_28:.*]] = arith.constant 2 : index
+// CHECK:             %[[VAL_29:.*]] = arith.divsi %[[VAL_27]], %[[VAL_28]] : index
+// CHECK:             %[[VAL_30:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_30]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_32:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_33:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_32]] : index
+// CHECK:             %[[VAL_34:.*]] = arith.addi %[[VAL_26]], %[[VAL_33]] : index
+// CHECK:             %[[VAL_35:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]])  typeparams %[[VAL_29]] : (!fir.box<!fir.array<?x!fir.char<2,?>>>, index, index) -> !fir.boxchar<2>
+// CHECK:             hlfir.yield_element %[[VAL_35]] : !fir.boxchar<2>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @cshift_vector_poly(%arg0: !fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>, %arg1: i32) {
+  %res = hlfir.cshift %arg0 %arg1 : (!fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>, i32) -> !hlfir.expr<?x!fir.type<_QFFtestTt>?>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_vector_poly(
+// CHECK-SAME:                                  %[[VAL_0:.*]]: !fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>,
+// CHECK-SAME:                                  %[[VAL_1:.*]]: i32) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_6:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_7:.*]] = hlfir.elemental %[[VAL_4]] mold %[[VAL_0]] unordered : (!fir.shape<1>, !fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>) -> !hlfir.expr<?x!fir.type<_QFFtestTt>?> {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: index):
+// CHECK:             %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i64
+// CHECK:             %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_6]] : i64
+// CHECK:             %[[VAL_11:.*]] = arith.subi %[[VAL_10]], %[[VAL_5]] : i64
+// CHECK:             %[[VAL_12:.*]] = fir.convert %[[VAL_3]]#1 : (index) -> i64
+// CHECK:             %[[VAL_13:.*]] = arith.remsi %[[VAL_11]], %[[VAL_12]] : i64
+// CHECK:             %[[VAL_14:.*]] = arith.xori %[[VAL_11]], %[[VAL_12]] : i64
+// CHECK:             %[[VAL_15:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_16:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_15]] : i64
+// CHECK:             %[[VAL_18:.*]] = arith.andi %[[VAL_17]], %[[VAL_16]] : i1
+// CHECK:             %[[VAL_19:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : i64
+// CHECK:             %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_5]] : i64
+// CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (i64) -> index
+// CHECK:             %[[VAL_23:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_23]] : (!fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_25:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_26:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_25]] : index
+// CHECK:             %[[VAL_27:.*]] = arith.addi %[[VAL_22]], %[[VAL_26]] : index
+// CHECK:             %[[VAL_28:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_27]])  : (!fir.class<!fir.array<?x!fir.type<_QFFtestTt>>>, index) -> !fir.class<!fir.type<_QFFtestTt>>
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.class<!fir.type<_QFFtestTt>>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+// negative: non-constant dim argument
+func.func @cshift_nonconst_dim(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: i32, %dim : i32) {
+  %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box<!fir.array<?x?xi32>>, i32, i32) -> !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_nonconst_dim(
+// CHECK-SAME:                                   %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>>,
+// CHECK-SAME:                                   %[[VAL_1:.*]]: i32,
+// CHECK-SAME:                                   %[[VAL_2:.*]]: i32) {
+// CHECK:           %[[VAL_3:.*]] = hlfir.cshift %[[VAL_0]] %[[VAL_1]] dim %[[VAL_2]] : (!fir.box<!fir.array<?x?xi32>>, i32, i32) -> !hlfir.expr<?x?xi32>
+// CHECK:           return
+// CHECK:         }
+
+// negative: invalid constant dim argument
+func.func @cshift_invalid_dim(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: i32) {
+  %dim = arith.constant 3 : i32
+  %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box<!fir.array<?x?xi32>>, i32, i32) -> !hlfir.expr<?x?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_invalid_dim(
+// CHECK-SAME:                                  %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xi32>>,
+// CHECK-SAME:                                  %[[VAL_1:.*]]: i32) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : i32
+// CHECK:           %[[VAL_3:.*]] = hlfir.cshift %[[VAL_0]] %[[VAL_1]] dim %[[VAL_2]] : (!fir.box<!fir.array<?x?xi32>>, i32, i32) -> !hlfir.expr<?x?xi32>
+// CHECK:           return
+// CHECK:         }
+
+// When the input array is 1D, we may assume that DIM==1,
+// otherwise the program is illegal, and we can do anything
+// about it.
+func.func @cshift_vector_assumed_dim_1(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: i32) {
+  %dim = arith.constant 3 : i32
+  %res = hlfir.cshift %arg0 %arg1 dim %dim : (!fir.box<!fir.array<?xi32>>, i32, i32) -> !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL:   func.func @cshift_vector_assumed_dim_1(
+// CHECK-SAME:                                           %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>,
+// CHECK-SAME:                                           %[[VAL_1:.*]]: i32) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_7:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
+// CHECK:             %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_7]] : i64
+// CHECK:             %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_6]] : i64
+// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64
+// CHECK:             %[[VAL_14:.*]] = arith.remsi %[[VAL_12]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_15:.*]] = arith.xori %[[VAL_12]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_16:.*]] = arith.constant 0 : i64
+// CHECK:             %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_18:.*]] = arith.cmpi ne, %[[VAL_14]], %[[VAL_16]] : i64
+// CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_18]], %[[VAL_17]] : i1
+// CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_14]], %[[VAL_13]] : i64
+// CHECK:             %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_14]] : i64
+// CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_6]] : i64
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i64) -> index
+// CHECK:             %[[VAL_24:.*]] = arith.constant 0 : index
+// CHECK:             %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_24]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_26:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_27:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_26]] : index
+// CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_27]] : index
+// CHECK:             %[[VAL_29:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_28]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[VAL_30:.*]] = fir.load %[[VAL_29]] : !fir.ref<i32>
+// CHECK:             hlfir.yield_element %[[VAL_30]] : i32
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
diff --git a/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90 b/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90
new file mode 100644
index 00000000000000..b82bd13622764b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/atomic-compare-fail.f90
@@ -0,0 +1,11 @@
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
+
+! CHECK: not yet implemented: OpenMP atomic compare
+program p
+  integer :: x
+  logical :: r
+  !$omp atomic compare fail(relaxed)
+  if (x .eq. 0) then
+     x = 2
+  end if
+end program p
diff --git a/flang/test/Parser/OpenMP/atomic-unparse.f90 b/flang/test/Parser/OpenMP/atomic-unparse.f90
index 64fa79fb1d1a2f..16dc7a1a92bf9e 100644
--- a/flang/test/Parser/OpenMP/atomic-unparse.f90
+++ b/flang/test/Parser/OpenMP/atomic-unparse.f90
@@ -165,6 +165,20 @@ program main
       i = j
    end if
 
+
+!$omp atomic compare fail(relaxed)
+   if (i .eq. k) then
+      i = j
+   end if
+!$omp atomic fail(relaxed) compare
+   if (i .eq. k) then
+      i = j
+   end if
+!$omp atomic fail(relaxed) compare acquire
+   if (i .eq. k) then
+      i = j
+   end if
+   
 !ATOMIC
 !$omp atomic
    i = j
@@ -262,6 +276,9 @@ end program main
 !CHECK: !$OMP ATOMIC COMPARE ACQUIRE
 !CHECK: !$OMP ATOMIC RELAXED COMPARE
 !CHECK: !$OMP ATOMIC COMPARE RELAXED
+!CHECK: !$OMP ATOMIC COMPARE FAIL(RELAXED)
+!CHECK: !$OMP ATOMIC FAIL(RELAXED) COMPARE
+!CHECK: !$OMP ATOMIC FAIL(RELAXED) COMPARE ACQUIRE
 
 !ATOMIC
 !CHECK: !$OMP ATOMIC
@@ -270,3 +287,5 @@ end program main
 !CHECK: !$OMP ATOMIC ACQ_REL
 !CHECK: !$OMP ATOMIC ACQUIRE
 !CHECK: !$OMP ATOMIC RELAXED
+
+
diff --git a/flang/test/Semantics/OpenMP/atomic-compare.f90 b/flang/test/Semantics/OpenMP/atomic-compare.f90
index 85644ad909107e..54492bf6a22a60 100644
--- a/flang/test/Semantics/OpenMP/atomic-compare.f90
+++ b/flang/test/Semantics/OpenMP/atomic-compare.f90
@@ -35,45 +35,58 @@
   if (b .eq. a) b = c
   !$omp end atomic
 
+  !$omp atomic hint(1) acq_rel compare fail(release)
+  if (c .eq. a) a = b
+  !$omp end atomic
+
+  !$omp atomic compare fail(release)
+  if (c .eq. a) a = b
+  !$omp end atomic
+
   ! Check for error conditions:
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive
   !$omp atomic seq_cst seq_cst compare
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive
   !$omp atomic compare seq_cst seq_cst
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the COMPARE directive
   !$omp atomic seq_cst compare seq_cst
   if (b .eq. c) b = a
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive
   !$omp atomic acquire acquire compare
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive
   !$omp atomic compare acquire acquire
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the COMPARE directive
   !$omp atomic acquire compare acquire
   if (b .eq. c) b = a
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the COMPARE directive
   !$omp atomic relaxed relaxed compare
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the COMPARE directive
   !$omp atomic compare relaxed relaxed
   if (b .eq. c) b = a
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the COMPARE directive
   !$omp atomic relaxed compare relaxed
   if (b .eq. c) b = a
 
+  !ERROR: More than one FAIL clause not allowed on OpenMP ATOMIC construct
+  !$omp atomic fail(release) compare fail(release)
+  if (c .eq. a) a = b
+  !$omp end atomic
+
   !$omp end parallel
 end
diff --git a/flang/test/Semantics/OpenMP/atomic01.f90 b/flang/test/Semantics/OpenMP/atomic01.f90
index 538db316f6e7f5..173effe86b69c0 100644
--- a/flang/test/Semantics/OpenMP/atomic01.f90
+++ b/flang/test/Semantics/OpenMP/atomic01.f90
@@ -14,193 +14,193 @@
 ! At most one memory-order-clause may appear on the construct.
 
 !READ
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic seq_cst seq_cst read
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic read seq_cst seq_cst
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic seq_cst read seq_cst
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic acquire acquire read
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic read acquire acquire
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic acquire read acquire
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic relaxed relaxed read
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic read relaxed relaxed
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic relaxed read relaxed
     i = j
 
 !UPDATE
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic seq_cst seq_cst update
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic update seq_cst seq_cst
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic seq_cst update seq_cst
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic release release update
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic update release release
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic release update release
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic relaxed relaxed update
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic update relaxed relaxed
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic relaxed update relaxed
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
 
 !CAPTURE
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic seq_cst seq_cst capture
     i = j
     j = k
   !$omp end atomic
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic capture seq_cst seq_cst
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic seq_cst capture seq_cst
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic release release capture
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic capture release release
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic release capture release
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic relaxed relaxed capture
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic capture relaxed relaxed
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic relaxed capture relaxed
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic acq_rel acq_rel capture
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic capture acq_rel acq_rel
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic acq_rel capture acq_rel
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic acquire acquire capture
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic capture acquire acquire
     i = j
     j = k
   !$omp end atomic
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic acquire capture acquire
     i = j
@@ -208,57 +208,57 @@
   !$omp end atomic
 
 !WRITE
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic seq_cst seq_cst write
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic write seq_cst seq_cst
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic seq_cst write seq_cst
     i = j
 
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic release release write
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic write release release
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic release write release
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic relaxed relaxed write
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic write relaxed relaxed
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic relaxed write relaxed
     i = j
 
 !No atomic-clause
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELAXED clause can appear on the ATOMIC directive
   !$omp atomic relaxed relaxed
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one SEQ_CST clause can appear on the ATOMIC directive
   !$omp atomic seq_cst seq_cst
   !ERROR: Invalid or missing operator in atomic update statement
     i = j
-  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+  !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
   !ERROR: At most one RELEASE clause can appear on the ATOMIC directive
   !$omp atomic release release
   !ERROR: Invalid or missing operator in atomic update statement
diff --git a/flang/test/Semantics/OpenMP/atomic05.f90 b/flang/test/Semantics/OpenMP/atomic05.f90
index f37aabcfce06ec..266268a2124409 100644
--- a/flang/test/Semantics/OpenMP/atomic05.f90
+++ b/flang/test/Semantics/OpenMP/atomic05.f90
@@ -8,20 +8,20 @@ program OmpAtomic
     use omp_lib
     integer :: g, x
 
-    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
     !$omp atomic relaxed, seq_cst
         x = x + 1
-    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
     !$omp atomic read seq_cst, relaxed
         x = g
-    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
     !$omp atomic write relaxed, release
         x = 2 * 4
-    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
     !$omp atomic update release, seq_cst
     !ERROR: Invalid or missing operator in atomic update statement
         x = 10
-    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !ERROR: More than one memory order clause not allowed on OpenMP ATOMIC construct
     !$omp atomic capture release, seq_cst
         x = g
         g = x * 10
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index f98af5744685fc..05f15a0e4e5cbb 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -1,5 +1,6 @@
 set(TARGET_PUBLIC_HEADERS
     libc.include.assert
+    libc.include.complex
     libc.include.ctype
     libc.include.dlfcn
     libc.include.elf
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index 6576db1f852691..9aabac5dea33cb 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -1,4 +1,5 @@
 set(TARGET_PUBLIC_HEADERS
+    libc.include.complex
     libc.include.ctype
     libc.include.errno
     libc.include.fenv
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 41c343f71998b9..b38659e0b8daf7 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -1,5 +1,6 @@
 set(TARGET_PUBLIC_HEADERS
     libc.include.assert
+    libc.include.complex
     libc.include.ctype
     libc.include.dirent
     libc.include.dlfcn
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index e0c04b381492d0..8750100302ea7e 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -1,5 +1,6 @@
 set(TARGET_PUBLIC_HEADERS
     libc.include.assert
+    libc.include.complex
     libc.include.ctype
     libc.include.dirent
     libc.include.dlfcn
diff --git a/libc/hdrgen/yaml/complex.yaml b/libc/hdrgen/yaml/complex.yaml
new file mode 100644
index 00000000000000..be0d3c9ae59b47
--- /dev/null
+++ b/libc/hdrgen/yaml/complex.yaml
@@ -0,0 +1,105 @@
+header: complex.h
+macros: []
+types:
+  - type_name: cfloat16
+  - type_name: cfloat128
+  - type_name: float128
+enums: []
+objects: []
+functions:
+  - name: cimag
+    standards:
+      - stdc
+    return_type: double
+    arguments:
+      - type: _Complex double
+  - name: cimagf
+    standards:
+      - stdc
+    return_type: float
+    arguments:
+      - type: _Complex float
+  - name: cimagl
+    standards:
+      - stdc
+    return_type: long double
+    arguments:
+      - type: _Complex long double
+  - name: cimagf16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: cfloat16
+    guard: LIBC_TYPES_HAS_CFLOAT16
+  - name: cimagf128
+    standards:
+      - stdc
+    return_type: float128
+    arguments:
+      - type: cfloat128
+    guard: LIBC_TYPES_HAS_CFLOAT128
+  - name: creal
+    standards:
+      - stdc
+    return_type: double
+    arguments:
+      - type: _Complex double
+  - name: crealf
+    standards:
+      - stdc
+    return_type: float
+    arguments:
+      - type: _Complex float
+  - name: creall
+    standards:
+      - stdc
+    return_type: long double
+    arguments:
+      - type: _Complex long double
+  - name: crealf16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: cfloat16
+    guard: LIBC_TYPES_HAS_CFLOAT16
+  - name: crealf128
+    standards:
+      - stdc
+    return_type: float128
+    arguments:
+      - type: cfloat128
+    guard: LIBC_TYPES_HAS_CFLOAT128
+  - name: conj
+    standards:
+      - stdc
+    return_type: _Complex double
+    arguments:
+      - type: _Complex double
+  - name: conjf
+    standards:
+      - stdc
+    return_type: _Complex float
+    arguments:
+      - type: _Complex float
+  - name: conjl
+    standards:
+      - stdc
+    return_type: _Complex long double
+    arguments:
+      - type: _Complex long double
+  - name: conjf16
+    standards:
+      - stdc
+    return_type: cfloat16
+    arguments:
+      - type: cfloat16
+    guard: LIBC_TYPES_HAS_CFLOAT16
+  - name: conjf128
+    standards:
+      - stdc
+    return_type: cfloat128
+    arguments:
+      - type: cfloat128
+    guard: LIBC_TYPES_HAS_CFLOAT128
diff --git a/libc/test/src/strings/CMakeLists.txt b/libc/test/src/strings/CMakeLists.txt
index 963a1d6d6d60c0..10f96b8531f68f 100644
--- a/libc/test/src/strings/CMakeLists.txt
+++ b/libc/test/src/strings/CMakeLists.txt
@@ -20,7 +20,7 @@ add_libc_test(
     index_test.cpp
   DEPENDS
     libc.src.strings.index
-    libc.test.src.strchr_test_support
+    libc.test.src.string.strchr_test_support
 )
 
 add_libc_test(
@@ -31,7 +31,7 @@ add_libc_test(
     rindex_test.cpp
   DEPENDS
     libc.src.strings.rindex
-    libc.test.src.strchr_test_support
+    libc.test.src.string.strchr_test_support
 )
 
 add_libc_test(
diff --git a/libc/test/src/strings/index_test.cpp b/libc/test/src/strings/index_test.cpp
index 88953205009d76..fc4cd2b31c55d1 100644
--- a/libc/test/src/strings/index_test.cpp
+++ b/libc/test/src/strings/index_test.cpp
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "StrchrTest.h"
+#include "test/src/string/StrchrTest.h"
 
-#include "src/string/index.h"
+#include "src/strings/index.h"
 #include "test/UnitTest/Test.h"
 
 STRCHR_TEST(Index, LIBC_NAMESPACE::index)
diff --git a/libc/test/src/strings/rindex_test.cpp b/libc/test/src/strings/rindex_test.cpp
index 10513919cffa2d..d3b756fe5f6e52 100644
--- a/libc/test/src/strings/rindex_test.cpp
+++ b/libc/test/src/strings/rindex_test.cpp
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "StrchrTest.h"
+#include "test/src/string/StrchrTest.h"
 
-#include "src/string/rindex.h"
+#include "src/strings/rindex.h"
 #include "test/UnitTest/Test.h"
 
 STRRCHR_TEST(Rindex, LIBC_NAMESPACE::rindex)
diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator
index 2488bcfc155a8d..5075de537b6ac2 100644
--- a/libcxx/include/experimental/iterator
+++ b/libcxx/include/experimental/iterator
@@ -52,23 +52,25 @@ namespace std {
 
 */
 
-#include <__config>
-#include <__memory/addressof.h>
-#include <__ostream/basic_ostream.h>
-#include <__string/char_traits.h>
-#include <__type_traits/decay.h>
-#include <__utility/forward.h>
-#include <__utility/move.h>
-#include <iterator>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#if 0
+#else // 0
+#  include <__config>
+#  include <__memory/addressof.h>
+#  include <__ostream/basic_ostream.h>
+#  include <__string/char_traits.h>
+#  include <__type_traits/decay.h>
+#  include <__utility/forward.h>
+#  include <__utility/move.h>
+#  include <iterator>
+
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
 
 _LIBCPP_PUSH_MACROS
-#include <__undef_macros>
+#  include <__undef_macros>
 
-#if _LIBCPP_STD_VER >= 14
+#  if _LIBCPP_STD_VER >= 14
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS
 
@@ -117,14 +119,15 @@ make_ostream_joiner(basic_ostream<_CharT, _Traits>& __os, _Delim&& __d) {
 
 _LIBCPP_END_NAMESPACE_LFTS
 
-#endif // _LIBCPP_STD_VER >= 14
+#  endif // _LIBCPP_STD_VER >= 14
 
 _LIBCPP_POP_MACROS
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <cstddef>
-#  include <iosfwd>
-#  include <type_traits>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <cstddef>
+#    include <iosfwd>
+#    include <type_traits>
+#  endif
+#endif // 0
 
 #endif // _LIBCPP_EXPERIMENTAL_ITERATOR
diff --git a/libcxx/include/experimental/memory b/libcxx/include/experimental/memory
index 48e42a0a88a649..cb02ecb8e2bee7 100644
--- a/libcxx/include/experimental/memory
+++ b/libcxx/include/experimental/memory
@@ -49,27 +49,29 @@ public:
 }
 */
 
-#include <__config>
-#include <__cstddef/nullptr_t.h>
-#include <__cstddef/size_t.h>
-#include <__functional/hash.h>
-#include <__functional/operations.h>
-#include <__type_traits/add_lvalue_reference.h>
-#include <__type_traits/add_pointer.h>
-#include <__type_traits/common_type.h>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/is_convertible.h>
-#include <version>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
-
-#ifdef _LIBCPP_ENABLE_EXPERIMENTAL
+#if 0
+#else // 0
+#  include <__config>
+#  include <__cstddef/nullptr_t.h>
+#  include <__cstddef/size_t.h>
+#  include <__functional/hash.h>
+#  include <__functional/operations.h>
+#  include <__type_traits/add_lvalue_reference.h>
+#  include <__type_traits/add_pointer.h>
+#  include <__type_traits/common_type.h>
+#  include <__type_traits/enable_if.h>
+#  include <__type_traits/is_convertible.h>
+#  include <version>
+
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
+
+#  ifdef _LIBCPP_ENABLE_EXPERIMENTAL
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS_V2
 
-#  if _LIBCPP_STD_VER >= 17
+#    if _LIBCPP_STD_VER >= 17
 
 template <class _Wp>
 class observer_ptr {
@@ -172,7 +174,7 @@ _LIBCPP_HIDE_FROM_ABI bool operator>=(observer_ptr<_W1> __a, observer_ptr<_W2> _
   return !(__a < __b);
 }
 
-#  endif // _LIBCPP_STD_VER >= 17
+#    endif // _LIBCPP_STD_VER >= 17
 
 _LIBCPP_END_NAMESPACE_LFTS_V2
 
@@ -180,22 +182,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // hash
 
-#  if _LIBCPP_STD_VER >= 17
+#    if _LIBCPP_STD_VER >= 17
 template <class _Tp>
 struct hash<experimental::observer_ptr<_Tp>> {
   _LIBCPP_HIDE_FROM_ABI size_t operator()(const experimental::observer_ptr<_Tp>& __ptr) const noexcept {
     return hash<_Tp*>()(__ptr.get());
   }
 };
-#  endif // _LIBCPP_STD_VER >= 17
+#    endif // _LIBCPP_STD_VER >= 17
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP_ENABLE_EXPERIMENTAL
+#  endif // _LIBCPP_ENABLE_EXPERIMENTAL
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <cstddef>
-#  include <limits>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <cstddef>
+#    include <limits>
+#  endif
+#endif // 0
 
 #endif /* _LIBCPP_EXPERIMENTAL_MEMORY */
diff --git a/libcxx/include/experimental/propagate_const b/libcxx/include/experimental/propagate_const
index 8466d4e9c7ef3c..4d06e9f844acfa 100644
--- a/libcxx/include/experimental/propagate_const
+++ b/libcxx/include/experimental/propagate_const
@@ -107,39 +107,41 @@
 
 */
 
-#include <__config>
-#include <__cstddef/nullptr_t.h>
-#include <__cstddef/size_t.h>
-#include <__functional/operations.h>
-#include <__fwd/functional.h>
-#include <__type_traits/conditional.h>
-#include <__type_traits/decay.h>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/is_array.h>
-#include <__type_traits/is_constructible.h>
-#include <__type_traits/is_convertible.h>
-#include <__type_traits/is_function.h>
-#include <__type_traits/is_pointer.h>
-#include <__type_traits/is_reference.h>
-#include <__type_traits/is_same.h>
-#include <__type_traits/is_swappable.h>
-#include <__type_traits/remove_cv.h>
-#include <__type_traits/remove_pointer.h>
-#include <__type_traits/remove_reference.h>
-#include <__utility/declval.h>
-#include <__utility/forward.h>
-#include <__utility/move.h>
-#include <__utility/swap.h>
-#include <version>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#if 0
+#else // 0
+#  include <__config>
+#  include <__cstddef/nullptr_t.h>
+#  include <__cstddef/size_t.h>
+#  include <__functional/operations.h>
+#  include <__fwd/functional.h>
+#  include <__type_traits/conditional.h>
+#  include <__type_traits/decay.h>
+#  include <__type_traits/enable_if.h>
+#  include <__type_traits/is_array.h>
+#  include <__type_traits/is_constructible.h>
+#  include <__type_traits/is_convertible.h>
+#  include <__type_traits/is_function.h>
+#  include <__type_traits/is_pointer.h>
+#  include <__type_traits/is_reference.h>
+#  include <__type_traits/is_same.h>
+#  include <__type_traits/is_swappable.h>
+#  include <__type_traits/remove_cv.h>
+#  include <__type_traits/remove_pointer.h>
+#  include <__type_traits/remove_reference.h>
+#  include <__utility/declval.h>
+#  include <__utility/forward.h>
+#  include <__utility/move.h>
+#  include <__utility/swap.h>
+#  include <version>
+
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
 
 _LIBCPP_PUSH_MACROS
-#include <__undef_macros>
+#  include <__undef_macros>
 
-#if _LIBCPP_STD_VER >= 14
+#  if _LIBCPP_STD_VER >= 14
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS_V2
 
@@ -481,13 +483,14 @@ struct greater_equal<experimental::propagate_const<_Tp>> {
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP_STD_VER >= 14
+#  endif // _LIBCPP_STD_VER >= 14
 
 _LIBCPP_POP_MACROS
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <cstddef>
-#  include <type_traits>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <cstddef>
+#    include <type_traits>
+#  endif
+#endif // 0
 
 #endif // _LIBCPP_EXPERIMENTAL_PROPAGATE_CONST
diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd
index 1a868513d160b6..374ab57cdbca55 100644
--- a/libcxx/include/experimental/simd
+++ b/libcxx/include/experimental/simd
@@ -75,18 +75,21 @@ inline namespace parallelism_v2 {
 #  pragma GCC system_header
 #endif
 
-#include <__config>
-#include <experimental/__simd/aligned_tag.h>
-#include <experimental/__simd/declaration.h>
-#include <experimental/__simd/reference.h>
-#include <experimental/__simd/scalar.h>
-#include <experimental/__simd/simd.h>
-#include <experimental/__simd/simd_mask.h>
-#include <experimental/__simd/traits.h>
-#include <experimental/__simd/vec_ext.h>
-
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <cstddef>
-#endif
+#if 0
+#else // 0
+#  include <__config>
+#  include <experimental/__simd/aligned_tag.h>
+#  include <experimental/__simd/declaration.h>
+#  include <experimental/__simd/reference.h>
+#  include <experimental/__simd/scalar.h>
+#  include <experimental/__simd/simd.h>
+#  include <experimental/__simd/simd_mask.h>
+#  include <experimental/__simd/traits.h>
+#  include <experimental/__simd/vec_ext.h>
+
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <cstddef>
+#  endif
+#endif // 0
 
 #endif /* _LIBCPP_EXPERIMENTAL_SIMD */
diff --git a/libcxx/include/experimental/type_traits b/libcxx/include/experimental/type_traits
index 6980fc3c51e465..0da95b79f8ab77 100644
--- a/libcxx/include/experimental/type_traits
+++ b/libcxx/include/experimental/type_traits
@@ -68,16 +68,18 @@ inline namespace fundamentals_v1 {
 
  */
 
-#include <__config>
+#if 0
+#else // 0
+#  include <__config>
 
-#if _LIBCPP_STD_VER >= 14
+#  if _LIBCPP_STD_VER >= 14
 
-#  include <initializer_list>
-#  include <type_traits>
+#    include <initializer_list>
+#    include <type_traits>
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#      pragma GCC system_header
+#    endif
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS
 
@@ -148,10 +150,11 @@ constexpr bool is_detected_convertible_v = is_detected_convertible<_To, _Op, _Ar
 
 _LIBCPP_END_NAMESPACE_LFTS
 
-#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#    include <cstddef>
-#  endif
+#    if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#      include <cstddef>
+#    endif
 
-#endif /* _LIBCPP_STD_VER >= 14 */
+#  endif /* _LIBCPP_STD_VER >= 14 */
+#endif   // 0
 
 #endif /* _LIBCPP_EXPERIMENTAL_TYPE_TRAITS */
diff --git a/libcxx/include/experimental/utility b/libcxx/include/experimental/utility
index 00151b967e496d..1cb9c6a3617a8e 100644
--- a/libcxx/include/experimental/utility
+++ b/libcxx/include/experimental/utility
@@ -30,12 +30,14 @@ inline namespace fundamentals_v1 {
 
  */
 
-#include <__config>
-#include <utility>
+#if 0
+#else // 0
+#  include <__config>
+#  include <utility>
 
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS
 
@@ -43,8 +45,9 @@ struct _LIBCPP_TEMPLATE_VIS erased_type {};
 
 _LIBCPP_END_NAMESPACE_LFTS
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <cstddef>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <cstddef>
+#  endif
+#endif // 0
 
 #endif /* _LIBCPP_EXPERIMENTAL_UTILITY */
diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map
index 7b5b31c4081788..c9e35479b88c89 100644
--- a/libcxx/include/ext/hash_map
+++ b/libcxx/include/ext/hash_map
@@ -201,23 +201,25 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 
 */
 
-#include <__config>
-#include <__hash_table>
-#include <algorithm>
-#include <ext/__hash>
-#include <functional>
-
-#if defined(__DEPRECATED) && __DEPRECATED
-#  if defined(_LIBCPP_WARNING)
+#if 0
+#else // 0
+#  include <__config>
+#  include <__hash_table>
+#  include <algorithm>
+#  include <ext/__hash>
+#  include <functional>
+
+#  if defined(__DEPRECATED) && __DEPRECATED
+#    if defined(_LIBCPP_WARNING)
 _LIBCPP_WARNING("Use of the header <ext/hash_map> is deprecated.  Migrate to <unordered_map>")
-#  else
-#    warning Use of the header <ext/hash_map> is deprecated.  Migrate to <unordered_map>
+#    else
+#      warning Use of the header <ext/hash_map> is deprecated.  Migrate to <unordered_map>
+#    endif
 #  endif
-#endif
 
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
 
 namespace __gnu_cxx {
 
@@ -312,17 +314,17 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit __hash_map_node_destructor(allocator_type& __na)
       : __na_(__na), __first_constructed(false), __second_constructed(false) {}
 
-#ifndef _LIBCPP_CXX03_LANG
+#  ifndef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI __hash_map_node_destructor(std::__hash_node_destructor<allocator_type>&& __x)
       : __na_(__x.__na_), __first_constructed(__x.__value_constructed), __second_constructed(__x.__value_constructed) {
     __x.__value_constructed = false;
   }
-#else  // _LIBCPP_CXX03_LANG
+#  else  // _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI __hash_map_node_destructor(const std::__hash_node_destructor<allocator_type>& __x)
       : __na_(__x.__na_), __first_constructed(__x.__value_constructed), __second_constructed(__x.__value_constructed) {
     const_cast<bool&>(__x.__value_constructed) = false;
   }
-#endif // _LIBCPP_CXX03_LANG
+#  endif // _LIBCPP_CXX03_LANG
 
   _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) {
     if (__second_constructed)
@@ -863,10 +865,11 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const hash_multimap<_Key, _Tp, _Has
 
 } // namespace __gnu_cxx
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <concepts>
-#  include <iterator>
-#  include <type_traits>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <concepts>
+#    include <iterator>
+#    include <type_traits>
+#  endif
+#endif // 0
 
 #endif // _LIBCPP_HASH_MAP
diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set
index 1ab259b59979f3..073fcf2fd9911b 100644
--- a/libcxx/include/ext/hash_set
+++ b/libcxx/include/ext/hash_set
@@ -192,23 +192,25 @@ template <class Value, class Hash, class Pred, class Alloc>
 
 */
 
-#include <__config>
-#include <__hash_table>
-#include <algorithm>
-#include <ext/__hash>
-#include <functional>
-
-#if defined(__DEPRECATED) && __DEPRECATED
-#  if defined(_LIBCPP_WARNING)
+#if 0
+#else // 0
+#  include <__config>
+#  include <__hash_table>
+#  include <algorithm>
+#  include <ext/__hash>
+#  include <functional>
+
+#  if defined(__DEPRECATED) && __DEPRECATED
+#    if defined(_LIBCPP_WARNING)
 _LIBCPP_WARNING("Use of the header <ext/hash_set> is deprecated.  Migrate to <unordered_set>")
-#  else
-#    warning Use of the header <ext/hash_set> is deprecated.  Migrate to <unordered_set>
+#    else
+#      warning Use of the header <ext/hash_set> is deprecated.  Migrate to <unordered_set>
+#    endif
 #  endif
-#endif
 
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#  pragma GCC system_header
-#endif
+#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#    pragma GCC system_header
+#  endif
 
 namespace __gnu_cxx {
 
@@ -575,10 +577,11 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const hash_multiset<_Value, _Hash,
 
 } // namespace __gnu_cxx
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <concepts>
-#  include <iterator>
-#  include <type_traits>
-#endif
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
+#    include <concepts>
+#    include <iterator>
+#    include <type_traits>
+#  endif
+#endif // 0
 
 #endif // _LIBCPP_HASH_SET
diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map
index b99eebfa9fc538..1f2c204e2ef215 100644
--- a/libcxx/include/flat_map
+++ b/libcxx/include/flat_map
@@ -38,11 +38,15 @@ namespace std {
 */
 
 #if 0
-#else                 // 0
-#  include <__assert> // all public C++ headers provide the assertion handler
+#else // 0
 #  include <__config>
-#  include <__flat_map/flat_map.h>
-#  include <__flat_map/sorted_unique.h>
+
+#  if _LIBCPP_STD_VER >= 23
+#    include <__flat_map/flat_map.h>
+#    include <__flat_map/sorted_unique.h>
+#  endif
+
+// for feature-test macros
 #  include <version>
 
 // standard required includes
diff --git a/libcxx/test/libcxx/feature_test_macro/version_header.sh.py b/libcxx/test/libcxx/feature_test_macro/version_header.sh.py
index 24d3981f81aebb..1e53d5fd928305 100644
--- a/libcxx/test/libcxx/feature_test_macro/version_header.sh.py
+++ b/libcxx/test/libcxx/feature_test_macro/version_header.sh.py
@@ -30,8 +30,8 @@ def test(output, expected):
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP_VERSION
-#define _LIBCPP_VERSION
+#ifndef _LIBCPP_VERSIONH
+#define _LIBCPP_VERSIONH
 
 #include <__config>
 
@@ -69,6 +69,6 @@ def test(output, expected):
 // define __cpp_lib_missing_FTM_in_older_standard 2026L
 #endif // _LIBCPP_STD_VER >= 26
 
-#endif // _LIBCPP_VERSION
+#endif // _LIBCPP_VERSIONH
 """,
 )
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index 72fccfd3649325..801f6cbf9060f8 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -674,35 +674,13 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
-flat_map atomic
-flat_map cctype
-flat_map climits
 flat_map cmath
 flat_map compare
-flat_map concepts
 flat_map cstddef
 flat_map cstdint
-flat_map cstdio
-flat_map cstdlib
-flat_map cstring
-flat_map ctime
-flat_map cwchar
-flat_map cwctype
-flat_map exception
 flat_map initializer_list
-flat_map iosfwd
-flat_map iterator
 flat_map limits
-flat_map memory
-flat_map new
-flat_map optional
-flat_map ratio
-flat_map stdexcept
-flat_map tuple
 flat_map type_traits
-flat_map typeinfo
-flat_map utility
-flat_map variant
 flat_map version
 format algorithm
 format array
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 72fccfd3649325..801f6cbf9060f8 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -674,35 +674,13 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
-flat_map atomic
-flat_map cctype
-flat_map climits
 flat_map cmath
 flat_map compare
-flat_map concepts
 flat_map cstddef
 flat_map cstdint
-flat_map cstdio
-flat_map cstdlib
-flat_map cstring
-flat_map ctime
-flat_map cwchar
-flat_map cwctype
-flat_map exception
 flat_map initializer_list
-flat_map iosfwd
-flat_map iterator
 flat_map limits
-flat_map memory
-flat_map new
-flat_map optional
-flat_map ratio
-flat_map stdexcept
-flat_map tuple
 flat_map type_traits
-flat_map typeinfo
-flat_map utility
-flat_map variant
 flat_map version
 format algorithm
 format array
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index fd36dace19c76c..9146a7079ee1e9 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -692,35 +692,13 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
-flat_map atomic
-flat_map cctype
-flat_map climits
 flat_map cmath
 flat_map compare
-flat_map concepts
 flat_map cstddef
 flat_map cstdint
-flat_map cstdio
-flat_map cstdlib
-flat_map cstring
-flat_map ctime
-flat_map cwchar
-flat_map cwctype
-flat_map exception
 flat_map initializer_list
-flat_map iosfwd
-flat_map iterator
 flat_map limits
-flat_map memory
-flat_map new
-flat_map optional
-flat_map ratio
-flat_map stdexcept
-flat_map tuple
 flat_map type_traits
-flat_map typeinfo
-flat_map utility
-flat_map variant
 flat_map version
 format algorithm
 format array
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index eaec25f81e5821..a3518f7f62ecb9 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -701,35 +701,13 @@ filesystem utility
 filesystem variant
 filesystem vector
 filesystem version
-flat_map atomic
-flat_map cctype
-flat_map climits
 flat_map cmath
 flat_map compare
-flat_map concepts
 flat_map cstddef
 flat_map cstdint
-flat_map cstdio
-flat_map cstdlib
-flat_map cstring
-flat_map ctime
-flat_map cwchar
-flat_map cwctype
-flat_map exception
 flat_map initializer_list
-flat_map iosfwd
-flat_map iterator
 flat_map limits
-flat_map memory
-flat_map new
-flat_map optional
-flat_map ratio
-flat_map stdexcept
-flat_map tuple
 flat_map type_traits
-flat_map typeinfo
-flat_map utility
-flat_map variant
 flat_map version
 format algorithm
 format array
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 89c28e49d6c9de..6de95139279471 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -697,35 +697,13 @@ filesystem utility
 filesystem variant
 filesystem vector
 filesystem version
-flat_map atomic
-flat_map cctype
-flat_map climits
 flat_map cmath
 flat_map compare
-flat_map concepts
 flat_map cstddef
 flat_map cstdint
-flat_map cstdio
-flat_map cstdlib
-flat_map cstring
-flat_map ctime
-flat_map cwchar
-flat_map cwctype
-flat_map exception
 flat_map initializer_list
-flat_map iosfwd
-flat_map iterator
 flat_map limits
-flat_map memory
-flat_map new
-flat_map optional
-flat_map ratio
-flat_map stdexcept
-flat_map tuple
 flat_map type_traits
-flat_map typeinfo
-flat_map utility
-flat_map variant
 flat_map version
 format algorithm
 format array
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 53fd44291b216a..0b5c0f107134ba 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -2230,8 +2230,8 @@ def version_header(self) -> str:
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP_VERSION
-#define _LIBCPP_VERSION
+#ifndef _LIBCPP_VERSIONH
+#define _LIBCPP_VERSIONH
 
 #include <__config>
 
@@ -2241,7 +2241,7 @@ def version_header(self) -> str:
 
 {feature_test_macros}
 
-#endif // _LIBCPP_VERSION
+#endif // _LIBCPP_VERSIONH
 """
         return template.format(
             feature_test_macros=generate_version_header_implementation(
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index da0e8b286cddc1..158d61ff4fcb25 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -34,6 +34,11 @@ MACRO_ENSURE_OUT_OF_SOURCE_BUILD(
  build directory and run 'cmake /path/to/${PROJECT_NAME} [options]' there."
  )
 
+if (MSVC)
+  message(FATAL_ERROR "Libc++abi can't be built for MSVC targets, and doing so is pointless anyway because such "
+                      "targets must use the MS C++ ABI, and libc++abi provides the Itanium C++ ABI.")
+endif()
+
 #===============================================================================
 # Setup CMake Options
 #===============================================================================
diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp
index 2f631041f74c94..01a1d2603b18d0 100644
--- a/libcxxabi/src/private_typeinfo.cpp
+++ b/libcxxabi/src/private_typeinfo.cpp
@@ -591,10 +591,9 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info,
     // .. and reset the pointer.
     adjustedPtr = nullptr;
   }
-    __base_type->has_unambiguous_public_base(
-            info,
-            static_cast<char*>(adjustedPtr) + offset_to_base,
-            (__offset_flags & __public_mask) ? path_below : not_public_path);
+  __base_type->has_unambiguous_public_base(
+      info, reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(adjustedPtr) + offset_to_base),
+      (__offset_flags & __public_mask) ? path_below : not_public_path);
 }
 
 void
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index ea06dc8a67b949..3c8499fd334649 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -24,6 +24,11 @@ set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH
 include(GNUInstallDirs)
 include(CheckSymbolExists)
 
+if (MSVC)
+  message(FATAL_ERROR "Libunwind doesn't build for MSVC targets, and that is almost certainly not what you want to do "
+                      "anyway since libunwind is tied to the Itanium C++ ABI, and MSVC targets must use the MS C++ ABI.")
+endif()
+
 #===============================================================================
 # Setup CMake Options
 #===============================================================================
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index c1b3272a1f49ea..1148be09fb10cc 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -845,13 +845,14 @@ MemoryBufferRef LinkerDriver::convertResToCOFF(ArrayRef<MemoryBufferRef> mbs,
 
 // Create OptTable
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 // Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 // Create table mapping all options defined in Options.td
 static constexpr llvm::opt::OptTable::Info infoTable[] = {
@@ -860,7 +861,8 @@ static constexpr llvm::opt::OptTable::Info infoTable[] = {
 #undef OPTION
 };
 
-COFFOptTable::COFFOptTable() : GenericOptTable(infoTable, true) {}
+COFFOptTable::COFFOptTable()
+    : GenericOptTable(OptionStrTable, OptionPrefixesTable, infoTable, true) {}
 
 // Set color diagnostics according to --color-diagnostics={auto,always,never}
 // or --no-color-diagnostics flags.
diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp
index 4c88723f090d08..6d027c529c19e9 100644
--- a/lld/ELF/DriverUtils.cpp
+++ b/lld/ELF/DriverUtils.cpp
@@ -33,13 +33,14 @@ using namespace lld::elf;
 
 // Create OptTable
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 // Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 // Create table mapping all options defined in Options.td
 static constexpr opt::OptTable::Info optInfo[] = {
@@ -48,7 +49,8 @@ static constexpr opt::OptTable::Info optInfo[] = {
 #undef OPTION
 };
 
-ELFOptTable::ELFOptTable() : GenericOptTable(optInfo) {}
+ELFOptTable::ELFOptTable()
+    : GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {}
 
 // Set color diagnostics according to --color-diagnostics={auto,always,never}
 // or --no-color-diagnostics flags.
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 2084fcfd4d651a..c44773d0b7dabe 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1709,7 +1709,6 @@ static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
 }
 
 static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym,
-                                const std::vector<bool> &keptComdats,
                                 const lto::InputFile::Symbol &objSym,
                                 BitcodeFile &f) {
   uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
@@ -1726,8 +1725,7 @@ static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym,
     sym = ctx.symtab->insert(objSym.getName());
   }
 
-  int c = objSym.getComdatIndex();
-  if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
+  if (objSym.isUndefined()) {
     Undefined newSym(&f, StringRef(), binding, visibility, type);
     sym->resolve(ctx, newSym);
     sym->referenced = true;
@@ -1766,10 +1764,10 @@ void BitcodeFile::parse() {
   // ObjFile<ELFT>::initializeSymbols.
   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
     if (!irSym.isUndefined())
-      createBitcodeSymbol(ctx, symbols[i], keptComdats, irSym, *this);
+      createBitcodeSymbol(ctx, symbols[i], irSym, *this);
   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
     if (irSym.isUndefined())
-      createBitcodeSymbol(ctx, symbols[i], keptComdats, irSym, *this);
+      createBitcodeSymbol(ctx, symbols[i], irSym, *this);
 
   for (auto l : obj->getDependentLibraries())
     addDependentLibrary(ctx, l, this);
diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index 308c5eaf8c3178..69d023c23b3c79 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -34,13 +34,14 @@ using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 // Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 // Create table mapping all options defined in Options.td
 static constexpr OptTable::Info optInfo[] = {
@@ -65,7 +66,8 @@ static constexpr OptTable::Info optInfo[] = {
 #undef OPTION
 };
 
-MachOOptTable::MachOOptTable() : GenericOptTable(optInfo) {}
+MachOOptTable::MachOOptTable()
+    : GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {}
 
 // Set color diagnostics according to --color-diagnostics={auto,always,never}
 // or --no-color-diagnostics flags.
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index b4371b5b2240d2..a77d86b443a6c3 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -58,13 +58,13 @@ enum {
 #undef OPTION
 };
 
-// Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 // Create table mapping all options defined in Options.td
 static constexpr opt::OptTable::Info infoTable[] = {
@@ -92,7 +92,9 @@ static constexpr opt::OptTable::Info infoTable[] = {
 namespace {
 class MinGWOptTable : public opt::GenericOptTable {
 public:
-  MinGWOptTable() : opt::GenericOptTable(infoTable, false) {}
+  MinGWOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, infoTable,
+                             false) {}
   opt::InputArgList parse(ArrayRef<const char *> argv);
 };
 } // namespace
diff --git a/lld/test/ELF/lto/internalize-exportdyn.ll b/lld/test/ELF/lto/internalize-exportdyn.ll
index 7c670a548025f1..25e319269bbdb6 100644
--- a/lld/test/ELF/lto/internalize-exportdyn.ll
+++ b/lld/test/ELF/lto/internalize-exportdyn.ll
@@ -56,16 +56,19 @@ define linkonce_odr void @baz() {
 
 @use_baz = global ptr @baz
 
+;; Test comdat symbols that are prevailing in this module and non-prevailing in the other module.
 define void @ext_and_ext() local_unnamed_addr comdat {
   call void @foo(i64 1)
   ret void
 }
 
+;; linkonce_odr in this module and external in the other module.
 define linkonce_odr void @lo_and_ext() local_unnamed_addr comdat {
   call void @foo(i64 1)
   ret void
 }
 
+;; linkonce_odr in this module and weak_odr in the other module.
 define linkonce_odr void @lo_and_wo() local_unnamed_addr comdat {
   ret void
 }
@@ -92,7 +95,7 @@ define weak_odr void @wo_and_lo() local_unnamed_addr comdat {
 ; CHECK-NEXT: call void @foo(i64 1)
 ; CHECK: define internal void @lo_and_ext() comdat
 ; CHECK-NEXT: call void @foo(i64 1)
-; CHECK: define internal void @lo_and_wo() comdat
+; CHECK: define weak_odr dso_local void @lo_and_wo() comdat
 ; CHECK: define weak_odr dso_local void @wo_and_lo() comdat
 
 ; DSO: @c = weak_odr constant i32 1
@@ -110,7 +113,7 @@ define weak_odr void @wo_and_lo() local_unnamed_addr comdat {
 ; DSO: define weak_odr void @baz()
 ; DSO: define void @ext_and_ext() comdat
 ; DSO: define internal void @lo_and_ext() comdat
-; DSO: define internal void @lo_and_wo() comdat
+; DSO: define weak_odr void @lo_and_wo() comdat
 ; DSO: define weak_odr void @wo_and_lo() comdat
 
 ;--- lib.s
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 37a0156c728f6f..00b5c82d9c7777 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -139,13 +139,13 @@ bool link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
   return errorCount() == 0;
 }
 
-// Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 // Create table mapping all options defined in Options.td
 static constexpr opt::OptTable::Info optInfo[] = {
@@ -173,7 +173,8 @@ static constexpr opt::OptTable::Info optInfo[] = {
 namespace {
 class WasmOptTable : public opt::GenericOptTable {
 public:
-  WasmOptTable() : opt::GenericOptTable(optInfo) {}
+  WasmOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, optInfo) {}
   opt::InputArgList parse(ArrayRef<const char *> argv);
 };
 } // namespace
diff --git a/lldb/source/DataFormatters/FormatterBytecode.cpp b/lldb/source/DataFormatters/FormatterBytecode.cpp
index f344fbaff6f02a..e49c7506781875 100644
--- a/lldb/source/DataFormatters/FormatterBytecode.cpp
+++ b/lldb/source/DataFormatters/FormatterBytecode.cpp
@@ -379,7 +379,7 @@ llvm::Error Interpret(std::vector<ControlStackElement> &control,
       BINOP_CHECKZERO(%);
       continue;
     case op_shl:
-#define SHIFTOP(OP)                                                            \
+#define SHIFTOP(OP, LEFT)                                                      \
   {                                                                            \
     TYPE_CHECK(Any, UInt);                                                     \
     uint64_t y = data.Pop<uint64_t>();                                         \
@@ -390,16 +390,18 @@ llvm::Error Interpret(std::vector<ControlStackElement> &control,
       data.Push(x OP y);                                                       \
     } else if (std::holds_alternative<int64_t>(data.back())) {                 \
       int64_t x = data.Pop<int64_t>();                                         \
+      if (x < 0 && LEFT)                                                       \
+        return error("left shift of negative value");                          \
       if (y > 64)                                                              \
         return error("shift out of bounds");                                   \
       data.Push(x OP y);                                                       \
     } else                                                                     \
       return error("unsupported data types");                                  \
   }
-      SHIFTOP(<<);
+      SHIFTOP(<<, true);
       continue;
     case op_shr:
-      SHIFTOP(<<);
+      SHIFTOP(>>, false);
       continue;
     case op_and:
       BINOP(&);
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 8baf3a8d60c373..2a36f95c94d0ce 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -54,6 +54,10 @@
 using namespace lldb;
 using namespace lldb_private;
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "clang/Driver/Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 static Status ExceptionMaskValidator(const char *string, void *unused) {
   Status error;
   llvm::StringRef str_ref(string);
@@ -1078,8 +1082,8 @@ void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType(
   // clang has no version-min clang flag for XROS.
   if (!version.empty() && sdk_type != XcodeSDK::Type::Linux &&
       sdk_type != XcodeSDK::Type::XROS) {
-#define OPTION(PREFIX, NAME, VAR, ...)                                         \
-  llvm::StringRef opt_##VAR = NAME;                                            \
+#define OPTION(PREFIX_OFFSET, NAME_OFFSET, VAR, ...)                           \
+  llvm::StringRef opt_##VAR = &OptionStrTable[NAME_OFFSET];                    \
   (void)opt_##VAR;
 #include "clang/Driver/Options.inc"
 #undef OPTION
diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp
index afb1a1ff95c3a1..98c3643f75c97b 100644
--- a/lldb/tools/driver/Driver.cpp
+++ b/lldb/tools/driver/Driver.cpp
@@ -61,12 +61,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -76,7 +77,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class LLDBOptTable : public opt::GenericOptTable {
 public:
-  LLDBOptTable() : opt::GenericOptTable(InfoTable) {}
+  LLDBOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // namespace
 
diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
index 3bfc578806021e..7e8f7b5f6df679 100644
--- a/lldb/tools/lldb-dap/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -92,12 +92,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr llvm::opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -106,7 +107,9 @@ static constexpr llvm::opt::OptTable::Info InfoTable[] = {
 };
 class LLDBDAPOptTable : public llvm::opt::GenericOptTable {
 public:
-  LLDBDAPOptTable() : llvm::opt::GenericOptTable(InfoTable, true) {}
+  LLDBDAPOptTable()
+      : llvm::opt::GenericOptTable(OptionStrTable, OptionPrefixesTable,
+                                   InfoTable, true) {}
 };
 
 typedef void (*RequestCallback)(const llvm::json::Object &command);
diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp
index ed10c161b6b2f6..fec868b1fa9a18 100644
--- a/lldb/tools/lldb-server/lldb-gdbserver.cpp
+++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp
@@ -291,12 +291,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  constexpr llvm::StringLiteral NAME##_init[] = VALUE;                         \
-  constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                          \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "LLGSOptions.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "LLGSOptions.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -306,7 +307,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class LLGSOptTable : public opt::GenericOptTable {
 public:
-  LLGSOptTable() : opt::GenericOptTable(InfoTable) {}
+  LLGSOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 
   void PrintHelp(llvm::StringRef Name) {
     std::string Usage =
diff --git a/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp
index 15d9229de00332..7307db650c1629 100644
--- a/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp
+++ b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp
@@ -147,9 +147,12 @@ TEST_F(FormatterBytecodeTest, ArithOps) {
   {
     DataStack data;
     unsigned char minus_one = 127;
-    ASSERT_TRUE(
+    ASSERT_FALSE(
         Interpret({op_lit_int, minus_one, op_lit_uint, 2, op_shl}, data));
-    ASSERT_EQ(data.Pop<int64_t>(), -4);
+    unsigned char minus_two = 126;
+    ASSERT_TRUE(
+        Interpret({op_lit_int, minus_two, op_lit_uint, 1, op_shr}, data));
+    ASSERT_EQ(data.Pop<int64_t>(), -1);
   }
   {
     DataStack data;
diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md
index 8da9b150c13d9f..655bd996806299 100644
--- a/llvm/Maintainers.md
+++ b/llvm/Maintainers.md
@@ -126,11 +126,6 @@ llvm-dev@redking.me.uk (email), [RKSimon](https://github.com/RKSimon) (GitHub) \
 Craig Topper \
 craig.topper@sifive.com (email), [topperc](https://github.com/topperc) (GitHub)
 
-#### FastISel
-
-Chad Rosier \
-mcrosier@codeaurora.org (email)
-
 #### Instruction scheduling
 
 Matthias Braun \
@@ -387,7 +382,7 @@ tstellar@redhat.com (email), [tstellar](https://github.com/tstellar) (GitHub)
 #### MinGW support
 
 Martin Storsjö \
-martin@martin.st (email), [mstrorsjo](https://github.com/mstrorsjo) (GitHub)
+martin@martin.st (email), [mstorsjo](https://github.com/mstorsjo) (GitHub)
 
 #### Windows support in object tools
 
@@ -461,6 +456,7 @@ Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- Se
 Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \
 Renato Golin (rengolin@systemcall.eu, [rengolin](https://github.com/rengolin)) -- ARM backend \
 Anton Korobeynikov (anton@korobeynikov.info, [asl](https://github.com/asl)) -- ARM EABI \
+Chad Rosier (mcrosier@codeaurora.org) -- FastISel \
 Hans Wennborg (hans@chromium.org, [zmodem](https://github.com/zmodem)) -- Release management \
 Kostya Serebryany ([kcc](https://github.com/kcc)) -- Sanitizers \
 Evgeniy Stepanov ([eugenis](https://github.com/eugenis)) -- Sanitizers
diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst
index 67b6ebb4b04d94..aaa599b787a76d 100644
--- a/llvm/docs/Security.rst
+++ b/llvm/docs/Security.rst
@@ -36,6 +36,7 @@ meet the criteria for inclusion below. The list is in the format
 `* ${full_name} (${affiliation}) [${github_username}]`. If a github
 username for an individual isn't available, the brackets will be empty.
 
+* Abhay Kanhere (Apple) [@AbhayKanhere]
 * Ahmed Bougacha (Apple) [@ahmedbougacha]
 * Artur Pilipenko (Azul Systems Inc) []
 * Boovaragavan Dasarathan (Nvidia) [@mrragava]
diff --git a/llvm/docs/UndefinedBehavior.rst b/llvm/docs/UndefinedBehavior.rst
index f68bbbd505330a..82ae20c74f330e 100644
--- a/llvm/docs/UndefinedBehavior.rst
+++ b/llvm/docs/UndefinedBehavior.rst
@@ -237,6 +237,53 @@ Poison values can be replaced with any value of type (undef, concrete values,
 or a ``freeze`` instruction).
 
 
+Propagation of Poison Through Select
+------------------------------------
+Most instructions return poison if any of their inputs is poison.
+A notable exception is the ``select`` instruction, which is poison if and
+only if the condition is poison or the selected value is poison.
+This means that ``select`` acts as a barrier for poison propagation, which
+impacts which optimizations can be performed.
+
+For example, consider the following function:
+
+.. code-block:: llvm
+
+  define i1 @fn(i32 %x, i32 %y) {
+    %cmp1 = icmp ne i32 %x, 0
+    %cmp2 = icmp ugt i32 %x, %y
+    %and = select i1 %cmp1, i1 %cmp2, i1 false
+    ret i1 %and
+  }
+
+It is not correct to optimize the ``select`` into an ``and`` because when
+``%cmp1`` is false, the ``select`` is only poison if ``%x`` is poison, while
+the ``and`` below is poison if either ``%x`` or ``%y`` are poison.
+
+.. code-block:: llvm
+
+  define i1 @fn(i32 %x, i32 %y) {
+    %cmp1 = icmp ne i32 %x, 0
+    %cmp2 = icmp ugt i32 %x, %y
+    %and = and i1 %cmp1, %cmp2     ;; poison if %x or %y are poison
+    ret i1 %and
+  }
+
+However, the optimization is possible if all operands of the values are used in
+the condition (notice the flipped operands in the ``select``):
+
+.. code-block:: llvm
+
+  define i1 @fn(i32 %x, i32 %y) {
+    %cmp1 = icmp ne i32 %x, 0
+    %cmp2 = icmp ugt i32 %x, %y
+    %and = select i1 %cmp2, i1 %cmp1, i1 false
+    ; ok to replace with:
+    %and = and i1 %cmp1, %cmp2
+    ret i1 %and
+  }
+
+
 The Freeze Instruction
 ======================
 Both undef and poison values sometimes propagate too much down an expression
diff --git a/llvm/docs/Vectorizers.rst b/llvm/docs/Vectorizers.rst
index a4462e53edda09..f134a6df94a69a 100644
--- a/llvm/docs/Vectorizers.rst
+++ b/llvm/docs/Vectorizers.rst
@@ -399,6 +399,19 @@ small trip counts.
 
 .. image:: epilogue-vectorization-cfg.png
 
+Early Exit Vectorization
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+When vectorizing a loop with a single early exit, the loop blocks following the
+early exit are predicated and the vector loop will always exit via the latch.
+If the early exit has been taken, the vector loop's successor block
+(``middle.split`` below) branches to the early exit block. Otherwise
+``middle.block`` selects between the exit block from the latch or the scalar
+remainder loop.
+
+.. image:: vplan-early-exit.png
+
+
 Performance
 -----------
 
diff --git a/llvm/docs/vplan-early-exit.dot b/llvm/docs/vplan-early-exit.dot
new file mode 100644
index 00000000000000..63490b0cdb2e43
--- /dev/null
+++ b/llvm/docs/vplan-early-exit.dot
@@ -0,0 +1,41 @@
+digraph VPlan {
+graph [labelloc=t, fontsize=30; label=""]
+node [shape=rect, fontname=Courier, fontsize=30]
+edge [fontname=Courier, fontsize=30]
+compound=true
+  N1 [label =
+    "vector.ph"
+  ]
+  N1 -> N2 [ label="" lhead=cluster_N3]
+  subgraph cluster_N3 {
+    fontname=Courier
+    label="\<x1\> vector loop"
+    N2 [label =
+      "vector.body"
+    ]
+  }
+  N2 -> N4 [ label="" ltail=cluster_N3]
+  N4 [label =
+    "middle.split"
+  ]
+  N4 -> N5 [ label=""]
+  N4 -> N6 [ label=""]
+  N5 [label =
+    "early.exit"
+  ]
+  N6 [label =
+    "middle.block"
+  ]
+  N6 -> N9 [ label=""]
+  N6 -> N7 [ label=""]
+  N7 [label =
+    "scalar.ph"
+  ]
+  N7 -> N8 [ label=""]
+  N8 [label =
+    "loop.header"
+  ]
+  N9 [label =
+    "latch.exit"
+  ]
+}
diff --git a/llvm/docs/vplan-early-exit.png b/llvm/docs/vplan-early-exit.png
new file mode 100644
index 00000000000000..3cd293bcdbcc82
Binary files /dev/null and b/llvm/docs/vplan-early-exit.png differ
diff --git a/llvm/include/llvm/CodeGen/SDNodeProperties.td b/llvm/include/llvm/CodeGen/SDNodeProperties.td
index 3cb304f47f4b9d..d32904283a11a9 100644
--- a/llvm/include/llvm/CodeGen/SDNodeProperties.td
+++ b/llvm/include/llvm/CodeGen/SDNodeProperties.td
@@ -29,5 +29,3 @@ def SDNPMayLoad     : SDNodeProperty;   // May read memory, sets 'mayLoad'.
 def SDNPSideEffect  : SDNodeProperty;   // Sets 'HasUnmodelledSideEffects'.
 def SDNPMemOperand  : SDNodeProperty;   // Touches memory, has assoc MemOperand
 def SDNPVariadic    : SDNodeProperty;   // Node has variable arguments.
-def SDNPWantRoot    : SDNodeProperty;   // ComplexPattern gets the root of match
-def SDNPWantParent  : SDNodeProperty;   // ComplexPattern gets the parent
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 292fa3c94969be..374f9f2e7f5696 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -41,12 +41,10 @@ class RegScavenger;
 class VirtRegMap;
 class LiveIntervals;
 class LiveInterval;
-
 class TargetRegisterClass {
 public:
   using iterator = const MCPhysReg *;
   using const_iterator = const MCPhysReg *;
-  using sc_iterator = const TargetRegisterClass* const *;
 
   // Instance variables filled by tablegen, do not use!
   const MCRegisterClass *MC;
@@ -67,7 +65,8 @@ class TargetRegisterClass {
   /// Whether a combination of subregisters can cover every register in the
   /// class. See also the CoveredBySubRegs description in Target.td.
   const bool CoveredBySubRegs;
-  const sc_iterator SuperClasses;
+  const unsigned *SuperClasses;
+  const uint16_t SuperClassesSize;
   ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
 
   /// Return the register class ID number.
@@ -175,18 +174,16 @@ class TargetRegisterClass {
     return SuperRegIndices;
   }
 
-  /// Returns a NULL-terminated list of super-classes.  The
+  /// Returns a list of super-classes.  The
   /// classes are ordered by ID which is also a topological ordering from large
   /// to small classes.  The list does NOT include the current class.
-  sc_iterator getSuperClasses() const {
-    return SuperClasses;
+  ArrayRef<unsigned> superclasses() const {
+    return ArrayRef(SuperClasses, SuperClassesSize);
   }
 
   /// Return true if this TargetRegisterClass is a subset
   /// class of at least one other TargetRegisterClass.
-  bool isASubClass() const {
-    return SuperClasses[0] != nullptr;
-  }
+  bool isASubClass() const { return SuperClasses != nullptr; }
 
   /// Returns the preferred order for allocating registers from this register
   /// class in MF. The raw order comes directly from the .td file and may
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 2eb80f49bedf12..be5e63633097e1 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 521082
+#define LLVM_MAIN_REVISION 521148
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index bd7fb2361aaeb1..772f60343c6348 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -177,6 +177,7 @@ def OMPC_Exclusive : Clause<"exclusive"> {
 }
 def OMPC_Fail : Clause<"fail"> {
   let clangClass = "OMPFailClause";
+  let flangClass = "OmpFailClause";
 }
 def OMPC_Filter : Clause<"filter"> {
   let clangClass = "OMPFilterClause";
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b2f0aa2f7e4d90..0a1bd4c923b9b8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3872,6 +3872,20 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt;
   def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt;
 
+  // SVE Narrowing Conversions
+  class SVE2_FP8_Narrow_Cvt
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
+                            [llvm_anyvector_ty, LLVMMatchType<0>],
+                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+  
+  def int_aarch64_sve_fp8_cvtn  : SVE2_FP8_Narrow_Cvt;
+  def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
+  
+  def int_aarch64_sve_fp8_cvtnt
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
+                            [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
+                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+
   class SME2_FP8_CVT_X2_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                             [llvm_nxv16i8_ty],
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index 8fabc78d81aedf..decb6cb5455ebc 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -53,10 +53,8 @@ class OptTable {
 public:
   /// Entry for a single option instance in the option data table.
   struct Info {
-    /// A null terminated array of prefix strings to apply to name while
-    /// matching.
-    ArrayRef<StringLiteral> Prefixes;
-    StringLiteral PrefixedName;
+    unsigned PrefixesOffset;
+    unsigned PrefixedNameOffset;
     const char *HelpText;
     // Help text for specific visibilities. A list of pairs, where each pair
     // is a list of visibilities and a specific help string for those
@@ -80,15 +78,56 @@ class OptTable {
     const char *AliasArgs;
     const char *Values;
 
-    StringRef getName() const {
-      unsigned PrefixLength = Prefixes.empty() ? 0 : Prefixes[0].size();
-      return PrefixedName.drop_front(PrefixLength);
+    bool hasNoPrefix() const { return PrefixesOffset == 0; }
+
+    unsigned getNumPrefixes(ArrayRef<unsigned> PrefixesTable) const {
+      return PrefixesTable[PrefixesOffset];
+    }
+
+    ArrayRef<unsigned>
+    getPrefixOffsets(ArrayRef<unsigned> PrefixesTable) const {
+      return hasNoPrefix() ? ArrayRef<unsigned>()
+                           : PrefixesTable.slice(PrefixesOffset + 1,
+                                                 getNumPrefixes(PrefixesTable));
+    }
+
+    void appendPrefixes(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                        SmallVectorImpl<StringRef> &Prefixes) const {
+      for (unsigned PrefixOffset : getPrefixOffsets(PrefixesTable))
+        Prefixes.push_back(&StrTable[PrefixOffset]);
+    }
+
+    StringRef getPrefix(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                        unsigned PrefixIndex) const {
+      return &StrTable[getPrefixOffsets(PrefixesTable)[PrefixIndex]];
+    }
+
+    StringRef getPrefixedName(const char *StrTable) const {
+      return &StrTable[PrefixedNameOffset];
+    }
+
+    StringRef getName(const char *StrTable,
+                      ArrayRef<unsigned> PrefixesTable) const {
+      unsigned PrefixLength =
+          hasNoPrefix() ? 0 : getPrefix(StrTable, PrefixesTable, 0).size();
+      return getPrefixedName(StrTable).drop_front(PrefixLength);
     }
   };
 
 private:
+  // A unified string table for these options. Individual strings are stored as
+  // null terminated C-strings at offsets within this table.
+  const char *StrTable;
+
+  // A table of different sets of prefixes. Each set starts with the number of
+  // prefixes in that set followed by that many offsets into the string table
+  // for each of the prefix strings. This is essentially a Pascal-string style
+  // encoding.
+  ArrayRef<unsigned> PrefixesTable;
+
   /// The option information table.
   ArrayRef<Info> OptionInfos;
+
   bool IgnoreCase;
   bool GroupedShortOptions = false;
   bool DashDashParsing = false;
@@ -102,12 +141,12 @@ class OptTable {
   /// special option like 'input' or 'unknown', and is not an option group).
   unsigned FirstSearchableIndex = 0;
 
-  /// The union of the first element of all option prefixes.
-  SmallString<8> PrefixChars;
-
   /// The union of all option prefixes. If an argument does not begin with
   /// one of these, it is an input.
-  virtual ArrayRef<StringLiteral> getPrefixesUnion() const = 0;
+  SmallVector<StringRef> PrefixesUnion;
+
+  /// The union of the first element of all option prefixes.
+  SmallString<8> PrefixChars;
 
 private:
   const Info &getInfo(OptSpecifier Opt) const {
@@ -122,7 +161,8 @@ class OptTable {
 protected:
   /// Initialize OptTable using Tablegen'ed OptionInfos. Child class must
   /// manually call \c buildPrefixChars once they are fully constructed.
-  OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
+  OptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+           ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
 
   /// Build (or rebuild) the PrefixChars member.
   void buildPrefixChars();
@@ -130,6 +170,12 @@ class OptTable {
 public:
   virtual ~OptTable();
 
+  /// Return the string table used for option names.
+  const char *getStrTable() const { return StrTable; }
+
+  /// Return the prefixes table used for option names.
+  ArrayRef<unsigned> getPrefixesTable() const { return PrefixesTable; }
+
   /// Return the total number of option classes.
   unsigned getNumOptions() const { return OptionInfos.size(); }
 
@@ -141,7 +187,25 @@ class OptTable {
 
   /// Lookup the name of the given option.
   StringRef getOptionName(OptSpecifier id) const {
-    return getInfo(id).getName();
+    return getInfo(id).getName(StrTable, PrefixesTable);
+  }
+
+  /// Lookup the prefix of the given option.
+  StringRef getOptionPrefix(OptSpecifier id) const {
+    const Info &I = getInfo(id);
+    return I.hasNoPrefix() ? StringRef()
+                           : I.getPrefix(StrTable, PrefixesTable, 0);
+  }
+
+  void appendOptionPrefixes(OptSpecifier id,
+                            SmallVectorImpl<StringRef> &Prefixes) const {
+    const Info &I = getInfo(id);
+    I.appendPrefixes(StrTable, PrefixesTable, Prefixes);
+  }
+
+  /// Lookup the prefixed name of the given option.
+  StringRef getOptionPrefixedName(OptSpecifier id) const {
+    return getInfo(id).getPrefixedName(StrTable);
   }
 
   /// Get the kind of the given option.
@@ -353,28 +417,22 @@ class OptTable {
 
 /// Specialization of OptTable
 class GenericOptTable : public OptTable {
-  SmallVector<StringLiteral> PrefixesUnionBuffer;
-
 protected:
-  GenericOptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
-  ArrayRef<StringLiteral> getPrefixesUnion() const final {
-    return PrefixesUnionBuffer;
-  }
+  GenericOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                  ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
 };
 
 class PrecomputedOptTable : public OptTable {
-  ArrayRef<StringLiteral> PrefixesUnion;
-
 protected:
-  PrecomputedOptTable(ArrayRef<Info> OptionInfos,
-                      ArrayRef<StringLiteral> PrefixesTable,
+  PrecomputedOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                      ArrayRef<Info> OptionInfos,
+                      ArrayRef<unsigned> PrefixesUnionOffsets,
                       bool IgnoreCase = false)
-      : OptTable(OptionInfos, IgnoreCase), PrefixesUnion(PrefixesTable) {
+      : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) {
+    for (unsigned PrefixOffset : PrefixesUnionOffsets)
+      PrefixesUnion.push_back(&StrTable[PrefixOffset]);
     buildPrefixChars();
   }
-  ArrayRef<StringLiteral> getPrefixesUnion() const final {
-    return PrefixesUnion;
-  }
 };
 
 } // end namespace opt
@@ -382,31 +440,35 @@ class PrecomputedOptTable : public OptTable {
 } // end namespace llvm
 
 #define LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(                                       \
-    ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS,       \
-    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \
+    ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS,  \
+    ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,       \
+    METAVAR, VALUES)                                                           \
   ID_PREFIX##ID
 
-#define LLVM_MAKE_OPT_ID(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS,        \
-                         ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT,        \
-                         HELPTEXTSFORVARIANTS, METAVAR, VALUES)                \
-  LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(                                             \
-      OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,   \
-      VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUE)
+#define LLVM_MAKE_OPT_ID(PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND,      \
+                         GROUP, ALIAS, ALIASARGS, FLAGS, VISIBILITY, PARAM,    \
+                         HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES)      \
+  LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, \
+                                  ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,    \
+                                  VISIBILITY, PARAM, HELPTEXT,                 \
+                                  HELPTEXTSFORVARIANTS, METAVAR, VALUE)
 
 #define LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(                                \
-    ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS,       \
-    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \
+    ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS,  \
+    ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,       \
+    METAVAR, VALUES)                                                           \
   llvm::opt::OptTable::Info {                                                  \
-    PREFIX, PREFIXED_NAME, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR,            \
-        ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS,           \
+    PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, HELPTEXT, HELPTEXTSFORVARIANTS,     \
+        METAVAR, ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS,  \
         VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES      \
   }
 
-#define LLVM_CONSTRUCT_OPT_INFO(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \
-                                ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \
-                                HELPTEXTSFORVARIANTS, METAVAR, VALUES)         \
+#define LLVM_CONSTRUCT_OPT_INFO(                                               \
+    PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS,  \
+    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \
   LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(                                      \
-      OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,   \
-      VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES)
+      OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS,     \
+      ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,     \
+      METAVAR, VALUES)
 
 #endif // LLVM_OPTION_OPTTABLE_H
diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 5d16fbdb6b77c8..a0563da15c8edf 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -100,7 +100,8 @@ class Option {
   /// Get the name of this option without any prefix.
   StringRef getName() const {
     assert(Info && "Must have a valid info!");
-    return Info->getName();
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOptionName(Info->ID);
   }
 
   const Option getGroup() const {
@@ -127,15 +128,16 @@ class Option {
 
   /// Get the default prefix for this option.
   StringRef getPrefix() const {
-    return Info->Prefixes.empty()
-               ? StringRef()
-               : static_cast<const StringRef &>(Info->Prefixes[0]);
+    assert(Info && "Must have a valid info!");
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOptionPrefix(Info->ID);
   }
 
   /// Get the name of this option with the default prefix.
-  StringLiteral getPrefixedName() const {
+  StringRef getPrefixedName() const {
     assert(Info && "Must have a valid info!");
-    return Info->PrefixedName;
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOptionPrefixedName(Info->ID);
   }
 
   /// Get the help text for this option.
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 25e13f575296dd..f1010b312ee569 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -21,6 +21,7 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfYAML.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LineIterator.h"
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index ace3c044d8c277..ef96b74c9d400c 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -11,9 +11,7 @@
 #include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/HashBuilder.h"
-#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <bitset>
@@ -492,23 +490,6 @@ struct MemProfRecord {
   }
 };
 
-// A "typedef" for GUID.  See ScalarTraits<memprof::GUIDHex64> for how a GUID is
-// serialized and deserialized in YAML.
-LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64)
-
-// Helper struct for AllMemProfData.  In YAML, we treat the GUID and the fields
-// within MemProfRecord at the same level as if the GUID were part of
-// MemProfRecord.
-struct GUIDMemProfRecordPair {
-  GUIDHex64 GUID;
-  MemProfRecord Record;
-};
-
-// The top-level data structure, only used with YAML for now.
-struct AllMemProfData {
-  std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
-};
-
 // Reads a memprof schema from a buffer. All entries in the buffer are
 // interpreted as uint64_t. The first entry in the buffer denotes the number of
 // ids in the schema. Subsequent entries are integers which map to memprof::Meta
@@ -1169,131 +1150,6 @@ template <typename FrameIdTy> class CallStackRadixTreeBuilder {
   }
 };
 } // namespace memprof
-
-namespace yaml {
-template <> struct ScalarTraits<memprof::GUIDHex64> {
-  static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
-    // Print GUID as a 16-digit hexadecimal number.
-    Out << format("0x%016" PRIx64, (uint64_t)Val);
-  }
-  static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
-    // Reject decimal GUIDs.
-    if (all_of(Scalar, [](char C) { return std::isdigit(C); }))
-      return "use a hexadecimal GUID or a function instead";
-
-    uint64_t Num;
-    if (Scalar.starts_with_insensitive("0x")) {
-      // Accept hexadecimal numbers starting with 0x or 0X.
-      if (Scalar.getAsInteger(0, Num))
-        return "invalid hex64 number";
-      Val = Num;
-    } else {
-      // Otherwise, treat the input as a string containing a function name.
-      Val = memprof::IndexedMemProfRecord::getGUID(Scalar);
-    }
-    return StringRef();
-  }
-  static QuotingType mustQuote(StringRef) { return QuotingType::None; }
-};
-
-template <> struct MappingTraits<memprof::Frame> {
-  static void mapping(IO &Io, memprof::Frame &F) {
-    Io.mapRequired("Function", F.Function);
-    Io.mapRequired("LineOffset", F.LineOffset);
-    Io.mapRequired("Column", F.Column);
-    Io.mapRequired("IsInlineFrame", F.IsInlineFrame);
-
-    // Assert that the definition of Frame matches what we expect.  The
-    // structured bindings below detect changes to the number of fields.
-    // static_assert checks the type of each field.
-    const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F;
-    static_assert(
-        std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>);
-    static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>,
-                                 std::unique_ptr<std::string>>);
-    static_assert(
-        std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>);
-    static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>);
-    static_assert(
-        std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>);
-
-    // MSVC issues unused variable warnings despite the uses in static_assert
-    // above.
-    (void)Function;
-    (void)SymbolName;
-    (void)LineOffset;
-    (void)Column;
-    (void)IsInlineFrame;
-  }
-
-  // Request the inline notation for brevity:
-  //   { Function: 123, LineOffset: 11, Column: 10; IsInlineFrame: true }
-  static const bool flow = true;
-};
-
-template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> {
-  static void inputOne(IO &Io, StringRef KeyStr,
-                       memprof::PortableMemInfoBlock &MIB) {
-    // PortableMemInfoBlock keeps track of the set of fields that actually have
-    // values.  We update the set here as we receive a key-value pair from the
-    // YAML document.
-    //
-    // We set MIB.Name via a temporary variable because ScalarTraits<uintptr_t>
-    // isn't available on macOS.
-#define MIBEntryDef(NameTag, Name, Type)                                       \
-  if (KeyStr == #Name) {                                                       \
-    uint64_t Value;                                                            \
-    Io.mapRequired(KeyStr.str().c_str(), Value);                               \
-    MIB.Name = static_cast<Type>(Value);                                       \
-    MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name));                  \
-    return;                                                                    \
-  }
-#include "llvm/ProfileData/MIBEntryDef.inc"
-#undef MIBEntryDef
-    Io.setError("Key is not a valid validation event");
-  }
-
-  static void output(IO &Io, memprof::PortableMemInfoBlock &MIB) {
-    auto Schema = MIB.getSchema();
-#define MIBEntryDef(NameTag, Name, Type)                                       \
-  if (Schema.test(llvm::to_underlying(memprof::Meta::Name))) {                 \
-    uint64_t Value = MIB.Name;                                                 \
-    Io.mapRequired(#Name, Value);                                              \
-  }
-#include "llvm/ProfileData/MIBEntryDef.inc"
-#undef MIBEntryDef
-  }
-};
-
-template <> struct MappingTraits<memprof::AllocationInfo> {
-  static void mapping(IO &Io, memprof::AllocationInfo &AI) {
-    Io.mapRequired("Callstack", AI.CallStack);
-    Io.mapRequired("MemInfoBlock", AI.Info);
-  }
-};
-
-// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
-// treat the GUID and the fields within MemProfRecord at the same level as if
-// the GUID were part of MemProfRecord.
-template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
-  static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
-    Io.mapRequired("GUID", Pair.GUID);
-    Io.mapRequired("AllocSites", Pair.Record.AllocSites);
-    Io.mapRequired("CallSites", Pair.Record.CallSites);
-  }
-};
-
-template <> struct MappingTraits<memprof::AllMemProfData> {
-  static void mapping(IO &Io, memprof::AllMemProfData &Data) {
-    Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
-  }
-};
-} // namespace yaml
 } // namespace llvm
 
-LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
-LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
-LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
-LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
-
 #endif // LLVM_PROFILEDATA_MEMPROF_H_
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
new file mode 100644
index 00000000000000..4568385fc6f71f
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -0,0 +1,154 @@
+#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_
+#define LLVM_PROFILEDATA_MEMPROFYAML_H_
+
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace memprof {
+// A "typedef" for GUID.  See ScalarTraits<memprof::GUIDHex64> for how a GUID is
+// serialized and deserialized in YAML.
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64)
+
+// Helper struct for AllMemProfData.  In YAML, we treat the GUID and the fields
+// within MemProfRecord at the same level as if the GUID were part of
+// MemProfRecord.
+struct GUIDMemProfRecordPair {
+  GUIDHex64 GUID;
+  MemProfRecord Record;
+};
+
+// The top-level data structure, only used with YAML for now.
+struct AllMemProfData {
+  std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+};
+} // namespace memprof
+
+namespace yaml {
+template <> struct ScalarTraits<memprof::GUIDHex64> {
+  static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
+    // Print GUID as a 16-digit hexadecimal number.
+    Out << format("0x%016" PRIx64, (uint64_t)Val);
+  }
+  static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
+    // Reject decimal GUIDs.
+    if (all_of(Scalar, [](char C) { return std::isdigit(C); }))
+      return "use a hexadecimal GUID or a function instead";
+
+    uint64_t Num;
+    if (Scalar.starts_with_insensitive("0x")) {
+      // Accept hexadecimal numbers starting with 0x or 0X.
+      if (Scalar.getAsInteger(0, Num))
+        return "invalid hex64 number";
+      Val = Num;
+    } else {
+      // Otherwise, treat the input as a string containing a function name.
+      Val = memprof::IndexedMemProfRecord::getGUID(Scalar);
+    }
+    return StringRef();
+  }
+  static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+template <> struct MappingTraits<memprof::Frame> {
+  static void mapping(IO &Io, memprof::Frame &F) {
+    Io.mapRequired("Function", F.Function);
+    Io.mapRequired("LineOffset", F.LineOffset);
+    Io.mapRequired("Column", F.Column);
+    Io.mapRequired("IsInlineFrame", F.IsInlineFrame);
+
+    // Assert that the definition of Frame matches what we expect.  The
+    // structured bindings below detect changes to the number of fields.
+    // static_assert checks the type of each field.
+    const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F;
+    static_assert(
+        std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>);
+    static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>,
+                                 std::unique_ptr<std::string>>);
+    static_assert(
+        std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>);
+    static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>);
+    static_assert(
+        std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>);
+
+    // MSVC issues unused variable warnings despite the uses in static_assert
+    // above.
+    (void)Function;
+    (void)SymbolName;
+    (void)LineOffset;
+    (void)Column;
+    (void)IsInlineFrame;
+  }
+
+  // Request the inline notation for brevity:
+  //   { Function: 123, LineOffset: 11, Column: 10; IsInlineFrame: true }
+  static const bool flow = true;
+};
+
+template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> {
+  static void inputOne(IO &Io, StringRef KeyStr,
+                       memprof::PortableMemInfoBlock &MIB) {
+    // PortableMemInfoBlock keeps track of the set of fields that actually have
+    // values.  We update the set here as we receive a key-value pair from the
+    // YAML document.
+    //
+    // We set MIB.Name via a temporary variable because ScalarTraits<uintptr_t>
+    // isn't available on macOS.
+#define MIBEntryDef(NameTag, Name, Type)                                       \
+  if (KeyStr == #Name) {                                                       \
+    uint64_t Value;                                                            \
+    Io.mapRequired(KeyStr.str().c_str(), Value);                               \
+    MIB.Name = static_cast<Type>(Value);                                       \
+    MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name));                  \
+    return;                                                                    \
+  }
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+    Io.setError("Key is not a valid validation event");
+  }
+
+  static void output(IO &Io, memprof::PortableMemInfoBlock &MIB) {
+    auto Schema = MIB.getSchema();
+#define MIBEntryDef(NameTag, Name, Type)                                       \
+  if (Schema.test(llvm::to_underlying(memprof::Meta::Name))) {                 \
+    uint64_t Value = MIB.Name;                                                 \
+    Io.mapRequired(#Name, Value);                                              \
+  }
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+  }
+};
+
+template <> struct MappingTraits<memprof::AllocationInfo> {
+  static void mapping(IO &Io, memprof::AllocationInfo &AI) {
+    Io.mapRequired("Callstack", AI.CallStack);
+    Io.mapRequired("MemInfoBlock", AI.Info);
+  }
+};
+
+// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
+// treat the GUID and the fields within MemProfRecord at the same level as if
+// the GUID were part of MemProfRecord.
+template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
+  static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
+    Io.mapRequired("GUID", Pair.GUID);
+    Io.mapRequired("AllocSites", Pair.Record.AllocSites);
+    Io.mapRequired("CallSites", Pair.Record.CallSites);
+  }
+};
+
+template <> struct MappingTraits<memprof::AllMemProfData> {
+  static void mapping(IO &Io, memprof::AllMemProfData &Data) {
+    Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
+LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
+
+#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 7bb6c3156c43e0..2c58eedce1de0b 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -2086,4 +2086,12 @@ class ComplexPattern<ValueType ty, int numops, string fn,
   list<SDNode> RootNodes = roots;
   list<SDNodeProperty> Properties = props;
   int Complexity = complexity;
+
+  // Set this to true if SelectFunc wants an additional argument
+  // that is the root of the matched pattern.
+  bit WantsRoot = false;
+
+  // Set this to true if SelectFunc wants an additional argument
+  // that is the parent of the matched node.
+  bit WantsParent = false;
 }
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 1311329821828f..2dd8469becbb9e 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -272,9 +272,10 @@ bool isX18ReservedByDefault(const Triple &TT);
 unsigned getFMVPriority(ArrayRef<StringRef> Features);
 
 // For given feature names, return a bitmask corresponding to the entries of
-// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks
-// themselves, they are sequential (0, 1, 2, 3, ...).
-uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks themselves,
+// they are sequential (0, 1, 2, 3, ...). The resulting bitmask is used at
+// runtime to test whether a certain FMV feature is available on the host.
+uint64_t getCpuSupportsMask(ArrayRef<StringRef> Features);
 
 void PrintSupportedExtensions();
 
diff --git a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h
index 7ea50a5584dde0..e1f8065f8011df 100644
--- a/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h
+++ b/llvm/include/llvm/Transforms/Utils/ExtraPassManager.h
@@ -55,29 +55,49 @@ template <typename MarkerTy> struct ShouldRunExtraPasses {
 /// request additional transformations on demand. An example is extra
 /// simplifications after loop-vectorization, if runtime checks have been added.
 template <typename MarkerTy>
-struct ExtraFunctionPassManager : public FunctionPassManager {
+class ExtraFunctionPassManager
+    : public PassInfoMixin<ExtraFunctionPassManager<MarkerTy>> {
+  FunctionPassManager InnerFPM;
+
+public:
+  template <typename PassT> void addPass(PassT &&Pass) {
+    InnerFPM.addPass(std::move(Pass));
+  }
+
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
     auto PA = PreservedAnalyses::all();
     if (AM.getCachedResult<MarkerTy>(F))
-      PA.intersect(FunctionPassManager::run(F, AM));
+      PA.intersect(InnerFPM.run(F, AM));
     PA.abandon<MarkerTy>();
     return PA;
   }
+
+  static bool isRequired() { return true; }
 };
 
 /// A pass manager to run a set of extra loop passes if the MarkerTy analysis is
 /// present. This allows passes to request additional transformations on demand.
 /// An example is doing additional runs of SimpleLoopUnswitch.
 template <typename MarkerTy>
-struct ExtraLoopPassManager : public LoopPassManager {
+class ExtraLoopPassManager
+    : public PassInfoMixin<ExtraLoopPassManager<MarkerTy>> {
+  LoopPassManager InnerLPM;
+
+public:
+  template <typename PassT> void addPass(PassT &&Pass) {
+    InnerLPM.addPass(std::move(Pass));
+  }
+
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U) {
     auto PA = PreservedAnalyses::all();
     if (AM.getCachedResult<MarkerTy>(L))
-      PA.intersect(LoopPassManager::run(L, AM, AR, U));
+      PA.intersect(InnerLPM.run(L, AM, AR, U));
     PA.abandon<MarkerTy>();
     return PA;
   }
+
+  static bool isRequired() { return true; }
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index dc7e484a40a452..fbe80eddbae07a 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -395,6 +395,11 @@ class LoopVectorizationLegality {
 
   /// Returns the uncountable early exiting block.
   BasicBlock *getUncountableEarlyExitingBlock() const {
+    if (!HasUncountableEarlyExit) {
+      assert(getUncountableExitingBlocks().empty() &&
+             "Expected no uncountable exiting blocks");
+      return nullptr;
+    }
     assert(getUncountableExitingBlocks().size() == 1 &&
            "Expected only a single uncountable exiting block");
     return getUncountableExitingBlocks()[0];
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index d1332de4582087..ebae27e37b4fcb 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -313,10 +313,9 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
         }
         if (Error) {
           const Function &Fn = MI->getMF()->getFunction();
-          DiagnosticInfoInlineAsm DI(LocCookie,
-                                     "invalid operand in inline asm: '" +
-                                         Twine(AsmStr) + "'");
-          Fn.getContext().diagnose(DI);
+          Fn.getContext().diagnose(DiagnosticInfoInlineAsm(
+              LocCookie,
+              "invalid operand in inline asm: '" + Twine(AsmStr) + "'"));
         }
       }
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 605937503407a8..907c4577d93d39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -138,6 +138,11 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth(
     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
     cl::desc("DAG combiner enable reducing the width of load/op/store "
              "sequence"));
+static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable(
+    "combiner-reduce-load-op-store-width-force-narrowing-profitable",
+    cl::Hidden, cl::init(false),
+    cl::desc("DAG combiner force override the narrowing profitable check when"
+             "reducing the width of load/op/store sequences"));
 
 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
@@ -20351,19 +20356,38 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     // The narrowing should be profitable, the load/store operation should be
     // legal (or custom) and the store size should be equal to the NewVT width.
-    while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW ||
-                                !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
-                                !TLI.isNarrowingProfitable(N, VT, NewVT))) {
+    while (NewBW < BitWidth &&
+           (NewVT.getStoreSizeInBits() != NewBW ||
+            !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+            (!ReduceLoadOpStoreWidthForceNarrowingProfitable &&
+             !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
       NewBW = NextPowerOf2(NewBW);
       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     }
     if (NewBW >= BitWidth)
       return SDValue();
 
-    // If the lsb changed does not start at the type bitwidth boundary,
-    // start at the previous one.
-    if (ShAmt % NewBW)
-      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+    // TODO: For big-endian we probably want to align given the most significant
+    // bit being modified instead of adjusting ShAmt based on least significant
+    // bits. This to reduce the risk of failing on the alignment check below. If
+    // for example VT.getStoreSize()==5 and Imm is 0x0000ffff00, then we want to
+    // find NewBW=16, and we want to load/store with a PtrOff set to 2. But then
+    // ShAmt should be set to 8, which isn't a multiple of NewBW.  But given
+    // that isNarrowingProfitable doesn't seem to be overridden for any in-tree
+    // big-endian target, then the support for big-endian here isn't covered by
+    // any in-tree lit tests, so it is unfortunately not highly optimized
+    // either. It should be possible to improve that by using
+    // ReduceLoadOpStoreWidthForceNarrowingProfitable.
+
+    // If the lsb that is modified does not start at the type bitwidth boundary,
+    // align to start at the previous boundary.
+    ShAmt = ShAmt - (ShAmt % NewBW);
+
+    // Make sure we do not access memory outside the memory touched by the
+    // original load/store.
+    if (ShAmt + NewBW > VT.getStoreSizeInBits())
+      return SDValue();
+
     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
                                    std::min(BitWidth, ShAmt + NewBW));
     if ((Imm & Mask) == Imm) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
index f23f3ed9406bdc..ecf5c0e519caec 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.cpp
@@ -19,21 +19,17 @@ using namespace jitlink;
 
 #define DEBUG_TYPE "jitlink"
 
-// Create prefix string literals used in Options.td
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "COFFOptions.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
 
-static constexpr const StringLiteral PrefixTable_init[] =
-#define PREFIX_UNION(VALUES) VALUES
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "COFFOptions.inc"
-#undef PREFIX_UNION
-    ;
-static constexpr const ArrayRef<StringLiteral>
-    PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_UNION_CODE
+#include "COFFOptions.inc"
+#undef OPTTABLE_PREFIXES_UNION_CODE
 
 // Create table mapping all options defined in COFFOptions.td
 using namespace llvm::opt;
@@ -46,7 +42,9 @@ static constexpr opt::OptTable::Info infoTable[] = {
 
 class COFFOptTable : public opt::PrecomputedOptTable {
 public:
-  COFFOptTable() : PrecomputedOptTable(infoTable, PrefixTable, true) {}
+  COFFOptTable()
+      : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, infoTable,
+                            OptionPrefixesUnion, true) {}
 };
 
 static COFFOptTable optTable;
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 9fdafed39b8b6c..87e6f1f12364c2 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -31,46 +31,55 @@
 using namespace llvm;
 using namespace llvm::opt;
 
-namespace llvm::opt {
+namespace {
+struct OptNameLess {
+  const char *StrTable;
+  ArrayRef<unsigned> PrefixesTable;
+
+  explicit OptNameLess(const char *StrTable, ArrayRef<unsigned> PrefixesTable)
+      : StrTable(StrTable), PrefixesTable(PrefixesTable) {}
+
 #ifndef NDEBUG
-static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
-  if (&A == &B)
-    return false;
-
-  if (int Cmp = StrCmpOptionName(A.getName(), B.getName()))
-    return Cmp < 0;
-
-  // Note: we are converting ArrayRef<StringLiteral> to ArrayRef<StringRef>.
-  // In general, ArrayRef<SubClass> cannot be safely viewed as ArrayRef<Base>
-  // since sizeof(SubClass) may not be same as sizeof(Base). However in this
-  // case, sizeof(StringLiteral) is same as sizeof(StringRef), so this
-  // conversion is safe.
-  static_assert(sizeof(StringRef) == sizeof(StringLiteral));
-  ArrayRef<StringRef> APrefixes(A.Prefixes.data(), A.Prefixes.size());
-  ArrayRef<StringRef> BPrefixes(B.Prefixes.data(), B.Prefixes.size());
-
-  if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
-    return Cmp < 0;
-
-  // Names are the same, check that classes are in order; exactly one
-  // should be joined, and it should succeed the other.
-  assert(((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) &&
-         "Unexpected classes for options with same name.");
-  return B.Kind == Option::JoinedClass;
-}
+  inline bool operator()(const OptTable::Info &A,
+                         const OptTable::Info &B) const {
+    if (&A == &B)
+      return false;
+
+    if (int Cmp = StrCmpOptionName(A.getName(StrTable, PrefixesTable),
+                                   B.getName(StrTable, PrefixesTable)))
+      return Cmp < 0;
+
+    SmallVector<StringRef, 8> APrefixes, BPrefixes;
+    A.appendPrefixes(StrTable, PrefixesTable, APrefixes);
+    B.appendPrefixes(StrTable, PrefixesTable, BPrefixes);
+
+    if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
+      return Cmp < 0;
+
+    // Names are the same, check that classes are in order; exactly one
+    // should be joined, and it should succeed the other.
+    assert(
+        ((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) &&
+        "Unexpected classes for options with same name.");
+    return B.Kind == Option::JoinedClass;
+  }
 #endif
 
-// Support lower_bound between info and an option name.
-static inline bool operator<(const OptTable::Info &I, StringRef Name) {
-  // Do not fallback to case sensitive comparison.
-  return StrCmpOptionName(I.getName(), Name, false) < 0;
-}
-} // namespace llvm::opt
+  // Support lower_bound between info and an option name.
+  inline bool operator()(const OptTable::Info &I, StringRef Name) const {
+    // Do not fallback to case sensitive comparison.
+    return StrCmpOptionName(I.getName(StrTable, PrefixesTable), Name, false) <
+           0;
+  }
+};
+} // namespace
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
-OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
-    : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) {
+OptTable::OptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                   ArrayRef<Info> OptionInfos, bool IgnoreCase)
+    : StrTable(StrTable), PrefixesTable(PrefixesTable),
+      OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) {
   // Explicitly zero initialize the error to work around a bug in array
   // value-initialization on MinGW with gcc 4.3.5.
 
@@ -102,7 +111,7 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
 
   // Check that options are in order.
   for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions(); i != e; ++i){
-    if (!(getInfo(i) < getInfo(i + 1))) {
+    if (!(OptNameLess(StrTable, PrefixesTable)(getInfo(i), getInfo(i + 1)))) {
       getOption(i).dump();
       getOption(i + 1).dump();
       llvm_unreachable("Options are not in order!");
@@ -115,7 +124,7 @@ void OptTable::buildPrefixChars() {
   assert(PrefixChars.empty() && "rebuilding a non-empty prefix char");
 
   // Build prefix chars.
-  for (const StringLiteral &Prefix : getPrefixesUnion()) {
+  for (StringRef Prefix : PrefixesUnion) {
     for (char C : Prefix)
       if (!is_contained(PrefixChars, C))
         PrefixChars.push_back(C);
@@ -132,7 +141,7 @@ const Option OptTable::getOption(OptSpecifier Opt) const {
   return Option(&getInfo(id), this);
 }
 
-static bool isInput(const ArrayRef<StringLiteral> &Prefixes, StringRef Arg) {
+static bool isInput(const ArrayRef<StringRef> &Prefixes, StringRef Arg) {
   if (Arg == "-")
     return true;
   for (const StringRef &Prefix : Prefixes)
@@ -142,25 +151,32 @@ static bool isInput(const ArrayRef<StringLiteral> &Prefixes, StringRef Arg) {
 }
 
 /// \returns Matched size. 0 means no match.
-static unsigned matchOption(const OptTable::Info *I, StringRef Str,
+static unsigned matchOption(const char *StrTable,
+                            ArrayRef<unsigned> PrefixesTable,
+                            const OptTable::Info *I, StringRef Str,
                             bool IgnoreCase) {
-  for (auto Prefix : I->Prefixes) {
+  StringRef Name = I->getName(StrTable, PrefixesTable);
+  for (unsigned PrefixOffset : I->getPrefixOffsets(PrefixesTable)) {
+    StringRef Prefix = &StrTable[PrefixOffset];
     if (Str.starts_with(Prefix)) {
       StringRef Rest = Str.substr(Prefix.size());
-      bool Matched = IgnoreCase ? Rest.starts_with_insensitive(I->getName())
-                                : Rest.starts_with(I->getName());
+      bool Matched = IgnoreCase ? Rest.starts_with_insensitive(Name)
+                                : Rest.starts_with(Name);
       if (Matched)
-        return Prefix.size() + StringRef(I->getName()).size();
+        return Prefix.size() + Name.size();
     }
   }
   return 0;
 }
 
 // Returns true if one of the Prefixes + In.Names matches Option
-static bool optionMatches(const OptTable::Info &In, StringRef Option) {
-  for (auto Prefix : In.Prefixes)
-    if (Option.ends_with(In.getName()))
-      if (Option.slice(0, Option.size() - In.getName().size()) == Prefix)
+static bool optionMatches(const char *StrTable,
+                          ArrayRef<unsigned> PrefixesTable,
+                          const OptTable::Info &In, StringRef Option) {
+  StringRef Name = In.getName(StrTable, PrefixesTable);
+  if (Option.consume_back(Name))
+    for (unsigned PrefixOffset : In.getPrefixOffsets(PrefixesTable))
+      if (Option == &StrTable[PrefixOffset])
         return true;
   return false;
 }
@@ -173,7 +189,7 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const {
   // Search all options and return possible values.
   for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) {
     const Info &In = OptionInfos[I];
-    if (!In.Values || !optionMatches(In, Option))
+    if (!In.Values || !optionMatches(StrTable, PrefixesTable, In, Option))
       continue;
 
     SmallVector<StringRef, 8> Candidates;
@@ -194,15 +210,17 @@ OptTable::findByPrefix(StringRef Cur, Visibility VisibilityMask,
   std::vector<std::string> Ret;
   for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) {
     const Info &In = OptionInfos[I];
-    if (In.Prefixes.empty() || (!In.HelpText && !In.GroupID))
+    if (In.hasNoPrefix() || (!In.HelpText && !In.GroupID))
       continue;
     if (!(In.Visibility & VisibilityMask))
       continue;
     if (In.Flags & DisableFlags)
       continue;
 
-    for (auto Prefix : In.Prefixes) {
-      std::string S = (Prefix + In.getName() + "\t").str();
+    StringRef Name = In.getName(StrTable, PrefixesTable);
+    for (unsigned PrefixOffset : In.getPrefixOffsets(PrefixesTable)) {
+      StringRef Prefix = &StrTable[PrefixOffset];
+      std::string S = (Twine(Prefix) + Name + "\t").str();
       if (In.HelpText)
         S += In.HelpText;
       if (StringRef(S).starts_with(Cur) && S != std::string(Cur) + "\t")
@@ -253,7 +271,7 @@ unsigned OptTable::internalFindNearest(
 
   for (const Info &CandidateInfo :
        ArrayRef<Info>(OptionInfos).drop_front(FirstSearchableIndex)) {
-    StringRef CandidateName = CandidateInfo.getName();
+    StringRef CandidateName = CandidateInfo.getName(StrTable, PrefixesTable);
 
     // We can eliminate some option prefix/name pairs as candidates right away:
     // * Ignore option candidates with empty names, such as "--", or names
@@ -267,7 +285,7 @@ unsigned OptTable::internalFindNearest(
 
     // * Ignore positional argument option candidates (which do not
     //   have prefixes).
-    if (CandidateInfo.Prefixes.empty())
+    if (CandidateInfo.hasNoPrefix())
       continue;
 
     // Now check if the candidate ends with a character commonly used when
@@ -286,7 +304,9 @@ unsigned OptTable::internalFindNearest(
     // Consider each possible prefix for each candidate to find the most
     // appropriate one. For example, if a user asks for "--helm", suggest
     // "--help" over "-help".
-    for (auto CandidatePrefix : CandidateInfo.Prefixes) {
+    for (unsigned CandidatePrefixOffset :
+         CandidateInfo.getPrefixOffsets(PrefixesTable)) {
+      StringRef CandidatePrefix = &StrTable[CandidatePrefixOffset];
       // If Candidate and NormalizedName have more than 'BestDistance'
       // characters of difference, no need to compute the edit distance, it's
       // going to be greater than BestDistance. Don't bother computing Candidate
@@ -332,19 +352,21 @@ std::unique_ptr<Arg> OptTable::parseOneArgGrouped(InputArgList &Args,
   // itself.
   const char *CStr = Args.getArgString(Index);
   StringRef Str(CStr);
-  if (isInput(getPrefixesUnion(), Str))
+  if (isInput(PrefixesUnion, Str))
     return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, CStr);
 
   const Info *End = OptionInfos.data() + OptionInfos.size();
   StringRef Name = Str.ltrim(PrefixChars);
   const Info *Start =
-      std::lower_bound(OptionInfos.data() + FirstSearchableIndex, End, Name);
+      std::lower_bound(OptionInfos.data() + FirstSearchableIndex, End, Name,
+                       OptNameLess(StrTable, PrefixesTable));
   const Info *Fallback = nullptr;
   unsigned Prev = Index;
 
   // Search for the option which matches Str.
   for (; Start != End; ++Start) {
-    unsigned ArgSize = matchOption(Start, Str, IgnoreCase);
+    unsigned ArgSize =
+        matchOption(StrTable, PrefixesTable, Start, Str, IgnoreCase);
     if (!ArgSize)
       continue;
 
@@ -417,7 +439,7 @@ std::unique_ptr<Arg> OptTable::internalParseOneArg(
 
   // Anything that doesn't start with PrefixesUnion is an input, as is '-'
   // itself.
-  if (isInput(getPrefixesUnion(), Str))
+  if (isInput(PrefixesUnion, Str))
     return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++,
                                  Str.data());
 
@@ -426,7 +448,8 @@ std::unique_ptr<Arg> OptTable::internalParseOneArg(
   StringRef Name = Str.ltrim(PrefixChars);
 
   // Search for the first next option which could be a prefix.
-  Start = std::lower_bound(Start, End, Name);
+  Start =
+      std::lower_bound(Start, End, Name, OptNameLess(StrTable, PrefixesTable));
 
   // Options are stored in sorted order, with '\0' at the end of the
   // alphabet. Since the only options which can accept a string must
@@ -440,7 +463,8 @@ std::unique_ptr<Arg> OptTable::internalParseOneArg(
     unsigned ArgSize = 0;
     // Scan for first option which is a proper prefix.
     for (; Start != End; ++Start)
-      if ((ArgSize = matchOption(Start, Str, IgnoreCase)))
+      if ((ArgSize =
+               matchOption(StrTable, PrefixesTable, Start, Str, IgnoreCase)))
         break;
     if (Start == End)
       break;
@@ -763,12 +787,15 @@ void OptTable::internalPrintHelp(
   OS.flush();
 }
 
-GenericOptTable::GenericOptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
-    : OptTable(OptionInfos, IgnoreCase) {
+GenericOptTable::GenericOptTable(const char *StrTable,
+                                 ArrayRef<unsigned> PrefixesTable,
+                                 ArrayRef<Info> OptionInfos, bool IgnoreCase)
+    : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) {
 
-  std::set<StringLiteral> TmpPrefixesUnion;
+  std::set<StringRef> TmpPrefixesUnion;
   for (auto const &Info : OptionInfos.drop_front(FirstSearchableIndex))
-    TmpPrefixesUnion.insert(Info.Prefixes.begin(), Info.Prefixes.end());
-  PrefixesUnionBuffer.append(TmpPrefixesUnion.begin(), TmpPrefixesUnion.end());
+    for (unsigned PrefixOffset : Info.getPrefixOffsets(PrefixesTable))
+      TmpPrefixesUnion.insert(StringRef(&StrTable[PrefixOffset]));
+  PrefixesUnion.append(TmpPrefixesUnion.begin(), TmpPrefixesUnion.end());
   buildPrefixChars();
 }
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index ecb3e84b1da8bd..738f75bb41e68c 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -57,10 +57,13 @@ void Option::print(raw_ostream &O, bool AddNewLine) const {
 #undef P
   }
 
-  if (!Info->Prefixes.empty()) {
+  if (!Info->hasNoPrefix()) {
     O << " Prefixes:[";
-    for (size_t I = 0, N = Info->Prefixes.size(); I != N; ++I)
-      O << '"' << Info->Prefixes[I] << (I == N - 1 ? "\"" : "\", ");
+    for (size_t I = 0, N = Info->getNumPrefixes(Owner->getPrefixesTable());
+         I != N; ++I)
+      O << '"'
+        << Info->getPrefix(Owner->getStrTable(), Owner->getPrefixesTable(), I)
+        << (I == N - 1 ? "\"" : "\", ");
     O << ']';
   }
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 2ddebb07017c2a..825f2f7f9a494a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -366,6 +366,8 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM))
 FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM))
 FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM))
 FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
+FUNCTION_PASS("extra-vector-passes",
+                  ExtraFunctionPassManager<ShouldRunExtraVectorPasses>())
 FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
 FUNCTION_PASS("flatten-cfg", FlattenCFGPass())
 FUNCTION_PASS("float2int", Float2IntPass())
@@ -651,6 +653,8 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
 LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
 LOOP_PASS("dot-ddg", DDGDotPrinterPass())
 LOOP_PASS("guard-widening", GuardWideningPass())
+LOOP_PASS("extra-simple-loop-unswitch-passes",
+              ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch>())
 LOOP_PASS("indvars", IndVarSimplifyPass())
 LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 19ae9a79ea519e..9dd43d34f2a0bb 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfYAML.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 088de4328a198d..3a6bef81f4a0d8 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -160,7 +160,7 @@ def FeatureSVE : ExtensionWithMArch<"sve", "SVE", "FEAT_SVE",
 
 let ArchExtKindSpelling = "AEK_I8MM" in
 def FeatureMatMulInt8 : ExtensionWithMArch<"i8mm", "MatMulInt8", "FEAT_I8MM",
-  "Enable Matrix Multiply Int8 Extension">;
+  "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>;
 
 let ArchExtKindSpelling = "AEK_F32MM" in
 def FeatureMatMulFP32 : ExtensionWithMArch<"f32mm", "MatMulFP32", "FEAT_F32MM",
@@ -207,7 +207,7 @@ def FeatureLSE2 : Extension<"lse2", "LSE2", "FEAT_LSE2",
   "Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules">;
 
 def FeatureFP16FML : ExtensionWithMArch<"fp16fml", "FP16FML", "FEAT_FHM",
-  "Enable FP16 FML instructions", [FeatureFullFP16]>;
+  "Enable FP16 FML instructions", [FeatureFullFP16, FeatureNEON]>;
 
 def FeatureDotProd : ExtensionWithMArch<"dotprod", "DotProd", "FEAT_DotProd",
   "Enable dot product support", [FeatureNEON]>;
@@ -247,7 +247,8 @@ def FeatureAltFPCmp : Extension<"altnzcv", "AlternativeNZCV", "FEAT_FlagM2",
 
 def FeatureFRInt3264 : Extension<"fptoint", "FRInt3264", "FEAT_FRINTTS",
   "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
-  "an integer (in FP format) forcing it to fit into a 32- or 64-bit int">;
+  "an integer (in FP format) forcing it to fit into a 32- or 64-bit int",
+  [FeatureFPARMv8]>;
 
 def FeatureSB : ExtensionWithMArch<"sb", "SB", "FEAT_SB",
   "Enable Armv8.5-A Speculation Barrier">;
@@ -279,7 +280,7 @@ def FeatureMTE : ExtensionWithMArch<"mte", "MTE", "FEAT_MTE, FEAT_MTE2",
 //===----------------------------------------------------------------------===//
 
 def FeatureBF16 : ExtensionWithMArch<"bf16", "BF16", "FEAT_BF16",
-  "Enable BFloat16 Extension">;
+  "Enable BFloat16 Extension", [FeatureNEON]>;
 
 def FeatureAMVS : Extension<"amvs", "AMVS", "FEAT_AMUv1p1",
   "Enable Armv8.6-A Activity Monitors Virtualization support",
@@ -403,7 +404,7 @@ def FeatureRME : Extension<"rme", "RME", "FEAT_RME",
   "Enable Realm Management Extension">;
 
 def FeatureSME : ExtensionWithMArch<"sme", "SME", "FEAT_SME",
-  "Enable Scalable Matrix Extension (SME)", [FeatureBF16]>;
+  "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureFullFP16]>;
 
 def FeatureSMEF64F64 : ExtensionWithMArch<"sme-f64f64", "SMEF64F64", "FEAT_SME_F64F64",
   "Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 424f18ba4d822f..cee609ed1e2f6f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15841,27 +15841,11 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
       return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG);
     }
 
-    // Results of setcc operations get widened to 128 bits for xor reduce if
-    // their input operands are 128 bits wide, otherwise vectors that are less
-    // than 64 bits get widened to neatly fit a 64 bit register, so e.g.
-    // <4 x i1> gets lowered to either <4 x i16> or <4 x i32>. Sign extending to
+    // Vectors that are less than 64 bits get widened to neatly fit a 64 bit
+    // register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to
     // this element size leads to the best codegen, since e.g. setcc results
     // might need to be truncated otherwise.
-    unsigned ExtendedWidth = 64;
-    if (ScalarOpcode == ISD::XOR && Vec.getOpcode() == ISD::SETCC &&
-        Vec.getOperand(0).getValueSizeInBits() >= 128) {
-      ExtendedWidth = 128;
-    }
-    EVT ExtendedVT = MVT::getIntegerVT(std::max(ExtendedWidth / NumElems, 8u));
-
-    // Negate the reduced vector value for reduce and operations that use
-    // fcmp.
-    if (ScalarOpcode == ISD::AND && NumElems < 16) {
-      Vec = DAG.getNode(
-          ISD::XOR, DL, VecVT, Vec,
-          DAG.getSplatVector(
-              VecVT, DL, DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32)));
-    }
+    EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u));
 
     // any_ext doesn't work with umin/umax, so only use it for uadd.
     unsigned ExtendOp =
@@ -15870,36 +15854,10 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
         ExtendOp, DL, VecVT.changeVectorElementType(ExtendedVT), Vec);
     switch (ScalarOpcode) {
     case ISD::AND:
-      if (NumElems < 16) {
-        // Check if all lanes of the negated bool vector value are zero by
-        // comparing against 0.0 with ordered and equal predicate. The only
-        // non-zero bit pattern that compares ordered and equal to 0.0 is -0.0,
-        // where only the sign bit is set. However the bool vector is
-        // sign-extended so that each bit in a lane is either zero or one,
-        // meaning that it is impossible to get the bit pattern of -0.0.
-        assert(Extended.getValueSizeInBits() == 64);
-        Extended = DAG.getBitcast(MVT::f64, Extended);
-        Result =
-            DAG.getSetCC(DL, MVT::i32, Extended,
-                         DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETOEQ);
-      } else {
-        Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended);
-      }
+      Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended);
       break;
     case ISD::OR:
-      if (NumElems < 16) {
-        // Check if any lane of the bool vector is set by comparing against 0.0.
-        // NaN bit patterns are handled by using the 'unordered or not equal'
-        // predicate. Similarly to the reduce and case, -0.0 doesn't have to be
-        // handled here (see explanation above).
-        assert(Extended.getValueSizeInBits() == 64);
-        Extended = DAG.getBitcast(MVT::f64, Extended);
-        Result =
-            DAG.getSetCC(DL, MVT::i32, Extended,
-                         DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETUNE);
-      } else {
-        Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended);
-      }
+      Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended);
       break;
     case ISD::XOR:
       Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, ExtendedVT, Extended);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index da585dd3a21c88..1a5be28dce4a0c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4379,10 +4379,11 @@ defm BF1CVTLT_ZZ  : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aar
 defm BF2CVTLT_ZZ  : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>;
 
 // FP8 downconvert
-defm FCVTN_Z2Z_HtoB  : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
-defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
-defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
-defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
+defm FCVTN_Z2Z_HtoB  : sve2_fp8_down_cvt_single<0b00, "fcvtn",  ZZ_h_mul_r, nxv8f16,  int_aarch64_sve_fp8_cvtn>;
+defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r, nxv4f32,  int_aarch64_sve_fp8_cvtnb>;
+defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r, nxv8bf16, int_aarch64_sve_fp8_cvtn>;
+
+defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single_top<0b11, "fcvtnt", ZZ_s_mul_r, nxv4f32,  int_aarch64_sve_fp8_cvtnt>;
 } // End HasSVE2orSME2, HasFP8
 
 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 27995ca5bb701f..a67093b1a58c3d 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -32,7 +32,8 @@ def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
 def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4,  4>", []>;
 def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0,  4>", []>;
 
-def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
+let WantsRoot = true in
+def am_sme_indexed_b4 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0, 15>">;
 
 def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
 def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9ae66518dfb4ed..22b56f7f3e9a9e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -9567,8 +9567,10 @@ multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatter
 }
 
 /// Addressing modes
-def am_sve_indexed_s4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
-def am_sve_indexed_s6 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
+let WantsRoot = true in {
+  def am_sve_indexed_s4 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-8, 7>">;
+  def am_sve_indexed_s6 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<-32, 31>">;
+}
 
 def am_sve_regreg_lsl0 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<0>", []>;
 def am_sve_regreg_lsl1 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<1>", []>;
@@ -10794,10 +10796,45 @@ class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
   let Inst{5} = 0b0;
   let Inst{4-0} = Zd;
   let Uses = [FPMR, FPCR];
+
+  let mayLoad  = 1;
+  let mayStore = 0;
 }
 
-multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
+multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src,
+                                    ValueType ty, SDPatternOperator op> {
   def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+
+  def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)),
+            (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
+}
+
+class sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty>
+  : I<(outs ZPR8:$Zd), (ins ZPR8:$_Zd, src_ty:$Zn), mnemonic, "\t$Zd, $Zn","", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<4> Zn;
+
+  let Inst{31-12} = 0b01100101000010100011;
+  let Inst{11-10} = opc;
+  let Inst{9-6}   = Zn;
+  let Inst{5}     = 0b0;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = DestructiveOther;
+  let ElementSize         = ZPR8.ElementSize;
+  
+  let Uses     = [FPMR, FPCR];
+  let mayLoad  = 1;
+  let mayStore = 0;
+}
+
+multiclass sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty,
+                                        ValueType ty, SDPatternOperator op> {
+  def NAME : sve2_fp8_down_cvt_single_top<opc, mnemonic, src_ty>;
+
+  def : Pat<(nxv16i8 (op nxv16i8:$Zd, ty:$Zn1, ty:$Zn2)),
+            (!cast<Instruction>(NAME) $Zd, (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
 }
 
 // FP8 Widening Multiply-Add Long - Indexed Group
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 06e852fe4752e0..21f9c50c352563 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -168,9 +168,18 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.supportsGetDoorbellID();
   }
 
-  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
+  std::optional<std::pair<unsigned, unsigned>>
+  getFlatWorkGroupSizeAttr(const Function &F) const {
+    auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
+    if (!R)
+      return std::nullopt;
+    return std::make_pair(R->first, *(R->second));
+  }
+
+  std::pair<unsigned, unsigned>
+  getDefaultFlatWorkGroupSize(const Function &F) const {
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-    return ST.getFlatWorkGroupSizes(F);
+    return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
   }
 
   std::pair<unsigned, unsigned>
@@ -197,6 +206,19 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.getWavesPerEU(F, FlatWorkGroupSize);
   }
 
+  std::optional<std::pair<unsigned, unsigned>>
+  getWavesPerEUAttr(const Function &F) {
+    auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
+                                               /*OnlyFirstRequired=*/true);
+    if (!Val)
+      return std::nullopt;
+    if (!Val->second) {
+      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+      Val->second = ST.getMaxWavesPerEU();
+    }
+    return std::make_pair(Val->first, *(Val->second));
+  }
+
   std::pair<unsigned, unsigned>
   getEffectiveWavesPerEU(const Function &F,
                          std::pair<unsigned, unsigned> WavesPerEU,
@@ -812,17 +834,30 @@ struct AAAMDSizeRangeAttribute
     return Change;
   }
 
-  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
-                                         unsigned Max) {
-    // Don't add the attribute if it's the implied default.
-    if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
+  /// Clamp the assumed range to the default value ([Min, Max]) and emit the
+  /// attribute if it is not same as default.
+  ChangeStatus
+  emitAttributeIfNotDefaultAfterClamp(Attributor &A,
+                                      std::pair<unsigned, unsigned> Default) {
+    auto [Min, Max] = Default;
+    unsigned Lower = getAssumed().getLower().getZExtValue();
+    unsigned Upper = getAssumed().getUpper().getZExtValue();
+
+    // Clamp the range to the default value.
+    if (Lower < Min)
+      Lower = Min;
+    if (Upper > Max + 1)
+      Upper = Max + 1;
+
+    // No manifest if the value is invalid or same as default after clamp.
+    if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
       return ChangeStatus::UNCHANGED;
 
     Function *F = getAssociatedFunction();
     LLVMContext &Ctx = F->getContext();
     SmallString<10> Buffer;
     raw_svector_ostream OS(Buffer);
-    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+    OS << Lower << ',' << Upper - 1;
     return A.manifestAttrs(getIRPosition(),
                            {Attribute::get(Ctx, AttrName, OS.str())},
                            /*ForceReplace=*/true);
@@ -846,13 +881,33 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
   void initialize(Attributor &A) override {
     Function *F = getAssociatedFunction();
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-    unsigned MinGroupSize, MaxGroupSize;
-    std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
-    intersectKnown(
-        ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
 
-    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-      indicatePessimisticFixpoint();
+    bool HasAttr = false;
+    auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
+    auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
+
+    if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
+      // We only consider an attribute that is not max range because the front
+      // end always emits the attribute, unfortunately, and sometimes it emits
+      // the max range.
+      if (*Attr != MaxRange) {
+        Range = *Attr;
+        HasAttr = true;
+      }
+    }
+
+    // We don't want to directly clamp the state if it's the max range because
+    // that is basically the worst state.
+    if (Range == MaxRange)
+      return;
+
+    auto [Min, Max] = Range;
+    ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
+    IntegerRangeState IRS(CR);
+    clampStateAndIndicateChange(this->getState(), IRS);
+
+    if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+      indicateOptimisticFixpoint();
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -866,9 +921,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
   ChangeStatus manifest(Attributor &A) override {
     Function *F = getAssociatedFunction();
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-    unsigned Min, Max;
-    std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
-    return emitAttributeIfNotDefault(A, Min, Max);
+    return emitAttributeIfNotDefaultAfterClamp(
+        A, InfoCache.getMaximumFlatWorkGroupRange(*F));
   }
 
   /// See AbstractAttribute::getName()
@@ -1044,29 +1098,47 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
       : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
 
-  bool isValidState() const override {
-    return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
-  }
-
   void initialize(Attributor &A) override {
     Function *F = getAssociatedFunction();
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
 
-    if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
-            *this, IRPosition::function(*F), DepClassTy::REQUIRED);
-        AssumedGroupSize->isValidState()) {
+    auto TakeRange = [&](std::pair<unsigned, unsigned> R) {
+      auto [Min, Max] = R;
+      ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+      IntegerRangeState RangeState(Range);
+      clampStateAndIndicateChange(this->getState(), RangeState);
+      indicateOptimisticFixpoint();
+    };
 
-      unsigned Min, Max;
-      std::tie(Min, Max) = InfoCache.getWavesPerEU(
-          *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-               AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+    std::pair<unsigned, unsigned> MaxWavesPerEURange{
+        1U, InfoCache.getMaxWavesPerEU(*F)};
 
-      ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
-      intersectKnown(Range);
+    // If the attribute exists, we will honor it if it is not the default.
+    if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
+      if (*Attr != MaxWavesPerEURange) {
+        TakeRange(*Attr);
+        return;
+      }
     }
 
-    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
-      indicatePessimisticFixpoint();
+    // Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
+    // calculation of waves per EU involves flat work group size, we can't
+    // simply use an assumed flat work group size as a start point, because the
+    // update of flat work group size is in an inverse direction of waves per
+    // EU. However, we can still do something if it is an entry function. Since
+    // an entry function is a terminal node, and flat work group size either
+    // from attribute or default will be used anyway, we can take that value and
+    // calculate the waves per EU based on it. This result can't be updated by
+    // no means, but that could still allow us to propagate it.
+    if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
+      std::pair<unsigned, unsigned> FlatWorkGroupSize;
+      if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))
+        FlatWorkGroupSize = *Attr;
+      else
+        FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);
+      TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,
+                                                 FlatWorkGroupSize));
+    }
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
@@ -1115,8 +1187,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
   ChangeStatus manifest(Attributor &A) override {
     Function *F = getAssociatedFunction();
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-    unsigned Max = InfoCache.getMaxWavesPerEU(*F);
-    return emitAttributeIfNotDefault(A, 1, Max);
+    return emitAttributeIfNotDefaultAfterClamp(
+        A, {1U, InfoCache.getMaxWavesPerEU(*F)});
   }
 
   /// See AbstractAttribute::getName()
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a288c58def5cbd..a351f451584f9d 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -9,8 +9,10 @@
 def MUBUFAddr64 : ComplexPattern<iPTR, 4, "SelectMUBUFAddr64">;
 def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">;
 
-def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
-def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
+let WantsParent = true in {
+  def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen">;
+  def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [], 20>;
+}
 
 def BUFSOffset   : ComplexPattern<iPTR, 1, "SelectBUFSOffset">;
 
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9244b4a6649861..8fa708b74dde32 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -6,13 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
-def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
-def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
+let WantsRoot = true in {
+  def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [], -10>;
+  def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [], -10>;
+  def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [], -10>;
 
-def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
-def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
-def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
+  def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [], -10>;
+  def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
+  def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
+}
 
 //===----------------------------------------------------------------------===//
 // FLAT classes
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8510f53c03654d..6b78c96c982b9e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1344,22 +1344,33 @@ std::pair<unsigned, unsigned>
 getIntegerPairAttribute(const Function &F, StringRef Name,
                         std::pair<unsigned, unsigned> Default,
                         bool OnlyFirstRequired) {
+  if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
+    return {Attr->first, Attr->second ? *(Attr->second) : Default.second};
+  return Default;
+}
+
+std::optional<std::pair<unsigned, std::optional<unsigned>>>
+getIntegerPairAttribute(const Function &F, StringRef Name,
+                        bool OnlyFirstRequired) {
   Attribute A = F.getFnAttribute(Name);
   if (!A.isStringAttribute())
-    return Default;
+    return std::nullopt;
 
   LLVMContext &Ctx = F.getContext();
-  std::pair<unsigned, unsigned> Ints = Default;
+  std::pair<unsigned, std::optional<unsigned>> Ints;
   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
     Ctx.emitError("can't parse first integer attribute " + Name);
-    return Default;
+    return std::nullopt;
   }
-  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
+  unsigned Second = 0;
+  if (Strs.second.trim().getAsInteger(0, Second)) {
     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
       Ctx.emitError("can't parse second integer attribute " + Name);
-      return Default;
+      return std::nullopt;
     }
+  } else {
+    Ints.second = Second;
   }
 
   return Ints;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 667d8600a589d9..21ed61a9426fcf 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -936,6 +936,19 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
                         std::pair<unsigned, unsigned> Default,
                         bool OnlyFirstRequired = false);
 
+/// \returns A pair of integer values requested using \p F's \p Name attribute
+/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
+/// is false).
+///
+/// \returns \p std::nullopt if attribute is not present.
+///
+/// \returns \p std::nullopt and emits error if one of the requested values
+/// cannot be converted to integer, or \p OnlyFirstRequired is false and
+/// "second" value is not present.
+std::optional<std::pair<unsigned, std::optional<unsigned>>>
+getIntegerPairAttribute(const Function &F, StringRef Name,
+                        bool OnlyFirstRequired = false);
+
 /// \returns Generate a vector of integer values requested using \p F's \p Name
 /// attribute.
 ///
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a1f068f0e049bd..291bfc0610f85d 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -262,30 +262,31 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF,
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                                const MachineFunction &MF) const {
-  const TargetRegisterClass *Super = RC;
-  TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+  unsigned SuperID = RC->getID();
+  auto I = RC->superclasses().begin();
+  auto E = RC->superclasses().end();
   do {
-    switch (Super->getID()) {
+    switch (SuperID) {
     case ARM::GPRRegClassID:
     case ARM::SPRRegClassID:
     case ARM::DPRRegClassID:
     case ARM::GPRPairRegClassID:
-      return Super;
+      return getRegClass(SuperID);
     case ARM::QPRRegClassID:
     case ARM::QQPRRegClassID:
     case ARM::QQQQPRRegClassID:
       if (MF.getSubtarget<ARMSubtarget>().hasNEON())
-        return Super;
+        return getRegClass(SuperID);
       break;
     case ARM::MQPRRegClassID:
     case ARM::MQQPRRegClassID:
     case ARM::MQQQQPRRegClassID:
       if (MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps())
-        return Super;
+        return getRegClass(SuperID);
       break;
     }
-    Super = *I++;
-  } while (Super);
+    SuperID = (I != E) ? *I++ : ~0U;
+  } while (SuperID != ~0U);
   return RC;
 }
 
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 718cb964ab7c3b..1cb6589184b603 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -1225,25 +1225,25 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass {
   let ParserMethod = "parsePostIdxReg";
 }
 def am2offset_reg : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
-                [], [SDNPWantRoot]> {
+                    ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg"> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
   let PrintMethod = "printAddrMode2OffsetOperand";
   // When using this for assembly, it's always as a post-index offset.
   let ParserMatchClass = PostIdxRegShiftedAsmOperand;
   let MIOperandInfo = (ops GPRnopc, i32imm);
+  let WantsRoot = true;
 }
 
 // FIXME: am2offset_imm should only need the immediate, not the GPR. Having
 // the GPR is purely vestigal at this point.
 def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
 def am2offset_imm : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
-                [], [SDNPWantRoot]> {
+                    ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm"> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
   let PrintMethod = "printAddrMode2OffsetOperand";
   let ParserMatchClass = AM2OffsetImmAsmOperand;
   let MIOperandInfo = (ops GPRnopc, i32imm);
+  let WantsRoot = true;
 }
 
 
@@ -1275,13 +1275,12 @@ def AM3OffsetAsmOperand : AsmOperandClass {
   let Name = "AM3Offset";
   let ParserMethod = "parseAM3Offset";
 }
-def am3offset : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode3Offset",
-                               [], [SDNPWantRoot]> {
+def am3offset : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode3Offset"> {
   let EncoderMethod = "getAddrMode3OffsetOpValue";
   let PrintMethod = "printAddrMode3OffsetOperand";
   let ParserMatchClass = AM3OffsetAsmOperand;
   let MIOperandInfo = (ops GPR, i32imm);
+  let WantsRoot = true;
 }
 
 // ldstm_mode := {ia, ib, da, db}
@@ -1328,40 +1327,39 @@ def addrmode5fp16 : AddrMode5FP16 {
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
-def addrmode6 : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+def addrmode6 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6"> {
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
   let EncoderMethod = "getAddrMode6AddressOpValue";
   let DecoderMethod = "DecodeAddrMode6Operand";
   let ParserMatchClass = AddrMode6AsmOperand;
+  let WantsParent = true;
 }
 
-def am6offset : MemOperand,
-                ComplexPattern<i32, 1, "SelectAddrMode6Offset",
-                               [], [SDNPWantRoot]> {
+def am6offset : MemOperand, ComplexPattern<i32, 1, "SelectAddrMode6Offset"> {
   let PrintMethod = "printAddrMode6OffsetOperand";
   let MIOperandInfo = (ops GPR);
   let EncoderMethod = "getAddrMode6OffsetOpValue";
   let DecoderMethod = "DecodeGPRRegisterClass";
+  let WantsRoot = true;
 }
 
 // Special version of addrmode6 to handle alignment encoding for VST1/VLD1
 // (single element from one lane) for size 32.
-def addrmode6oneL32 : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+def addrmode6oneL32 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6"> {
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
   let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
+  let WantsParent = true;
 }
 
 // Base class for addrmode6 with specific alignment restrictions.
-class AddrMode6Align : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+class AddrMode6Align : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6"> {
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
   let EncoderMethod = "getAddrMode6AddressOpValue";
   let DecoderMethod = "DecodeAddrMode6Operand";
+  let WantsParent = true;
 }
 
 // Special version of addrmode6 to handle no allowed alignment encoding for
@@ -1432,22 +1430,23 @@ def addrmode6align64or128or256 : AddrMode6Align {
 
 // Special version of addrmode6 to handle alignment encoding for VLD-dup
 // instructions, specifically VLD4-dup.
-def addrmode6dup : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+def addrmode6dup : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6"> {
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
   let EncoderMethod = "getAddrMode6DupAddressOpValue";
   // FIXME: This is close, but not quite right. The alignment specifier is
   // different.
   let ParserMatchClass = AddrMode6AsmOperand;
+  let WantsParent = true;
 }
 
 // Base class for addrmode6dup with specific alignment restrictions.
 class AddrMode6DupAlign : MemOperand,
-                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+                          ComplexPattern<i32, 2, "SelectAddrMode6"> {
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
   let EncoderMethod = "getAddrMode6DupAddressOpValue";
+  let WantsParent = true;
 }
 
 // Special version of addrmode6 to handle no allowed alignment encoding for
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index bdd0d739a05684..6dd8a374a92af4 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -184,15 +184,16 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
 def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
 def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
 
-class t2am_imm7_offset<int shift> : MemOperand,
-      ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
-                     [], [SDNPWantRoot]> {
+class t2am_imm7_offset<int shift>
+    : MemOperand,
+      ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">"> {
   // They are printed the same way as the imm8 version
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
   let ParserMatchClass =
     !cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
   let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
   let DecoderMethod = "DecodeT2Imm7<"#shift#">";
+  let WantsRoot = true;
 }
 
 // Operands for gather/scatter loads of the form [Rbase, Qoffsets]
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index cc7fc743fe4f92..b69bc601a0cdce 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -287,8 +287,8 @@ def t_addrmode_sp : MemOperand,
 
 // Inspects parent to determine whether an or instruction can be implemented as
 // an add (i.e. whether we know overflow won't occur in the add).
-def AddLikeOrOp : ComplexPattern<i32, 1, "SelectAddLikeOr", [],
-                                 [SDNPWantParent]>;
+let WantsParent = true in
+def AddLikeOrOp : ComplexPattern<i32, 1, "SelectAddLikeOr">;
 
 // Pattern to exclude immediates from matching
 def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa<ConstantSDNode>(N); }]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 99617e53d657a9..9ff056f9c06a16 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -267,11 +267,11 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
 }
 
 def t2am_imm8_offset : MemOperand,
-                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
-                                      [], [SDNPWantRoot]> {
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset"> {
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
   let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
   let DecoderMethod = "DecodeT2Imm8";
+  let WantsRoot = true;
 }
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 61c17090e3fbe8..5474a42e58848d 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -232,7 +232,8 @@ def imm_port6 : Operand<i8> {
 }
 
 // Addressing mode pattern reg+imm6
-def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], [SDNPWantRoot]>;
+let WantsRoot = true in
+def addr : ComplexPattern<iPTR, 2, "SelectAddr">;
 
 //===----------------------------------------------------------------------===//
 // AVR predicates for subtarget features
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d4d121e4380089..2731c523963e5d 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -431,8 +431,9 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
       return WSub[GenIdx];
   }
 
-  if (const TargetRegisterClass *SuperRC = *RC.getSuperClasses())
-    return getHexagonSubRegIndex(*SuperRC, GenIdx);
+  if (!RC.superclasses().empty())
+    return getHexagonSubRegIndex(*getRegClass(*RC.superclasses().begin()),
+                                 GenIdx);
 
   llvm_unreachable("Invalid register class");
 }
diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td
index 6e116d7cfe4019..4e94b2ed3a0645 100644
--- a/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -179,6 +179,8 @@ class MxBcc<string cc, Operand TARGET, dag disp_8, dag disp_16_32>
         (descend 0b0110, !cast<MxEncCondOp>("MxCC"#cc).Value, disp_8),
         disp_16_32
       );
+
+  let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []);
 }
 
 foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
@@ -190,6 +192,10 @@ foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
   def B#cc#"16"
     : MxBcc<cc, MxBrTarget16, (descend 0b0000, 0b0000),
             (operand "$dst", 16, (encoder "encodePCRelImm<16>"))>;
+
+  def B#cc#"32"
+    : MxBcc<cc, MxBrTarget32, (descend 0b1111, 0b1111),
+            (operand "$dst", 32, (encoder "encodePCRelImm<32>"))>;
 }
 
 foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
@@ -215,6 +221,8 @@ class MxBra<Operand TARGET, dag disp_8, dag disp_16_32>
       (descend 0b0110, 0b0000, disp_8),
       disp_16_32
     );
+  
+  let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []);
 }
 
 def BRA8  : MxBra<MxBrTarget8,
@@ -223,6 +231,10 @@ def BRA8  : MxBra<MxBrTarget8,
 def BRA16 : MxBra<MxBrTarget16, (descend 0b0000, 0b0000),
                   (operand "$dst", 16, (encoder "encodePCRelImm<16>"))>;
 
+def BRA32 : MxBra<MxBrTarget32, (descend 0b1111, 0b1111),
+                  (operand "$dst", 32, (encoder "encodePCRelImm<32>"),
+                                       (decoder "DecodeImm32"))>;
+
 def : Pat<(br bb:$target), (BRA8 MxBrTarget8:$target)>;
 
 /// -------------------------------------------------
@@ -242,6 +254,7 @@ class MxBsr<Operand TARGET, MxType TYPE, dag disp_8, dag disp_16_32>
                 (descend 0b0110, 0b0001, disp_8),
                  disp_16_32
               );
+  let Predicates = !if(!eq(TARGET, MxBrTarget32), [AtLeastM68020], []);
 }
 
 def BSR8 : MxBsr<MxBrTarget8, MxType8,
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index 3a9e4fe77f1ccf..dca774e94b9b5a 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -466,34 +466,28 @@ def MxCONDle  : PatLeaf<(i8 15)>; // Less or Equal
 
 // NOTE Though this CP is not strictly necessarily it will simplify instruciton
 // definitions
-def MxCP_ARI   : ComplexPattern<iPTR, 1, "SelectARI",
-                                [], [SDNPWantParent]>;
+let WantsParent = true in {
+  def MxCP_ARI   : ComplexPattern<iPTR, 1, "SelectARI">;
 
-def MxCP_ARIPI : ComplexPattern<iPTR, 1, "SelectARIPI",
-                                [], [SDNPWantParent]>;
+  def MxCP_ARIPI : ComplexPattern<iPTR, 1, "SelectARIPI">;
 
-def MxCP_ARIPD : ComplexPattern<iPTR, 1, "SelectARIPD",
-                                [], [SDNPWantParent]>;
+  def MxCP_ARIPD : ComplexPattern<iPTR, 1, "SelectARIPD">;
 
-def MxCP_ARID  : ComplexPattern<iPTR, 2, "SelectARID",
-                                [add, sub, mul, or, shl, frameindex],
-                                [SDNPWantParent]>;
+  def MxCP_ARID  : ComplexPattern<iPTR, 2, "SelectARID",
+                                  [add, sub, mul, or, shl, frameindex]>;
 
-def MxCP_ARII  : ComplexPattern<iPTR, 3, "SelectARII",
-                                [add, sub, mul, or, shl, frameindex],
-                                [SDNPWantParent]>;
+  def MxCP_ARII  : ComplexPattern<iPTR, 3, "SelectARII",
+                                  [add, sub, mul, or, shl, frameindex]>;
 
-def MxCP_AL    : ComplexPattern<iPTR, 1, "SelectAL",
-                                [add, sub, mul, or, shl],
-                                [SDNPWantParent]>;
+  def MxCP_AL    : ComplexPattern<iPTR, 1, "SelectAL",
+                                  [add, sub, mul, or, shl]>;
 
-def MxCP_PCD   : ComplexPattern<iPTR, 1, "SelectPCD",
-                                [add, sub, mul, or, shl],
-                                [SDNPWantParent]>;
-
-def MxCP_PCI   : ComplexPattern<iPTR, 2, "SelectPCI",
-                                [add, sub, mul, or, shl], [SDNPWantParent]>;
+  def MxCP_PCD   : ComplexPattern<iPTR, 1, "SelectPCD",
+                                  [add, sub, mul, or, shl]>;
 
+  def MxCP_PCI   : ComplexPattern<iPTR, 2, "SelectPCI",
+                                  [add, sub, mul, or, shl]>;
+}
 
 //===----------------------------------------------------------------------===//
 // Pattern Fragments
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index 2c52fe07bb1119..04777677cbf218 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -29,19 +29,28 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+#define DEBUG_TYPE "M68k-asm-backend"
+
 namespace {
 
 class M68kAsmBackend : public MCAsmBackend {
+  bool Allows32BitBranch;
 
 public:
-  M68kAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::big) {}
+  M68kAsmBackend(const Target &T, const MCSubtargetInfo &STI)
+      : MCAsmBackend(llvm::endianness::big),
+        Allows32BitBranch(llvm::StringSwitch<bool>(STI.getCPU())
+                              .CasesLower("m68020", "m68030", "m68040", true)
+                              .Default(false)) {}
 
   unsigned getNumFixupKinds() const override { return 0; }
 
@@ -51,18 +60,34 @@ class M68kAsmBackend : public MCAsmBackend {
                   const MCSubtargetInfo *STI) const override {
     unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
 
-    assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
+    if (Fixup.getOffset() + Size > Data.size()) {
+      LLVM_DEBUG(dbgs() << "Fixup.getOffset(): " << Fixup.getOffset() << '\n');
+      LLVM_DEBUG(dbgs() << "Size: " << Size << '\n');
+      LLVM_DEBUG(dbgs() << "Data.size(): " << Data.size() << '\n');
+      assert(Fixup.getOffset() + Size <= Data.size() &&
+             "Invalid fixup offset!");
+    }
 
     // Check that uppper bits are either all zeros or all ones.
     // Specifically ignore overflow/underflow as long as the leakage is
     // limited to the lower bits. This is to remain compatible with
     // other assemblers.
-    assert(isIntN(Size * 8 + 1, Value) &&
-           "Value does not fit in the Fixup field");
+    if (!(isIntN(Size * 8 + 1, static_cast<int64_t>(Value)) || IsResolved)) {
+      LLVM_DEBUG(dbgs() << "Fixup.getOffset(): " << Fixup.getOffset() << '\n');
+      LLVM_DEBUG(dbgs() << "Size: " << Size << '\n');
+      LLVM_DEBUG(dbgs() << "Data.size(): " << Data.size() << '\n');
+      LLVM_DEBUG(dbgs() << "Value: " << Value << '\n');
+      LLVM_DEBUG(dbgs() << "Target: ");
+      LLVM_DEBUG(Target.print(dbgs()));
+      LLVM_DEBUG(dbgs() << '\n');
+      assert(isIntN(Size * 8 + 1, static_cast<int64_t>(Value)) &&
+             "Value does not fit in the Fixup field");
+    }
 
     // Write in Big Endian
     for (unsigned i = 0; i != Size; ++i)
-      Data[Fixup.getOffset() + i] = uint8_t(Value >> ((Size - i - 1) * 8));
+      Data[Fixup.getOffset() + i] =
+          uint8_t(static_cast<int64_t>(Value) >> ((Size - i - 1) * 8));
   }
 
   bool mayNeedRelaxation(const MCInst &Inst,
@@ -99,6 +124,8 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst) {
   switch (Op) {
   default:
     return Op;
+
+  // 8 -> 16
   case M68k::BRA8:
     return M68k::BRA16;
   case M68k::Bcc8:
@@ -129,6 +156,38 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst) {
     return M68k::Ble16;
   case M68k::Bvs8:
     return M68k::Bvs16;
+
+  // 16 -> 32
+  case M68k::BRA16:
+    return M68k::BRA32;
+  case M68k::Bcc16:
+    return M68k::Bcc32;
+  case M68k::Bls16:
+    return M68k::Bls32;
+  case M68k::Blt16:
+    return M68k::Blt32;
+  case M68k::Beq16:
+    return M68k::Beq32;
+  case M68k::Bmi16:
+    return M68k::Bmi32;
+  case M68k::Bne16:
+    return M68k::Bne32;
+  case M68k::Bge16:
+    return M68k::Bge32;
+  case M68k::Bcs16:
+    return M68k::Bcs32;
+  case M68k::Bpl16:
+    return M68k::Bpl32;
+  case M68k::Bgt16:
+    return M68k::Bgt32;
+  case M68k::Bhi16:
+    return M68k::Bhi32;
+  case M68k::Bvc16:
+    return M68k::Bvc32;
+  case M68k::Ble16:
+    return M68k::Ble32;
+  case M68k::Bvs16:
+    return M68k::Bvs32;
   }
 }
 
@@ -166,26 +225,35 @@ bool M68kAsmBackend::mayNeedRelaxation(const MCInst &Inst,
 }
 
 bool M68kAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
-                                          uint64_t Value) const {
-  // TODO Newer CPU can use 32 bit offsets, so check for this when ready
-  if (!isInt<16>(Value)) {
+                                          uint64_t UnsignedValue) const {
+  int64_t Value = static_cast<int64_t>(UnsignedValue);
+
+  if (!isInt<32>(Value) || (!Allows32BitBranch && !isInt<16>(Value)))
     llvm_unreachable("Cannot relax the instruction, value does not fit");
-  }
-  // Relax if the value is too big for a (signed) i8. This means that byte-wide
-  // instructions have to matched by default
-  //
+
+  // Relax if the value is too big for a (signed) i8
+  // (or signed i16 if 32 bit branches can be used). This means
+  // that byte-wide instructions have to matched by default
+  unsigned KindLog2Size = getFixupKindLog2Size(Fixup.getKind());
+  bool FixupFieldTooSmall = false;
+  if (!isInt<8>(Value) && KindLog2Size == 0)
+    FixupFieldTooSmall = true;
+  else if (!isInt<16>(Value) && KindLog2Size <= 1)
+    FixupFieldTooSmall = true;
+
   // NOTE
   // A branch to the immediately following instruction automatically
   // uses the 16-bit displacement format because the 8-bit
   // displacement field contains $00 (zero offset).
-  return Value == 0 || !isInt<8>(Value);
+  bool ZeroDisplacementNeedsFixup = Value == 0 && KindLog2Size == 0;
+
+  return ZeroDisplacementNeedsFixup || FixupFieldTooSmall;
 }
 
 // NOTE Can tblgen help at all here to verify there aren't other instructions
 // we can relax?
 void M68kAsmBackend::relaxInstruction(MCInst &Inst,
                                       const MCSubtargetInfo &STI) const {
-  // The only relaxations M68k does is from a 1byte pcrel to a 2byte PCRel.
   unsigned RelaxedOp = getRelaxedOpcode(Inst);
 
   if (RelaxedOp == Inst.getOpcode()) {
@@ -218,8 +286,8 @@ namespace {
 class M68kELFAsmBackend : public M68kAsmBackend {
 public:
   uint8_t OSABI;
-  M68kELFAsmBackend(const Target &T, uint8_t OSABI)
-      : M68kAsmBackend(T), OSABI(OSABI) {}
+  M68kELFAsmBackend(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI)
+      : M68kAsmBackend(T, STI), OSABI(OSABI) {}
 
   std::unique_ptr<MCObjectTargetWriter>
   createObjectTargetWriter() const override {
@@ -235,5 +303,5 @@ MCAsmBackend *llvm::createM68kAsmBackend(const Target &T,
                                          const MCTargetOptions &Options) {
   const Triple &TheTriple = STI.getTargetTriple();
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  return new M68kELFAsmBackend(T, OSABI);
+  return new M68kELFAsmBackend(T, STI, OSABI);
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 66684dbff6b335..a7836ccc45f476 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1907,10 +1907,10 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
 // Data Movement (Load / Store, Move)
 //-----------------------------------
 
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
-                            [SDNPWantRoot]>;
-def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
-                              [SDNPWantRoot]>;
+let WantsRoot = true in {
+  def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex]>;
+  def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex]>;
+}
 def ADDRvar : ComplexPattern<iPTR, 1, "SelectDirectAddr", [], []>;
 
 def MEMri : Operand<i32> {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 0d3c07aa87ff25..be90a5c562c570 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -686,13 +686,15 @@ def addr   : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
 def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 
 // Load and Store Instruction Selection addressing modes.
-def DForm  : ComplexPattern<iPTR, 2, "SelectDForm",    [], [SDNPWantParent]>;
-def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm",   [], [SDNPWantParent]>;
-def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm",   [], [SDNPWantParent]>;
-def XForm  : ComplexPattern<iPTR, 2, "SelectXForm",    [], [SDNPWantParent]>;
-def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
-def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm", [], [SDNPWantParent]>;
-def PDForm : ComplexPattern<iPTR, 2, "SelectPDForm",   [], [SDNPWantParent]>;
+let WantsParent = true in {
+  def DForm  : ComplexPattern<iPTR, 2, "SelectDForm">;
+  def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm">;
+  def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm">;
+  def XForm  : ComplexPattern<iPTR, 2, "SelectXForm">;
+  def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm">;
+  def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm">;
+  def PDForm : ComplexPattern<iPTR, 2, "SelectPDForm">;
+}
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 43dfc4108f8384..019d4cfa33fbaf 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -692,21 +692,23 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
         InflateGPRC++;
     }
 
-    for (const auto *I = RC->getSuperClasses(); *I; ++I) {
-      if (getRegSizeInBits(**I) != getRegSizeInBits(*RC))
+    for (unsigned SuperID : RC->superclasses()) {
+      if (getRegSizeInBits(*getRegClass(SuperID)) != getRegSizeInBits(*RC))
         continue;
 
-      switch ((*I)->getID()) {
+      switch (SuperID) {
       case PPC::VSSRCRegClassID:
-        return Subtarget.hasP8Vector() ? *I : DefaultSuperclass;
+        return Subtarget.hasP8Vector() ? getRegClass(SuperID)
+                                       : DefaultSuperclass;
       case PPC::VSFRCRegClassID:
       case PPC::VSRCRegClassID:
-        return *I;
+        return getRegClass(SuperID);
       case PPC::VSRpRCRegClassID:
-        return Subtarget.pairedVectorMemops() ? *I : DefaultSuperclass;
+        return Subtarget.pairedVectorMemops() ? getRegClass(SuperID)
+                                              : DefaultSuperclass;
       case PPC::ACCRCRegClassID:
       case PPC::UACCRCRegClassID:
-        return Subtarget.hasMMA() ? *I : DefaultSuperclass;
+        return Subtarget.hasMMA() ? getRegClass(SuperID) : DefaultSuperclass;
       }
     }
   }
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1028149bf513f4..ffc477e9cc252b 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1785,7 +1785,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
   const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
 
-  auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
+  auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) {
     for (auto &CS : CSInfo) {
       // Insert the spill to the stack frame.
       Register Reg = CS.getReg();
@@ -1794,8 +1794,8 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
                               CS.getFrameIdx(), RC, TRI, Register());
     }
   };
-  storeRegToStackSlot(UnmanagedCSI);
-  storeRegToStackSlot(RVVCSI);
+  storeRegsToStackSlots(UnmanagedCSI);
+  storeRegsToStackSlots(RVVCSI);
 
   return true;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index dcd3598f658f6a..4bcb0edc4b093d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -51,13 +51,6 @@ static cl::opt<cl::boolOrDefault>
     EnableGlobalMerge("riscv-enable-global-merge", cl::Hidden,
                       cl::desc("Enable the global merge pass"));
 
-static cl::opt<bool> ForceEnableGlobalMergeExternalGlobals(
-    "riscv-force-enable-global-merge-external-globals", cl::Hidden,
-    cl::init(false),
-    cl::desc(
-        "If the global merge pass is enabled, force enable global merging of "
-        "external globals (overriding any logic that might disable it)"));
-
 static cl::opt<bool>
     EnableMachineCombiner("riscv-enable-machine-combiner",
                           cl::desc("Enable the machine combiner pass"),
@@ -494,8 +487,7 @@ bool RISCVPassConfig::addPreISel() {
     // Investigating and addressing both items are TODO.
     addPass(createGlobalMergePass(TM, /* MaxOffset */ 2047,
                                   /* OnlyOptimizeForSize */ false,
-                                  /* MergeExternalByDefault */
-                                  ForceEnableGlobalMergeExternalGlobals));
+                                  /* MergeExternalByDefault */ true));
   }
 
   return false;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index dabf36480f1dcf..0df4c451894bec 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -247,6 +247,23 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
     llvm_unreachable("Configuration setting instructions do not read or write "
                      "vector registers");
 
+  // Vector Loads and Stores
+  // Vector Unit-Stride Instructions
+  // Vector Strided Instructions
+  /// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
+  case RISCV::VSE8_V:
+  case RISCV::VSSE8_V:
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+  case RISCV::VSE16_V:
+  case RISCV::VSSE16_V:
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+  case RISCV::VSE32_V:
+  case RISCV::VSSE32_V:
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+  case RISCV::VSE64_V:
+  case RISCV::VSSE64_V:
+    return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
+
   // Vector Integer Arithmetic Instructions
   // Vector Single-Width Integer Add and Subtract
   case RISCV::VADD_VI:
@@ -546,8 +563,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
   // FIXME: Add support
   // Vector Narrowing Integer Right Shift Instructions
-  // FIXME: Add support
+  case RISCV::VNSRL_WX:
   case RISCV::VNSRL_WI:
+  case RISCV::VNSRL_WV:
+  case RISCV::VNSRA_WI:
+  case RISCV::VNSRA_WV:
+  case RISCV::VNSRA_WX:
   // Vector Integer Compare Instructions
   // FIXME: Add support
   // Vector Integer Min/Max Instructions
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index ba9779cdc335d6..ea7af893ce103f 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -352,7 +352,8 @@ def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd,
                            SDNPMemOperand]>;
 
 // Define X86-specific addressing mode.
-def addr      : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
+let WantsParent = true in
+def addr      : ComplexPattern<iPTR, 5, "selectAddr">;
 def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, xor, frameindex],
                                []>;
@@ -378,7 +379,8 @@ def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
 def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
-def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
+let WantsParent = true in
+def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr">;
 
 // A relocatable immediate is an operand that can be relocated by the linker to
 // an immediate, such as a regular symbol in non-PIC code.
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 50db211c99d882..164d4205955166 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -123,7 +123,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
 
   const TargetRegisterClass *Super = RC;
-  TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
+  auto I = RC->superclasses().begin();
+  auto E = RC->superclasses().end();
   do {
     switch (Super->getID()) {
     case X86::FR32RegClassID:
@@ -172,7 +173,12 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
         return Super;
     }
-    Super = *I++;
+    if (I != E) {
+      Super = getRegClass(*I);
+      ++I;
+    } else {
+      Super = nullptr;
+    }
   } while (Super);
   return RC;
 }
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 5865eb7b70b608..7e43c03ee72cac 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -14,6 +14,7 @@
 #include "XtensaISelLowering.h"
 #include "XtensaConstantPoolValue.h"
 #include "XtensaInstrInfo.h"
+#include "XtensaMachineFunctionInfo.h"
 #include "XtensaSubtarget.h"
 #include "XtensaTargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -133,6 +134,13 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
 
+  // VASTART, VAARG and VACOPY need to deal with the Xtensa-specific varargs
+  // structure, but VAEND is a no-op.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
   // Compute derived properties from the register classes
   computeRegisterProperties(STI.getRegisterInfo());
 }
@@ -217,12 +225,12 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint(
 
 #include "XtensaGenCallingConv.inc"
 
+static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4,
+                                    Xtensa::A5, Xtensa::A6, Xtensa::A7};
+
 static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT,
                              CCValAssign::LocInfo LocInfo,
                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
-  static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4,
-                                      Xtensa::A5, Xtensa::A6, Xtensa::A7};
-
   if (ArgFlags.isByVal()) {
     Align ByValAlign = ArgFlags.getNonZeroByValAlign();
     unsigned ByValSize = ArgFlags.getByValSize();
@@ -304,13 +312,11 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
+  XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
 
-  if (IsVarArg)
-    report_fatal_error("Var arg not supported by FormalArguments Lowering");
-
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
@@ -323,17 +329,14 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     // Arguments stored on registers
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
-      const TargetRegisterClass *RC;
 
-      if (RegVT == MVT::i32)
-        RC = &Xtensa::ARRegClass;
-      else
+      if (RegVT != MVT::i32)
         report_fatal_error("RegVT not supported by FormalArguments Lowering");
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Register = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT);
+      Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
       // to 32 bits.  Insert an assert[sz]ext to capture this, then
@@ -378,6 +381,56 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     }
   }
 
+  if (IsVarArg) {
+    unsigned Idx = CCInfo.getFirstUnallocated(IntRegs);
+    unsigned ArgRegsNum = std::size(IntRegs);
+    const TargetRegisterClass *RC = &Xtensa::ARRegClass;
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    unsigned RegSize = 4;
+    MVT RegTy = MVT::i32;
+    MVT FITy = getFrameIndexTy(DAG.getDataLayout());
+
+    XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register
+
+    XtensaFI->setVarArgsOnStackFrameIndex(
+        MFI.CreateFixedObject(4, CCInfo.getStackSize(), true));
+
+    // Offset of the first variable argument from stack pointer, and size of
+    // the vararg save area. For now, the varargs save area is either zero or
+    // large enough to hold a0-a7.
+    int VaArgOffset, VarArgsSaveSize;
+
+    // If all registers are allocated, then all varargs must be passed on the
+    // stack and we don't need to save any argregs.
+    if (ArgRegsNum == Idx) {
+      VaArgOffset = CCInfo.getStackSize();
+      VarArgsSaveSize = 0;
+    } else {
+      VarArgsSaveSize = RegSize * (ArgRegsNum - Idx);
+      VaArgOffset = -VarArgsSaveSize;
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+      XtensaFI->setVarArgsInRegsFrameIndex(FI);
+
+      // Copy the integer registers that may have been used for passing varargs
+      // to the vararg save area.
+      for (unsigned I = Idx; I < ArgRegsNum; ++I, VaArgOffset += RegSize) {
+        const Register Reg = RegInfo.createVirtualRegister(RC);
+        RegInfo.addLiveIn(IntRegs[I], Reg);
+
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
+        FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+        SDValue PtrOff = DAG.getFrameIndex(FI, FITy);
+        SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+                                     MachinePointerInfo::getFixedStack(MF, FI));
+        OutChains.push_back(Store);
+      }
+    }
+  }
+
   // All stores are grouped in one node to allow the matching between
   // the size of Ins and InVals. This only happens when on varg functions
   if (!OutChains.empty()) {
@@ -579,9 +632,6 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<SDValue> &OutVals,
                                   const SDLoc &DL, SelectionDAG &DAG) const {
-  if (IsVarArg)
-    report_fatal_error("VarArg not supported");
-
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Assign locations to each returned value.
@@ -859,6 +909,156 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Addr = Op.getOperand(1);
+  EVT PtrVT = Addr.getValueType();
+  SDLoc DL(Op);
+
+  // Struct va_list_tag
+  // int32 *va_stk - points to the arguments passed in memory
+  // int32 *va_reg - points to the registers with arguments saved in memory
+  // int32 va_ndx  - offset from va_stk or va_reg pointers which points to  the
+  // next variable argument
+
+  SDValue VAIndex;
+  SDValue StackOffsetFI =
+      DAG.getFrameIndex(XtensaFI->getVarArgsOnStackFrameIndex(), PtrVT);
+  unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2;
+
+  // If first variable argument passed in registers (maximum words in registers
+  // is 6) then set va_ndx to the position of this argument in registers area
+  // stored in memory (va_reg pointer). Otherwise va_ndx should point to the
+  // position of the first variable argument on stack (va_stk pointer).
+  if (ArgWords < 6) {
+    VAIndex = DAG.getConstant(ArgWords * 4, DL, MVT::i32);
+  } else {
+    VAIndex = DAG.getConstant(32, DL, MVT::i32);
+  }
+
+  SDValue FrameIndex =
+      DAG.getFrameIndex(XtensaFI->getVarArgsInRegsFrameIndex(), PtrVT);
+  uint64_t FrameOffset = PtrVT.getStoreSize();
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // Store pointer to arguments given on stack (va_stk)
+  SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI,
+                                 DAG.getConstant(32, DL, PtrVT));
+
+  SDValue StoreStackPtr =
+      DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV));
+
+  uint64_t NextOffset = FrameOffset;
+  SDValue NextPtr =
+      DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset));
+
+  // Store pointer to arguments given on registers (va_reg)
+  SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr,
+                                     MachinePointerInfo(SV, NextOffset));
+  NextOffset += FrameOffset;
+  NextPtr = DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset));
+
+  // Store third word : position in bytes of the first VA argument (va_ndx)
+  return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr,
+                      MachinePointerInfo(SV, NextOffset));
+}
+
+SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  // Size of the va_list_tag structure
+  constexpr unsigned VAListSize = 3 * 4;
+  SDValue Chain = Op.getOperand(0);
+  SDValue DstPtr = Op.getOperand(1);
+  SDValue SrcPtr = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  SDLoc DL(Op);
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+                       DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32),
+                       Align(4), /*isVolatile*/ false, /*AlwaysInline*/ true,
+                       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
+                       MachinePointerInfo(SrcSV));
+}
+
+SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = Op.getValueType();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  SDLoc DL(Node);
+  auto &TD = DAG.getDataLayout();
+  Align ArgAlignment = TD.getABITypeAlign(Ty);
+  unsigned ArgAlignInBytes = ArgAlignment.value();
+  unsigned ArgSizeInBytes = TD.getTypeAllocSize(Ty);
+  unsigned VASizeInBytes = llvm::alignTo(ArgSizeInBytes, 4);
+
+  // va_stk
+  SDValue VAStack =
+      DAG.getLoad(MVT::i32, DL, InChain, VAListPtr, MachinePointerInfo());
+  InChain = VAStack.getValue(1);
+
+  // va_reg
+  SDValue VARegPtr =
+      DAG.getObjectPtrOffset(DL, VAListPtr, TypeSize::getFixed(4));
+  SDValue VAReg =
+      DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo());
+  InChain = VAReg.getValue(1);
+
+  // va_ndx
+  SDValue VarArgIndexPtr =
+      DAG.getObjectPtrOffset(DL, VARegPtr, TypeSize::getFixed(4));
+  SDValue VAIndex =
+      DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo());
+  InChain = VAIndex.getValue(1);
+
+  SDValue OrigIndex = VAIndex;
+
+  if (ArgAlignInBytes > 4) {
+    OrigIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex,
+                            DAG.getConstant(ArgAlignInBytes - 1, DL, MVT::i32));
+    OrigIndex =
+        DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex,
+                    DAG.getSignedConstant(-ArgAlignInBytes, DL, MVT::i32));
+  }
+
+  VAIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex,
+                        DAG.getConstant(VASizeInBytes, DL, MVT::i32));
+
+  SDValue CC = DAG.getSetCC(DL, MVT::i32, OrigIndex,
+                            DAG.getConstant(6 * 4, DL, MVT::i32), ISD::SETLE);
+
+  SDValue StkIndex =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAIndex,
+                  DAG.getConstant(32 + VASizeInBytes, DL, MVT::i32));
+
+  CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32),
+                    ISD::SETLE);
+
+  SDValue Array = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAReg, VAStack);
+
+  VAIndex = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAIndex, StkIndex);
+
+  CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32),
+                    ISD::SETLE);
+
+  SDValue VAIndexStore = DAG.getStore(InChain, DL, VAIndex, VarArgIndexPtr,
+                                      MachinePointerInfo(SV));
+  InChain = VAIndexStore;
+
+  SDValue Addr = DAG.getNode(ISD::SUB, DL, PtrVT, VAIndex,
+                             DAG.getConstant(VASizeInBytes, DL, MVT::i32));
+
+  Addr = DAG.getNode(ISD::ADD, DL, PtrVT, Array, Addr);
+
+  return DAG.getLoad(VT, DL, InChain, Addr, MachinePointerInfo());
+}
+
 SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -1001,6 +1201,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
     return LowerFRAMEADDR(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index f1cd00c41437a4..cebd7d2016c8ee 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -148,6 +148,12 @@ class XtensaTargetLowering : public TargetLowering {
 
   SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index c38c060b9387ff..c430562091ba75 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -24,10 +24,15 @@ namespace llvm {
 class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
   int BranchRelaxationScratchFrameIndex = -1;
+  unsigned VarArgsFirstGPR;
+  int VarArgsOnStackFrameIndex;
+  int VarArgsInRegsFrameIndex;
 
 public:
   explicit XtensaMachineFunctionInfo(const Function &F,
-                                     const TargetSubtargetInfo *STI) {}
+                                     const TargetSubtargetInfo *STI)
+      : VarArgsFirstGPR(0), VarArgsOnStackFrameIndex(0),
+        VarArgsInRegsFrameIndex(0) {}
 
   int getBranchRelaxationScratchFrameIndex() const {
     return BranchRelaxationScratchFrameIndex;
@@ -35,6 +40,16 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   void setBranchRelaxationScratchFrameIndex(int Index) {
     BranchRelaxationScratchFrameIndex = Index;
   }
+
+  unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+  void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+  int getVarArgsOnStackFrameIndex() const { return VarArgsOnStackFrameIndex; }
+  void setVarArgsOnStackFrameIndex(int FI) { VarArgsOnStackFrameIndex = FI; }
+
+  // Get and set the frame index of the first stack vararg.
+  int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; }
+  void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; }
 };
 
 } // namespace llvm
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index fe5ab0fabefa6e..50c9a565e7ae25 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -61,12 +61,20 @@ unsigned AArch64::getFMVPriority(ArrayRef<StringRef> Features) {
   return Priority + MaxFMVPriority * NumFeatures;
 }
 
-uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
+uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) {
+  // Transitively enable the Arch Extensions which correspond to each feature.
+  ExtensionSet FeatureBits;
+  for (const StringRef Feature : Features)
+    if (std::optional<FMVInfo> Info = parseFMVExtension(Feature))
+      if (Info->ID)
+        FeatureBits.enable(*Info->ID);
+
+  // Construct a bitmask for all the transitively enabled Arch Extensions.
   uint64_t FeaturesMask = 0;
-  for (const StringRef &FeatureStr : FeatureStrs) {
-    if (auto Ext = parseFMVExtension(FeatureStr))
-      FeaturesMask |= (1ULL << Ext->Bit);
-  }
+  for (const FMVInfo &Info : getFMVInfo())
+    if (Info.ID && FeatureBits.Enabled.test(*Info.ID))
+      FeaturesMask |= (1ULL << Info.Bit);
+
   return FeaturesMask;
 }
 
diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index b3dcc0f9866842..58ff720516f384 100644
--- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -31,6 +31,10 @@ using namespace llvm::COFF;
 
 namespace {
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 enum {
   OPT_INVALID = 0,
 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
@@ -38,12 +42,9 @@ enum {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -54,7 +55,9 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DllOptTable : public opt::GenericOptTable {
 public:
-  DllOptTable() : opt::GenericOptTable(InfoTable, false) {}
+  DllOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                             false) {}
 };
 
 // Opens a file. Path has to be resolved already.
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 2e0841ba02b543..319aebffdbbba2 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -37,6 +37,10 @@ using namespace llvm::object;
 
 namespace {
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 enum {
   OPT_INVALID = 0,
 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
@@ -44,12 +48,9 @@ enum {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -60,7 +61,9 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class LibOptTable : public opt::GenericOptTable {
 public:
-  LibOptTable() : opt::GenericOptTable(InfoTable, true) {}
+  LibOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                             true) {}
 };
 } // namespace
 
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 778f83b7925691..09e8301b772d96 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -164,9 +164,9 @@ static cl::opt<bool>
     OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
                       cl::desc("Allow DSE to optimize memory accesses."));
 
-// TODO: remove this flag.
+// TODO: turn on and remove this flag.
 static cl::opt<bool> EnableInitializesImprovement(
-    "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
+    "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
     cl::desc("Enable the initializes attr improvement in DSE"));
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f1568781252c06..555c8435dd330d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1375,6 +1375,16 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence(
 }
 
 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
+  // When vectorizing early exits, create predicates for the latch block only.
+  // The early exiting block must be a direct predecessor of the latch at the
+  // moment.
+  BasicBlock *Latch = TheLoop->getLoopLatch();
+  if (hasUncountableEarlyExit()) {
+    assert(
+        is_contained(predecessors(Latch), getUncountableEarlyExitingBlock()) &&
+        "Uncountable exiting block must be a direct predecessor of latch");
+    return BB == Latch;
+  }
   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
@@ -1788,13 +1798,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
 
   HasUncountableEarlyExit = false;
   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
+    HasUncountableEarlyExit = true;
     if (!isVectorizableEarlyExitLoop()) {
+      UncountableExitingBlocks.clear();
+      HasUncountableEarlyExit = false;
       if (DoExtraAnalysis)
         Result = false;
       else
         return false;
-    } else
-      HasUncountableEarlyExit = true;
+    }
   }
 
   // Go over each instruction and look at memory deps.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index de164ee434d647..ed00c844285c62 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -385,6 +385,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
     cl::Hidden,
     cl::desc("Try wider VFs if they enable the use of vector variants"));
 
+static cl::opt<bool> EnableEarlyExitVectorization(
+    "enable-early-exit-vectorization", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Enable vectorization of early exit loops with uncountable exits."));
+
 // Likelyhood of bypassing the vectorized loop because assumptions about SCEV
 // variables not overflowing do not hold. See `emitSCEVChecks`.
 static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
@@ -1382,9 +1387,10 @@ class LoopVectorizationCostModel {
       LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n");
       return false;
     }
-    // If we might exit from anywhere but the latch, must run the exiting
-    // iteration in scalar form.
-    if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+    // If we might exit from anywhere but the latch and early exit vectorization
+    // is disabled, we must run the exiting iteration in scalar form.
+    if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
+        !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit())) {
       LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: not exiting "
                            "from latch block\n");
       return true;
@@ -3656,10 +3662,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
 
   // Start with the conditional branches exiting the loop. If the branch
   // condition is an instruction contained in the loop that is only used by the
-  // branch, it is uniform.
+  // branch, it is uniform. Note conditions from uncountable early exits are not
+  // uniform.
   SmallVector<BasicBlock *> Exiting;
   TheLoop->getExitingBlocks(Exiting);
   for (BasicBlock *E : Exiting) {
+    if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E)
+      continue;
     auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
     if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
       AddToWorklistIfAllowed(Cmp);
@@ -8239,8 +8248,11 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
 
   // If source is an exiting block, we know the exit edge is dynamically dead
   // in the vector loop, and thus we don't need to restrict the mask.  Avoid
-  // adding uses of an otherwise potentially dead instruction.
-  if (OrigLoop->isLoopExiting(Src))
+  // adding uses of an otherwise potentially dead instruction unless we are
+  // vectorizing a loop with uncountable exits. In that case, we always
+  // materialize the mask.
+  if (OrigLoop->isLoopExiting(Src) &&
+      Src != Legal->getUncountableEarlyExitingBlock())
     return EdgeMaskCache[Edge] = SrcMask;
 
   VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8931,14 +8943,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
 static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
     Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
     const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+  auto *MiddleVPBB = Plan.getMiddleBlock();
   SetVector<VPIRInstruction *> ExitUsersToFix;
   for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
-    BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock();
-    BasicBlock *ExitingBB = find_singleton<BasicBlock>(
-        to_vector(predecessors(ExitBB)),
-        [OrigLoop](BasicBlock *Pred, bool AllowRepeats) {
-          return OrigLoop->contains(Pred) ? Pred : nullptr;
-        });
     for (VPRecipeBase &R : *ExitVPBB) {
       auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
       if (!ExitIRI)
@@ -8946,35 +8953,48 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
       auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
       if (!ExitPhi)
         break;
-      Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
-      VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
-      // Exit values for inductions are computed and updated outside of VPlan
-      // and independent of induction recipes.
-      // TODO: Compute induction exit values in VPlan.
-      if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
-           !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
-          isa<VPWidenPointerInductionRecipe>(V) ||
-          (isa<Instruction>(IncomingValue) &&
-           OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
-           any_of(IncomingValue->users(), [&Inductions](User *U) {
-             auto *P = dyn_cast<PHINode>(U);
-             return P && Inductions.contains(P);
-           })))
-        continue;
-      ExitUsersToFix.insert(ExitIRI);
-      ExitIRI->addOperand(V);
+      for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) {
+        BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
+        if (PredVPBB != MiddleVPBB) {
+          SmallVector<BasicBlock *> ExitingBlocks;
+          OrigLoop->getExitingBlocks(ExitingBlocks);
+          assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks");
+          ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1]
+                                                    : ExitingBlocks[0];
+        }
+        Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
+        VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
+        // Exit values for inductions are computed and updated outside of VPlan
+        // and independent of induction recipes.
+        // TODO: Compute induction exit values in VPlan.
+        if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
+             !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
+            isa<VPWidenPointerInductionRecipe>(V) ||
+            (isa<Instruction>(IncomingValue) &&
+             OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
+             any_of(IncomingValue->users(), [&Inductions](User *U) {
+               auto *P = dyn_cast<PHINode>(U);
+               return P && Inductions.contains(P);
+             }))) {
+          if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
+            continue;
+        }
+        ExitUsersToFix.insert(ExitIRI);
+        ExitIRI->addOperand(V);
+      }
     }
   }
   return ExitUsersToFix;
 }
 
 // Add exit values to \p Plan. Extracts are added for each entry in \p
-// ExitUsersToFix if needed and their operands are updated.
-static void
+// ExitUsersToFix if needed and their operands are updated. Returns true if all
+// exit users can be handled, otherwise return false.
+static bool
 addUsersInExitBlocks(VPlan &Plan,
                      const SetVector<VPIRInstruction *> &ExitUsersToFix) {
   if (ExitUsersToFix.empty())
-    return;
+    return true;
 
   auto *MiddleVPBB = Plan.getMiddleBlock();
   VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
@@ -8988,14 +9008,18 @@ addUsersInExitBlocks(VPlan &Plan,
     if (V->isLiveIn())
       continue;
 
-    assert(ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
-           "Exit value not handled yet for this edge.");
+    // Currently only live-ins can be used by exit values from blocks not
+    // exiting via the vector latch through to the middle block.
+    if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
+      return false;
+
     LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
     VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
                                   {V, Plan.getOrAddLiveIn(ConstantInt::get(
                                           IntegerType::get(Ctx, 32), 1))});
     ExitIRI->setOperand(0, Ext);
   }
+  return true;
 }
 
 /// Handle users in the exit block for first order reductions in the original
@@ -9268,11 +9292,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
          "VPBasicBlock");
   RecipeBuilder.fixHeaderPhis();
 
+  if (auto *UncountableExitingBlock =
+          Legal->getUncountableEarlyExitingBlock()) {
+    VPlanTransforms::handleUncountableEarlyExit(
+        *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
+  }
   addScalarResumePhis(RecipeBuilder, *Plan);
   SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
       OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
   addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
-  addUsersInExitBlocks(*Plan, ExitUsersToFix);
+  if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+    reportVectorizationFailure(
+        "Some exit values in loop with uncountable exit not supported yet",
+        "Some exit values in loop with uncountable exit not supported yet",
+        "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
+    return nullptr;
+  }
+
   // ---------------------------------------------------------------------------
   // Transform initial VPlan: Apply previously taken decisions, in order, to
   // bring the VPlan to its final state.
@@ -10138,12 +10174,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     return false;
   }
 
-  if (LVL.hasUncountableEarlyExit()) {
+  if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) {
     reportVectorizationFailure("Auto-vectorization of loops with uncountable "
-                               "early exit is not yet supported",
+                               "early exit is not enabled",
                                "Auto-vectorization of loops with uncountable "
-                               "early exit is not yet supported",
-                               "UncountableEarlyExitLoopsUnsupported", ORE, L);
+                               "early exit is not enabled",
+                               "UncountableEarlyExitLoopsDisabled", ORE, L);
     return false;
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5122232ffe9b8e..81c76bc99fbf74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -861,14 +861,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
 
   // Create SCEV and VPValue for the trip count.
-
-  // Currently only loops with countable exits are vectorized, but calling
-  // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
-  // uncountable exits whilst also ensuring the symbolic maximum and known
-  // back-edge taken count remain identical for loops with countable exits.
+  // We use the symbolic max backedge-taken-count, which works also when
+  // vectorizing loops with uncountable early exits.
   const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
-  assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
-          BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
+  assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
          "Invalid loop count");
   ScalarEvolution &SE = *PSE.getSE();
   const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
@@ -903,7 +899,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   // 2) If we require a scalar epilogue, there is no conditional branch as
   //    we unconditionally branch to the scalar preheader.  Do nothing.
   // 3) Otherwise, construct a runtime check.
-  BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
+  BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
   auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock);
   // The connection order corresponds to the operands of the conditional branch.
   VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8794517b777f3b..7440a3a386fd2d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -621,6 +621,14 @@ class VPBlockBase {
   /// Remove all the successors of this block.
   void clearSuccessors() { Successors.clear(); }
 
+  /// Swap successors of the block. The block must have exactly 2 successors.
+  // TODO: This should be part of introducing conditional branch recipes rather
+  // than being independent.
+  void swapSuccessors() {
+    assert(Successors.size() == 2 && "must have 2 successors to swap");
+    std::swap(Successors[0], Successors[1]);
+  }
+
   /// The method which generates the output IR that correspond to this
   /// VPBlockBase, thereby "executing" the VPlan.
   virtual void execute(VPTransformState *State) = 0;
@@ -1232,6 +1240,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
     // operand). Only generates scalar values (either for the first lane only or
     // for all lanes, depending on its uses).
     PtrAdd,
+    // Returns a scalar boolean value, which is true if any lane of its single
+    // operand is true.
+    AnyOf,
   };
 
 private:
@@ -3884,10 +3895,10 @@ class VPlan {
   /// whether to execute the scalar tail loop or the exit block from the loop
   /// latch.
   const VPBasicBlock *getMiddleBlock() const {
-    return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+    return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
   }
   VPBasicBlock *getMiddleBlock() {
-    return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+    return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
   }
 
   /// Return the VPBasicBlock for the preheader of the scalar loop.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..8fea2ca9461047 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
     case Instruction::Or:
     case Instruction::ICmp:
     case Instruction::Select:
+    case VPInstruction::AnyOf:
     case VPInstruction::Not:
     case VPInstruction::CalculateTripCountMinusVF:
     case VPInstruction::CanonicalIVIncrementForPart:
@@ -361,6 +362,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
   case VPInstruction::CanonicalIVIncrementForPart:
   case VPInstruction::PtrAdd:
   case VPInstruction::ExplicitVectorLength:
+  case VPInstruction::AnyOf:
     return true;
   default:
     return false;
@@ -639,6 +641,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
     }
     return NewPhi;
   }
+  case VPInstruction::AnyOf: {
+    Value *A = State.get(getOperand(0));
+    return Builder.CreateOrReduce(A);
+  }
 
   default:
     llvm_unreachable("Unsupported opcode for instruction");
@@ -647,7 +653,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
 
 bool VPInstruction::isVectorToScalar() const {
   return getOpcode() == VPInstruction::ExtractFromEnd ||
-         getOpcode() == VPInstruction::ComputeReductionResult;
+         getOpcode() == VPInstruction::ComputeReductionResult ||
+         getOpcode() == VPInstruction::AnyOf;
 }
 
 bool VPInstruction::isSingleScalar() const {
@@ -710,6 +717,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
     return false;
   case Instruction::ICmp:
   case Instruction::Select:
+  case Instruction::Or:
   case VPInstruction::PtrAdd:
     // TODO: Cover additional opcodes.
     return vputils::onlyFirstLaneUsed(this);
@@ -805,6 +813,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::PtrAdd:
     O << "ptradd";
     break;
+  case VPInstruction::AnyOf:
+    O << "any-of";
+    break;
   default:
     O << Instruction::getOpcodeName(getOpcode());
   }
@@ -822,12 +833,13 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
 void VPIRInstruction::execute(VPTransformState &State) {
   assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
          "Only PHINodes can have extra operands");
-  if (getNumOperands() == 1) {
-    VPValue *ExitValue = getOperand(0);
+  for (const auto &[Idx, Op] : enumerate(operands())) {
+    VPValue *ExitValue = Op;
     auto Lane = vputils::isUniformAfterVectorization(ExitValue)
                     ? VPLane::getFirstLane()
                     : VPLane::getLastLaneForVF(State.VF);
-    auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor());
+    VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
+    auto *PredVPBB = Pred->getExitingBasicBlock();
     BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
     // Set insertion point in PredBB in case an extract needs to be generated.
     // TODO: Model extracts explicitly.
@@ -860,11 +872,13 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
   O << Indent << "IR " << I;
 
   if (getNumOperands() != 0) {
-    assert(getNumOperands() == 1 && "can have at most 1 operand");
-    O << " (extra operand: ";
-    getOperand(0)->printAsOperand(O, SlotTracker);
-    O << " from ";
-    getParent()->getPredecessors()[0]->printAsOperand(O);
+    O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
+    interleaveComma(
+        enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
+          Op.value()->printAsOperand(O, SlotTracker);
+          O << " from ";
+          getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
+        });
     O << ")";
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 922cba7831f4e9..e27c1bfba93525 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1858,3 +1858,62 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
     }
   }
 }
+
+void VPlanTransforms::handleUncountableEarlyExit(
+    VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
+    BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
+  VPBuilder Builder(LatchVPBB->getTerminator());
+  auto *MiddleVPBB = Plan.getMiddleBlock();
+  VPValue *IsEarlyExitTaken = nullptr;
+
+  // Process the uncountable exiting block. Update IsEarlyExitTaken, which
+  // tracks if the uncountable early exit has been taken. Also split the middle
+  // block and have it conditionally branch to the early exit block if
+  // EarlyExitTaken.
+  auto *EarlyExitingBranch =
+      cast<BranchInst>(UncountableExitingBlock->getTerminator());
+  BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
+  BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
+
+  // The early exit block may or may not be the same as the "countable" exit
+  // block. Creates a new VPIRBB for the early exit block in case it is distinct
+  // from the countable exit block.
+  // TODO: Introduce both exit blocks during VPlan skeleton construction.
+  VPIRBasicBlock *VPEarlyExitBlock;
+  if (OrigLoop->getUniqueExitBlock()) {
+    VPEarlyExitBlock = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
+  } else {
+    VPEarlyExitBlock = VPIRBasicBlock::fromBasicBlock(
+        !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
+  }
+
+  VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
+      OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
+  auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
+  IsEarlyExitTaken =
+      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+
+  VPBasicBlock *NewMiddle = new VPBasicBlock("middle.split");
+  VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
+  VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
+  NewMiddle->swapSuccessors();
+
+  VPBuilder MiddleBuilder(NewMiddle);
+  MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
+
+  // Replace the condition controlling the non-early exit from the vector loop
+  // with one exiting if either the original condition of the vector latch is
+  // true or the early exit has been taken.
+  auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
+  assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
+         "Unexpected terminator");
+  auto *IsLatchExitTaken =
+      Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
+                         LatchExitingBranch->getOperand(1));
+  auto *AnyExitTaken = Builder.createNaryOp(
+      Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
+  Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
+  LatchExitingBranch->eraseFromParent();
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 9cf314a6a9f447..fddde868911665 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -124,6 +124,17 @@ struct VPlanTransforms {
   /// Remove dead recipes from \p Plan.
   static void removeDeadRecipes(VPlan &Plan);
 
+  /// Update \p Plan to account for the uncountable early exit block in \p
+  /// UncountableExitingBlock by
+  ///  * updating the condition exiting the vector loop to include the early
+  ///    exit conditions
+  ///  * splitting the original middle block to branch to the early exit block
+  ///    if taken.
+  static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
+                                         Loop *OrigLoop,
+                                         BasicBlock *UncountableExitingBlock,
+                                         VPRecipeBuilder &RecipeBuilder);
+
   /// Lower abstract recipes to concrete ones, that can be codegen'd.
   static void convertToConcreteRecipes(VPlan &Plan);
 };
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 033900de55278c..9003642f1f93b2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2848,6 +2848,12 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   if (OldCost < NewCost)
     return false;
 
+  // Canonicalize undef param to RHS to help further folds.
+  if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
+    ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+    std::swap(DstVec, SrcVec);
+  }
+
   Value *Shuf = Builder.CreateShuffleVector(DstVec, SrcVec, Mask);
   replaceValue(I, *Shuf);
 
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index fb366564723db6..a48a4e0e723ebc 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -5,8 +5,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmeq v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
@@ -71,8 +73,9 @@ define i1 @combine_setcc_ne_vecreduce_or_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmtst v0.8b, v0.8b, v0.8b
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
@@ -129,9 +132,10 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) {
 define i1 @combine_setcc_eq_vecreduce_and_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmtst v0.8b, v0.8b, v0.8b
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cmeq v0.8b, v0.8b, #0
+; CHECK-NEXT:    uminv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
@@ -188,9 +192,11 @@ define i1 @combine_setcc_eq_vecreduce_and_v64i1(<64 x i8> %a) {
 define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmeq v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    cmtst v0.8b, v0.8b, v0.8b
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    uminv b0, v0.8b
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
new file mode 100644
index 00000000000000..2ffba10e211007
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s
+; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
+; CHECK-LABEL: cvtn_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    bfcvtn z0.b, { z0.h, z1.h }
+; CHECK-NEXT:    ret
+    %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
+    ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
+; CHECK-LABEL: cvtn_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtn z0.b, { z0.h, z1.h }
+; CHECK-NEXT:    ret
+    %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
+    ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
+; CHECK-LABEL: cvtnb_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    fcvtnb z0.b, { z0.s, z1.s }
+; CHECK-NEXT:    ret
+    %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
+    ret <vscale x 16 x i8> %r
+}
+
+define <vscale x 16 x i8> @cvtnt_f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
+; CHECK-LABEL: cvtnt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    fcvtnt z0.b, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+    %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
+    ret <vscale x 16 x i8> %r
+}
diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
index 5374d4823034ff..767ca91a58bb10 100644
--- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll
@@ -9,11 +9,13 @@ define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmgt v1.4s, v1.4s, #0.0
 ; CHECK-NEXT:    fcmgt v0.4s, v0.4s, #0.0
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    mvn v0.16b, v0.16b
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret
   %a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer
   %cmp_result = bitcast <8 x i1> %a_cmp to i8
diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index 62f3e8d184d24d..8ca521327c2e31 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -20,11 +20,11 @@ define i1 @test_redand_v1i1(<1 x i1> %a) {
 define i1 @test_redand_v2i1(<2 x i1> %a) {
 ; CHECK-LABEL: test_redand_v2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    shl v0.2s, v0.2s, #31
 ; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redand_v2i1:
@@ -42,11 +42,11 @@ define i1 @test_redand_v2i1(<2 x i1> %a) {
 define i1 @test_redand_v4i1(<4 x i1> %a) {
 ; CHECK-LABEL: test_redand_v4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #15
 ; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    uminv h0, v0.4h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redand_v4i1:
@@ -68,11 +68,11 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
 define i1 @test_redand_v8i1(<8 x i1> %a) {
 ; CHECK-LABEL: test_redand_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    shl v0.8b, v0.8b, #7
 ; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    uminv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redand_v8i1:
diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index 485cb7c916140c..aac31ce8b71b75 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -22,8 +22,9 @@ define i1 @test_redor_v2i1(<2 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.2s, v0.2s, #31
 ; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redor_v2i1:
@@ -43,8 +44,9 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #15
 ; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    umaxv h0, v0.4h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redor_v4i1:
@@ -68,8 +70,9 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.8b, v0.8b, #7
 ; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redor_v8i1:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index f402463de7be81..0d291e0bf07983 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -49,14 +49,6 @@ define half @t3(half %x)  {
 ; CHECK-NEXT:    scvtf s0, w8
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
-;
-; USE-NEON-NO-GPRS-LABEL: t3:
-; USE-NEON-NO-GPRS:       // %bb.0: // %entry
-; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
-; USE-NEON-NO-GPRS-NEXT:    fcvtzs s0, s0
-; USE-NEON-NO-GPRS-NEXT:    scvtf s0, s0
-; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
-; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptosi half %x to i32
   %conv1 = sitofp i32 %conv to half
@@ -107,14 +99,6 @@ define half @t6(half %x)  {
 ; CHECK-NEXT:    ucvtf s0, w8
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
-;
-; USE-NEON-NO-GPRS-LABEL: t6:
-; USE-NEON-NO-GPRS:       // %bb.0: // %entry
-; USE-NEON-NO-GPRS-NEXT:    fcvt s0, h0
-; USE-NEON-NO-GPRS-NEXT:    fcvtzu s0, s0
-; USE-NEON-NO-GPRS-NEXT:    ucvtf s0, s0
-; USE-NEON-NO-GPRS-NEXT:    fcvt h0, s0
-; USE-NEON-NO-GPRS-NEXT:    ret
 entry:
   %conv = fptoui half %x to i32
   %conv1 = uitofp i32 %conv to half
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index fd81deeb7d913b..7fa416e0dbcd5c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -139,11 +139,11 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
 define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #15
 ; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    uminv h0, v0.4h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index 10a3ef1658a965..58020d28702b2f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -15,15 +15,8 @@ declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
 declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
 declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
 
-declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %a)
-declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
-declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
-declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
-declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
-declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
-
-define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v1i8:
+define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
@@ -36,14 +29,16 @@ define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v2i8:
+define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.2s, v0.2s, #24
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
-; CHECK-NEXT:    cmge v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
+; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
+; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
+; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <2 x i8> %a0, zeroinitializer
   %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
@@ -51,14 +46,16 @@ define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v4i8:
+define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
-; CHECK-NEXT:    cmge v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
+; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
+; CHECK-NEXT:    uminv h0, v0.4h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
+; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <4 x i8> %a0, zeroinitializer
   %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
@@ -66,12 +63,14 @@ define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v8i8:
+define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
+; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
+; CHECK-NEXT:    uminv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
+; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <8 x i8> %a0, zeroinitializer
   %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
@@ -79,8 +78,8 @@ define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v16i8:
+define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    uminv b0, v0.16b
@@ -94,8 +93,8 @@ define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v32i8:
+define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_and_v32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
@@ -110,182 +109,8 @@ define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v1i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smov w8, v0.h[0]
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
-; CHECK-NEXT:    cmge v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.8h, v0.8h, #0
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    uminv b0, v0.16b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <16 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.4s, v0.4s, #0
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v1.4s, v1.4s, #0
-; CHECK-NEXT:    cmge v0.4s, v0.4s, #0
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v0.2d, v0.2d, #0
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_and_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_and_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmge v1.2d, v1.2d, #0
-; CHECK-NEXT:    cmge v0.2d, v0.2d, #0
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v1i8:
+define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
@@ -298,13 +123,15 @@ define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v2i8:
+define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.2s, v0.2s, #24
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <2 x i8> %a0, zeroinitializer
@@ -313,13 +140,15 @@ define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v4i8:
+define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
 ; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    umaxv h0, v0.4h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <4 x i8> %a0, zeroinitializer
@@ -328,11 +157,13 @@ define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v8i8:
+define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    fcmp d0, #0.0
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <8 x i8> %a0, zeroinitializer
@@ -341,8 +172,8 @@ define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v16i8:
+define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    umaxv b0, v0.16b
@@ -356,8 +187,8 @@ define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   ret i32 %z
 }
 
-define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v32i8:
+define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
+; CHECK-LABEL: reduce_or_v32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
@@ -371,457 +202,3 @@ define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
-
-define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v1i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smov w8, v0.h[0]
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    umaxv b0, v0.16b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <16 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
-; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_or_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_or_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.2d, v1.2d, #0
-; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    fcmp d0, #0.0
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v1i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smov w8, v0.b[0]
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v4i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
-; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    addv h0, v0.4h
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    addv b0, v0.8b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT:    addv b0, v0.16b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <16 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v32i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
-; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    addv b0, v0.16b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <32 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v1i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smov w8, v0.h[0]
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v2i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #16
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    addv h0, v0.4h
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT:    addv h0, v0.8h
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    addv b0, v0.16b
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <16 x i16> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
-; CHECK-NEXT:    addv s0, v0.4s
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
-; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    addv h0, v0.8h
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <8 x i32> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csel w0, w0, w1, lt
-; CHECK-NEXT:    ret
-  %x = icmp slt <1 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
-; CHECK-NEXT:    addp d0, v0.2d
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <2 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
-
-define i32 @reduce_xor_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
-; CHECK-LABEL: reduce_xor_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmlt v1.2d, v1.2d, #0
-; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    addv s0, v0.4s
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    tst w8, #0x1
-; CHECK-NEXT:    csel w0, w0, w1, ne
-; CHECK-NEXT:    ret
-  %x = icmp slt <4 x i64> %a0, zeroinitializer
-  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
-  %z = select i1 %y, i32 %a1, i32 %a2
-  ret i32 %z
-}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 2a21cc8d7c611d..809a6d6556a7be 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -202,8 +202,9 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #15
 ; CHECK-SD-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-SD-NEXT:    fcmp d0, #0.0
-; CHECK-SD-NEXT:    cset w0, ne
+; CHECK-SD-NEXT:    umaxv h0, v0.4h
+; CHECK-SD-NEXT:    fmov w8, s0
+; CHECK-SD-NEXT:    and w0, w8, #0x1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i1:
diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
index 593c9db090a26d..c0f1720e1cf8b3 100644
--- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
+++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
@@ -62,11 +62,13 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
 ; NEON-FIXED-NEXT:    str q0, [sp]
 ; NEON-FIXED-NEXT:    xtn v1.8b, v1.8h
 ; NEON-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; NEON-FIXED-NEXT:    fcmp d1, #0.0
+; NEON-FIXED-NEXT:    umaxv b1, v1.8b
 ; NEON-FIXED-NEXT:    umaxv b2, v2.8b
 ; NEON-FIXED-NEXT:    fmov w8, s2
 ; NEON-FIXED-NEXT:    bfi x9, x8, #1, #3
 ; NEON-FIXED-NEXT:    ldrh w8, [x9]
+; NEON-FIXED-NEXT:    fmov w9, s1
+; NEON-FIXED-NEXT:    tst w9, #0x1
 ; NEON-FIXED-NEXT:    csel w0, w8, w0, ne
 ; NEON-FIXED-NEXT:    add sp, sp, #16
 ; NEON-FIXED-NEXT:    ret
@@ -81,11 +83,13 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
 ; SVE-FIXED-NEXT:    str q0, [sp]
 ; SVE-FIXED-NEXT:    xtn v1.8b, v1.8h
 ; SVE-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT:    fcmp d1, #0.0
+; SVE-FIXED-NEXT:    umaxv b1, v1.8b
 ; SVE-FIXED-NEXT:    umaxv b2, v2.8b
 ; SVE-FIXED-NEXT:    fmov w8, s2
 ; SVE-FIXED-NEXT:    bfi x9, x8, #1, #3
 ; SVE-FIXED-NEXT:    ldrh w8, [x9]
+; SVE-FIXED-NEXT:    fmov w9, s1
+; SVE-FIXED-NEXT:    tst w9, #0x1
 ; SVE-FIXED-NEXT:    csel w0, w8, w0, ne
 ; SVE-FIXED-NEXT:    add sp, sp, #16
 ; SVE-FIXED-NEXT:    ret
@@ -106,11 +110,13 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
 ; NEON-FIXED-NEXT:    str q0, [sp]
 ; NEON-FIXED-NEXT:    xtn v1.4h, v1.4s
 ; NEON-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; NEON-FIXED-NEXT:    fcmp d1, #0.0
+; NEON-FIXED-NEXT:    umaxv h1, v1.4h
 ; NEON-FIXED-NEXT:    umaxv h2, v2.4h
 ; NEON-FIXED-NEXT:    fmov w8, s2
 ; NEON-FIXED-NEXT:    bfi x9, x8, #2, #2
 ; NEON-FIXED-NEXT:    ldr w8, [x9]
+; NEON-FIXED-NEXT:    fmov w9, s1
+; NEON-FIXED-NEXT:    tst w9, #0x1
 ; NEON-FIXED-NEXT:    csel w0, w8, w0, ne
 ; NEON-FIXED-NEXT:    add sp, sp, #16
 ; NEON-FIXED-NEXT:    ret
@@ -125,11 +131,13 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
 ; SVE-FIXED-NEXT:    str q0, [sp]
 ; SVE-FIXED-NEXT:    xtn v1.4h, v1.4s
 ; SVE-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT:    fcmp d1, #0.0
+; SVE-FIXED-NEXT:    umaxv h1, v1.4h
 ; SVE-FIXED-NEXT:    umaxv h2, v2.4h
 ; SVE-FIXED-NEXT:    fmov w8, s2
 ; SVE-FIXED-NEXT:    bfi x9, x8, #2, #2
 ; SVE-FIXED-NEXT:    ldr w8, [x9]
+; SVE-FIXED-NEXT:    fmov w9, s1
+; SVE-FIXED-NEXT:    tst w9, #0x1
 ; SVE-FIXED-NEXT:    csel w0, w8, w0, ne
 ; SVE-FIXED-NEXT:    add sp, sp, #16
 ; SVE-FIXED-NEXT:    ret
@@ -150,11 +158,13 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
 ; NEON-FIXED-NEXT:    str q0, [sp]
 ; NEON-FIXED-NEXT:    xtn v1.2s, v1.2d
 ; NEON-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; NEON-FIXED-NEXT:    fcmp d1, #0.0
+; NEON-FIXED-NEXT:    umaxp v1.2s, v1.2s, v1.2s
 ; NEON-FIXED-NEXT:    umaxp v2.2s, v2.2s, v2.2s
 ; NEON-FIXED-NEXT:    fmov w8, s2
 ; NEON-FIXED-NEXT:    bfi x9, x8, #3, #1
 ; NEON-FIXED-NEXT:    ldr x8, [x9]
+; NEON-FIXED-NEXT:    fmov w9, s1
+; NEON-FIXED-NEXT:    tst w9, #0x1
 ; NEON-FIXED-NEXT:    csel x0, x8, x0, ne
 ; NEON-FIXED-NEXT:    add sp, sp, #16
 ; NEON-FIXED-NEXT:    ret
@@ -169,11 +179,13 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
 ; SVE-FIXED-NEXT:    str q0, [sp]
 ; SVE-FIXED-NEXT:    xtn v1.2s, v1.2d
 ; SVE-FIXED-NEXT:    and v2.8b, v1.8b, v2.8b
-; SVE-FIXED-NEXT:    fcmp d1, #0.0
+; SVE-FIXED-NEXT:    umaxp v1.2s, v1.2s, v1.2s
 ; SVE-FIXED-NEXT:    umaxp v2.2s, v2.2s, v2.2s
 ; SVE-FIXED-NEXT:    fmov w8, s2
 ; SVE-FIXED-NEXT:    bfi x9, x8, #3, #1
 ; SVE-FIXED-NEXT:    ldr x8, [x9]
+; SVE-FIXED-NEXT:    fmov w9, s1
+; SVE-FIXED-NEXT:    tst w9, #0x1
 ; SVE-FIXED-NEXT:    csel x0, x8, x0, ne
 ; SVE-FIXED-NEXT:    add sp, sp, #16
 ; SVE-FIXED-NEXT:    ret
@@ -194,11 +206,13 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass
 ; NEON-FIXED-NEXT:    str q0, [sp]
 ; NEON-FIXED-NEXT:    xtn v1.4h, v1.4s
 ; NEON-FIXED-NEXT:    and v3.8b, v1.8b, v3.8b
-; NEON-FIXED-NEXT:    fcmp d1, #0.0
+; NEON-FIXED-NEXT:    umaxv h1, v1.4h
 ; NEON-FIXED-NEXT:    umaxv h3, v3.4h
 ; NEON-FIXED-NEXT:    fmov w8, s3
 ; NEON-FIXED-NEXT:    bfi x9, x8, #2, #2
+; NEON-FIXED-NEXT:    fmov w8, s1
 ; NEON-FIXED-NEXT:    ldr s0, [x9]
+; NEON-FIXED-NEXT:    tst w8, #0x1
 ; NEON-FIXED-NEXT:    fcsel s0, s0, s2, ne
 ; NEON-FIXED-NEXT:    add sp, sp, #16
 ; NEON-FIXED-NEXT:    ret
@@ -213,11 +227,13 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass
 ; SVE-FIXED-NEXT:    str q0, [sp]
 ; SVE-FIXED-NEXT:    xtn v1.4h, v1.4s
 ; SVE-FIXED-NEXT:    and v3.8b, v1.8b, v3.8b
-; SVE-FIXED-NEXT:    fcmp d1, #0.0
+; SVE-FIXED-NEXT:    umaxv h1, v1.4h
 ; SVE-FIXED-NEXT:    umaxv h3, v3.4h
 ; SVE-FIXED-NEXT:    fmov w8, s3
 ; SVE-FIXED-NEXT:    bfi x9, x8, #2, #2
+; SVE-FIXED-NEXT:    fmov w8, s1
 ; SVE-FIXED-NEXT:    ldr s0, [x9]
+; SVE-FIXED-NEXT:    tst w8, #0x1
 ; SVE-FIXED-NEXT:    fcsel s0, s0, s2, ne
 ; SVE-FIXED-NEXT:    add sp, sp, #16
 ; SVE-FIXED-NEXT:    ret
@@ -238,11 +254,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
 ; NEON-FIXED-NEXT:    str q0, [sp]
 ; NEON-FIXED-NEXT:    xtn v1.2s, v1.2d
 ; NEON-FIXED-NEXT:    and v3.8b, v1.8b, v3.8b
-; NEON-FIXED-NEXT:    fcmp d1, #0.0
+; NEON-FIXED-NEXT:    umaxp v1.2s, v1.2s, v1.2s
 ; NEON-FIXED-NEXT:    umaxp v3.2s, v3.2s, v3.2s
 ; NEON-FIXED-NEXT:    fmov w8, s3
 ; NEON-FIXED-NEXT:    bfi x9, x8, #3, #1
+; NEON-FIXED-NEXT:    fmov w8, s1
 ; NEON-FIXED-NEXT:    ldr d0, [x9]
+; NEON-FIXED-NEXT:    tst w8, #0x1
 ; NEON-FIXED-NEXT:    fcsel d0, d0, d2, ne
 ; NEON-FIXED-NEXT:    add sp, sp, #16
 ; NEON-FIXED-NEXT:    ret
@@ -257,11 +275,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
 ; SVE-FIXED-NEXT:    str q0, [sp]
 ; SVE-FIXED-NEXT:    xtn v1.2s, v1.2d
 ; SVE-FIXED-NEXT:    and v3.8b, v1.8b, v3.8b
-; SVE-FIXED-NEXT:    fcmp d1, #0.0
+; SVE-FIXED-NEXT:    umaxp v1.2s, v1.2s, v1.2s
 ; SVE-FIXED-NEXT:    umaxp v3.2s, v3.2s, v3.2s
 ; SVE-FIXED-NEXT:    fmov w8, s3
 ; SVE-FIXED-NEXT:    bfi x9, x8, #3, #1
+; SVE-FIXED-NEXT:    fmov w8, s1
 ; SVE-FIXED-NEXT:    ldr d0, [x9]
+; SVE-FIXED-NEXT:    tst w8, #0x1
 ; SVE-FIXED-NEXT:    fcsel d0, d0, d2, ne
 ; SVE-FIXED-NEXT:    add sp, sp, #16
 ; SVE-FIXED-NEXT:    ret
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 96bbcb7ed2149a..d316e10037757b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -217,7 +217,7 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
 ; AKF_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
   ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
@@ -235,7 +235,6 @@ attributes #1 = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 0e4a9791b6f57d..33e7e7a7a019e3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -73,7 +73,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
 
 define void @func_uses_asm_virtreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    ret void
 ;
@@ -83,7 +83,7 @@ define void @func_uses_asm_virtreg_agpr() {
 
 define void @func_uses_asm_physreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    ret void
 ;
@@ -93,7 +93,7 @@ define void @func_uses_asm_physreg_agpr() {
 
 define void @func_uses_asm_physreg_agpr_tuple() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    ret void
 ;
@@ -105,7 +105,7 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    ret void
 ;
@@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR4]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown() #0
@@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 
 define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    call void [[INDIRECT]]()
 ; CHECK-NEXT:    ret void
 ;
@@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR6]]
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect() #0
@@ -155,7 +155,7 @@ define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
 
 define void @empty() {
 ; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -163,7 +163,7 @@ define void @empty() {
 
 define void @also_empty() {
 ; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -256,12 +256,9 @@ attributes #0 = { "amdgpu-no-agpr" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
+; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
index 28722021e0448f..7e0208cd1f45aa 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll
@@ -117,14 +117,14 @@ define void @call_no_dispatch_id() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index ed136c58379cb3..ea3f08ede2c5dc 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -593,7 +593,7 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -645,7 +645,7 @@ define internal void @defined.func() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -658,7 +658,7 @@ define void @func_call_external() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @external.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -673,7 +673,7 @@ define void @func_call_defined() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @defined.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -687,8 +687,8 @@ define void @func_call_asm() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
-; ATTRIBUTOR_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR28:[0-9]+]]
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
+; ATTRIBUTOR_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   call void asm sideeffect "", ""() #3
@@ -702,7 +702,7 @@ define amdgpu_kernel void @kern_call_external() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @external.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @defined.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -769,7 +769,7 @@ define float @func_indirect_call(ptr %fptr) #3 {
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -788,7 +788,7 @@ define float @func_extern_call() #3 {
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float @extern()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -806,7 +806,7 @@ define float @func_null_call(ptr %fptr) #3 {
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float null()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -827,7 +827,7 @@ define float @func_other_intrinsic_call(float %arg) #3 {
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
-; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -912,7 +912,7 @@ define amdgpu_kernel void @kern_decl_sanitize_address() #3 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @extern_func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_decl()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR30:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_def()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -969,7 +969,7 @@ define void @unused_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -980,7 +980,7 @@ define internal void @known_func() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR25]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -994,8 +994,8 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] {
-; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
+; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR27:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   call void @known_func() #6
@@ -1024,37 +1024,34 @@ attributes #6 = { "enqueued-block" }
 ; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" }
 ; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" }
 ;.
-
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27:[0-9]+]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind }
-; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind }
+; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 239bdfde323cf5..6896ac8d2e5dbd 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -478,17 +478,11 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
 
 ; No-op addrspacecast should not use queue ptr
 define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
-; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; AKF_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
-; AKF_HSA-NEXT:    ret void
-;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
-; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
-; ATTRIBUTOR_HSA-NEXT:    ret void
+; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
+; HSA-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(1) %ptr to ptr
   store volatile i32 0, ptr %stof
@@ -496,17 +490,11 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
 }
 
 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
-; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
-; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
-; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; AKF_HSA-NEXT:    [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
-; AKF_HSA-NEXT:    ret void
-;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
-; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT:    [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
-; ATTRIBUTOR_HSA-NEXT:    ret void
+; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
+; HSA-NEXT:    [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
+; HSA-NEXT:    ret void
 ;
   %stof = addrspacecast ptr addrspace(4) %ptr to ptr
   %ld = load volatile i32, ptr %stof
@@ -633,7 +621,7 @@ define void @use_alloca_func() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR14:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
 ; ATTRIBUTOR_HSA-NEXT:    store i32 0, ptr addrspace(5) [[ALLOCA]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -668,7 +656,6 @@ attributes #1 = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
index 366432e0fc6cb1..678c3a0158ec17 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-max-num-workgroups-propagate.ll
@@ -27,7 +27,7 @@ define internal void @callee_1_2_3() {
 
 define amdgpu_kernel void @kernel_1_2_3() #0 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2_3(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @callee_1_2_3()
 ; CHECK-NEXT:    call void @extern_callee()
 ; CHECK-NEXT:    call void @dummy()
@@ -44,7 +44,7 @@ attributes #0 = {"amdgpu-max-num-workgroups"="1,2,3"}
 ; -> 100,10,99
 define internal void @callee_merge_100_8_32__16_10_99() {
 ; CHECK-LABEL: define internal void @callee_merge_100_8_32__16_10_99(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
 ;
@@ -54,7 +54,7 @@ define internal void @callee_merge_100_8_32__16_10_99() {
 
 define amdgpu_kernel void @kernel_100_8_32() #1 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_100_8_32(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    call void @callee_merge_100_8_32__16_10_99()
 ; CHECK-NEXT:    ret void
 ;
@@ -64,7 +64,7 @@ define amdgpu_kernel void @kernel_100_8_32() #1 {
 
 define amdgpu_cs void @amdgpu_cs_100_8_32() #1 {
 ; CHECK-LABEL: define amdgpu_cs void @amdgpu_cs_100_8_32(
-; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-SAME: ) #[[ATTR4]] {
 ; CHECK-NEXT:    call void @callee_merge_100_8_32__16_10_99()
 ; CHECK-NEXT:    ret void
 ;
@@ -76,7 +76,7 @@ attributes #1 = {"amdgpu-max-num-workgroups"="100,8,32"}
 
 define amdgpu_kernel void @kernel_16_10_99() #2 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_16_10_99(
-; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:    call void @callee_merge_100_8_32__16_10_99()
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
@@ -110,7 +110,7 @@ define internal void @callee_x_worst_case() {
 
 define amdgpu_kernel void @kernel_x_maximum() #3 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_x_maximum(
-; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:    call void @merge_to_worst_case()
 ; CHECK-NEXT:    call void @callee_x_worst_case()
 ; CHECK-NEXT:    call void @dummy()
@@ -126,7 +126,7 @@ attributes #3 = {"amdgpu-max-num-workgroups"="4294967295,1,1"}
 
 define amdgpu_kernel void @kernel_y_maximum() #4 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_y_maximum(
-; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    call void @merge_to_worst_case()
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
@@ -140,7 +140,7 @@ attributes #4 = {"amdgpu-max-num-workgroups"="1,4294967295,1"}
 
 define amdgpu_kernel void @kernel_z_maximum() #5 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_z_maximum(
-; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
 ; CHECK-NEXT:    call void @merge_to_worst_case()
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
@@ -155,7 +155,7 @@ attributes #5 = {"amdgpu-max-num-workgroups"="1,1,4294967295"}
 ; Make sure the attribute isn't lost from the callee.
 define internal void @annotated_callee_from_unannotated_kernel() #6 {
 ; CHECK-LABEL: define internal void @annotated_callee_from_unannotated_kernel(
-; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
 ;
@@ -167,7 +167,7 @@ attributes #6 = {"amdgpu-max-num-workgroups"="42,99,123"}
 
 define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee()  {
 ; CHECK-LABEL: define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
 ; CHECK-NEXT:    call void @annotated_callee_from_unannotated_kernel()
 ; CHECK-NEXT:    ret void
 ;
@@ -178,7 +178,7 @@ define amdgpu_kernel void @unannotated_kernel_calls_annotated_callee()  {
 
 define internal void @annotated_callee_merge_caller() #7 {
 ; CHECK-LABEL: define internal void @annotated_callee_merge_caller(
-; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
 ; CHECK-NEXT:    call void @dummy()
 ; CHECK-NEXT:    ret void
 ;
@@ -190,7 +190,7 @@ attributes #7 = {"amdgpu-max-num-workgroups"="512,256,1024"}
 
 define amdgpu_kernel void @call_annotated_callee_merge_caller() #8 {
 ; CHECK-LABEL: define amdgpu_kernel void @call_annotated_callee_merge_caller(
-; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR12:[0-9]+]] {
 ; CHECK-NEXT:    call void @annotated_callee_merge_caller()
 ; CHECK-NEXT:    ret void
 ;
@@ -212,7 +212,7 @@ define internal void @called_by_explicit_worst_case() {
 
 define amdgpu_kernel void @kernel_explicit_worst_case() #9 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_explicit_worst_case(
-; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR13:[0-9]+]] {
 ; CHECK-NEXT:    call void @called_by_explicit_worst_case()
 ; CHECK-NEXT:    ret void
 ;
@@ -223,16 +223,18 @@ define amdgpu_kernel void @kernel_explicit_worst_case() #9 {
 attributes #9 = {"amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295"}
 
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="100,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,8,32" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="42,99,123" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="256,128,1024" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="1,2,3" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="1,2,3" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="100,10,99" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-max-num-workgroups"="100,8,32" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-max-num-workgroups"="16,10,99" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-max-num-workgroups"="4294967295,1,1" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-max-num-workgroups"="1,4294967295,1" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-max-num-workgroups"="1,1,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-max-num-workgroups"="42,99,123" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-max-num-workgroups"="256,128,1024" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-max-num-workgroups"="256,128,2048" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-max-num-workgroups"="4294967295,4294967295,4294967295" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index bcc960f6a56c2c..25da00e6bde35d 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -27,12 +27,12 @@ define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
 ; GFX10-NEXT:    ret void
 ;
@@ -60,13 +60,13 @@ define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
 
 define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -93,12 +93,12 @@ define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
 
 define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    store volatile i32 0, ptr addrspace(2) [[PTR]], align 4
 ; GFX10-NEXT:    ret void
 ;
@@ -126,13 +126,13 @@ define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
 
 define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(2) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -159,12 +159,12 @@ define void @without_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
 
 define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @without_group_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
 ; GFX10-NEXT:    ret void
 ;
@@ -192,13 +192,13 @@ define void @with_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
 
 define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_group_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -225,12 +225,12 @@ define void @without_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
 
 define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @without_constant_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    store volatile i32 0, ptr addrspace(4) [[PTR]], align 4
 ; GFX10-NEXT:    ret void
 ;
@@ -258,13 +258,13 @@ define void @with_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) {
 
 define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_constant_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -291,12 +291,12 @@ define void @without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 
 define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
 ; GFX10-NEXT:    ret void
 ;
@@ -306,13 +306,13 @@ define amdgpu_kernel void @without_private_to_flat_addrspacecast_cc_kernel(ptr a
 
 define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -324,13 +324,13 @@ define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 
 define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    ret void
@@ -357,12 +357,12 @@ define void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 
 define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -372,12 +372,12 @@ define amdgpu_kernel void @call_without_private_to_flat_addrspacecast_cc_kernel(
 
 define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -387,12 +387,12 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 
 define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -402,13 +402,13 @@ define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr
 
 define void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_both_with_and_without_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
@@ -420,13 +420,13 @@ define void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrsp
 
 define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
@@ -453,12 +453,12 @@ define void @call_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %p
 
 define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    call void @call_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -468,12 +468,12 @@ define amdgpu_kernel void @call_call_without_private_to_flat_addrspacecast_cc_ke
 
 define void @call_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @call_call_with_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_call_with_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -483,12 +483,12 @@ define void @call_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
 
 define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -498,12 +498,12 @@ define amdgpu_kernel void @call_call_with_private_to_flat_addrspacecast_cc_kerne
 
 define void @call_call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_call_both_with_and_without_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -513,12 +513,12 @@ define void @call_call_both_with_and_without_private_to_flat_addrspacecast(ptr a
 
 define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @call_both_with_and_without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -528,14 +528,14 @@ define amdgpu_kernel void @call_call_both_with_and_without_private_to_flat_addrs
 
 define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_cast_call_without_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
@@ -549,14 +549,14 @@ define void @with_cast_call_without_private_to_flat_addrspacecast(ptr addrspace(
 
 define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    call void @without_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
@@ -570,14 +570,14 @@ define amdgpu_kernel void @with_cast_call_without_private_to_flat_addrspacecast_
 
 define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_cast_call_with_private_to_flat_addrspacecast(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR2]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
@@ -591,14 +591,14 @@ define void @with_cast_call_with_private_to_flat_addrspacecast(ptr addrspace(5)
 
 define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX9-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX9-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_kernel(
-; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; GFX10-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; GFX10-NEXT:    call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
@@ -614,12 +614,12 @@ define amdgpu_kernel void @with_cast_call_with_private_to_flat_addrspacecast_cc_
 
 define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) {
 ; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use(
-; GFX9-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use(
-; GFX10-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(1) nocapture [[OUT:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
 ; GFX10-NEXT:    ret void
 ;
@@ -633,13 +633,13 @@ define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) noca
 
 define void @with_indirect_call() {
 ; GFX9-LABEL: define void @with_indirect_call(
-; GFX9-SAME: ) #[[ATTR4:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR2:[0-9]+]] {
 ; GFX9-NEXT:    [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
 ; GFX9-NEXT:    call void [[FPTR]]()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @with_indirect_call(
-; GFX10-SAME: ) #[[ATTR4:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR2:[0-9]+]] {
 ; GFX10-NEXT:    [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
 ; GFX10-NEXT:    call void [[FPTR]]()
 ; GFX10-NEXT:    ret void
@@ -651,13 +651,13 @@ define void @with_indirect_call() {
 
 define amdgpu_kernel void @with_indirect_call_cc_kernel() {
 ; GFX9-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel(
-; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR2]] {
 ; GFX9-NEXT:    [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
 ; GFX9-NEXT:    call void [[FPTR]]()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_indirect_call_cc_kernel(
-; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR2]] {
 ; GFX10-NEXT:    [[FPTR:%.*]] = load ptr, ptr addrspace(4) @gv.fptr0, align 8
 ; GFX10-NEXT:    call void [[FPTR]]()
 ; GFX10-NEXT:    ret void
@@ -669,12 +669,12 @@ define amdgpu_kernel void @with_indirect_call_cc_kernel() {
 
 define void @call_with_indirect_call() {
 ; GFX9-LABEL: define void @call_with_indirect_call(
-; GFX9-SAME: ) #[[ATTR4]] {
+; GFX9-SAME: ) #[[ATTR2]] {
 ; GFX9-NEXT:    call void @with_indirect_call()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_with_indirect_call(
-; GFX10-SAME: ) #[[ATTR4]] {
+; GFX10-SAME: ) #[[ATTR2]] {
 ; GFX10-NEXT:    call void @with_indirect_call()
 ; GFX10-NEXT:    ret void
 ;
@@ -684,12 +684,12 @@ define void @call_with_indirect_call() {
 
 define amdgpu_kernel void @call_with_indirect_call_cc_kernel() {
 ; GFX9-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR2]] {
 ; GFX9-NEXT:    call void @with_indirect_call()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_with_indirect_call_cc_kernel(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR2]] {
 ; GFX10-NEXT:    call void @with_indirect_call()
 ; GFX10-NEXT:    ret void
 ;
@@ -723,7 +723,7 @@ define void @also_empty() {
 
 define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
 ; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR6:[0-9]+]] {
+; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
 ; GFX9-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; GFX9-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; GFX9-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -741,7 +741,7 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR6:[0-9]+]] {
+; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
 ; GFX10-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; GFX10-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; GFX10-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -767,13 +767,13 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
 
 define void @use_intrinsic_workitem_id_x() {
 ; GFX9-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR8:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
 ; GFX9-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX9-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR8:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
 ; GFX10-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX10-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX10-NEXT:    ret void
@@ -785,13 +785,13 @@ define void @use_intrinsic_workitem_id_x() {
 
 define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
 ; GFX9-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel(
-; GFX9-SAME: ) #[[ATTR1]] {
+; GFX9-SAME: ) #[[ATTR0]] {
 ; GFX9-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX9-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel(
-; GFX10-SAME: ) #[[ATTR1]] {
+; GFX10-SAME: ) #[[ATTR0]] {
 ; GFX10-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX10-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX10-NEXT:    ret void
@@ -803,12 +803,12 @@ define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
 
 define void @call_use_intrinsic_workitem_id_x() {
 ; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR8]] {
+; GFX9-SAME: ) #[[ATTR5]] {
 ; GFX9-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR8]] {
+; GFX10-SAME: ) #[[ATTR5]] {
 ; GFX10-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX10-NEXT:    ret void
 ;
@@ -818,12 +818,12 @@ define void @call_use_intrinsic_workitem_id_x() {
 
 define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
 ; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX9-SAME: ) #[[ATTR9:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR5]] {
 ; GFX9-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX10-SAME: ) #[[ATTR9:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR5]] {
 ; GFX10-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX10-NEXT:    ret void
 ;
@@ -833,13 +833,13 @@ define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
 
 define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
-; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
 ; GFX9-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
-; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
 ; GFX10-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
 ; GFX10-NEXT:    ret void
@@ -851,12 +851,12 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
 
 define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
-; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX9-NEXT:    call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]])
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
-; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR3]] {
+; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; GFX10-NEXT:    call void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) [[PTR]])
 ; GFX10-NEXT:    ret void
 ;
@@ -866,12 +866,12 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %
 
 define amdgpu_kernel void @with_inline_asm() {
 ; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX9-SAME: ) #[[ATTR6]] {
+; GFX9-SAME: ) #[[ATTR3]] {
 ; GFX9-NEXT:    call void asm sideeffect "
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX10-SAME: ) #[[ATTR6]] {
+; GFX10-SAME: ) #[[ATTR3]] {
 ; GFX10-NEXT:    call void asm sideeffect "
 ; GFX10-NEXT:    ret void
 ;
@@ -880,25 +880,17 @@ define amdgpu_kernel void @with_inline_asm() {
 }
 
 ;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR4]] = { "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR5]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR6]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
 ;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR4]] = { "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR5]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR6]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
index 2b9f579e6a1839..8481cea4d7c353 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -51,7 +51,7 @@ bb5:                                              ; preds = %bb5, %bb3
 
 define amdgpu_kernel void @entry() {
 ; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
 ; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
 ; CHECK-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
@@ -63,5 +63,6 @@ define amdgpu_kernel void @entry() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index c108b93766bcc8..510ee9c1a23fd2 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -27,7 +27,7 @@ define internal void @direct() {
 
 define amdgpu_kernel void @test_direct_indirect_call() {
 ; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @direct()
 ; CHECK-NEXT:    ret void
 ;
@@ -35,6 +35,7 @@ define amdgpu_kernel void @test_direct_indirect_call() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 908746a0c7784f..c430c41f59143b 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 5979f1bbacdd14..d5f45d70fb970b 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -70,14 +70,32 @@ define void @use_everything_else() {
 }
 
 define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kernarg) {
-; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
-; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    call void @use_everything_else()
-; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
-; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
-; CHECK-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
-; CHECK-NEXT:    ret void
+; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; V4-NEXT:    call void @use_everything_else()
+; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
+; V4-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V4-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V4-NEXT:    ret void
+;
+; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
+; V5-NEXT:    call void @use_everything_else()
+; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
+; V5-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V5-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V5-NEXT:    ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
+; V6-NEXT:    call void @use_everything_else()
+; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
+; V6-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT:    ret void
 ;
   call void @use_everything_else()
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -88,14 +106,32 @@ define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kern
 }
 
 define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kernarg) {
-; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
-; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:    call void @use_everything_else()
-; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
-; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
-; CHECK-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
-; CHECK-NEXT:    ret void
+; V4-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; V4-NEXT:    call void @use_everything_else()
+; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
+; V4-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V4-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V4-NEXT:    ret void
+;
+; V5-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; V5-NEXT:    call void @use_everything_else()
+; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
+; V5-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V5-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V5-NEXT:    ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; V6-NEXT:    call void @use_everything_else()
+; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
+; V6-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT:    ret void
 ;
   call void @use_everything_else()
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -116,7 +152,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
@@ -125,7 +161,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
@@ -142,32 +178,14 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 }
 
 define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) {
-; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] {
-; V4-NEXT:    call void @use_everything_else()
-; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
-; V4-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
-; V4-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
-; V4-NEXT:    ret void
-;
-; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
-; V5-NEXT:    call void @use_everything_else()
-; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
-; V5-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
-; V5-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
-; V5-NEXT:    ret void
-;
-; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
-; V6-NEXT:    call void @use_everything_else()
-; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
-; V6-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
-; V6-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
-; V6-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
+; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @use_everything_else()
+; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; CHECK-NEXT:    store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; CHECK-NEXT:    ret void
 ;
   call void @use_everything_else()
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -188,7 +206,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -197,7 +215,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -224,7 +242,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -233,7 +251,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -258,25 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
 
 ;.
 ; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ; V4: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ; V4: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ; V4: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ; V4: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
index b283a8fca8a394..a27cf3c18a70cd 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -68,6 +68,6 @@ if.end:
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
index e7a7b8a335d0d3..22e3cc4b047b1a 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -6,14 +6,14 @@
 ; GCN: define amdgpu_kernel void @caller(ptr addrspace(1) nocapture %p) local_unnamed_addr #1 {
 ; GCN: %mul.i = fmul float %load, 1.500000e+01
 
-; UNSAFE: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
 ; UNSAFE: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
 
-; NOINFS: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "no-infs-fp-math"="true" "uniform-work-group-size"="false" }
-; NOINFS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="false" }
+; NOINFS: attributes #0 = { nounwind "no-infs-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
+; NOINFS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
 
-; NONANS: attributes #0 = { nounwind "amdgpu-waves-per-eu"="4,10" "no-nans-fp-math"="true" "uniform-work-group-size"="false" }
-; NONANS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="false" }
+; NONANS: attributes #0 = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
+; NONANS: attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" }
 
 declare void @extern() #0
 
@@ -32,5 +32,5 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true"}
 attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index 51e0eb6d497945..1afd31c6d45e7d 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -98,7 +98,7 @@ define amdgpu_kernel void @kernel_128_512() #5 {
 
 define amdgpu_kernel void @kernel_512_512() #6 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_512_512
-; CHECK-SAME: () #[[ATTR5]] {
+; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:    call void @default_to_128_512()
 ; CHECK-NEXT:    call void @flat_group_512_1024()
 ; CHECK-NEXT:    ret void
@@ -111,7 +111,7 @@ define amdgpu_kernel void @kernel_512_512() #6 {
 ; Called from kernels with 128,256 and 64,128 => 64,256
 define internal void @default_to_64_256() {
 ; CHECK-LABEL: define {{[^@]+}}@default_to_64_256
-; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -121,7 +121,7 @@ define internal void @default_to_64_256() {
 ; this should probably be illegal.
 define amdgpu_kernel void @kernel_128_256() #3 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_128_256
-; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:    call void @default_to_64_256()
 ; CHECK-NEXT:    ret void
 ;
@@ -153,7 +153,7 @@ define internal void @merge_cycle_1() #3 {
 
 define amdgpu_kernel void @kernel_64_256() #7 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_64_256
-; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR7]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
 ; CHECK-NEXT:    call void @default_captured_address()
 ; CHECK-NEXT:    call void @externally_visible_default()
@@ -188,7 +188,7 @@ define void @externally_visible_default() {
 ; 1,1024 -> 64,256
 define internal i32 @bitcasted_function() {
 ; CHECK-LABEL: define {{[^@]+}}@bitcasted_function
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR7]] {
 ; CHECK-NEXT:    ret i32 0
 ;
   ret i32 0
@@ -207,9 +207,9 @@ attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index eb4cf5c063d101..6a909f52082d67 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -117,7 +117,7 @@ define amdgpu_kernel void @kernel_2_9() #6 {
 
 define amdgpu_kernel void @kernel_9_9() #7 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    call void @default_to_2_9()
 ; CHECK-NEXT:    call void @flat_group_9_10()
 ; CHECK-NEXT:    ret void
@@ -140,7 +140,7 @@ define internal void @default_to_1_8_b() {
 ; this should probably be illegal.
 define amdgpu_kernel void @kernel_2_8() #4 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8
-; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:    call void @default_to_1_8_a()
 ; CHECK-NEXT:    call void @default_to_1_8_b()
 ; CHECK-NEXT:    ret void
@@ -153,7 +153,7 @@ define amdgpu_kernel void @kernel_2_8() #4 {
 ; 1,2 -> 2,2
 define internal void @merge_cycle_0() #1 {
 ; CHECK-LABEL: define internal void @merge_cycle_0
-; CHECK-SAME: () #[[ATTR5]] {
+; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @merge_cycle_1()
 ; CHECK-NEXT:    ret void
 ;
@@ -165,7 +165,7 @@ define internal void @merge_cycle_0() #1 {
 ; 2,8 -> 2,8
 define internal void @merge_cycle_1() #4 {
 ; CHECK-LABEL: define internal void @merge_cycle_1
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
 ; CHECK-NEXT:    ret void
 ;
@@ -210,7 +210,7 @@ define void @externally_visible_default() {
 ; 1,10 -> 3,8
 define internal i32 @bitcasted_function() {
 ; CHECK-LABEL: define internal i32 @bitcasted_function
-; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR8]] {
 ; CHECK-NEXT:    ret i32 0
 ;
   ret i32 0
@@ -235,7 +235,7 @@ define internal void @called_from_invalid_bounds_1() {
 ; Invalid range for amdgpu-waves-per-eu
 define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8
-; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @called_from_invalid_bounds_0()
 ; CHECK-NEXT:    ret void
 ;
@@ -246,7 +246,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
 ; Invalid range for amdgpu-waves-per-eu
 define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123
-; CHECK-SAME: () #[[ATTR12:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
 ; CHECK-NEXT:    call void @called_from_invalid_bounds_1()
 ; CHECK-NEXT:    ret void
 ;
@@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
 ; -> 2,10
 define void @larger_group_size_implies_lower_minimum() #11 {
 ; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum
-; CHECK-SAME: () #[[ATTR13:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR12:[0-9]+]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -268,7 +268,7 @@ define void @larger_group_size_implies_lower_minimum() #11 {
 
 define amdgpu_kernel void @kernel_3_6() #12 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6
-; CHECK-SAME: () #[[ATTR14:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR13:[0-9]+]] {
 ; CHECK-NEXT:    call void @larger_group_size_implies_lower_minimum()
 ; CHECK-NEXT:    ret void
 ;
@@ -279,7 +279,7 @@ define amdgpu_kernel void @kernel_3_6() #12 {
 ; 3,6 -> 6,9
 define internal void @refine_upper_func_3_6() #13 {
 ; CHECK-LABEL: define internal void @refine_upper_func_3_6
-; CHECK-SAME: () #[[ATTR15:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR14:[0-9]+]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -288,7 +288,7 @@ define internal void @refine_upper_func_3_6() #13 {
 ; 4,8 -> 6,8
 define internal void @refine_lower_func_4_8() #14 {
 ; CHECK-LABEL: define internal void @refine_lower_func_4_8
-; CHECK-SAME: () #[[ATTR16:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR15:[0-9]+]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
 ; CHECK-NEXT:    ret void
 ;
@@ -298,7 +298,7 @@ define internal void @refine_lower_func_4_8() #14 {
 
 define amdgpu_kernel void @kernel_foo_6_8() #15 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8
-; CHECK-SAME: () #[[ATTR16]] {
+; CHECK-SAME: () #[[ATTR16:[0-9]+]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
 ; CHECK-NEXT:    call void @refine_lower_func_4_8()
 ; CHECK-NEXT:    call void @func_9_10_a()
@@ -340,7 +340,7 @@ define internal void @func_9_10_a() #18 {
 ; 9,10 -> 9,9
 define internal void @func_9_10_b() #18 {
 ; CHECK-LABEL: define internal void @func_9_10_b
-; CHECK-SAME: () #[[ATTR20:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR19]] {
 ; CHECK-NEXT:    ret void
 ;
   ret void
@@ -348,7 +348,7 @@ define internal void @func_9_10_b() #18 {
 
 define amdgpu_kernel void @kernel_bar_8_9() #19 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9
-; CHECK-SAME: () #[[ATTR21:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR20:[0-9]+]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
 ; CHECK-NEXT:    call void @func_5_5()
 ; CHECK-NEXT:    call void @func_9_10_b()
@@ -404,21 +404,20 @@ attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,2" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="0,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR21]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
index 9758afa3b49912..72ae3966416f4e 100644
--- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
@@ -7,7 +7,7 @@
 @global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 }
 
 ;.
-; CHECK: @[[GLOBAL_2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(1) global [[STRUCT_FOO:%.*]] { [[STRUCT_PLUTO:%.*]] zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 }
+; CHECK: @global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 }
 ;.
 define void @hoge()  {
 ; CHECK-LABEL: define void @hoge
@@ -19,5 +19,5 @@ define void @hoge()  {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index 8792e60bb0ca16..2850612d700817 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -178,7 +178,7 @@ define internal void @mutual_recursion_1(i16 %arg) {
 
 define amdgpu_kernel void @kernel_lds_recursion() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion(
-; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ]
 ; CHECK-NEXT:    call void @mutual_recursion_0(i16 0)
 ; CHECK-NEXT:    ret void
@@ -191,13 +191,14 @@ define amdgpu_kernel void @kernel_lds_recursion() {
 !1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="2" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index 5326c9e2c12f08..b2006d1a1f302a 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -100,8 +100,13 @@ entry:
 }
 
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; NO: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; NO: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+;.
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+;.
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
 ; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index fabdfff5272940..59d7fe107ee537 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -81,7 +81,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
 ; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index c8e3f97f61515f..4993df7e1ba487 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -826,5 +826,5 @@ entry:
 ; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
 
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
-; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "uniform-work-group-size"="false" }
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 2ab48479fa1b08..ab5306bc2452da 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -7,7 +7,7 @@
 @x = global i32 0
 
 ;.
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @x = global i32 0
 ;.
 define void @foo() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@foo
@@ -21,7 +21,7 @@ define void @foo() #0 {
 
 define amdgpu_kernel void @kernel1() #1 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    ret void
 ;
@@ -31,6 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
 
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index cc58d34a8b255b..da0234c90363d3 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -2,8 +2,8 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor < %s | FileCheck %s
 
 ;.
-; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr null
-; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @G1 = global ptr null
+; CHECK: @G2 = global i32 0
 ;.
 define weak void @weak() {
 ; CHECK-LABEL: define {{[^@]+}}@weak
@@ -87,7 +87,7 @@ define internal void @internal2() {
 
 define amdgpu_kernel void @kernel2() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
-; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    call void @internal2()
 ; CHECK-NEXT:    ret void
 ;
@@ -97,8 +97,9 @@ define amdgpu_kernel void @kernel2() #0 {
 
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index 33298bde89b975..9fe753fec0f913 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -7,7 +7,7 @@
 @x = global i32 0
 
 ;.
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @x = global i32 0
 ;.
 define void @func1() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@func1
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
 
 attributes #2 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 6fe85cd7c9e654..f0e0df00feff6b 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -7,7 +7,7 @@
 @x = global i32 0
 
 ;.
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @x = global i32 0
 ;.
 define void @func() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@func
@@ -31,7 +31,7 @@ define amdgpu_kernel void @kernel1() #1 {
 
 define amdgpu_kernel void @kernel2() #2 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
-; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @func()
 ; CHECK-NEXT:    ret void
 ;
@@ -41,7 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
 
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index f6ab402e012325..de83f91fce261d 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -5,7 +5,7 @@
 
 ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
 ;.
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @x = global i32 0
 ;.
 define void @func() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@func
@@ -52,8 +52,8 @@ attributes #0 = { nounwind }
 attributes #1 = { "uniform-work-group-size"="false" }
 attributes #2 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 37e3376ef9d500..dc19f4d879e86d 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
 attributes #0 = { nounwind readnone }
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index 7f83686bc756e8..51f060a342fa1e 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -3,7 +3,7 @@
 
 @x = global i32 0
 ;.
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
+; CHECK: @x = global i32 0
 ;.
 define void @func1() {
 ; CHECK-LABEL: define {{[^@]+}}@func1
@@ -49,7 +49,7 @@ define void @func3() {
 
 define amdgpu_kernel void @kernel3() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel3
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @func2()
 ; CHECK-NEXT:    call void @func3()
 ; CHECK-NEXT:    ret void
@@ -61,6 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
 
 attributes #0 = { "uniform-work-group-size"="false" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll b/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll
new file mode 100644
index 00000000000000..efdfa10f7ca07f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/dagcombine-ld-op-st.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple armv7 -O1 | FileCheck %s -check-prefix=CHECK-LE-NORMAL
+; RUN: llc < %s -mtriple armv7 -O1 -combiner-reduce-load-op-store-width-force-narrowing-profitable=1 | FileCheck %s -check-prefix=CHECK-LE-NARROW
+; RUN: llc < %s -mtriple armv7eb -O1 | FileCheck %s -check-prefix=CHECK-BE-NORMAL
+; RUN: llc < %s -mtriple armv7eb -O1 -combiner-reduce-load-op-store-width-force-narrowing-profitable=1 | FileCheck %s -check-prefix=CHECK-BE-NARROW
+
+; This is a reproducer for a bug when DAGCombiner::ReduceLoadOpStoreWidth
+; would end up narrowing the load-op-store sequence into this SDNode sequence
+; for little-endian
+;
+;   t18: i32,ch = load<(load (s32) from %ir.p1 + 8, align 8)> t0, t17, undef:i32
+;   t20: i32 = or t18, Constant:i32<65534>
+;   t21: ch = store<(store (s32) into %ir.p1 + 8, align 8)> t18:1, t20, t17, undef:i32
+;
+; This was wrong since it accesses memory above %ir.p1+9 which is outside the
+; "store size" for the original store.
+;
+; For big-endian we used to hit an assertion due to passing a negative offset
+; to getMemBasePlusOffset (at least after commit 3e1b55cafc95d4ef4, while
+; before that commit we got load/store instructions that accessed memory at a
+; negative offset from %p1).
+;
+define i16 @test(ptr %p1) {
+; CHECK-LE-NORMAL-LABEL: test:
+; CHECK-LE-NORMAL:       @ %bb.0: @ %entry
+; CHECK-LE-NORMAL-NEXT:    ldrh r1, [r0, #8]
+; CHECK-LE-NORMAL-NEXT:    movw r2, #65534
+; CHECK-LE-NORMAL-NEXT:    orr r1, r1, r2
+; CHECK-LE-NORMAL-NEXT:    strh r1, [r0, #8]
+; CHECK-LE-NORMAL-NEXT:    mov r0, #0
+; CHECK-LE-NORMAL-NEXT:    bx lr
+;
+; CHECK-LE-NARROW-LABEL: test:
+; CHECK-LE-NARROW:       @ %bb.0: @ %entry
+; CHECK-LE-NARROW-NEXT:    ldrh r1, [r0, #8]
+; CHECK-LE-NARROW-NEXT:    movw r2, #65534
+; CHECK-LE-NARROW-NEXT:    orr r1, r1, r2
+; CHECK-LE-NARROW-NEXT:    strh r1, [r0, #8]
+; CHECK-LE-NARROW-NEXT:    mov r0, #0
+; CHECK-LE-NARROW-NEXT:    bx lr
+;
+; CHECK-BE-NORMAL-LABEL: test:
+; CHECK-BE-NORMAL:       @ %bb.0: @ %entry
+; CHECK-BE-NORMAL-NEXT:    ldrh r1, [r0]
+; CHECK-BE-NORMAL-NEXT:    movw r2, #65534
+; CHECK-BE-NORMAL-NEXT:    orr r1, r1, r2
+; CHECK-BE-NORMAL-NEXT:    strh r1, [r0]
+; CHECK-BE-NORMAL-NEXT:    mov r0, #0
+; CHECK-BE-NORMAL-NEXT:    bx lr
+;
+; CHECK-BE-NARROW-LABEL: test:
+; CHECK-BE-NARROW:       @ %bb.0: @ %entry
+; CHECK-BE-NARROW-NEXT:    ldrh r1, [r0]
+; CHECK-BE-NARROW-NEXT:    movw r2, #65534
+; CHECK-BE-NARROW-NEXT:    orr r1, r1, r2
+; CHECK-BE-NARROW-NEXT:    strh r1, [r0]
+; CHECK-BE-NARROW-NEXT:    mov r0, #0
+; CHECK-BE-NARROW-NEXT:    bx lr
+entry:
+  %load = load i80, ptr %p1, align 32
+  %mask = shl i80 -1, 65
+  %op = or i80 %load, %mask
+  store i80 %op, ptr %p1, align 32
+  ret i16 0
+}
+
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
index 156a34db827456..77fe1783bb5d5e 100644
--- a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -riscv-force-enable-global-merge-external-globals \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=SMALL-DATA
-; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=0 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=SMALL-DATA
+; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=0 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=MINSIZE
 
 @ig1 = internal global i32 0, align 4
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
index e41f14d394b7ca..c29749c17a5b5c 100644
--- a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -riscv-force-enable-global-merge-external-globals \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=SMALL-DATA
-; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=5 \
-; RUN:   -riscv-force-enable-global-merge-external-globals  -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=SMALL-DATA
+; RUN: llc -mtriple=riscv64 -global-merge-min-data-size=5 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=MINSIZE
 
 @ig1 = internal global i32 0, align 4
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll
index 1d65d9d1732ba3..915dde388cffdc 100644
--- a/llvm/test/CodeGen/RISCV/global-merge-minsize.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -riscv-force-enable-global-merge-external-globals \
+; RUN: llc -mtriple=riscv32 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32
 ; RUN: llc -mtriple=riscv32 -global-merge-min-data-size=5 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE
+; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE
 
 @ig1 = internal global i32 0, align 4
 @ig2 = internal global i32 0, align 4
diff --git a/llvm/test/CodeGen/RISCV/global-merge-offset.ll b/llvm/test/CodeGen/RISCV/global-merge-offset.ll
index bb8264ee438545..c1074bc8ca97ed 100644
--- a/llvm/test/CodeGen/RISCV/global-merge-offset.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge-offset.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv32 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s
+; RUN:   -verify-machineinstrs | FileCheck %s
 ; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv64 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s
+; RUN:   -verify-machineinstrs | FileCheck %s
 ; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv32 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG
+; RUN:   -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG
 ; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv64 \
-; RUN:   -riscv-force-enable-global-merge-external-globals -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG
+; RUN:   -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG
 
 ; This test demonstrates that the MaxOffset is set correctly for RISC-V by
 ; constructing an input that is at the limit and comparing.
diff --git a/llvm/test/CodeGen/RISCV/global-merge.ll b/llvm/test/CodeGen/RISCV/global-merge.ll
index 327a6b54f4be25..31b3aa81b58dd9 100644
--- a/llvm/test/CodeGen/RISCV/global-merge.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 \
-; RUN:     -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -global-merge-on-external=false \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -global-merge-on-external=false \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=CHECK-WEXTERN %s
-; RUN: llc -mtriple=riscv64 \
-; RUN:     -riscv-force-enable-global-merge-external-globals -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=CHECK-WEXTERN %s
 
 @ig1 = internal global i32 0, align 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 1c6e1a37fa8af5..ebcea741a2e8bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1021,3 +1021,55 @@ define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) {
   %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i32> %out
 }
+
+define <8 x i32> @shuffle_zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
+; CHECK-LABEL: shuffle_zipeven_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 170
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vslideup.vi v8, v10, 1, v0.t
+; CHECK-NEXT:    ret
+  %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i32> %out
+}
+
+define <8 x i32> @shuffle_zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
+; CHECK-LABEL: shuffle_zipodd_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 85
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vslidedown.vi v10, v8, 1, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i32> %out
+}
+
+define <16 x i64> @shuffle_zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
+; CHECK-LABEL: shuffle_zipeven_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 11
+; CHECK-NEXT:    addi a0, a0, -1366
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vslideup.vi v8, v16, 1, v0.t
+; CHECK-NEXT:    ret
+  %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i64> %out
+}
+
+define <16 x i64> @shuffle_zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
+; CHECK-LABEL: shuffle_zipodd_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, 5
+; CHECK-NEXT:    addi a0, a0, 1365
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; CHECK-NEXT:    vmv.s.x v0, a0
+; CHECK-NEXT:    vslidedown.vi v16, v8, 1, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i64> %out
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 0215e6a80d09a4..e13482d23a26f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -982,6 +982,107 @@ define <vscale x 4 x i16> @vnsrl_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b
   ret <vscale x 4 x i16> %2
 }
 
+define <vscale x 4 x i16> @vnsrl_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %c, iXLen %vl) {
+; NOVLOPT-LABEL: vnsrl_wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsrl.wx v11, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsrl_wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsrl.wx v11, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+define <vscale x 4 x i16> @vnsrl_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vnsrl_wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsrl.wv v12, v8, v11
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsrl_wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsrl.wv v12, v8, v11
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsrl.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+define <vscale x 4 x i16> @vnsra_wi(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnsra_wi:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsra.wi v11, v8, 5
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsra_wi:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsra.wi v11, v8, 5
+; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+define <vscale x 4 x i16> @vnsra_wx(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, iXLen %c, iXLen %vl) {
+; NOVLOPT-LABEL: vnsra_wx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsra.wx v11, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v11, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsra_wx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsra.wx v11, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v11, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, iXLen %c, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+define <vscale x 4 x i16> @vnsra_wv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vnsra_wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vnsra.wv v12, v8, v11
+; NOVLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v12, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vnsra_wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; VLOPT-NEXT:    vnsra.wv v12, v8, v11
+; VLOPT-NEXT:    vadd.vv v8, v12, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i16> @llvm.riscv.vnsra.nxv4i16.nxv4i32.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i32> %a, <vscale x 4 x i16> %c, iXLen -1)
+  %2 = call <vscale x 4 x i16> @llvm.riscv.vadd.nxv4i16.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i16> %1, <vscale x 4 x i16> %b, iXLen %vl)
+  ret <vscale x 4 x i16> %2
+}
+
+
 define <vscale x 4 x i32> @vminu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vminu_vv:
 ; NOVLOPT:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 1071ee53610854..814894f4acea3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -392,13 +392,12 @@ body: |
     %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVZEXT_VF8_M1 $noreg, %x, 1, 6 /* e64 */, 0
 ...
-# TODO: VNSRL_WV isn't yet a supported instruction for VL reduction
 ---
 name: vnop_wv_vd
 body: |
   bb.0:
     ; CHECK-LABEL: name: vnop_wv_vd
-    ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: early-clobber %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVNSRL_WV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0
@@ -483,3 +482,64 @@ body: |
     %x:vr = PseudoVADD_VV_MF4 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
     %y:vr = PseudoVNSRL_WV_MF2 $noreg, $noreg, %x, 1, 3 /* e8 */, 0
 ...
+---
+name: vseN_v
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vseN_v
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */
+...
+---
+name: vseN_v_incompatible_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vseN_v_incompatible_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
+    PseudoVSE8_V_M1 %x, $noreg, 1, 3 /* e8 */
+...
+---
+name: vseN_v_incompatible_emul
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vseN_v_incompatible_emul
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSE8_V_MF2 %x, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    PseudoVSE8_V_MF2 %x, $noreg, 1, 3 /* e8 */
+...
+---
+name: vsseN_v
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vsseN_v
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
+...
+---
+name: vsseN_v_incompatible_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vsseN_v_incompatible_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0
+    PseudoVSSE8_V_M1 %x, $noreg, $noreg, 1, 3 /* e8 */
+...
+---
+name: vsseN_v_incompatible_emul
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vsseN_v_incompatible_emul
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: PseudoVSSE8_V_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    PseudoVSSE8_V_MF2 %x, $noreg, $noreg, 1, 3 /* e8 */
+...
+
diff --git a/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll b/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll
index 06ab469e700778..432fbe0a22975d 100644
--- a/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll
+++ b/llvm/test/CodeGen/SPIRV/constant/local-arbitrary-width-integers-constants-type-promotion.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 define i4 @getConstantI4() {
   ret i4 2 ; i4 => OpTypeInt 8
 }
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll
index d5a8fb3e7baafa..368c5d4a32980e 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp-simple-hierarchy.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
 ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-DAG: OpName %[[I9:.*]] "_ZN13BaseIncrement9incrementEPi"
 ; CHECK-DAG: OpName %[[I29:.*]] "_ZN12IncrementBy29incrementEPi"
 ; CHECK-DAG: OpName %[[I49:.*]] "_ZN12IncrementBy49incrementEPi"
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
index 6aeb29df9f7bd4..75ad382f05ffd5 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s
 ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-DAG: OpCapability FunctionPointersINTEL
 ; CHECK-DAG: OpCapability Int64
 ; CHECK: OpExtension "SPV_INTEL_function_pointers"
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll
index b238b07bbc04ec..d38de216c22301 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fun-ptr-addrcast.ll
@@ -5,6 +5,9 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - --spirv-ext=+SPV_INTEL_function_pointers | FileCheck %s
 ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; Running with -verify-machineinstrs would lead to "Reading virtual register without a def"
 ; error, because OpConstantFunctionPointerINTEL forward-refers to a function definition.
 
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll
index e290c1eaeabad8..45e71d44fbf60d 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_cooperative_matrix/cooperative_matrix.ll
@@ -2,6 +2,9 @@
 ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_cooperative_matrix %s -o - | FileCheck %s
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_cooperative_matrix %s -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-ERROR: LLVM ERROR: OpTypeCooperativeMatrixKHR type requires the following SPIR-V extension: SPV_KHR_cooperative_matrix
 
 ; CHECK: OpCapability CooperativeMatrixKHR
diff --git a/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll b/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll
index e22e02f76d01d5..9c35d4ee9b1c5d 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/vector-shuffle.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-DAG: OpName [[SHFv4:%.+]] "shuffle_v4"
 ; CHECK-DAG: OpName [[INSv4:%.+]] "insert_v4"
 ; CHECK-DAG: OpName [[EXTv4:%.+]] "extract_v4"
diff --git a/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll
index 485da19fd6da0a..d897ccd02ed964 100644
--- a/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll
+++ b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK: OpDecorate %[[#SpecConst:]] SpecId 0
 ; CHECK: %[[#SpecConst]] = OpSpecConstant %[[#]] 70
 ; CHECK: %[[#]] = OpPhi %[[#]] %[[#]] %[[#]] %[[#SpecConst]] %[[#]]
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll
index fe71ce862dfc30..481bad9a26b7bc 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse-subbyte.ll
@@ -7,6 +7,9 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - | FileCheck %s --check-prefix=CHECK-SPIRV
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-SPIRV: OpCapability BitInstructions
 ; CHECK-SPIRV: OpExtension "SPV_KHR_bit_instructions"
 ; CHECK-SPIRV: %[[#CharTy:]] = OpTypeInt 8 0
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll
index 1840ad5411f477..9d6a5e0dc0d324 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpBitReverse_i2.ll
@@ -7,6 +7,9 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - | FileCheck %s --check-prefix=CHECK-SPIRV
 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_KHR_bit_instructions -o - -filetype=obj | spirv-val %}
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-SPIRV: OpCapability BitInstructions
 ; CHECK-SPIRV: OpExtension "SPV_KHR_bit_instructions"
 ; CHECK-SPIRV: %[[#CharTy:]] = OpTypeInt 8 0
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll
index d2d5483ed36185..11b613b956e06d 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGroupAsyncCopy.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-SPIRV-DAG: %[[#]] = OpGroupAsyncCopy %[[#]] %[[#Scope:]]
 ; CHECK-SPIRV-DAG: %[[#Scope]] = OpConstant %[[#]]
 
diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll
index c770ab7f8fb5b8..03731b6d67565f 100644
--- a/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll
+++ b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorExtractDynamic.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-SPIRV: OpName %[[#vec:]] "vec"
 ; CHECK-SPIRV: OpName %[[#index:]] "index"
 ; CHECK-SPIRV: OpName %[[#res:]] "res"
diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll
index 437e161864eca5..79c2824c3dde1f 100644
--- a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll
+++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll
@@ -8,6 +8,9 @@
 
 ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT
 
+; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled
+; XFAIL: expensive_checks
+
 ; CHECK-DAG: OpName %[[#Struct:]] "struct"
 ; CHECK-DAG: OpName %[[#Arg:]] "arg"
 ; CHECK-DAG: OpName %[[#QArg:]] "qarg"
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold.ll b/llvm/test/CodeGen/X86/store_op_load_fold.ll
index bd7068535eabcc..2915d1a7d7ae12 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold.ll
@@ -23,7 +23,10 @@ define void @test2() nounwind uwtable ssp {
 ; CHECK-LABEL: test2:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl L_s2$non_lazy_ptr, %eax
-; CHECK-NEXT:    andl $-262144, 20(%eax) ## imm = 0xFFFC0000
+; CHECK-NEXT:    movzbl 22(%eax), %ecx
+; CHECK-NEXT:    andl $-4, %ecx
+; CHECK-NEXT:    movb %cl, 22(%eax)
+; CHECK-NEXT:    movw $0, 20(%eax)
 ; CHECK-NEXT:    retl
   %bf.load35 = load i56, ptr getelementptr inbounds (%struct.S2, ptr @s2, i32 0, i32 5), align 16
   %bf.clear36 = and i56 %bf.load35, -1125895611875329
diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll
new file mode 100644
index 00000000000000..d85752e11fa6bb
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/vararg.ll
@@ -0,0 +1,532 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s --mtriple=xtensa | FileCheck %s
+
+define void @vararg(...) {
+; CHECK-LABEL: vararg:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    s32i a7, a1, 20
+; CHECK-NEXT:    s32i a6, a1, 16
+; CHECK-NEXT:    s32i a5, a1, 12
+; CHECK-NEXT:    s32i a4, a1, 8
+; CHECK-NEXT:    s32i a3, a1, 4
+; CHECK-NEXT:    s32i a2, a1, 0
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  ret void
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+declare void @f_i32(i32) nounwind
+declare void @f_i64(i64) nounwind
+
+define void @vararg_fixed_0(...) nounwind {
+; CHECK-LABEL: vararg_fixed_0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -48
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 32
+; CHECK-NEXT:    s32i a6, a1, 28
+; CHECK-NEXT:    s32i a5, a1, 24
+; CHECK-NEXT:    s32i a4, a1, 20
+; CHECK-NEXT:    s32i a3, a1, 16
+; CHECK-NEXT:    s32i a2, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 48
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 4
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB1_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB1_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 40
+; CHECK-NEXT:  .LBB1_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a2, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI1_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB1_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB1_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB1_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB1_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB1_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB1_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB1_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB1_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI1_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 48
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %0 = va_arg ptr %list, i32
+  call void @f_i32(i32 %0)
+  %1 = va_arg ptr %list, i64
+  call void @f_i64(i64 %1)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_1(i32 %a1, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 28
+; CHECK-NEXT:    s32i a6, a1, 24
+; CHECK-NEXT:    s32i a5, a1, 20
+; CHECK-NEXT:    s32i a4, a1, 16
+; CHECK-NEXT:    s32i a3, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 8
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB2_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB2_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 44
+; CHECK-NEXT:  .LBB2_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI2_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB2_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB2_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB2_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB2_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB2_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB2_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB2_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB2_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI2_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 16
+; CHECK-NEXT:    s32i a6, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 20
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB3_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB3_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 56
+; CHECK-NEXT:  .LBB3_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI3_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB3_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB3_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB3_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB3_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB3_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB3_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB3_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB3_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI3_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_5(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 12
+; CHECK-NEXT:    addi a9, a1, 12
+; CHECK-NEXT:    s32i a9, a1, 4
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a12, .LBB4_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB4_2: # %entry
+; CHECK-NEXT:    blt a12, a12, .LBB4_4
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    or a9, a12, a12
+; CHECK-NEXT:    j .LBB4_5
+; CHECK-NEXT:  .LBB4_4:
+; CHECK-NEXT:    movi a9, 60
+; CHECK-NEXT:  .LBB4_5: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI4_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB4_7
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB4_7: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB4_9
+; CHECK-NEXT:  # %bb.8:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB4_9: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB4_11
+; CHECK-NEXT:  # %bb.10: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB4_11: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB4_13
+; CHECK-NEXT:  # %bb.12:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB4_13: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI4_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_6(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    addi a10, a1, 0
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 36
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB5_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB5_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB5_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 72
+; CHECK-NEXT:  .LBB5_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI5_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB5_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB5_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB5_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB5_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB5_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB5_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB5_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB5_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI5_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_7(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    addi a10, a1, 0
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 20
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 36
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB6_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 72
+; CHECK-NEXT:  .LBB6_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI6_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB6_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB6_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB6_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB6_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB6_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB6_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB6_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB6_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI6_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
index 512906e8a669ae..b0dba632e6ec5c 100644
--- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-neon-negative.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme 2>&1 < %s| FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,-neon 2>&1 < %s| FileCheck %s
 
 // ------------------------------------------------------------------------- //
 // Check FABD is illegal in streaming mode
@@ -12,7 +12,7 @@ fabd s0, s1, s2
 // Check non-scalar v8.6a BFloat16 instructions are illegal in streaming mode
 
 bfcvtn v5.4h, v5.4s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: neon
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: bf16 neon
 // CHECK-NEXT: bfcvtn v5.4h, v5.4s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SME/streaming-sve-feature.s b/llvm/test/MC/AArch64/SME/streaming-sve-feature.s
index 90b1b6bd64ad6c..e4fdd09889f370 100644
--- a/llvm/test/MC/AArch64/SME/streaming-sve-feature.s
+++ b/llvm/test/MC/AArch64/SME/streaming-sve-feature.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -triple=aarch64 -mattr=+sme < %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+sme < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
 // Verify NEON is disabled when targeting streaming mode, if it's not
 // explicitly requested.
diff --git a/llvm/test/MC/AArch64/armv8a-fpmul.s b/llvm/test/MC/AArch64/armv8a-fpmul.s
index 6f06ad52e0e6bd..3a451a91e53d80 100644
--- a/llvm/test/MC/AArch64/armv8a-fpmul.s
+++ b/llvm/test/MC/AArch64/armv8a-fpmul.s
@@ -3,7 +3,7 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fp16fml,-fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon,+fp16fml < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-NEON
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fp16fml,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-FP16FML-NOR-NEON
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-FP16FML-NOR-NEON
 
 //------------------------------------------------------------------------------
@@ -37,15 +37,6 @@ FMLSL2  V0.4S, V1.4H, V2.4H
 //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
 //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
 
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
@@ -111,23 +102,6 @@ fmlsl2  V0.4s, v1.4h, v2.h[5]
 //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
 //CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
 
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-//CHECK-NO-NEON: error: instruction requires: neon{{$}}
-
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
 //CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
diff --git a/llvm/test/MC/M68k/Control/bsr.s b/llvm/test/MC/M68k/Control/bsr.s
index a70c7fb9a96edf..6b7a7d5c6ecd94 100644
--- a/llvm/test/MC/M68k/Control/bsr.s
+++ b/llvm/test/MC/M68k/Control/bsr.s
@@ -8,10 +8,6 @@
 	; CHECK-SAME: encoding: [0x61,0x00,A,A]
         ; CHECK:      fixup A - offset: 2, value: .LBB0_2, kind: FK_PCRel_2
 	bsr.w	.LBB0_2
-  ; CHECK:     bsr.l   .LBB0_3
-  ; CHECK-SAME: encoding: [0x61,0xff,A,A,A,A] 
-        ; CHECK:      fixup A - offset: 2, value: .LBB0_3, kind: FK_PCRel_4
-  bsr.l .LBB0_3  
 .LBB0_1:
 	; CHECK:      add.l  #0, %d0
 	; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x00]
@@ -26,10 +22,3 @@
 	; CHECK:      rts
 	; CHECK-SAME: encoding: [0x4e,0x75]
 	rts
-.LBB0_3:
-	; CHECK:      add.l  #1, %d0
-	; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01]
-	add.l	#1, %d0
-	; CHECK:      rts
-	; CHECK-SAME: encoding: [0x4e,0x75]
-	rts
diff --git a/llvm/test/MC/M68k/Control/bsr32.s b/llvm/test/MC/M68k/Control/bsr32.s
new file mode 100644
index 00000000000000..58ed4a29a35318
--- /dev/null
+++ b/llvm/test/MC/M68k/Control/bsr32.s
@@ -0,0 +1,35 @@
+; RUN: llvm-mc -triple=m68k --mcpu=M68020 -show-encoding %s | FileCheck %s
+
+	; CHECK:      bsr.b   .LBB0_1
+	; CHECK-SAME: encoding: [0x61,A]
+        ; CHECK:      fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1
+	bsr.b .LBB0_1
+	; CHECK:      bsr.w   .LBB0_2
+	; CHECK-SAME: encoding: [0x61,0x00,A,A]
+        ; CHECK:      fixup A - offset: 2, value: .LBB0_2, kind: FK_PCRel_2
+	bsr.w	.LBB0_2
+  ; CHECK:     bsr.l   .LBB0_3
+  ; CHECK-SAME: encoding: [0x61,0xff,A,A,A,A] 
+        ; CHECK:      fixup A - offset: 2, value: .LBB0_3, kind: FK_PCRel_4
+  bsr.l .LBB0_3  
+.LBB0_1:
+	; CHECK:      add.l  #0, %d0
+	; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x00]
+	add.l	#0, %d0
+	; CHECK:      rts
+	; CHECK-SAME: encoding: [0x4e,0x75]
+	rts
+.LBB0_2:
+	; CHECK:      add.l  #1, %d0
+	; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01]
+	add.l	#1, %d0
+	; CHECK:      rts
+	; CHECK-SAME: encoding: [0x4e,0x75]
+	rts
+.LBB0_3:
+	; CHECK:      add.l  #1, %d0
+	; CHECK-SAME: encoding: [0xd0,0xbc,0x00,0x00,0x00,0x01]
+	add.l	#1, %d0
+	; CHECK:      rts
+	; CHECK-SAME: encoding: [0x4e,0x75]
+	rts
diff --git a/llvm/test/MC/M68k/Relaxations/PIC/branch.s b/llvm/test/MC/M68k/Relaxations/PIC/branch.s
new file mode 100644
index 00000000000000..5035ca3ce50a7b
--- /dev/null
+++ b/llvm/test/MC/M68k/Relaxations/PIC/branch.s
@@ -0,0 +1,38 @@
+; RUN: llvm-mc -triple=m68k -motorola-integers -filetype=obj --position-independent < %s \
+; RUN:     | llvm-objdump -d - | FileCheck %s
+; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; CHECK-LABEL: <TIGHT>:
+TIGHT:
+	; CHECK: bra  $7f
+	; INSTR: bra .LBB0_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+	bra	.LBB0_2
+	.space 0x7F  ; i8::MAX
+.LBB0_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED>:
+RELAXED:
+	; CHECK: bra  $82
+	; INSTR: bra .LBB1_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+	bra	.LBB1_2
+	.space 0x80  ; Greater than i8::MAX
+.LBB1_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <ZERO>:
+ZERO:
+	; CHECK: bra  $2
+	; INSTR: bra .LBB3_1 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+	bra	.LBB3_1
+.LBB3_1:
+	add.l	#0, %d0
+	rts
+
+
diff --git a/llvm/test/MC/M68k/Relaxations/PIC/branch32.s b/llvm/test/MC/M68k/Relaxations/PIC/branch32.s
new file mode 100644
index 00000000000000..7e40f055aff13d
--- /dev/null
+++ b/llvm/test/MC/M68k/Relaxations/PIC/branch32.s
@@ -0,0 +1,53 @@
+; RUN: llvm-mc -triple=m68k --mcpu=M68020 -motorola-integers -filetype=obj --position-independent < %s \
+; RUN:     | llvm-objdump -d - | FileCheck %s
+; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; Branch relocations are relaxed as part of object layout, so -show-encoding still 
+; shows them as 1-byte relocactions 
+
+; CHECK-LABEL: <TIGHT>:
+TIGHT:
+	; CHECK: bra  $7f
+	; INSTR: bra .LBB0_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+	bra	.LBB0_2
+	.space 0x7F  ; i8::MAX
+.LBB0_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED>:
+RELAXED:
+	; CHECK: bra  $82
+	; INSTR: bra .LBB1_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+	bra	.LBB1_2
+	.space 0x80  ; Greater than i8::MAX
+.LBB1_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED_32>:
+RELAXED_32:
+	; CHECK: bra  $ff
+	; CHECK-NEXT: 00 00
+	; CHECK-NEXT: 80 04
+	; INSTR: bra .LBB2_1 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+	bra	.LBB2_1
+	.space 0x8000  ; Greater than i16::MAX.
+.LBB2_1:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <ZERO>:
+ZERO:
+	; CHECK: bra  $2
+	; INSTR: bra .LBB3_1 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+	bra	.LBB3_1
+.LBB3_1:
+	add.l	#0, %d0
+	rts
+
diff --git a/llvm/test/MC/M68k/Relaxations/PIC/bsr.s b/llvm/test/MC/M68k/Relaxations/PIC/bsr.s
new file mode 100644
index 00000000000000..67adcd545450e9
--- /dev/null
+++ b/llvm/test/MC/M68k/Relaxations/PIC/bsr.s
@@ -0,0 +1,51 @@
+; RUN: llvm-mc -triple=m68k -motorola-integers -filetype=obj --position-independent < %s \
+; RUN:     | llvm-objdump -d - | FileCheck %s
+
+; CHECK-LABEL: <TIGHT>:
+TIGHT:
+  ; CHECK: bsr.w   $7a
+	bsr.w	.LBB0_2
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+.LBB0_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED>:
+RELAXED:
+  ; CHECK: bsr.b   $82
+	bsr.b	.LBB1_2
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+	move.l	$0, $0
+.LBB1_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <ZERO>:
+ZERO:
+  ; CHECK: bsr.w    $2
+	bsr.w	.LBB2_1
+.LBB2_1:
+	add.l	#0, %d0
+	rts
diff --git a/llvm/test/MC/M68k/Relaxations/branch32.s b/llvm/test/MC/M68k/Relaxations/branch32.s
new file mode 100644
index 00000000000000..ba5c371dada8de
--- /dev/null
+++ b/llvm/test/MC/M68k/Relaxations/branch32.s
@@ -0,0 +1,50 @@
+; RUN: llvm-mc -triple=m68k --mcpu=M68020 -motorola-integers -filetype=obj < %s \
+; RUN:     | llvm-objdump -d - | FileCheck %s
+; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; CHECK-LABEL: <TIGHT>:
+TIGHT:
+	; CHECK: bra  $7f
+	; INSTR: bra .LBB0_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+	bra	.LBB0_2
+	.space 0x7F  ; i8::MAX
+.LBB0_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED>:
+RELAXED:
+	; CHECK: bra  $82
+	; INSTR: bra .LBB1_2 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+	bra	.LBB1_2
+	.space 0x80  ; Greater than i8::MAX
+.LBB1_2:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <RELAXED_32>:
+RELAXED_32:
+	; CHECK: bra  $ff
+	; CHECK-NEXT: 00 00
+	; CHECK-NEXT: 80 04
+	; INSTR: bra .LBB2_1 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+	bra	.LBB2_1
+	.space 0x8000  ; Greater than i16::MAX.
+.LBB2_1:
+	add.l	#0, %d0
+	rts
+
+; CHECK-LABEL: <ZERO>:
+ZERO:
+	; CHECK: bra  $2
+	; INSTR: bra .LBB3_1 ; encoding: [0x60,A]
+	; FIXUP: fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+	bra	.LBB3_1
+.LBB3_1:
+	add.l	#0, %d0
+	rts
+
diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-abs.s b/llvm/test/MC/M68k/Relocations/PIC/data-abs.s
new file mode 100644
index 00000000000000..0df09c8cb5ccb5
--- /dev/null
+++ b/llvm/test/MC/M68k/Relocations/PIC/data-abs.s
@@ -0,0 +1,26 @@
+; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \
+; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
+; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; RELOC: R_68K_32 dst 0x0
+; INSTR: move.l dst, %d0
+; FIXUP: fixup A - offset: 2, value: dst, kind: FK_Data_4
+move.l	dst, %d0
+
+; Relocating immediate values
+
+; RELOC: R_68K_8 str8 0x0
+; INSTR: move.b  #str8,  (4,%sp)
+; FIXUP: fixup A - offset: 3, value: str8, kind: FK_Data_1
+move.b  #str8,  (4,%sp)
+
+; RELOC: R_68K_16 str16 0x0
+; INSTR: move.w  #str16,  (4,%sp)
+; FIXUP: fixup A - offset: 2, value: str16, kind: FK_Data_2
+move.w  #str16, (4,%sp)
+
+; RELOC: R_68K_32 str32 0x0
+; INSTR: move.l  #str32,  (4,%sp)
+; FIXUP: fixup A - offset: 2, value: str32, kind: FK_Data_4
+move.l  #str32, (4,%sp)
diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s b/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s
new file mode 100644
index 00000000000000..ca1ce52de1ca75
--- /dev/null
+++ b/llvm/test/MC/M68k/Relocations/PIC/data-gotoff.s
@@ -0,0 +1,19 @@
+; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \
+; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
+; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; RELOC: R_68K_GOTOFF8 dst1 0x0
+; INSTR: move.l  (dst1@GOTOFF,%a5,%d0), %d0
+; FIXUP: fixup A - offset: 3, value: dst1@GOTOFF, kind: FK_Data_1
+move.l	(dst1@GOTOFF,%a5,%d0), %d0
+
+; RELOC: R_68K_GOTOFF16 dst2 0x0
+; INSTR: move.l  (dst2@GOTOFF,%a5), %d0
+; FIXUP: fixup A - offset: 2, value: dst2@GOTOFF, kind: FK_Data_2
+move.l	(dst2@GOTOFF,%a5), %d0
+
+; RELOC: R_68K_GOTPCREL16 dst3 0x0
+; INSTR: lea     (dst3@GOTPCREL,%pc), %a5
+; FIXUP: fixup A - offset: 2, value: dst3@GOTPCREL, kind: FK_PCRel_2
+lea	(dst3@GOTPCREL,%pc), %a5
diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s b/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s
new file mode 100644
index 00000000000000..9f79dd94f8198a
--- /dev/null
+++ b/llvm/test/MC/M68k/Relocations/PIC/data-gotpcrel.s
@@ -0,0 +1,14 @@
+; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \
+; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
+; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; RELOC: R_68K_GOTPCREL8 dst1 0x1
+; INSTR: move.l  (dst1@GOTPCREL,%pc,%d0), %a0
+; FIXUP: fixup A - offset: 3, value: dst1@GOTPCREL+1, kind: FK_PCRel_1
+move.l	(dst1@GOTPCREL,%pc,%d0), %a0
+
+; RELOC: R_68K_GOTPCREL16 dst2 0x0
+; INSTR: move.l  (dst2@GOTPCREL,%pc), %a0
+; FIXUP: fixup A - offset: 2, value: dst2@GOTPCREL, kind: FK_PCRel_2
+move.l	(dst2@GOTPCREL,%pc), %a0
diff --git a/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s b/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s
new file mode 100644
index 00000000000000..ef57ca589e64fe
--- /dev/null
+++ b/llvm/test/MC/M68k/Relocations/PIC/data-pc-rel.s
@@ -0,0 +1,20 @@
+; RUN: llvm-mc -triple m68k -filetype=obj --position-independent %s -o - \
+; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
+; RUN: llvm-mc -triple m68k -show-encoding --position-independent %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; RELOC: R_68K_PC8 dst1 0x1
+; INSTR: move.l  (dst1,%pc,%a0), %a0
+; FIXUP: fixup A - offset: 3, value: dst1+1, kind: FK_PCRel_1
+move.l	(dst1,%pc,%a0), %a0
+
+; RELOC: R_68K_PC16 dst2 0x0
+; INSTR: move.l  (dst2,%pc), %a0
+; FIXUP: fixup A - offset: 2, value: dst2, kind: FK_PCRel_2
+move.l	(dst2,%pc), %a0
+
+; Shouldn't have any relocation
+; RELOC-NOT: R_68K_PC
+; INSTR: move.l  (0,%pc), %a0
+; FIXUP-NOT: fixup
+move.l	(0,%pc), %a0
diff --git a/llvm/test/MC/M68k/Relocations/PIC/text-plt.s b/llvm/test/MC/M68k/Relocations/PIC/text-plt.s
new file mode 100644
index 00000000000000..0ccd1758004a97
--- /dev/null
+++ b/llvm/test/MC/M68k/Relocations/PIC/text-plt.s
@@ -0,0 +1,14 @@
+; RUN: llvm-mc -triple m68k --mcpu=M68020 --position-independent -filetype=obj %s -o - \
+; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
+; RUN: llvm-mc -triple m68k --mcpu=M68020 --position-independent -show-encoding %s -o - \
+; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
+
+; RELOC: R_68K_PLT16 target 0x0
+; INSTR: jsr     (target@PLT,%pc)
+; FIXUP: fixup A - offset: 2, value: target@PLT, kind: FK_PCRel_2
+jsr	(target@PLT,%pc)
+
+; RELOC: R_68K_PLT32  __tls_get_addr 0x0
+; INSTR: bsr.l   __tls_get_addr@PLT
+; FIXUP: fixup A - offset: 2, value: __tls_get_addr@PLT, kind: FK_PCRel_4
+bsr.l __tls_get_addr@PLT
diff --git a/llvm/test/MC/M68k/Relocations/text-plt.s b/llvm/test/MC/M68k/Relocations/text-plt.s
index 9513519c33c670..7de04b8b2182a5 100644
--- a/llvm/test/MC/M68k/Relocations/text-plt.s
+++ b/llvm/test/MC/M68k/Relocations/text-plt.s
@@ -1,6 +1,6 @@
-; RUN: llvm-mc -triple m68k -filetype=obj %s -o - \
+; RUN: llvm-mc -triple m68k --mcpu=M68020 -filetype=obj %s -o - \
 ; RUN:   | llvm-readobj -r - | FileCheck -check-prefix=RELOC %s
-; RUN: llvm-mc -triple m68k -show-encoding %s -o - \
+; RUN: llvm-mc -triple m68k --mcpu=M68020 -show-encoding %s -o - \
 ; RUN:   | FileCheck -check-prefix=INSTR -check-prefix=FIXUP %s
 
 ; RELOC: R_68K_PLT16 target 0x0
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index 21433477c1d7a3..2a99693523d3cf 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; REQUIRES: asserts
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
 
 declare void @init_mem(ptr, i64);
 
@@ -11,7 +11,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
 ; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
 ; CHECK:       Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
 ; CHECK-NEXT:  LV: We can vectorize this loop!
-; CHECK-NEXT:  LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported.
+; CHECK-NOT:   LV: Not vectorizing:
 entry:
   %p1 = alloca [1024 x i32]
   %p2 = alloca [1024 x i32]
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
 ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
 ; CHECK:       LV: Found an early exit loop with symbolic max backedge taken count: 63
 ; CHECK-NEXT:  LV: We can vectorize this loop!
-; CHECK-NEXT:  LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported.
+; CHECK-NOT:   LV: Not vectorizing
 entry:
   %p1 = alloca [1024 x i8]
   %p2 = alloca [1024 x i8]
@@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
 ; CHECK:       LV: Found an early exit loop with symbolic max backedge taken count: 63
 ; CHECK-NEXT:  LV: We can vectorize this loop!
-; CHECK-NEXT:  LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported.
+; CHECK:       LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
 entry:
   %p1 = alloca [1024 x i8]
   call void @init_mem(ptr %p1, i64 1024)
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
index 52f82d007de4df..08a333fa865154 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s
 
 declare void @init_mem(ptr, i64);
 
@@ -11,21 +11,47 @@ define i64 @same_exit_block_phi_of_consts() {
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i8], align 1
 ; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
 ; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP1]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[MIDDLE_SPLIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -62,19 +88,45 @@ define i64 @diff_exit_block_phi_of_consts() {
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i8], align 1
 ; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
 ; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       loop.early.exit:
 ; CHECK-NEXT:    ret i64 0
 ; CHECK:       loop.end:
@@ -119,22 +171,66 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
 ; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
 ; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
 ; CHECK-NEXT:    [[END_CLAMPED:%.*]] = and i32 [[END]], 1023
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[END]] to i10
+; CHECK-NEXT:    [[TMP20:%.*]] = zext i10 [[TMP19]] to i64
+; CHECK-NEXT:    [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP20]], i64 1)
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = add i8 1, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD3]]
+; CHECK-NEXT:    [[TMP14:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
+; CHECK-NEXT:    [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP16]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]]
+; CHECK-NEXT:    br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IND_NEXT]] = add i8 [[IND]], 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
 ; CHECK-NEXT:    [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       found:
 ; CHECK-NEXT:    ret i32 1
 ; CHECK:       exit:
@@ -183,14 +279,33 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SVAL:%.*]] = load i32, ptr [[S]], align 4
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[SVAL]], 0
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276
+; CHECK-NEXT:    [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 266, [[MIDDLE_BLOCK]] ], [ -10, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]]
+; CHECK-NEXT:    [[IND:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IND_NEXT]] = add nsw i32 [[IND]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       early.exit:
 ; CHECK-NEXT:    tail call void @abort()
 ; CHECK-NEXT:    unreachable
@@ -218,3 +333,15 @@ early.exit:
 for.end:
   ret i32 0
 }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
new file mode 100644
index 00000000000000..368c766e6b3c70
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S -enable-early-exit-vectorization -debug %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+declare void @init(ptr)
+
+define i64 @multi_exiting_to_different_exits_live_in_exit_values() {
+; CHECK: multi_exiting_to_different_exits_live_in_exit_values
+; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<128> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT:   IR %src = alloca [128 x i32], align 4
+; CHECK-NEXT:   IR call void @init(ptr %src)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT:   vector.body:
+; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
+; CHECK-NEXT:     CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3>
+; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
+; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT:     WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
+; CHECK-NEXT:     EMIT vp<[[NOT1:%.+]]> = not ir<%c.1>
+; CHECK-NEXT:     EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT:     EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]>
+; CHECK-NEXT:     EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]>
+; CHECK-NEXT:     EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
+; CHECK-NEXT:     EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
+; CHECK-NEXT:     EMIT branch-on-cond vp<[[EC]]>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.split
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.split:
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[EA_TAKEN]]>
+; CHECK-NEXT: Successor(s): ir-bb<e1>, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT:   EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
+; CHECK-NEXT: Successor(s): ir-bb<e2>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+; CHECK:      No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<e2>:
+; CHECK-NEXT:  IR %p2 = phi i64 [ 1, %loop.latch ] (extra operand: ir<1> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<e1>:
+; CHECK-NEXT:   IR %p1 = phi i64 [ 0, %loop.header ] (extra operand: ir<0> from middle.split)
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+  %src = alloca [128 x i32]
+  call void @init(ptr %src)
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src
+  %c.1 = icmp eq i32 %l, 10
+  br i1 %c.1, label %e1, label %loop.latch
+
+loop.latch:
+  %inc = add nuw i64 %iv, 1
+  %c.2 = icmp eq i64 %inc, 128
+  br i1 %c.2, label %e2, label %loop.header
+
+e1:
+  %p1 = phi i64 [ 0, %loop.header ]
+  ret i64 %p1
+
+e2:
+  %p2 = phi i64 [ 1, %loop.latch ]
+  ret i64 %p2
+}
+
+define i64 @multi_exiting_to_same_exit_live_in_exit_values() {
+; CHECK: multi_exiting_to_same_exit_live_in_exit_values
+; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<128> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT:   IR %src = alloca [128 x i32], align 4
+; CHECK-NEXT:   IR call void @init(ptr %src)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT:   vector.body:
+; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
+; CHECK-NEXT:     CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3>
+; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
+; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT:     WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
+; CHECK-NEXT:     EMIT vp<[[NOT1:%.+]]> = not ir<%c.1>
+; CHECK-NEXT:     EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT:     EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]>
+; CHECK-NEXT:     EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]>
+; CHECK-NEXT:     EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
+; CHECK-NEXT:     EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
+; CHECK-NEXT:     EMIT branch-on-cond vp<[[EC]]>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.split
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.split:
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[EA_TAKEN]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT:   EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+; CHECK:      No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT:   IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split)
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+
+entry:
+  %src = alloca [128 x i32]
+  call void @init(ptr %src)
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src
+  %c.1 = icmp eq i32 %l, 10
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %inc = add nuw i64 %iv, 1
+  %c.2 = icmp eq i64 %inc, 128
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
+  ret i64 %p
+}
+
+define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() {
+; CHECK: multi_exiting_to_same_exit_live_in_exit_values_2
+; CHECK-LABEL: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<128> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT:   IR %src = alloca [128 x i32], align 4
+; CHECK-NEXT:   IR call void @init(ptr %src)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT:   vector.body:
+; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
+; CHECK-NEXT:     CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<%3>
+; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
+; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT:     WIDEN ir<%c.1> = icmp eq ir<%l>, ir<10>
+; CHECK-NEXT:     EMIT vp<[[NOT1:%.+]]> = not ir<%c.1>
+; CHECK-NEXT:     EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT:     EMIT vp<[[NOT2:%.+]]> = not vp<[[NOT1]]>
+; CHECK-NEXT:     EMIT vp<[[EA_TAKEN:%.+]]> = any-of vp<[[NOT2]]>
+; CHECK-NEXT:     EMIT vp<[[LATCH_CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
+; CHECK-NEXT:     EMIT vp<[[EC:%.+]]> = or vp<[[EA_TAKEN]]>, vp<[[LATCH_CMP]]>
+; CHECK-NEXT:     EMIT branch-on-cond vp<[[EC]]>
+; CHECK-NEXT:   No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.split
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.split:
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[EA_TAKEN]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT:   EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<128>, vp<[[VTC]]>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+; CHECK:      No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT:   IR %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ] (extra operands: ir<1> from middle.block, ir<0> from middle.split)
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+
+entry:
+  %src = alloca [128 x i32]
+  call void @init(ptr %src)
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src
+  %c.1 = icmp eq i32 %l, 10
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %inc = add nuw i64 %iv, 1
+  %c.2 = icmp eq i64 %inc, 128
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %p = phi i64 [ 0, %loop.header ], [ 1, %loop.latch ]
+  ret i64 %p
+
+; uselistorder directives
+  uselistorder label %exit, { 1, 0 }
+}
diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
index cd91d07120f9ee..5b2a95f1b368c3 100644
--- a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization  -force-vector-width=4 | FileCheck %s
 
 declare void @init_mem(ptr, i64);
 
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index 3cf2940e86808e..80a21cef4e38ad 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -474,8 +474,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; SSE-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
 ; SSE-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
 ; SSE-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; SSE-NEXT:    [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
-; SSE-NEXT:    [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; SSE-NEXT:    [[V2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 poison, i32 2, i32 4, i32 poison>
 ; SSE-NEXT:    [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; SSE-NEXT:    ret <4 x float> [[V3]]
 ;
@@ -489,7 +488,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; AVX-NEXT:    [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; AVX-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
 ; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; AVX-NEXT:    [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; AVX-NEXT:    [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
 ; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
 ; AVX-NEXT:    [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; AVX-NEXT:    ret <4 x float> [[V3]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index a08506840572c5..307fbf711cdc2d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -474,7 +474,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; SSE-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
 ; SSE-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
 ; SSE-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; SSE-NEXT:    [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; SSE-NEXT:    [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
 ; SSE-NEXT:    [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
 ; SSE-NEXT:    [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; SSE-NEXT:    ret <4 x float> [[V3]]
@@ -489,7 +489,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; AVX-NEXT:    [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; AVX-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
 ; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; AVX-NEXT:    [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; AVX-NEXT:    [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
 ; AVX-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
 ; AVX-NEXT:    [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 ; AVX-NEXT:    ret <4 x float> [[V3]]
diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp
index 594b52326871dd..913077eb0b06d5 100644
--- a/llvm/tools/dsymutil/dsymutil.cpp
+++ b/llvm/tools/dsymutil/dsymutil.cpp
@@ -64,12 +64,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -80,7 +81,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DsymutilOptTable : public opt::GenericOptTable {
 public:
-  DsymutilOptTable() : opt::GenericOptTable(InfoTable) {}
+  DsymutilOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // namespace
 
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
index d33459b194c9a3..9e3800f5bfbbc6 100644
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
@@ -51,12 +51,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -67,7 +68,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class CGDataOptTable : public opt::GenericOptTable {
 public:
-  CGDataOptTable() : GenericOptTable(InfoTable) {}
+  CGDataOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // end anonymous namespace
 
diff --git a/llvm/tools/llvm-cvtres/llvm-cvtres.cpp b/llvm/tools/llvm-cvtres/llvm-cvtres.cpp
index 0c10769a9488ea..8ef8d6e239cfe3 100644
--- a/llvm/tools/llvm-cvtres/llvm-cvtres.cpp
+++ b/llvm/tools/llvm-cvtres/llvm-cvtres.cpp
@@ -42,12 +42,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -58,7 +59,9 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class CvtResOptTable : public opt::GenericOptTable {
 public:
-  CvtResOptTable() : opt::GenericOptTable(InfoTable, true) {}
+  CvtResOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                             true) {}
 };
 }
 
diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index 41b379e8fd396b..1467093e78c0b4 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -31,12 +31,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -47,7 +48,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class CxxfiltOptTable : public opt::GenericOptTable {
 public:
-  CxxfiltOptTable() : opt::GenericOptTable(InfoTable) {
+  CxxfiltOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp b/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp
index 77862737bccd2f..934833bf6fe426 100644
--- a/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp
+++ b/llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp
@@ -37,12 +37,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -53,7 +54,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DebuginfodFindOptTable : public opt::GenericOptTable {
 public:
-  DebuginfodFindOptTable() : GenericOptTable(InfoTable) {}
+  DebuginfodFindOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 
 } // end anonymous namespace
diff --git a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp
index 44d656148a4e2c..2859a36c80b0b3 100644
--- a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp
+++ b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp
@@ -36,12 +36,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -52,7 +53,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DebuginfodOptTable : public opt::GenericOptTable {
 public:
-  DebuginfodOptTable() : GenericOptTable(InfoTable) {}
+  DebuginfodOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // end anonymous namespace
 
diff --git a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
index 7b777b1845f8a4..0180abb834f9d3 100644
--- a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
+++ b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
@@ -38,12 +38,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Options.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -54,7 +55,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DwarfutilOptTable : public opt::GenericOptTable {
 public:
-  DwarfutilOptTable() : opt::GenericOptTable(InfoTable) {}
+  DwarfutilOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // namespace
 
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 60a89cb13c57a0..e34fcadfde5fc9 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -47,12 +47,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -63,7 +64,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class DwpOptTable : public opt::GenericOptTable {
 public:
-  DwpOptTable() : GenericOptTable(InfoTable) {}
+  DwpOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // end anonymous namespace
 
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 4d441465c47fbf..42900159ce9667 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -64,12 +64,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  constexpr llvm::StringLiteral NAME##_init[] = VALUE;                         \
-  constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                          \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 const opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -79,7 +80,8 @@ const opt::OptTable::Info InfoTable[] = {
 
 class GSYMUtilOptTable : public llvm::opt::GenericOptTable {
 public:
-  GSYMUtilOptTable() : GenericOptTable(InfoTable) {
+  GSYMUtilOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp
index b76ea8dec0c98c..e12016c51e906c 100644
--- a/llvm/tools/llvm-ifs/llvm-ifs.cpp
+++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp
@@ -59,12 +59,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -74,7 +75,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class IFSOptTable : public opt::GenericOptTable {
 public:
-  IFSOptTable() : opt::GenericOptTable(InfoTable) {
+  IFSOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
index 3d3f3f0af4b7a7..94247118dc4eb2 100644
--- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
+++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
@@ -48,12 +48,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -63,7 +64,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class LibtoolDarwinOptTable : public opt::GenericOptTable {
 public:
-  LibtoolDarwinOptTable() : GenericOptTable(InfoTable) {}
+  LibtoolDarwinOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // end anonymous namespace
 
diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp
index 711a9185e155f6..3c0197e8b7bac7 100644
--- a/llvm/tools/llvm-lipo/llvm-lipo.cpp
+++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp
@@ -72,12 +72,13 @@ enum LipoID {
 };
 
 namespace lipo {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr llvm::StringLiteral NAME##_init[] = VALUE;                  \
-  static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                   \
-      NAME##_init, std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "LipoOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "LipoOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info LipoInfoTable[] = {
@@ -89,7 +90,9 @@ static constexpr opt::OptTable::Info LipoInfoTable[] = {
 
 class LipoOptTable : public opt::GenericOptTable {
 public:
-  LipoOptTable() : opt::GenericOptTable(lipo::LipoInfoTable) {}
+  LipoOptTable()
+      : opt::GenericOptTable(lipo::OptionStrTable, lipo::OptionPrefixesTable,
+                             lipo::LipoInfoTable) {}
 };
 
 enum class LipoAction {
diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp
index db69109e2d1fab..1aa41096002eec 100644
--- a/llvm/tools/llvm-ml/llvm-ml.cpp
+++ b/llvm/tools/llvm-ml/llvm-ml.cpp
@@ -58,12 +58,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -73,7 +74,9 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class MLOptTable : public opt::GenericOptTable {
 public:
-  MLOptTable() : opt::GenericOptTable(InfoTable, /*IgnoreCase=*/false) {}
+  MLOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                             /*IgnoreCase=*/false) {}
 };
 } // namespace
 
diff --git a/llvm/tools/llvm-mt/llvm-mt.cpp b/llvm/tools/llvm-mt/llvm-mt.cpp
index 8b793b877642cb..3bd1bc786f86d3 100644
--- a/llvm/tools/llvm-mt/llvm-mt.cpp
+++ b/llvm/tools/llvm-mt/llvm-mt.cpp
@@ -40,12 +40,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -56,7 +57,9 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class CvtResOptTable : public opt::GenericOptTable {
 public:
-  CvtResOptTable() : opt::GenericOptTable(InfoTable, true) {}
+  CvtResOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                             true) {}
 };
 } // namespace
 
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index d3e8d4c5ed987c..e7c3e36dd38d2f 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -65,12 +65,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -80,7 +81,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class NmOptTable : public opt::GenericOptTable {
 public:
-  NmOptTable() : opt::GenericOptTable(InfoTable) {
+  NmOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 104d802b1e1eeb..0925fc55317f7d 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -39,12 +39,13 @@ enum ObjcopyID {
 };
 
 namespace objcopy_opt {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "ObjcopyOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "ObjcopyOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info ObjcopyInfoTable[] = {
 #define OPTION(...)                                                            \
@@ -56,7 +57,10 @@ static constexpr opt::OptTable::Info ObjcopyInfoTable[] = {
 
 class ObjcopyOptTable : public opt::GenericOptTable {
 public:
-  ObjcopyOptTable() : opt::GenericOptTable(objcopy_opt::ObjcopyInfoTable) {
+  ObjcopyOptTable()
+      : opt::GenericOptTable(objcopy_opt::OptionStrTable,
+                             objcopy_opt::OptionPrefixesTable,
+                             objcopy_opt::ObjcopyInfoTable) {
     setGroupedShortOptions(true);
     setDashDashParsing(true);
   }
@@ -71,13 +75,13 @@ enum InstallNameToolID {
 };
 
 namespace install_name_tool {
+#define OPTTABLE_STR_TABLE_CODE
+#include "InstallNameToolOpts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "InstallNameToolOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InstallNameToolInfoTable[] = {
 #define OPTION(...)                                                            \
@@ -90,7 +94,9 @@ static constexpr opt::OptTable::Info InstallNameToolInfoTable[] = {
 class InstallNameToolOptTable : public opt::GenericOptTable {
 public:
   InstallNameToolOptTable()
-      : GenericOptTable(install_name_tool::InstallNameToolInfoTable) {}
+      : GenericOptTable(install_name_tool::OptionStrTable,
+                        install_name_tool::OptionPrefixesTable,
+                        install_name_tool::InstallNameToolInfoTable) {}
 };
 
 enum BitcodeStripID {
@@ -102,13 +108,13 @@ enum BitcodeStripID {
 };
 
 namespace bitcode_strip {
+#define OPTTABLE_STR_TABLE_CODE
+#include "BitcodeStripOpts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "BitcodeStripOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info BitcodeStripInfoTable[] = {
 #define OPTION(...)                                                            \
@@ -121,7 +127,9 @@ static constexpr opt::OptTable::Info BitcodeStripInfoTable[] = {
 class BitcodeStripOptTable : public opt::GenericOptTable {
 public:
   BitcodeStripOptTable()
-      : opt::GenericOptTable(bitcode_strip::BitcodeStripInfoTable) {}
+      : opt::GenericOptTable(bitcode_strip::OptionStrTable,
+                             bitcode_strip::OptionPrefixesTable,
+                             bitcode_strip::BitcodeStripInfoTable) {}
 };
 
 enum StripID {
@@ -132,12 +140,13 @@ enum StripID {
 };
 
 namespace strip {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
+#include "StripOpts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "StripOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info StripInfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(STRIP_, __VA_ARGS__),
@@ -148,7 +157,9 @@ static constexpr opt::OptTable::Info StripInfoTable[] = {
 
 class StripOptTable : public opt::GenericOptTable {
 public:
-  StripOptTable() : GenericOptTable(strip::StripInfoTable) {
+  StripOptTable()
+      : GenericOptTable(strip::OptionStrTable, strip::OptionPrefixesTable,
+                        strip::StripInfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 246d5cfa05818a..1e74cb80b21187 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -99,10 +99,11 @@ namespace {
 
 class CommonOptTable : public opt::GenericOptTable {
 public:
-  CommonOptTable(ArrayRef<Info> OptionInfos, const char *Usage,
+  CommonOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable,
+                 ArrayRef<Info> OptionInfos, const char *Usage,
                  const char *Description)
-      : opt::GenericOptTable(OptionInfos), Usage(Usage),
-        Description(Description) {
+      : opt::GenericOptTable(StrTable, PrefixesTable, OptionInfos),
+        Usage(Usage), Description(Description) {
     setGroupedShortOptions(true);
   }
 
@@ -121,12 +122,13 @@ class CommonOptTable : public opt::GenericOptTable {
 
 // ObjdumpOptID is in ObjdumpOptID.h
 namespace objdump_opt {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "ObjdumpOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "ObjdumpOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info ObjdumpInfoTable[] = {
 #define OPTION(...)                                                            \
@@ -139,9 +141,10 @@ static constexpr opt::OptTable::Info ObjdumpInfoTable[] = {
 class ObjdumpOptTable : public CommonOptTable {
 public:
   ObjdumpOptTable()
-      : CommonOptTable(objdump_opt::ObjdumpInfoTable,
-                       " [options] <input object files>",
-                       "llvm object file dumper") {}
+      : CommonOptTable(
+            objdump_opt::OptionStrTable, objdump_opt::OptionPrefixesTable,
+            objdump_opt::ObjdumpInfoTable, " [options] <input object files>",
+            "llvm object file dumper") {}
 };
 
 enum OtoolOptID {
@@ -152,12 +155,13 @@ enum OtoolOptID {
 };
 
 namespace otool {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
+#include "OtoolOpts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "OtoolOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info OtoolInfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(OTOOL_, __VA_ARGS__),
@@ -169,7 +173,8 @@ static constexpr opt::OptTable::Info OtoolInfoTable[] = {
 class OtoolOptTable : public CommonOptTable {
 public:
   OtoolOptTable()
-      : CommonOptTable(otool::OtoolInfoTable, " [option...] [file...]",
+      : CommonOptTable(otool::OptionStrTable, otool::OptionPrefixesTable,
+                       otool::OtoolInfoTable, " [option...] [file...]",
                        "Mach-O object file displaying tool") {}
 };
 
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index f34d99e10f3163..6b9e2349899a44 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ProfileData/InstrProfWriter.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfYAML.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/ProfileData/SampleProfWriter.h"
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 4bc9d90095575b..a77188c462afe0 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -57,12 +57,13 @@ enum ID {
 };
 
 namespace rc_opt {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -73,7 +74,10 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class RcOptTable : public opt::GenericOptTable {
 public:
-  RcOptTable() : GenericOptTable(rc_opt::InfoTable, /* IgnoreCase = */ true) {}
+  RcOptTable()
+      : GenericOptTable(rc_opt::OptionStrTable, rc_opt::OptionPrefixesTable,
+                        rc_opt::InfoTable,
+                        /* IgnoreCase = */ true) {}
 };
 
 enum Windres_ID {
@@ -84,12 +88,13 @@ enum Windres_ID {
 };
 
 namespace windres_opt {
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
+#include "WindresOpts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "WindresOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...)                                                            \
@@ -102,7 +107,10 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 class WindresOptTable : public opt::GenericOptTable {
 public:
   WindresOptTable()
-      : GenericOptTable(windres_opt::InfoTable, /* IgnoreCase = */ false) {}
+      : GenericOptTable(windres_opt::OptionStrTable,
+                        windres_opt::OptionPrefixesTable,
+                        windres_opt::InfoTable,
+                        /* IgnoreCase = */ false) {}
 };
 
 static ExitOnError ExitOnErr;
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 3e76cda2dd4335..2f77e5d350553d 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -59,12 +59,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -74,7 +75,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class ReadobjOptTable : public opt::GenericOptTable {
 public:
-  ReadobjOptTable() : opt::GenericOptTable(InfoTable) {
+  ReadobjOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
index 04282d3e4877c1..b5574ea41e332b 100644
--- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
+++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
@@ -45,12 +45,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "TapiOpts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "TapiOpts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -60,7 +61,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class TAPIOptTable : public opt::GenericOptTable {
 public:
-  TAPIOptTable() : opt::GenericOptTable(InfoTable) {
+  TAPIOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-size/llvm-size.cpp b/llvm/tools/llvm-size/llvm-size.cpp
index 4a1b0e879036cc..0d7bf248326702 100644
--- a/llvm/tools/llvm-size/llvm-size.cpp
+++ b/llvm/tools/llvm-size/llvm-size.cpp
@@ -45,12 +45,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -60,7 +61,10 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class SizeOptTable : public opt::GenericOptTable {
 public:
-  SizeOptTable() : GenericOptTable(InfoTable) { setGroupedShortOptions(true); }
+  SizeOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
+    setGroupedShortOptions(true);
+  }
 };
 
 enum OutputFormatTy { berkeley, sysv, darwin };
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index d4305096b60a0b..9979b93de84270 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -38,12 +38,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -54,7 +55,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class StringsOptTable : public opt::GenericOptTable {
 public:
-  StringsOptTable() : GenericOptTable(InfoTable) {
+  StringsOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
     setDashDashParsing(true);
   }
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 3e41a85d646956..3ba7f59d5b8475 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -56,12 +56,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -72,7 +73,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class SymbolizerOptTable : public opt::GenericOptTable {
 public:
-  SymbolizerOptTable() : GenericOptTable(InfoTable) {
+  SymbolizerOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
     setGroupedShortOptions(true);
   }
 };
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
index a091e37ff4026d..ca0b4247221968 100644
--- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -33,12 +33,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 using namespace llvm::opt;
 static constexpr opt::OptTable::Info InfoTable[] = {
@@ -49,7 +50,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class TLICheckerOptTable : public opt::GenericOptTable {
 public:
-  TLICheckerOptTable() : GenericOptTable(InfoTable) {}
+  TLICheckerOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // end anonymous namespace
 
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index 39feff62391fe1..727b94b8477cc5 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -67,12 +67,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_STR_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
 static constexpr opt::OptTable::Info InfoTable[] = {
 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
@@ -82,7 +83,8 @@ static constexpr opt::OptTable::Info InfoTable[] = {
 
 class SancovOptTable : public opt::GenericOptTable {
 public:
-  SancovOptTable() : GenericOptTable(InfoTable) {}
+  SancovOptTable()
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
 };
 } // namespace
 
diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp
index c32c2ce859af5d..ab28963b39311a 100644
--- a/llvm/unittests/CodeGen/MachineInstrTest.cpp
+++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp
@@ -584,7 +584,7 @@ TEST(MachineInstrTest, SpliceOperands) {
   // test tied operands
   MCRegisterClass MRC{
       0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true, /*BaseClass=*/true};
-  TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0};
+  TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0, 0};
   // MachineRegisterInfo will be very upset if these registers aren't
   // allocatable.
   assert(RC.isAllocatable() && "unusable TargetRegisterClass");
diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp
index 2ec422f1a09843..08c3b019689f8c 100644
--- a/llvm/unittests/Option/OptionMarshallingTest.cpp
+++ b/llvm/unittests/Option/OptionMarshallingTest.cpp
@@ -9,29 +9,37 @@
 #include "llvm/ADT/StringRef.h"
 #include "gtest/gtest.h"
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 struct OptionWithMarshallingInfo {
-  llvm::StringLiteral PrefixedName;
+  int PrefixedNameOffset;
   const char *KeyPath;
   const char *ImpliedCheck;
   const char *ImpliedValue;
+
+  llvm::StringRef getPrefixedName() const {
+    return &OptionStrTable[PrefixedNameOffset];
+  }
 };
 
 static const OptionWithMarshallingInfo MarshallingTable[] = {
 #define OPTION_WITH_MARSHALLING(                                               \
-    PREFIX_TYPE, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS,      \
-    VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES,        \
+    PREFIX_TYPE, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS,      \
+    FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \
     SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK,          \
     IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX)   \
-  {PREFIXED_NAME, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE},
+  {PREFIXED_NAME_OFFSET, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE},
 #include "Opts.inc"
 #undef OPTION_WITH_MARSHALLING
 };
 
 TEST(OptionMarshalling, EmittedOrderSameAsDefinitionOrder) {
-  ASSERT_EQ(MarshallingTable[0].PrefixedName, "-marshalled-flag-d");
-  ASSERT_EQ(MarshallingTable[1].PrefixedName, "-marshalled-flag-c");
-  ASSERT_EQ(MarshallingTable[2].PrefixedName, "-marshalled-flag-b");
-  ASSERT_EQ(MarshallingTable[3].PrefixedName, "-marshalled-flag-a");
+  ASSERT_EQ(MarshallingTable[0].getPrefixedName(), "-marshalled-flag-d");
+  ASSERT_EQ(MarshallingTable[1].getPrefixedName(), "-marshalled-flag-c");
+  ASSERT_EQ(MarshallingTable[2].getPrefixedName(), "-marshalled-flag-b");
+  ASSERT_EQ(MarshallingTable[3].getPrefixedName(), "-marshalled-flag-a");
 }
 
 TEST(OptionMarshalling, EmittedSpecifiedKeyPath) {
diff --git a/llvm/unittests/Option/OptionParsingTest.cpp b/llvm/unittests/Option/OptionParsingTest.cpp
index cd8743e49d4fde..3da015e343eb97 100644
--- a/llvm/unittests/Option/OptionParsingTest.cpp
+++ b/llvm/unittests/Option/OptionParsingTest.cpp
@@ -20,6 +20,10 @@ using namespace llvm::opt;
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #endif
 
+#define OPTTABLE_STR_TABLE_CODE
+#include "Opts.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
 enum ID {
   OPT_INVALID = 0, // This is not an option ID.
 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
@@ -28,20 +32,13 @@ enum ID {
 #undef OPTION
 };
 
-#define PREFIX(NAME, VALUE)                                                    \
-  static constexpr StringLiteral NAME##_init[] = VALUE;                        \
-  static constexpr ArrayRef<StringLiteral> NAME(NAME##_init,                   \
-                                                std::size(NAME##_init) - 1);
+#define OPTTABLE_PREFIXES_TABLE_CODE
 #include "Opts.inc"
-#undef PREFIX
+#undef OPTTABLE_PREFIXES_TABLE_CODE
 
-static constexpr const StringLiteral PrefixTable_init[] =
-#define PREFIX_UNION(VALUES) VALUES
+#define OPTTABLE_PREFIXES_UNION_CODE
 #include "Opts.inc"
-#undef PREFIX_UNION
-    ;
-static constexpr const ArrayRef<StringLiteral>
-    PrefixTable(PrefixTable_init, std::size(PrefixTable_init) - 1);
+#undef OPTTABLE_PREFIXES_UNION_CODE
 
 enum OptionFlags {
   OptFlag1 = (1 << 4),
@@ -64,13 +61,15 @@ namespace {
 class TestOptTable : public GenericOptTable {
 public:
   TestOptTable(bool IgnoreCase = false)
-      : GenericOptTable(InfoTable, IgnoreCase) {}
+      : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                        IgnoreCase) {}
 };
 
 class TestPrecomputedOptTable : public PrecomputedOptTable {
 public:
   TestPrecomputedOptTable(bool IgnoreCase = false)
-      : PrecomputedOptTable(InfoTable, PrefixTable, IgnoreCase) {}
+      : PrecomputedOptTable(OptionStrTable, OptionPrefixesTable, InfoTable,
+                            OptionPrefixesUnion, IgnoreCase) {}
 };
 }
 
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 5886d3e2bcf842..456b093362b50f 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfYAML.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -161,9 +162,8 @@ TEST(MemProf, FillsValue) {
                           /*KeepName=*/true);
 
   llvm::DenseMap<llvm::GlobalValue::GUID, MemProfRecord> Records;
-  for (const auto &Pair : Reader) {
+  for (const auto &Pair : Reader)
     Records.insert({Pair.first, Pair.second});
-  }
 
   // Mock program pseudocode and expected memprof record contents.
   //
@@ -396,9 +396,8 @@ TEST(MemProf, SymbolizationFilter) {
   RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
 
   llvm::SmallVector<MemProfRecord, 1> Records;
-  for (const auto &KeyRecordPair : Reader) {
+  for (const auto &KeyRecordPair : Reader)
     Records.push_back(KeyRecordPair.second);
-  }
 
   ASSERT_THAT(Records, SizeIs(1));
   ASSERT_THAT(Records[0].AllocSites, SizeIs(1));
@@ -428,9 +427,8 @@ TEST(MemProf, BaseMemProfReader) {
   MemProfReader Reader(std::move(MemProfData));
 
   llvm::SmallVector<MemProfRecord, 1> Records;
-  for (const auto &KeyRecordPair : Reader) {
+  for (const auto &KeyRecordPair : Reader)
     Records.push_back(KeyRecordPair.second);
-  }
 
   ASSERT_THAT(Records, SizeIs(1));
   ASSERT_THAT(Records[0].AllocSites, SizeIs(1));
@@ -463,9 +461,8 @@ TEST(MemProf, BaseMemProfReaderWithCSIdMap) {
   MemProfReader Reader(std::move(MemProfData));
 
   llvm::SmallVector<MemProfRecord, 1> Records;
-  for (const auto &KeyRecordPair : Reader) {
+  for (const auto &KeyRecordPair : Reader)
     Records.push_back(KeyRecordPair.second);
-  }
 
   ASSERT_THAT(Records, SizeIs(1));
   ASSERT_THAT(Records[0].AllocSites, SizeIs(1));
@@ -748,7 +745,7 @@ TEST(MemProf, YAMLParser) {
 
   // Verify the entire contents of MemProfData.Records.
   ASSERT_THAT(MemProfData.Records, SizeIs(1));
-  const auto &[GUID, Record] = *MemProfData.Records.begin();
+  const auto &[GUID, Record] = MemProfData.Records.front();
   EXPECT_EQ(GUID, 0xdeadbeef12345678ULL);
   ASSERT_THAT(Record.AllocSites, SizeIs(2));
   EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1));
diff --git a/llvm/utils/TableGen/Basic/SDNodeProperties.h b/llvm/utils/TableGen/Basic/SDNodeProperties.h
index 1fe4044edcea23..97813067341fc8 100644
--- a/llvm/utils/TableGen/Basic/SDNodeProperties.h
+++ b/llvm/utils/TableGen/Basic/SDNodeProperties.h
@@ -28,8 +28,6 @@ enum SDNP {
   SDNPSideEffect,
   SDNPMemOperand,
   SDNPVariadic,
-  SDNPWantRoot,
-  SDNPWantParent
 };
 
 unsigned parseSDPatternOperatorProperties(const Record *R);
diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp
index 4e75db689a0b57..f01b8a962bfbb1 100644
--- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp
@@ -424,14 +424,13 @@ ComplexPattern::ComplexPattern(const Record *R) {
       Properties |= 1 << SDNPMemOperand;
     } else if (Prop->getName() == "SDNPVariadic") {
       Properties |= 1 << SDNPVariadic;
-    } else if (Prop->getName() == "SDNPWantRoot") {
-      Properties |= 1 << SDNPWantRoot;
-    } else if (Prop->getName() == "SDNPWantParent") {
-      Properties |= 1 << SDNPWantParent;
     } else {
       PrintFatalError(R->getLoc(),
                       "Unsupported SD Node property '" + Prop->getName() +
                           "' on ComplexPattern '" + R->getName() + "'!");
     }
   }
+
+  WantsRoot = R->getValueAsBit("WantsRoot");
+  WantsParent = R->getValueAsBit("WantsParent");
 }
diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h
index 8bcb2f677a00b0..c3c7f7e362dbff 100644
--- a/llvm/utils/TableGen/Common/CodeGenTarget.h
+++ b/llvm/utils/TableGen/Common/CodeGenTarget.h
@@ -243,6 +243,8 @@ class ComplexPattern {
   std::vector<const Record *> RootNodes;
   unsigned Properties; // Node properties
   unsigned Complexity;
+  bool WantsRoot;
+  bool WantsParent;
 
 public:
   ComplexPattern(const Record *R);
@@ -253,6 +255,8 @@ class ComplexPattern {
   const ArrayRef<const Record *> getRootNodes() const { return RootNodes; }
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
   unsigned getComplexity() const { return Complexity; }
+  bool wantsRoot() const { return WantsRoot; }
+  bool wantsParent() const { return WantsParent; }
 };
 
 } // namespace llvm
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index f2385bdf1dde41..f056306b2f8571 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -1189,12 +1189,12 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
       OS << "(";
       // If the complex pattern wants the root of the match, pass it in as the
       // first argument.
-      if (P.hasProperty(SDNPWantRoot))
+      if (P.wantsRoot())
         OS << "Root, ";
 
       // If the complex pattern wants the parent of the operand being matched,
       // pass it in as the next argument.
-      if (P.hasProperty(SDNPWantParent))
+      if (P.wantsParent())
         OS << "Parent, ";
 
       OS << "N";
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index 48f5818938ae86..eca828cad5f4d2 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -9,9 +9,11 @@
 #include "Common/OptEmitter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <cstring>
 #include <map>
@@ -26,6 +28,15 @@ static std::string getOptionName(const Record &R) {
   return std::string(R.getValueAsString("EnumName"));
 }
 
+static raw_ostream &writeStrTableOffset(raw_ostream &OS,
+                                        const StringToOffsetTable &Table,
+                                        llvm::StringRef Str) {
+  OS << Table.GetStringOffset(Str) << " /* ";
+  OS.write_escaped(Str);
+  OS << " */";
+  return OS;
+}
+
 static raw_ostream &writeCstring(raw_ostream &OS, llvm::StringRef Str) {
   OS << '"';
   OS.write_escaped(Str);
@@ -260,59 +271,84 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
 
   // Generate prefix groups.
   typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
-  typedef std::map<PrefixKeyT, std::string> PrefixesT;
+  typedef std::map<PrefixKeyT, unsigned> PrefixesT;
   PrefixesT Prefixes;
-  Prefixes.insert(std::pair(PrefixKeyT(), "prefix_0"));
-  unsigned CurPrefix = 0;
+  Prefixes.insert({PrefixKeyT(), 0});
   for (const Record &R : llvm::make_pointee_range(Opts)) {
     std::vector<StringRef> RPrefixes = R.getValueAsListOfStrings("Prefixes");
     PrefixKeyT PrefixKey(RPrefixes.begin(), RPrefixes.end());
-    unsigned NewPrefix = CurPrefix + 1;
-    std::string Prefix = (Twine("prefix_") + Twine(NewPrefix)).str();
-    if (Prefixes.insert(std::pair(PrefixKey, Prefix)).second)
-      CurPrefix = NewPrefix;
+    Prefixes.insert({PrefixKey, 0});
   }
 
   DenseSet<StringRef> PrefixesUnionSet;
-  for (const auto &Prefix : Prefixes)
-    PrefixesUnionSet.insert(Prefix.first.begin(), Prefix.first.end());
+  for (const auto &[Prefix, _] : Prefixes)
+    PrefixesUnionSet.insert(Prefix.begin(), Prefix.end());
   SmallVector<StringRef> PrefixesUnion(PrefixesUnionSet.begin(),
                                        PrefixesUnionSet.end());
   array_pod_sort(PrefixesUnion.begin(), PrefixesUnion.end());
 
+  llvm::StringToOffsetTable Table;
+  // Make sure the empty string is the zero-th one in the table. This both makes
+  // it easy to check for empty strings (zero offset == empty) and makes
+  // initialization cheaper for empty strings.
+  Table.GetOrAddStringOffset("");
+  // We can add all the prefixes via the union.
+  for (const auto &Prefix : PrefixesUnion)
+    Table.GetOrAddStringOffset(Prefix);
+  for (const Record &R : llvm::make_pointee_range(Groups))
+    Table.GetOrAddStringOffset(R.getValueAsString("Name"));
+  for (const Record &R : llvm::make_pointee_range(Opts))
+    Table.GetOrAddStringOffset(getOptionPrefixedName(R));
+
+  // Dump string table.
+  OS << "/////////\n";
+  OS << "// String table\n\n";
+  OS << "#ifdef OPTTABLE_STR_TABLE_CODE\n";
+  Table.EmitStringLiteralDef(OS, "static constexpr char OptionStrTable[]",
+                             /*Indent=*/"");
+  OS << "#endif // OPTTABLE_STR_TABLE_CODE\n\n";
+
   // Dump prefixes.
   OS << "/////////\n";
   OS << "// Prefixes\n\n";
-  OS << "#ifdef PREFIX\n";
-  OS << "#define COMMA ,\n";
-  for (const auto &Prefix : Prefixes) {
-    OS << "PREFIX(";
-
-    // Prefix name.
-    OS << Prefix.second;
-
-    // Prefix values.
-    OS << ", {";
-    for (const auto &PrefixKey : Prefix.first)
-      OS << "llvm::StringLiteral(\"" << PrefixKey << "\") COMMA ";
-    // Append an empty element to avoid ending up with an empty array.
-    OS << "llvm::StringLiteral(\"\")})\n";
+  OS << "#ifdef OPTTABLE_PREFIXES_TABLE_CODE\n";
+  OS << "static constexpr unsigned OptionPrefixesTable[] = {\n";
+  {
+    // Ensure the first prefix set is always empty.
+    assert(!Prefixes.empty() &&
+           "We should always emit an empty set of prefixes");
+    assert(Prefixes.begin()->first.empty() &&
+           "First prefix set should always be empty");
+    llvm::ListSeparator Sep(",\n");
+    unsigned CurIndex = 0;
+    for (auto &[Prefix, PrefixIndex] : Prefixes) {
+      // First emit the number of prefix strings in this list of prefixes.
+      OS << Sep << "  " << Prefix.size() << " /* prefixes */";
+      PrefixIndex = CurIndex;
+      assert((CurIndex == 0 || !Prefix.empty()) &&
+             "Only first prefix set should be empty!");
+      for (const auto &PrefixKey : Prefix)
+        OS << ", " << *Table.GetStringOffset(PrefixKey) << " /* '" << PrefixKey
+           << "' */";
+      CurIndex += Prefix.size() + 1;
+    }
   }
-  OS << "#undef COMMA\n";
-  OS << "#endif // PREFIX\n\n";
+  OS << "\n};\n";
+  OS << "#endif // OPTTABLE_PREFIXES_TABLE_CODE\n\n";
 
-  // Dump prefix unions.
+  // Dump prefixes union.
   OS << "/////////\n";
   OS << "// Prefix Union\n\n";
-  OS << "#ifdef PREFIX_UNION\n";
-  OS << "#define COMMA ,\n";
-  OS << "PREFIX_UNION({\n";
-  for (const auto &Prefix : PrefixesUnion) {
-    OS << "llvm::StringLiteral(\"" << Prefix << "\") COMMA ";
+  OS << "#ifdef OPTTABLE_PREFIXES_UNION_CODE\n";
+  OS << "static constexpr unsigned OptionPrefixesUnion[] = {\n";
+  {
+    llvm::ListSeparator Sep(", ");
+    for (auto Prefix : PrefixesUnion)
+      OS << Sep << "  " << *Table.GetStringOffset(Prefix) << " /* '" << Prefix
+         << "' */";
   }
-  OS << "llvm::StringLiteral(\"\")})\n";
-  OS << "#undef COMMA\n";
-  OS << "#endif // PREFIX_UNION\n\n";
+  OS << "\n};\n";
+  OS << "#endif // OPTTABLE_PREFIXES_UNION_CODE\n\n";
 
   // Dump groups.
   OS << "/////////\n";
@@ -337,11 +373,12 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
     // Start a single option entry.
     OS << "OPTION(";
 
-    // The option prefix;
-    OS << "llvm::ArrayRef<llvm::StringLiteral>()";
+    // A zero prefix offset corresponds to an empty set of prefixes.
+    OS << "0 /* no prefixes */";
 
-    // The option string.
-    OS << ", \"" << R.getValueAsString("Name") << '"';
+    // The option string offset.
+    OS << ", ";
+    writeStrTableOffset(OS, Table, R.getValueAsString("Name"));
 
     // The option identifier name.
     OS << ", " << getOptionName(R);
@@ -387,7 +424,7 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
     OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", ";
 
     // The option prefixed name.
-    writeCstring(OS, getOptionPrefixedName(R));
+    writeStrTableOffset(OS, Table, getOptionPrefixedName(R));
 
     // The option identifier name.
     OS << ", " << getOptionName(R);
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index a6f87119aca5ba..bfcd52da1c39cb 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1286,9 +1286,6 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
     }
     OS << "};\n";
 
-    OS << "\nstatic const TargetRegisterClass *const "
-       << "NullRegClasses[] = { nullptr };\n\n";
-
     // Emit register class bit mask tables. The first bit mask emitted for a
     // register class, RC, is the set of sub-classes, including RC itself.
     //
@@ -1340,19 +1337,18 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
     SuperRegIdxSeqs.emit(OS, printSubRegIndex);
     OS << "};\n\n";
 
-    // Emit NULL terminated super-class lists.
+    // Emit super-class lists.
     for (const auto &RC : RegisterClasses) {
       ArrayRef<CodeGenRegisterClass *> Supers = RC.getSuperClasses();
 
-      // Skip classes without supers.  We can reuse NullRegClasses.
+      // Skip classes without supers.
       if (Supers.empty())
         continue;
 
-      OS << "static const TargetRegisterClass *const " << RC.getName()
-         << "Superclasses[] = {\n";
+      OS << "static unsigned const " << RC.getName() << "Superclasses[] = {\n";
       for (const auto *Super : Supers)
-        OS << "  &" << Super->getQualifiedName() << "RegClass,\n";
-      OS << "  nullptr\n};\n\n";
+        OS << "  " << Super->getQualifiedIdName() << ",\n";
+      OS << "};\n\n";
     }
 
     // Emit methods.
@@ -1406,9 +1402,10 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
          << (RC.CoveredBySubRegs ? "true" : "false")
          << ", /* CoveredBySubRegs */\n    ";
       if (RC.getSuperClasses().empty())
-        OS << "NullRegClasses,\n    ";
+        OS << "nullptr, ";
       else
-        OS << RC.getName() << "Superclasses,\n    ";
+        OS << RC.getName() << "Superclasses,  ";
+      OS << RC.getSuperClasses().size() << ",\n    ";
       if (RC.AltOrderSelect.empty())
         OS << "nullptr\n";
       else
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index 76b2a3e26be28a..19264bca6ce8f6 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -10,6 +10,8 @@
 
 import argparse
 import os
+import re
+import shlex
 import subprocess
 import sys
 from typing import List, Optional
@@ -312,7 +314,112 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str
             return None
 
 
-ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper())
+class UndefGetFormatHelper(FormatHelper):
+    name = "undef deprecator"
+    friendly_name = "undef deprecator"
+
+    @property
+    def instructions(self) -> str:
+        return " ".join(shlex.quote(c) for c in self.cmd)
+
+    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
+        filtered_files = []
+        for path in changed_files:
+            _, ext = os.path.splitext(path)
+            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"):
+                filtered_files.append(path)
+        return filtered_files
+
+    def has_tool(self) -> bool:
+        return True
+
+    def pr_comment_text_for_diff(self, diff: str) -> str:
+        return f"""
+:warning: {self.name} found issues in your code. :warning:
+
+<details>
+<summary>
+You can test this locally with the following command:
+</summary>
+
+``````````bash
+{self.instructions}
+``````````
+
+</details>
+
+{diff}
+"""
+
+    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
+        files = self.filter_changed_files(changed_files)
+
+        # Use git to find files that have had a change in the number of undefs
+        regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
+        cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
+
+        if args.start_rev and args.end_rev:
+            cmd.append(args.start_rev)
+            cmd.append(args.end_rev)
+
+        cmd += files
+        self.cmd = cmd
+
+        if args.verbose:
+            print(f"Running: {self.instructions}")
+
+        proc = subprocess.run(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
+        )
+        sys.stdout.write(proc.stderr)
+        stdout = proc.stdout
+
+        files = []
+        # Split the diff so we have one array entry per file.
+        # Each file is prefixed like:
+        # diff --git a/file b/file
+        for file in re.split("^diff --git ", stdout, 0, re.MULTILINE):
+            # search for additions of undef
+            if re.search("^[+].*" + regex, file, re.MULTILINE):
+                files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1])
+
+        if not files:
+            return None
+
+        files = "\n".join(" - " + f for f in files)
+        report = f"""
+The following files introduce new uses of undef:
+{files}
+
+[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
+
+In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
+
+For example, this is considered a bad practice:
+```llvm
+define void @fn() {{
+  ...
+  br i1 undef, ...
+}}
+```
+
+Please use the following instead:
+```llvm
+define void @fn(i1 %cond) {{
+  ...
+  br i1 %cond, ...
+}}
+```
+
+Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.
+"""
+        if args.verbose:
+            print(f"error: {self.name} failed")
+            print(report)
+        return report
+
+
+ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
 
 
 def hook_main():
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
index 0b72d01f2279bd..21e348da90b905 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
@@ -45,6 +45,7 @@ static_library("modernize") {
     "UseEmplaceCheck.cpp",
     "UseEqualsDefaultCheck.cpp",
     "UseEqualsDeleteCheck.cpp",
+    "UseIntegerSignComparisonCheck.cpp",
     "UseNodiscardCheck.cpp",
     "UseNoexceptCheck.cpp",
     "UseNullptrCheck.cpp",
diff --git a/mlir/include/mlir-c/Dialect/EmitC.h b/mlir/include/mlir-c/Dialect/EmitC.h
new file mode 100644
index 00000000000000..82e698344bf1e7
--- /dev/null
+++ b/mlir/include/mlir-c/Dialect/EmitC.h
@@ -0,0 +1,26 @@
+//===-- mlir-c/Dialect/EmitC.h - C API for EmitC dialect ----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
+// Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_C_DIALECT_EmitC_H
+#define MLIR_C_DIALECT_EmitC_H
+
+#include "mlir-c/IR.h"
+#include "mlir-c/Support.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(EmitC, emitc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MLIR_C_DIALECT_EmitC_H
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 296a3c305e5bf4..14880a1a66ba57 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2029,6 +2029,107 @@ def NVVM_CpAsyncBulkTensorPrefetchOp :
   }];
 }
 
+// List of modes supported for TMA Store and Reduction Ops
+def TMAStoreModeTile   : I32EnumAttrCase<"TILE", 0, "tile">;
+def TMAStoreModeIm2Col : I32EnumAttrCase<"IM2COL", 1, "im2col">;
+
+def TMAStoreMode : I32EnumAttr<"TMAStoreMode", "NVVM TMA Store Mode",
+    [TMAStoreModeTile, TMAStoreModeIm2Col]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+def TMAStoreModeAttr : EnumAttr<NVVM_Dialect, TMAStoreMode, "tma_store_mode"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+// List of Reduction Ops supported with TMA Store
+def TMAReduxKindAdd : I32EnumAttrCase<"ADD", 0, "add">;
+def TMAReduxKindMin : I32EnumAttrCase<"MIN", 1, "min">;
+def TMAReduxKindMax : I32EnumAttrCase<"MAX", 2, "max">;
+def TMAReduxKindInc : I32EnumAttrCase<"INC", 3, "inc">;
+def TMAReduxKindDec : I32EnumAttrCase<"DEC", 4, "dec">;
+def TMAReduxKindAnd : I32EnumAttrCase<"AND", 5, "and">;
+def TMAReduxKindOr  : I32EnumAttrCase<"OR",  6, "or">;
+def TMAReduxKindXor : I32EnumAttrCase<"XOR", 7, "xor">;
+
+def TMAReduxKind : I32EnumAttr<"TMAReduxKind", "NVVM TMA redux kind",
+    [TMAReduxKindAdd, TMAReduxKindMax, TMAReduxKindMin,
+     TMAReduxKindInc, TMAReduxKindDec, TMAReduxKindAnd,
+     TMAReduxKindOr,  TMAReduxKindXor]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+def TMAReduxKindAttr : EnumAttr<NVVM_Dialect, TMAReduxKind, "tma_redux_kind"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_CpAsyncBulkTensorReduceOp :
+  NVVM_Op<"cp.async.bulk.tensor.reduce", [AttrSizedOperandSegments]> {
+  let arguments = (ins
+    LLVM_AnyPointer:$tmaDescriptor,
+    LLVM_PointerShared:$srcMem,
+    TMAReduxKindAttr:$redKind,
+    DefaultValuedAttr<TMAStoreModeAttr, "TMAStoreMode::TILE">:$mode,
+    Variadic<I32>:$coordinates,
+    Optional<I64>:$l2CacheHint);
+
+  let description = [{
+    Initiates an asynchronous reduction operation of tensor data in
+    global memory with tensor data in shared memory.
+
+    The `mode` attribute indicates whether the copy mode is tile or im2col.
+    The `redOp` attribute specifies the reduction operations applied.
+    The supported reduction operations are:
+    {add, min, max, inc, dec, and, or, xor}
+
+    The `l2CacheHint` operand is optional, and it is used to specify cache
+    eviction policy that may be used during the memory access.
+
+    [For more information, see PTX ISA]
+    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-reduce-async-bulk-tensor)
+  }];
+
+  let assemblyFormat = [{
+    $tmaDescriptor `,`
+    $srcMem `,`
+    `box` `[`$coordinates `]`
+    (`l2_cache_hint` `=` $l2CacheHint^ )?
+    attr-dict  `:` type($tmaDescriptor) `,` type($srcMem)
+  }];
+
+  let extraClassDeclaration = [{
+    static llvm::Intrinsic::ID getIntrinsicID(int tensorDims,
+                                              NVVM::TMAReduxKind kind,
+                                              bool isIm2Col);
+  }];
+
+  let hasVerifier = 1;
+
+  string llvmBuilder = [{
+    // Arguments to the intrinsic:
+    // shared_mem_ptr, tmaDesc, tensorDims
+    // cache_hint(if applicable) and flag(boolean)
+    llvm::SmallVector<llvm::Value *> translatedOperands;
+    translatedOperands.push_back($srcMem);
+    translatedOperands.push_back($tmaDescriptor);
+
+    for (auto v : op.getCoordinates())
+      translatedOperands.push_back(moduleTranslation.lookupValue(v));
+
+    llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext();
+    auto *i64Undef = llvm::UndefValue::get(llvm::IntegerType::get(ctx, 64));
+
+    bool isCacheHint = op.getL2CacheHint() ? true : false;
+    translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Undef);
+    translatedOperands.push_back(builder.getInt1(isCacheHint));
+
+    auto intId = NVVM::CpAsyncBulkTensorReduceOp::getIntrinsicID(
+                 op.getCoordinates().size(), $redKind,
+                 (op.getMode() == NVVM::TMAStoreMode::IM2COL));
+    createIntrinsicCall(builder, intId, translatedOperands);
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // NVVM Wgmma Ops
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/CAPI/Dialect/CMakeLists.txt b/mlir/lib/CAPI/Dialect/CMakeLists.txt
index 4e141b60ff8cc9..5ad4bafedf6c48 100644
--- a/mlir/lib/CAPI/Dialect/CMakeLists.txt
+++ b/mlir/lib/CAPI/Dialect/CMakeLists.txt
@@ -40,6 +40,15 @@ add_mlir_upstream_c_api_library(MLIRCAPIControlFlow
   MLIRControlFlowDialect
 )
 
+add_mlir_upstream_c_api_library(MLIRCAPIEmitC
+  EmitC.cpp
+
+  PARTIAL_SOURCES_INTENDED
+  LINK_LIBS PUBLIC
+  MLIRCAPIIR
+  MLIREmitCDialect
+)
+
 add_mlir_upstream_c_api_library(MLIRCAPIMath
   Math.cpp
 
diff --git a/mlir/lib/CAPI/Dialect/EmitC.cpp b/mlir/lib/CAPI/Dialect/EmitC.cpp
new file mode 100644
index 00000000000000..3dcb7038a57981
--- /dev/null
+++ b/mlir/lib/CAPI/Dialect/EmitC.cpp
@@ -0,0 +1,13 @@
+//===- EmitC.cpp - C Interface for EmitC dialect --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir-c/Dialect/EmitC.h"
+#include "mlir/CAPI/Registration.h"
+#include "mlir/Dialect/EmitC/IR/EmitC.h"
+
+MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(EmitC, emitc, mlir::emitc::EmitCDialect)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index ca04af0b060b4f..8b09c0f386d6b6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -75,21 +75,26 @@ ParseResult VoteBallotOp::parse(OpAsmParser &parser, OperationState &result) {
 
 void VoteBallotOp::print(OpAsmPrinter &p) { printNVVMIntrinsicOp(p, *this); }
 
-// This verifier is shared across:
-// CpAsyncBulkTensorGlobalToSharedClusterOp (TMA Load) and
-// CpAsyncBulkTensorPrefetchOp (TMA Prefetch) Ops.
+// This verifier is shared among the following Ops:
+// CpAsyncBulkTensorGlobalToSharedClusterOp (TMA Load)
+// CpAsyncBulkTensorPrefetchOp (TMA Prefetch)
+// CpAsyncBulkTensorReduceOp (TMA Store-Reduce)
 static LogicalResult CpAsyncBulkTensorCommonVerifier(size_t tensorDims,
+                                                     bool isIm2Col,
                                                      size_t numIm2ColOffsets,
                                                      Location loc) {
   if (tensorDims < 1 || tensorDims > 5)
     return emitError(loc, "expects coordinates between 1 to 5 dimension");
 
-  if (numIm2ColOffsets) {
+  // For Im2Col mode, there are two constraints:
+  if (isIm2Col) {
+    // 1. Tensor must always be at least 3-d.
     if (tensorDims < 3)
       return emitError(
           loc,
           "to use im2col mode, the tensor has to be at least 3-dimensional");
-    if (tensorDims != (numIm2ColOffsets + 2))
+    // 2. When there are Im2ColOffsets, they must be (Dims - 2) in number.
+    if (numIm2ColOffsets && (tensorDims != (numIm2ColOffsets + 2)))
       return emitError(
           loc, "im2col offsets must be 2 less than number of coordinates");
   }
@@ -97,8 +102,10 @@ static LogicalResult CpAsyncBulkTensorCommonVerifier(size_t tensorDims,
 }
 
 LogicalResult CpAsyncBulkTensorGlobalToSharedClusterOp::verify() {
-  return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(),
-                                         getIm2colOffsets().size(), getLoc());
+  size_t numIm2ColOffsets = getIm2colOffsets().size();
+  bool isIm2Col = numIm2ColOffsets > 0;
+  return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col,
+                                         numIm2ColOffsets, getLoc());
 }
 
 LogicalResult CpAsyncBulkTensorSharedCTAToGlobalOp::verify() {
@@ -119,8 +126,16 @@ LogicalResult CpAsyncOp::verify() {
 }
 
 LogicalResult CpAsyncBulkTensorPrefetchOp::verify() {
-  return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(),
-                                         getIm2colOffsets().size(), getLoc());
+  size_t numIm2ColOffsets = getIm2colOffsets().size();
+  bool isIm2Col = numIm2ColOffsets > 0;
+  return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col,
+                                         numIm2ColOffsets, getLoc());
+}
+
+LogicalResult CpAsyncBulkTensorReduceOp::verify() {
+  bool isIm2Col = (getMode() == TMAStoreMode::IM2COL);
+  return CpAsyncBulkTensorCommonVerifier(getCoordinates().size(), isIm2Col, 0,
+                                         getLoc());
 }
 
 // Given the element type of an operand and whether or not it is an accumulator,
@@ -1094,6 +1109,55 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
   }
 }
 
+#define CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, mode)                        \
+  llvm::Intrinsic::nvvm_cp_async_bulk_tensor_##op##_##mode##_##dim##d
+
+#define CP_ASYNC_BULK_TENSOR_REDUCE(op, dim, is_im2col)                        \
+  is_im2col ? CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, im2col)                \
+            : CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
+
+#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col)                       \
+  [&]() -> auto {                                                              \
+    switch (dims) {                                                            \
+    case 1:                                                                    \
+      return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile);                    \
+    case 2:                                                                    \
+      return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 2, tile);                    \
+    case 3:                                                                    \
+      return CP_ASYNC_BULK_TENSOR_REDUCE(op, 3, is_im2col);                    \
+    case 4:                                                                    \
+      return CP_ASYNC_BULK_TENSOR_REDUCE(op, 4, is_im2col);                    \
+    case 5:                                                                    \
+      return CP_ASYNC_BULK_TENSOR_REDUCE(op, 5, is_im2col);                    \
+    default:                                                                   \
+      llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp.");     \
+    }                                                                          \
+  }()
+
+llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID(
+    int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) {
+  using RedTy = NVVM::TMAReduxKind;
+  switch (kind) {
+  case RedTy::ADD:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_add, tensorDims, isIm2Col);
+  case RedTy::MIN:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_min, tensorDims, isIm2Col);
+  case RedTy::MAX:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_max, tensorDims, isIm2Col);
+  case RedTy::INC:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_inc, tensorDims, isIm2Col);
+  case RedTy::DEC:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_dec, tensorDims, isIm2Col);
+  case RedTy::AND:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_and, tensorDims, isIm2Col);
+  case RedTy::OR:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_or, tensorDims, isIm2Col);
+  case RedTy::XOR:
+    return GET_CP_ASYNC_BULK_TENSOR_ID(reduce_xor, tensorDims, isIm2Col);
+  }
+  llvm_unreachable("Invalid Reduction Op for CpAsyncBulkTensorReduceOp");
+}
+
 //===----------------------------------------------------------------------===//
 // NVVMDialect initialization, type parsing, and registration.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt
index e1b870b53ad25c..10866c11bdb71b 100644
--- a/mlir/python/CMakeLists.txt
+++ b/mlir/python/CMakeLists.txt
@@ -352,6 +352,14 @@ declare_mlir_python_sources(
     dialects/quant.py
     _mlir_libs/_mlir/dialects/quant.pyi)
 
+declare_mlir_dialect_python_bindings(
+  ADD_TO_PARENT MLIRPythonSources.Dialects
+  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
+  TD_FILE dialects/EmitC.td
+  SOURCES 
+    dialects/emitc.py
+  DIALECT_NAME emitc)
+
 declare_mlir_dialect_python_bindings(
   ADD_TO_PARENT MLIRPythonSources.Dialects
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
diff --git a/mlir/python/mlir/dialects/EmitC.td b/mlir/python/mlir/dialects/EmitC.td
new file mode 100644
index 00000000000000..ff0a56d1550148
--- /dev/null
+++ b/mlir/python/mlir/dialects/EmitC.td
@@ -0,0 +1,14 @@
+//===-- EmitC.td - Entry point for EmitC bind --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PYTHON_BINDINGS_EMITC
+#define PYTHON_BINDINGS_EMITC
+
+include "mlir/Dialect/EmitC/IR/EmitC.td"
+
+#endif
diff --git a/mlir/python/mlir/dialects/emitc.py b/mlir/python/mlir/dialects/emitc.py
new file mode 100644
index 00000000000000..99c3286e576f1e
--- /dev/null
+++ b/mlir/python/mlir/dialects/emitc.py
@@ -0,0 +1,5 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from ._emitc_ops_gen import *
diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
index 25435cf51a6156..cfa5b5d569ca9d 100644
--- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
@@ -31,83 +31,6 @@ func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi
 
 // -----
 
-#map = affine_map<() -> ()>
-func.func @extract_scalar_from_0d_into_0d(%src: tensor<f32>, %init: tensor<f32>) -> tensor<f32> {
-  %res = linalg.generic {
-    indexing_maps = [#map],
-    iterator_types = []
-  } outs(%init : tensor<f32>) {
-  ^bb0(%in: f32):
-    %1 = tensor.extract %src[] : tensor<f32>
-    linalg.yield %1 : f32
-  } -> tensor<f32>
-
-  return %res : tensor<f32>
-}
-
-// CHECK-LABEL:   func.func @extract_scalar_from_0d_into_0d(
-// CHECK-SAME:      %[[SRC:.*]]: tensor<f32>,
-// CHECK-SAME:      %[[INIT:.*]]: tensor<f32>) -> tensor<f32> {
-// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
-// CHECK:           vector.transfer_write %[[READ]], %[[INIT]][] : vector<f32>, tensor<f32>
-
-// -----
-
-#map = affine_map<(n) -> (n)>
-func.func @extract_scalar_from_0d_into_1d(%src: tensor<f32>, %init: tensor<1xf32>) -> tensor<1xf32> {
-  %res = linalg.generic {
-    indexing_maps = [#map],
-    iterator_types = ["parallel"]
-  } outs(%init : tensor<1xf32>) {
-  ^bb0(%in: f32):
-    %1 = tensor.extract %src[] : tensor<f32>
-    linalg.yield %1 : f32
-  } -> tensor<1xf32>
-
-  return %res : tensor<1xf32>
-}
-// CHECK-LABEL:   func.func @extract_scalar_from_0d_into_1d(
-// CHECK-SAME:      %[[SRC:.*]]: tensor<f32>,
-// CHECK-SAME:      %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> {
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
-// CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1xf32>
-// CHECK:           vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32>
-
-// -----
-
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
-  %c0 = arith.constant 1 : index
-  %c1 = arith.constant 2 : index
-
-  %res = linalg.generic {
-    indexing_maps = [#map],
-    iterator_types = ["parallel", "parallel", "parallel"]
-  } outs(%init : tensor<1x1x3xf32>) {
-  ^bb0(%arg4: f32):
-    %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32>
-    linalg.yield %1 : f32
-  } -> tensor<1x1x3xf32>
-
-  return %res : tensor<1x1x3xf32>
-}
-
-// CHECK-LABEL:   func.func @vectorize_nd_tensor_extract_scalar_broadcast(
-// CHECK-SAME:      %[[SRC:.*]]: tensor<3x3xf32>,
-// CHECK-SAME:      %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
-// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector<f32>
-// CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
-// CHECK:           vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
-
-// -----
-
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 func.func @vectorize_nd_tensor_extract_transfer_read_basic(
     %arg0: tensor<3x3x3xf32>,
@@ -144,37 +67,6 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic(
 // CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[IDX3]]], %[[CST]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32>
 // CHECK:   vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
 
-// Same as example above, but reading into a column tensor.
-
-// TODO: Currently this fails to vectorise when the indices are non-constant.
-
-func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
-    %input: tensor<3x3x3xf32>,
-    %output: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> {
-
-  %c0 = arith.constant 0 : index
-  %res = linalg.generic {
-    indexing_maps = [#map],
-    iterator_types = ["parallel", "parallel", "parallel"]
-  } outs(%output : tensor<3x1x1xf32>) {
-  ^bb0(%out: f32):
-    %5 = tensor.extract %input[%c0, %c0, %c0] : tensor<3x3x3xf32>
-    linalg.yield %5 : f32
-  } -> tensor<3x1x1xf32>
-
-  return %res : tensor<3x1x1xf32>
-}
-
-// CHECK-LABEL:   func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
-// CHECK-SAME:      %[[INPUT:.*]]: tensor<3x3x3xf32>,
-// CHECK-SAME:      %[[OUTPUT:.*]]: tensor<3x1x1xf32>)
-// CHECK-DAG:        %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:        %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
-// CHECK:           %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
-// CHECK:           %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>
-// CHECK:           return %[[RES]] : tensor<3x1x1xf32>
-
  // -----
 
 func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
@@ -620,26 +512,6 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1:
 
 // -----
 
-#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @vectorize_0d_tensor_extract(%arg0: tensor<f32>, %arg2: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
-  %2 = linalg.generic {
-    indexing_maps = [#map1],
-    iterator_types = ["parallel", "parallel", "parallel"]
-  } outs(%arg2 : tensor<1x1x3xf32>) {
-  ^bb0(%arg4: f32):
-    %7 = tensor.extract %arg0[] : tensor<f32>
-    linalg.yield %7 : f32
-  } -> tensor<1x1x3xf32>
-  return %2 : tensor<1x1x3xf32>
-}
-
-// CHECK-LABEL:   func.func @vectorize_0d_tensor_extract(
-// CHECK-SAME:     %[[ARG_0:.*]]: tensor<f32>
-// CHECK:           %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor<f32>
-// CHECK:           vector.broadcast %[[EXTRACT]] : vector<f32> to vector<1x1x3xf32>
-
-// -----
-
 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 #map1 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)>
 func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1: tensor<1x1x3xf32>, %arg2: index) -> tensor<1x1x3xf32> {
@@ -674,17 +546,118 @@ func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1
 // CHECK:        %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[T3]]], %[[MASK]], %[[PASSTHRU]]
 // CHECK:        vector.transfer_write %[[GATHER]]
 
+//===----------------------------------------------------------------------===//
+// Scalar load + broadcast
+//===----------------------------------------------------------------------===//
+
+// -----
+
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
+  %c0 = arith.constant 1 : index
+  %c1 = arith.constant 2 : index
+
+  %res = linalg.generic {
+    indexing_maps = [#map],
+    iterator_types = ["parallel", "parallel", "parallel"]
+  } outs(%init : tensor<1x1x3xf32>) {
+  ^bb0(%arg4: f32):
+    %1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32>
+    linalg.yield %1 : f32
+  } -> tensor<1x1x3xf32>
+
+  return %res : tensor<1x1x3xf32>
+}
+
+// CHECK-LABEL:   func.func @vectorize_nd_tensor_extract_scalar_broadcast(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<3x3xf32>,
+// CHECK-SAME:      %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:       %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector<f32>
+// CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
+// CHECK:           vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
+
 // -----
 
-func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
+#map = affine_map<() -> ()>
+func.func @extract_scalar_from_0d_into_0d(%src: tensor<f32>, %init: tensor<f32>) -> tensor<f32> {
+  %res = linalg.generic {
+    indexing_maps = [#map],
+    iterator_types = []
+  } outs(%init : tensor<f32>) {
+  ^bb0(%in: f32):
+    %1 = tensor.extract %src[] : tensor<f32>
+    linalg.yield %1 : f32
+  } -> tensor<f32>
+
+  return %res : tensor<f32>
+}
+
+// CHECK-LABEL:   func.func @extract_scalar_from_0d_into_0d(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<f32>,
+// CHECK-SAME:      %[[INIT:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
+// CHECK:           vector.transfer_write %[[READ]], %[[INIT]][] : vector<f32>, tensor<f32>
+
+// -----
+
+#map = affine_map<(n) -> (n)>
+func.func @extract_scalar_from_0d_into_1d(%src: tensor<f32>, %init: tensor<1xf32>) -> tensor<1xf32> {
+  %res = linalg.generic {
+    indexing_maps = [#map],
+    iterator_types = ["parallel"]
+  } outs(%init : tensor<1xf32>) {
+  ^bb0(%in: f32):
+    %1 = tensor.extract %src[] : tensor<f32>
+    linalg.yield %1 : f32
+  } -> tensor<1xf32>
+
+  return %res : tensor<1xf32>
+}
+// CHECK-LABEL:   func.func @extract_scalar_from_0d_into_1d(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<f32>,
+// CHECK-SAME:      %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
+// CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1xf32>
+// CHECK:           vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32>
+
+// -----
+
+#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+func.func @vectorize_0d_tensor_extract(%src: tensor<f32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
+  %res = linalg.generic {
+    indexing_maps = [#map1],
+    iterator_types = ["parallel", "parallel", "parallel"]
+  } outs(%init : tensor<1x1x3xf32>) {
+  ^bb0(%arg4: f32):
+    %1 = tensor.extract %src[] : tensor<f32>
+    linalg.yield %1 : f32
+  } -> tensor<1x1x3xf32>
+  return %res : tensor<1x1x3xf32>
+}
+
+// CHECK-LABEL:   func.func @vectorize_0d_tensor_extract(
+// CHECK-SAME:     %[[SRC:.*]]: tensor<f32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][], %{{.+}} : tensor<f32>
+// CHECK:           vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
+
+// -----
+
+func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
   %c4 = arith.constant 4 : index
   %c0 = arith.constant 0 : index
   %src = arith.constant dense<[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
 
   %res = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
-    iterator_types = ["parallel", "parallel", "parallel"]}
-    outs(%init : tensor<1x1x4xi32>) {
+    iterator_types = ["parallel", "parallel", "parallel"]
+  } outs(%init : tensor<1x1x4xi32>) {
 
     ^bb0(%out: i32):
       %idx = linalg.index 0 : index
@@ -695,13 +668,45 @@ func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor
   return %res : tensor<1x1x4xi32>
 }
 
-// CHECK-LABEL:   func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(
+// CHECK-LABEL:   func.func @scalar_read_with_broadcast_from_column_tensor
 // CHECK-SAME:      %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
-// CHECK:           %[[PAD:.*]] = arith.constant 0 : i32
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
-// CHECK:           %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex>
+// CHECK-DAG:       %[[PAD:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
+// CHECK-DAG:       %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex>
 // CHECK:           %[[IDX_ELT:.*]] = vector.extract %[[IDX_VEC]][0] : index from vector<1xindex>
 // CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[IDX_ELT]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector<i32>
 // CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<i32> to vector<1x1x4xi32>
 // CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x4xi32>, tensor<1x1x4xi32>
+
+// -----
+
+// TODO: Currently this fails to vectorise when the indices are non-constant.
+
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
+    %src: tensor<3x3x3xf32>,
+    %init: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> {
+
+  %c0 = arith.constant 0 : index
+
+  %res = linalg.generic {
+    indexing_maps = [#map],
+    iterator_types = ["parallel", "parallel", "parallel"]
+  } outs(%init : tensor<3x1x1xf32>) {
+  ^bb0(%out: f32):
+    %1 = tensor.extract %src[%c0, %c0, %c0] : tensor<3x3x3xf32>
+    linalg.yield %1 : f32
+  } -> tensor<3x1x1xf32>
+
+  return %res : tensor<3x1x1xf32>
+}
+
+// CHECK-LABEL:   func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<3x3x3xf32>,
+// CHECK-SAME:      %[[INIT:.*]]: tensor<3x1x1xf32>)
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
+// CHECK:           %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
+// CHECK:           vector.transfer_write %[[READ_BCAST]], %[[INIT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>
diff --git a/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir
new file mode 100644
index 00000000000000..3809bc0bce8974
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/tma_store_reduce.mlir
@@ -0,0 +1,313 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file --verify-diagnostics %s | FileCheck %s
+
+// CHECK-LABEL: define void @tma_store_reduce_1d(
+llvm.func @tma_store_reduce_1d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.1d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.1d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<tile>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0] {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<tile>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @tma_store_reduce_2d(
+llvm.func @tma_store_reduce_2d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.2d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.2d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @tma_store_reduce_3d_tile(
+llvm.func @tma_store_reduce_3d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.3d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// CHECK-LABEL: define void @tma_store_reduce_3d_im2col(
+llvm.func @tma_store_reduce_3d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.3d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.3d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2] {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @tma_store_reduce_4d_tile(
+llvm.func @tma_store_reduce_4d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.4d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// CHECK-LABEL: define void @tma_store_reduce_4d_im2col(
+llvm.func @tma_store_reduce_4d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.4d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.4d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3] {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @tma_store_reduce_5d_tile(
+llvm.func @tma_store_reduce_5d_tile(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %d4 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.5d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i32 %[[D4:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.tile.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<add>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<min>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<max>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<inc>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<dec>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<and>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<or>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<xor>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// CHECK-LABEL: define void @tma_store_reduce_5d_im2col(
+llvm.func @tma_store_reduce_5d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %d2 : i32, %d3 : i32, %d4 : i32, %ch : i64) {
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.5d(ptr addrspace(3) %[[SRC:.*]], ptr %[[DST:.*]], i32 %[[D0:.*]], i32 %[[D1:.*]], i32 %[[D2:.*]], i32 %[[D3:.*]], i32 %[[D4:.*]], i64 %[[CH:.*]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 %[[CH]], i1 true)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] l2_cache_hint = %ch {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.add.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.min.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.max.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.inc.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.dec.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.and.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.or.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  // CHECK: call void @llvm.nvvm.cp.async.bulk.tensor.reduce.xor.im2col.5d(ptr addrspace(3) %[[SRC]], ptr %[[DST]], i32 %[[D0]], i32 %[[D1]], i32 %[[D2]], i32 %[[D3]], i32 %[[D4]], i64 undef, i1 false)
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<add>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<min>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<max>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<inc>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<dec>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<or>, mode = #nvvm.tma_store_mode<im2col>}  : !llvm.ptr, !llvm.ptr<3>
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1, %d2, %d3, %d4] {redKind = #nvvm.tma_redux_kind<xor>, mode = #nvvm.tma_store_mode<im2col>} : !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 58282adf4dda85..194011d1621a1c 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -55,3 +55,19 @@ llvm.func @tma_prefetch_5d_im2col(%tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %
   nvvm.cp.async.bulk.tensor.prefetch %tma_desc, box[%d0, %d1, %d2, %d3, %d4] im2col[%off0, %off1] : !llvm.ptr
   llvm.return
 }
+
+// -----
+
+llvm.func @tma_reduce_0d(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %ch : i64) {
+  // expected-error @below {{expects coordinates between 1 to 5 dimension}}
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[] {redKind = #nvvm.tma_redux_kind<add>}: !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
+
+// -----
+
+llvm.func @tma_reduce_2d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 : i32, %d1 : i32, %ch : i64) {
+  // expected-error @below {{to use im2col mode, the tensor has to be at least 3-dimensional}}
+  nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind<and>, mode = #nvvm.tma_store_mode<im2col>}: !llvm.ptr, !llvm.ptr<3>
+  llvm.return
+}
diff --git a/mlir/test/python/dialects/emitc_dialect.py b/mlir/test/python/dialects/emitc_dialect.py
new file mode 100644
index 00000000000000..0c42c2d4084f19
--- /dev/null
+++ b/mlir/test/python/dialects/emitc_dialect.py
@@ -0,0 +1,31 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+from mlir.ir import *
+import mlir.dialects.emitc as emitc
+
+
+def run(f):
+    print("\nTEST:", f.__name__)
+    with Context() as ctx, Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            f(ctx)
+        print(module)
+
+
+# CHECK-LABEL: TEST: testConstantOp
+@run
+def testConstantOp(ctx):
+    i32 = IntegerType.get_signless(32)
+    a = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 42))
+    # CHECK: %{{.*}} = "emitc.constant"() <{value = 42 : i32}> : () -> i32
+
+
+# CHECK-LABEL: TEST: testAddOp
+@run
+def testAddOp(ctx):
+    i32 = IntegerType.get_signless(32)
+    lhs = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 0))
+    rhs = emitc.ConstantOp(result=i32, value=IntegerAttr.get(i32, 0))
+    a = emitc.AddOp(i32, lhs, rhs)
+    # CHECK: %{{.*}} = emitc.add %{{.*}}, %{{.*}} : (i32, i32) -> i32
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 544becfa30b40f..b5be59051cdfa7 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -529,6 +529,24 @@ mlir_c_api_cc_library(
     ],
 )
 
+mlir_c_api_cc_library(
+    name = "CAPIEmitC",
+    srcs = [
+        "lib/CAPI/Dialect/EmitC.cpp",
+    ],
+    hdrs = ["include/mlir-c/Dialect/EmitC.h"],
+    capi_deps = [
+        ":CAPIIR",
+    ],
+    header_deps = [
+        ":EmitCPassIncGen",
+    ],
+    includes = ["include"],
+    deps = [
+        ":EmitCDialect",
+    ],
+)
+
 mlir_c_api_cc_library(
     name = "CAPILinalg",
     srcs = [
diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
index 254cab0db4a5d6..3d7a6164a4fa54 100644
--- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
@@ -539,6 +539,36 @@ filegroup(
     ],
 )
 
+##---------------------------------------------------------------------------##
+# EmitC dialect.
+##---------------------------------------------------------------------------##
+
+gentbl_filegroup(
+    name = "EmitCPyGen",
+    tbl_outs = [
+        (
+            [
+                "-gen-python-op-bindings",
+                "-bind-dialect=emitc",
+            ],
+            "mlir/dialects/_emitc_ops_gen.py",
+        ),
+    ],
+    tblgen = "//mlir:mlir-tblgen",
+    td_file = "mlir/dialects/EmitC.td",
+    deps = [
+        "//mlir:EmitCTdFiles",
+    ],
+)
+
+filegroup(
+    name = "EmitCPyFiles",
+    srcs = [
+        "mlir/dialects/emitc.py",
+        ":EmitCPyGen",
+    ],
+)
+
 ##---------------------------------------------------------------------------##
 # Index dialect.
 ##---------------------------------------------------------------------------##