From 642701952d409c6bdc07f208a5eceaa5a1190b4e Mon Sep 17 00:00:00 2001
From: orbiri <orzivh@gmail.com>
Date: Mon, 18 Nov 2024 18:57:14 +0200
Subject: [PATCH 1/7] [CIR] Force cir.cmp to always return bool (#1110)

It was always the intention for `cir.cmp` operations to return bool
result. Due
to missing constraints, a bug in codegen has slipped in which created
`cir.cmp`
operations with result type that matches the original AST expression
type. In
C, as opposed to C++, boolean expression types are "int". This resulted
with
extra operations being codegened around boolean expressions and their
usage.

This commit both enforces `cir.cmp` in the op definition and fixes the
mentioned bug.
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |   3 +-
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp    |  13 +-
 clang/test/CIR/CodeGen/bf16-ops.c             | 289 +++++++++---------
 clang/test/CIR/CodeGen/float16-ops.c          | 144 ++++-----
 clang/test/CIR/CodeGen/fp16-ops.c             |  72 ++---
 clang/test/CIR/IR/invalid.cir                 |  10 +
 clang/test/CIR/Lowering/ThroughMLIR/doWhile.c |  79 +++--
 clang/test/CIR/Lowering/ThroughMLIR/if.c      | 156 +++++-----
 clang/test/CIR/Lowering/ThroughMLIR/while.c   | 109 +++----
 clang/test/CIR/Lowering/dot.cir               |  12 +-
 clang/test/CIR/Lowering/goto.cir              |   7 +-
 clang/test/CIR/Lowering/loops-with-break.cir  |  47 ++-
 .../test/CIR/Lowering/loops-with-continue.cir |  47 ++-
 clang/test/CIR/Lowering/switch.cir            |   5 +-
 clang/test/CIR/Transforms/mem2reg.c           |  78 +++--
 clang/test/CIR/Transforms/scf-prepare.cir     |  33 +-
 clang/test/CIR/Transforms/simpl.c             |  10 +-
 clang/test/CIR/Transforms/switch.cir          |   5 +-
 18 files changed, 531 insertions(+), 588 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index d97cbfe47a76..3d135e59e6ba 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1268,8 +1268,7 @@ def CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
     ```
   }];
 
-  // TODO: get more accurate than CIR_AnyType
-  let results = (outs CIR_AnyType:$result);
+  let results = (outs CIR_BoolType:$result);
   let arguments = (ins Arg<CmpOpKind, "cmp kind">:$kind,
                        CIR_AnyType:$lhs, CIR_AnyType:$rhs);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 6c4441ba0a1c..ee2a0c32cbff 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -947,12 +947,11 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
           // Other kinds of vectors.  Element-wise comparison returning
           // a vector.
           cir::CmpOpKind Kind = ClangCmpToCIRCmp(E->getOpcode());
-          return Builder.create<cir::VecCmpOp>(CGF.getLoc(BOInfo.Loc),
-                                               CGF.getCIRType(BOInfo.FullType),
-                                               Kind, BOInfo.LHS, BOInfo.RHS);
+          Result = Builder.create<cir::VecCmpOp>(
+              CGF.getLoc(BOInfo.Loc), CGF.getCIRType(BOInfo.FullType), Kind,
+              BOInfo.LHS, BOInfo.RHS);
         }
-      }
-      if (BOInfo.isFixedPointOp()) {
+      } else if (BOInfo.isFixedPointOp()) {
         assert(0 && "not implemented");
       } else {
         // FIXME(cir): handle another if above for CIR equivalent on
@@ -966,9 +965,7 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
         }
 
         cir::CmpOpKind Kind = ClangCmpToCIRCmp(E->getOpcode());
-        return Builder.create<cir::CmpOp>(CGF.getLoc(BOInfo.Loc),
-                                          CGF.getCIRType(BOInfo.FullType), Kind,
-                                          BOInfo.LHS, BOInfo.RHS);
+        Result = Builder.createCompare(CGF.getLoc(BOInfo.Loc), Kind, LHS, RHS);
       }
     } else { // Complex Comparison: can only be an equality comparison.
       assert(0 && "not implemented");
diff --git a/clang/test/CIR/CodeGen/bf16-ops.c b/clang/test/CIR/CodeGen/bf16-ops.c
index 479be9980546..406446b778eb 100644
--- a/clang/test/CIR/CodeGen/bf16-ops.c
+++ b/clang/test/CIR/CodeGen/bf16-ops.c
@@ -481,11 +481,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fsub bfloat %{{.+}}, %[[#A]]
 
   test = (h2 < h0);
-  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, %{{.+}}
 
@@ -494,13 +494,13 @@ void foo(void) {
   test = (h2 < (__bf16)42.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, 0xR4228
 
@@ -508,12 +508,12 @@ void foo(void) {
 
   test = (h2 < f0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp olt float %[[#LHS]], %{{.+}}
@@ -523,12 +523,12 @@ void foo(void) {
 
   test = (f2 < h0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#RHS]]
@@ -538,12 +538,12 @@ void foo(void) {
 
   test = (i0 < h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %[[#LHS]], %{{.+}}
@@ -553,12 +553,12 @@ void foo(void) {
 
   test = (h0 < i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
@@ -567,11 +567,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
 
   test = (h0 > h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %{{.+}}, %{{.+}}
 
@@ -580,13 +580,13 @@ void foo(void) {
   test = ((__bf16)42.0 > h2);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat 0xR4228, %{{.+}}
 
@@ -594,12 +594,12 @@ void foo(void) {
 
   test = (h0 > f2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
@@ -609,12 +609,12 @@ void foo(void) {
 
   test = (f0 > h2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
@@ -624,12 +624,12 @@ void foo(void) {
 
   test = (i0 > h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %[[#LHS]], %{{.+}}
@@ -639,12 +639,12 @@ void foo(void) {
 
   test = (h0 > i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
@@ -653,11 +653,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
 
   test = (h2 <= h0);
-  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, %{{.+}}
 
@@ -666,13 +666,13 @@ void foo(void) {
   test = (h2 <= (__bf16)42.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, 0xR4228
 
@@ -680,12 +680,12 @@ void foo(void) {
 
   test = (h2 <= f0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
@@ -695,12 +695,12 @@ void foo(void) {
 
   test = (f2 <= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
@@ -710,12 +710,12 @@ void foo(void) {
 
   test = (i0 <= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %[[#LHS]], %{{.+}}
@@ -725,12 +725,12 @@ void foo(void) {
 
   test = (h0 <= i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
@@ -739,12 +739,13 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
 
   test = (h0 >= h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
   // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
 
-  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, %{{.+}}
 
@@ -754,14 +755,14 @@ void foo(void) {
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
   // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
-  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#D]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
   // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
-  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#D]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, 0xRC000
 
@@ -769,12 +770,12 @@ void foo(void) {
 
   test = (h0 >= f2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
@@ -784,12 +785,12 @@ void foo(void) {
 
   test = (f0 >= h2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
@@ -799,12 +800,12 @@ void foo(void) {
 
   test = (i0 >= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %[[#LHS]], %{{.+}}
@@ -814,12 +815,12 @@ void foo(void) {
 
   test = (h0 >= i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
@@ -828,11 +829,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
 
   test = (h1 == h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, %{{.+}}
 
@@ -841,13 +842,13 @@ void foo(void) {
   test = (h1 == (__bf16)1.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, 0xR3F80
 
@@ -855,12 +856,12 @@ void foo(void) {
 
   test = (h1 == f1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#A]], %{{.+}}
@@ -870,12 +871,12 @@ void foo(void) {
 
   test = (f1 == h1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
@@ -885,12 +886,12 @@ void foo(void) {
 
   test = (i0 == h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %[[#LHS]], %{{.+}}
@@ -900,12 +901,12 @@ void foo(void) {
 
   test = (h0 == i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
@@ -914,11 +915,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
 
   test = (h1 != h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#A]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, %{{.+}}
 
@@ -926,13 +927,13 @@ void foo(void) {
 
   test = (h1 != (__bf16)1.0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#C]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, 0xR3F80
 
@@ -940,12 +941,12 @@ void foo(void) {
 
   test = (h1 != f1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
@@ -955,12 +956,12 @@ void foo(void) {
 
   test = (f1 != h1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#RHS]]
@@ -970,12 +971,12 @@ void foo(void) {
 
   test = (i0 != h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %[[#LHS]], %{{.+}}
@@ -985,12 +986,12 @@ void foo(void) {
 
   test = (h0 != i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
-  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
   // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une bfloat %{{.+}}, %[[#RHS]]
diff --git a/clang/test/CIR/CodeGen/float16-ops.c b/clang/test/CIR/CodeGen/float16-ops.c
index 5b3b7127476b..43f686a8b360 100644
--- a/clang/test/CIR/CodeGen/float16-ops.c
+++ b/clang/test/CIR/CodeGen/float16-ops.c
@@ -477,10 +477,10 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fsub half %{{.+}}, %[[#A]]
 
   test = (h2 < h0);
-  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
@@ -490,12 +490,12 @@ void foo(void) {
   test = (h2 < (_Float16)42.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
@@ -504,11 +504,11 @@ void foo(void) {
 
   test = (h2 < f0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
@@ -519,11 +519,11 @@ void foo(void) {
 
   test = (f2 < h0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.=}} to float
@@ -534,11 +534,11 @@ void foo(void) {
 
   test = (i0 < h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
@@ -549,11 +549,11 @@ void foo(void) {
 
   test = (h0 < i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
@@ -563,10 +563,10 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
 
   test = (h0 > h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
@@ -576,12 +576,12 @@ void foo(void) {
   test = ((_Float16)42.0 > h2);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
@@ -590,11 +590,11 @@ void foo(void) {
 
   test = (h0 > f2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
@@ -605,11 +605,11 @@ void foo(void) {
 
   test = (f0 > h2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -620,11 +620,11 @@ void foo(void) {
 
   test = (i0 > h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -635,11 +635,11 @@ void foo(void) {
 
   test = (h0 > i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
@@ -649,10 +649,10 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
 
   test = (h2 <= h0);
-  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
@@ -662,12 +662,12 @@ void foo(void) {
   test = (h2 <= (_Float16)42.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
@@ -676,11 +676,11 @@ void foo(void) {
 
   test = (h2 <= f0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -691,11 +691,11 @@ void foo(void) {
 
   test = (f2 <= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -706,11 +706,11 @@ void foo(void) {
 
   test = (i0 <= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -721,11 +721,11 @@ void foo(void) {
 
   test = (h0 <= i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
@@ -735,11 +735,11 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
 
   test = (h0 >= h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
   // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
 
-  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
@@ -750,13 +750,13 @@ void foo(void) {
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
   // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
-  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
   // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
-  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
@@ -765,11 +765,11 @@ void foo(void) {
 
   test = (h0 >= f2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -780,11 +780,11 @@ void foo(void) {
 
   test = (f0 >= h2);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -795,11 +795,11 @@ void foo(void) {
 
   test = (i0 >= h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -810,11 +810,11 @@ void foo(void) {
 
   test = (h0 >= i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
@@ -824,10 +824,10 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
 
   test = (h1 == h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
@@ -837,12 +837,12 @@ void foo(void) {
   test = (h1 == (_Float16)1.0);
   //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
@@ -851,11 +851,11 @@ void foo(void) {
 
   test = (h1 == f1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -866,11 +866,11 @@ void foo(void) {
 
   test = (f1 == h1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -881,11 +881,11 @@ void foo(void) {
 
   test = (i0 == h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -896,11 +896,11 @@ void foo(void) {
 
   test = (h0 == i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
@@ -910,10 +910,10 @@ void foo(void) {
   // NATIVE-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
 
   test = (h1 != h2);
-  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
-  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
@@ -922,12 +922,12 @@ void foo(void) {
 
   test = (h1 != (_Float16)1.0);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
@@ -936,11 +936,11 @@ void foo(void) {
 
   test = (h1 != f1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -951,11 +951,11 @@ void foo(void) {
 
   test = (f1 != h1);
   //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
@@ -966,11 +966,11 @@ void foo(void) {
 
   test = (i0 != h0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -981,11 +981,11 @@ void foo(void) {
 
   test = (h0 != i0);
   //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
diff --git a/clang/test/CIR/CodeGen/fp16-ops.c b/clang/test/CIR/CodeGen/fp16-ops.c
index 04cf64700d74..708d5db1dde0 100644
--- a/clang/test/CIR/CodeGen/fp16-ops.c
+++ b/clang/test/CIR/CodeGen/fp16-ops.c
@@ -228,7 +228,7 @@ void foo(void) {
   // CHECK-LLVM-NEXT: %{{.+}} = fsub half %{{.+}}, %[[#A]]
 
   test = (h2 < h0);
-  //      CHECK: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
@@ -236,14 +236,14 @@ void foo(void) {
   test = (h2 < (__fp16)42.0);
   //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // CHECK-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
 
   test = (h2 < f0);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#A:]] = fpext half %{{.+}} to float
@@ -251,7 +251,7 @@ void foo(void) {
 
   test = (f2 < h0);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#A:]] = fpext half %{{.=}} to float
@@ -259,7 +259,7 @@ void foo(void) {
 
   test = (i0 < h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
@@ -267,14 +267,14 @@ void foo(void) {
 
   test = (h0 < i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
   // CHECK-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
 
   test = (h0 > h2);
-  //      CHECK: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
@@ -282,14 +282,14 @@ void foo(void) {
   test = ((__fp16)42.0 > h2);
   //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  //      CHECK: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
 
   test = (h0 > f2);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
@@ -297,7 +297,7 @@ void foo(void) {
 
   test = (f0 > h2);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -305,7 +305,7 @@ void foo(void) {
 
   test = (i0 > h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -313,14 +313,14 @@ void foo(void) {
 
   test = (h0 > i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
   // CHECK-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
 
   test = (h2 <= h0);
-  //      CHECK: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
@@ -328,14 +328,14 @@ void foo(void) {
   test = (h2 <= (__fp16)42.0);
   //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // CHECK-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
 
   test = (h2 <= f0);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -343,7 +343,7 @@ void foo(void) {
 
   test = (f2 <= h0);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -351,7 +351,7 @@ void foo(void) {
 
   test = (i0 <= h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -359,14 +359,14 @@ void foo(void) {
 
   test = (h0 <= i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
   // CHECK-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
 
   test = (h0 >= h2);
-  //      CHECK: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
@@ -375,14 +375,14 @@ void foo(void) {
   //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
   // CHECK-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
-  // CHECK-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
 
   test = (h0 >= f2);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -390,7 +390,7 @@ void foo(void) {
 
   test = (f0 >= h2);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -398,7 +398,7 @@ void foo(void) {
 
   test = (i0 >= h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -406,14 +406,14 @@ void foo(void) {
 
   test = (h0 >= i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
   // CHECK-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
 
   test = (h1 == h2);
-  //      CHECK: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
@@ -421,14 +421,14 @@ void foo(void) {
   test = (h1 == (__fp16)1.0);
   //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // CHECK-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
 
   test = (h1 == f1);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
@@ -436,7 +436,7 @@ void foo(void) {
 
   test = (f1 == h1);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
@@ -444,7 +444,7 @@ void foo(void) {
 
   test = (i0 == h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -452,14 +452,14 @@ void foo(void) {
 
   test = (h0 == i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
   // CHECK-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
 
   test = (h1 != h2);
-  //      CHECK: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
@@ -467,14 +467,14 @@ void foo(void) {
   test = (h1 != (__fp16)1.0);
   //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
   // CHECK-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
-  // CHECK-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
 
   // CHECK-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
 
   test = (h1 != f1);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
@@ -482,7 +482,7 @@ void foo(void) {
 
   test = (f1 != h1);
   //      CHECK: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#A:]] = fpext half %{{.+}} to float
@@ -490,7 +490,7 @@ void foo(void) {
 
   test = (i0 != h0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
@@ -498,7 +498,7 @@ void foo(void) {
 
   test = (h0 != i0);
   //      CHECK: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
-  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
   // CHECK-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
 
   //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
diff --git a/clang/test/CIR/IR/invalid.cir b/clang/test/CIR/IR/invalid.cir
index 6acb9592246a..af516b2aaed6 100644
--- a/clang/test/CIR/IR/invalid.cir
+++ b/clang/test/CIR/IR/invalid.cir
@@ -1437,3 +1437,13 @@ cir.global external @f = #cir.fp<0x7FC00000 : !cir.float> : !cir.float
 // expected-error @below {{unexpected decimal integer literal for a floating point value}}
 // expected-note @below {{add a trailing dot to make the literal a float}}
 cir.global external @f = #cir.fp<42> : !cir.float
+
+// -----
+
+// Verify
+!s32i = !cir.int<s, 32>
+cir.func @cast0(%arg0: !s32i, %arg1: !s32i) {
+  // expected-error @below {{custom op 'cir.cmp' invalid kind of Type specified}}
+  %1 = cir.cmp(eq, %arg0, %arg1): !s32i, !s32i
+  cir.return
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c b/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c
index b6069e8a787e..8cc32dc96c94 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c
+++ b/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c
@@ -43,10 +43,7 @@ void nestedDoWhile() {
 // CHECK:     %[[VAR4:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
 // CHECK:     %[[C10_I32:.+]] = arith.constant 10 : i32
 // CHECK:     %[[CMP:.+]] = arith.cmpi sle, %[[VAR4]], %[[C10_I32]] : i32
-// CHECK:     %[[EXT:.+]] = arith.extui %[[CMP]] : i1 to i32
-// CHECK:     %[[C0_I32_3:.+]] = arith.constant 0 : i32
-// CHECK:     %[[NE:.+]] = arith.cmpi ne, %[[EXT]], %[[C0_I32_3]] : i32
-// CHECK:     %[[EXT1:.+]] = arith.extui %[[NE]] : i1 to i8
+// CHECK:     %[[EXT1:.+]] = arith.extui %[[CMP]] : i1 to i8
 // CHECK:     %[[TRUNC:.+]] = arith.trunci %[[EXT1]] : i8 to i1
 // CHECK:     scf.condition(%[[TRUNC]])
 // CHECK:   } do {
@@ -59,49 +56,43 @@ void nestedDoWhile() {
 // CHECK: return %[[RET]] : i32
 
 // CHECK: func.func @nestedDoWhile() {
-// CHECK:     %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
-// CHECK:     %[[C0_I32:.+]] = arith.constant 0 : i32 
-// CHECK:     memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+// CHECK:     %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// CHECK:     %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK:     memref.store %[[C0_I32]], %[[alloca]][] : memref<i32>
 // CHECK:     memref.alloca_scope  {
-// CHECK:       %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+// CHECK:       %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK:       scf.while : () -> () {
-// CHECK:         %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-// CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32 
-// CHECK:         %[[ONE:.+]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32 
-// CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32> 
-// CHECK:         %[[C0_I32_1:.+]] = arith.constant 0 : i32 
-// CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32> 
+// CHECK:         %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+// CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:         %[[ONE:.+]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32
+// CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32>
+// CHECK:         %[[C0_I32_1:.+]] = arith.constant 0 : i32
+// CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32>
 // CHECK:         memref.alloca_scope  {
 // CHECK:           scf.while : () -> () {
-// CHECK:             %[[EIGHT:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-// CHECK:             %[[C2_I32_3:.+]] = arith.constant 2 : i32 
-// CHECK:             %[[NINE:.+]] = arith.cmpi slt, %[[EIGHT]], %[[C2_I32_3]] : i32 
-// CHECK:             %[[TEN:.+]] = arith.extui %9 : i1 to i32 
-// CHECK:             %[[C0_I32_4:.+]] = arith.constant 0 : i32 
-// CHECK:             %[[ELEVEN:.+]] = arith.cmpi ne, %[[TEN]], %[[C0_I32_4]] : i32 
-// CHECK:             %[[TWELVE:.+]] = arith.extui %[[ELEVEN]] : i1 to i8 
-// CHECK:             %[[THIRTEEN:.+]] = arith.trunci %[[TWELVE]] : i8 to i1 
-// CHECK:             scf.condition(%[[THIRTEEN]]) 
+// CHECK:             %[[EIGHT:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+// CHECK:             %[[C2_I32_3:.+]] = arith.constant 2 : i32
+// CHECK:             %[[NINE:.+]] = arith.cmpi slt, %[[EIGHT]], %[[C2_I32_3]] : i32
+// CHECK:             %[[TWELVE:.+]] = arith.extui %[[NINE]] : i1 to i8
+// CHECK:             %[[THIRTEEN:.+]] = arith.trunci %[[TWELVE]] : i8 to i1
+// CHECK:             scf.condition(%[[THIRTEEN]])
 // CHECK:           } do {
-// CHECK:             %[[EIGHT]] = memref.load %[[alloca_0]][] : memref<i32> 
-// CHECK:             %[[C1_I32_3:.+]] = arith.constant 1 : i32 
-// CHECK:             %[[NINE]] = arith.addi %[[EIGHT]], %[[C1_I32_3]] : i32 
-// CHECK:             memref.store %[[NINE]], %[[alloca_0]][] : memref<i32> 
-// CHECK:             scf.yield 
-// CHECK:           } 
-// CHECK:         } 
-// CHECK:         %[[TWO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-// CHECK:         %[[C2_I32:.+]] = arith.constant 2 : i32 
-// CHECK:         %[[THREE:.+]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32 
-// CHECK:         %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i32 
-// CHECK:         %[[C0_I32_2:.+]] = arith.constant 0 : i32 
-// CHECK:         %[[FIVE:.+]] = arith.cmpi ne, %[[FOUR]], %[[C0_I32_2]] : i32 
-// CHECK:         %[[SIX:.+]] = arith.extui %[[FIVE]] : i1 to i8 
-// CHECK:         %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1 
-// CHECK:         scf.condition(%[[SEVEN]]) 
+// CHECK:             %[[EIGHT]] = memref.load %[[alloca_0]][] : memref<i32>
+// CHECK:             %[[C1_I32_3:.+]] = arith.constant 1 : i32
+// CHECK:             %[[NINE]] = arith.addi %[[EIGHT]], %[[C1_I32_3]] : i32
+// CHECK:             memref.store %[[NINE]], %[[alloca_0]][] : memref<i32>
+// CHECK:             scf.yield
+// CHECK:           }
+// CHECK:         }
+// CHECK:         %[[TWO:.+]] = memref.load %[[alloca]][] : memref<i32>
+// CHECK:         %[[C2_I32:.+]] = arith.constant 2 : i32
+// CHECK:         %[[THREE:.+]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32
+// CHECK:         %[[SIX:.+]] = arith.extui %[[THREE]] : i1 to i8
+// CHECK:         %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1
+// CHECK:         scf.condition(%[[SEVEN]])
 // CHECK:       } do {
-// CHECK:         scf.yield 
-// CHECK:       } 
-// CHECK:     } 
-// CHECK:     return 
-// CHECK:   } 
\ No newline at end of file
+// CHECK:         scf.yield
+// CHECK:       }
+// CHECK:     }
+// CHECK:     return
+// CHECK:   }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/if.c b/clang/test/CIR/Lowering/ThroughMLIR/if.c
index 4ff228514cd6..8e88346c727f 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/if.c
+++ b/clang/test/CIR/Lowering/ThroughMLIR/if.c
@@ -13,34 +13,31 @@ void foo() {
 
 //CHECK: func.func @foo() {
 //CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
-//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
-//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
-//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
-//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
-//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32>
 //CHECK:   memref.alloca_scope  {
-//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32 
-//CHECK:     %[[ONE:.+]] = arith.cmpi sgt, %[[ZERO]], %[[C0_I32_1]] : i32 
-//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
-//CHECK:     %[[C0_I32_2:.+]] = arith.constant 0 : i32 
-//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_2]] : i32 
-//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
-//CHECK:     %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi sgt, %[[ZERO]], %[[C0_I32_1]] : i32
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[ONE]] : i1 to i8
+//CHECK:     %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1
 //CHECK:     scf.if %[[FIVE]] {
-//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32 
-//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32>
 //CHECK:     } else {
-//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:       %[[SEVEN:.+]] = arith.subi %[[SIX]], %[[C1_I32]] : i32 
-//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
-//CHECK:     } 
-//CHECK:   } 
-//CHECK:   return 
-//CHECK: } 
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.subi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32>
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
 
 void foo2() {
   int a = 2;
@@ -51,30 +48,27 @@ void foo2() {
 }
 
 //CHECK: func.func @foo2() {
-//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
-//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
-//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
-//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
-//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
-//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32>
 //CHECK:   memref.alloca_scope  {
-//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32 
-//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32 
-//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
-//CHECK:     %[[C0_I32_1]] = arith.constant 0 : i32 
-//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_1]] : i32 
-//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
-//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[ONE]] : i1 to i8
+//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1
 //CHECK:     scf.if %[[FIVE]] {
-//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32 
-//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
-//CHECK:     } 
-//CHECK:   } 
-//CHECK:   return 
-//CHECK: } 
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32>
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
 
 void foo3() {
   int a = 2;
@@ -93,45 +87,39 @@ void foo3() {
 //CHECK: func.func @foo3() {
 //CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
 //CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
-//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
-//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
-//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
-//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32>
 //CHECK:   memref.alloca_scope  {
-//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32 
-//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32 
-//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
-//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32 
-//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO:.+]], %[[C0_I32_1]] : i32 
-//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
-//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[ONE]] : i1 to i8
+//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1
 //CHECK:     scf.if %[[FIVE]] {
-//CHECK:       %[[alloca_2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
-//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:       memref.store %[[C1_I32]], %[[alloca_2]][] : memref<i32> 
+//CHECK:       %[[alloca_2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       memref.store %[[C1_I32]], %[[alloca_2]][] : memref<i32>
 //CHECK:       memref.alloca_scope  {
-//CHECK:         %[[SIX:.+]] = memref.load %[[alloca_2]][] : memref<i32> 
-//CHECK:         %[[C2_I32_3:.+]] = arith.constant 2 : i32 
-//CHECK:         %[[SEVEN:.+]] = arith.cmpi sgt, %[[SIX]], %[[C2_I32_3]] : i32 
-//CHECK:         %[[EIGHT:.+]] = arith.extui %[[SEVEN]] : i1 to i32 
-//CHECK:         %[[C0_I32_4:.+]] = arith.constant 0 : i32 
-//CHECK:         %[[NINE:.+]] = arith.cmpi ne, %[[EIGHT]], %[[C0_I32_4]] : i32 
-//CHECK:         %[[TEN:.+]] = arith.extui %[[NINE]] : i1 to i8 
-//CHECK:         %[[ELEVEN:.+]] = arith.trunci %[[TEN]] : i8 to i1 
+//CHECK:         %[[SIX:.+]] = memref.load %[[alloca_2]][] : memref<i32>
+//CHECK:         %[[C2_I32_3:.+]] = arith.constant 2 : i32
+//CHECK:         %[[SEVEN:.+]] = arith.cmpi sgt, %[[SIX]], %[[C2_I32_3]] : i32
+//CHECK:         %[[TEN:.+]] = arith.extui %[[SEVEN]] : i1 to i8
+//CHECK:         %[[ELEVEN:.+]] = arith.trunci %[[TEN]] : i8 to i1
 //CHECK:         scf.if %[[ELEVEN]] {
-//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32 
-//CHECK:           %[[THIRTEEN:.+]] = arith.addi %[[TWELVE]], %[[C1_I32_5]] : i32 
-//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32
+//CHECK:           %[[THIRTEEN:.+]] = arith.addi %[[TWELVE]], %[[C1_I32_5]] : i32
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32>
 //CHECK:         } else {
-//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32 
-//CHECK:           %[[THIRTEEN:.+]] = arith.subi %[[TWELVE]], %[[C1_I32_5]] : i32 
-//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32> 
-//CHECK:         } 
-//CHECK:       } 
-//CHECK:     } 
-//CHECK:   } 
-//CHECK:   return 
-//CHECK: } 
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32
+//CHECK:           %[[THIRTEEN:.+]] = arith.subi %[[TWELVE]], %[[C1_I32_5]] : i32
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32>
+//CHECK:         }
+//CHECK:       }
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/while.c b/clang/test/CIR/Lowering/ThroughMLIR/while.c
index 40ad92de95e4..8cc1f7bca30d 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/while.c
+++ b/clang/test/CIR/Lowering/ThroughMLIR/while.c
@@ -21,75 +21,66 @@ void nestedWhile() {
 
 //CHECK: func.func @singleWhile() {
 //CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
-//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
-//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32>
 //CHECK:   memref.alloca_scope  {
 //CHECK:     scf.while : () -> () {
-//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32 
-//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO:.+]], %[[C2_I32]] : i32 
-//CHECK:       %[[TWO:.+]] = arith.extui %[[ONE:.+]] : i1 to i32 
-//CHECK:       %[[C0_I32_0:.+]] = arith.constant 0 : i32 
-//CHECK:       %[[THREE:.+]] = arith.cmpi ne, %[[TWO:.+]], %[[C0_I32_0]] : i32 
-//CHECK:       %[[FOUR:.+]] = arith.extui %[[THREE:.+]] : i1 to i8 
-//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR:.+]] : i8 to i1 
-//CHECK:       scf.condition(%[[FIVE]]) 
+//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO:.+]], %[[C2_I32]] : i32
+//CHECK:       %[[FOUR:.+]] = arith.extui %[[ONE:.+]] : i1 to i8
+//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR:.+]] : i8 to i1
+//CHECK:       scf.condition(%[[FIVE]])
 //CHECK:     } do {
-//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:       %[[ONE:.+]] = arith.addi %0, %[[C1_I32:.+]] : i32 
-//CHECK:       memref.store %[[ONE:.+]], %[[alloca]][] : memref<i32> 
-//CHECK:       scf.yield 
-//CHECK:     } 
-//CHECK:  } 
-//CHECK:   return 
-//CHECK: } 
+//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[ONE:.+]] = arith.addi %0, %[[C1_I32:.+]] : i32
+//CHECK:       memref.store %[[ONE:.+]], %[[alloca]][] : memref<i32>
+//CHECK:       scf.yield
+//CHECK:     }
+//CHECK:  }
+//CHECK:   return
+//CHECK: }
 
 //CHECK: func.func @nestedWhile() {
 //CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
 //CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
-//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32>
 //CHECK:   memref.alloca_scope  {
-//CHECK:     %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:     %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
 //CHECK:     scf.while : () -> () {
-//CHECK:       %[[ZERO:.+]] = memref.load %alloca[] : memref<i32> 
-//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32 
-//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C2_I32]] : i32 
-//CHECK:       %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
-//CHECK:       %[[C0_I32_1:.+]] = arith.constant 0 : i32 
-//CHECK:       %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_1]] : i32 
-//CHECK:       %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
-//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1 
-//CHECK:       scf.condition(%[[FIVE]]) 
+//CHECK:       %[[ZERO:.+]] = memref.load %alloca[] : memref<i32>
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C2_I32]] : i32
+//CHECK:       %[[FOUR:.+]] = arith.extui %[[ONE]] : i1 to i8
+//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1
+//CHECK:       scf.condition(%[[FIVE]])
 //CHECK:     } do {
-//CHECK:         %[[C0_I32_1]] = arith.constant 0 : i32 
-//CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32> 
+//CHECK:         %[[C0_I32_1:.+]] = arith.constant 0 : i32
+//CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32>
 //CHECK:         memref.alloca_scope  {
 //CHECK:           scf.while : () -> () {
-//CHECK:             %[[TWO]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:             %[[C2_I32]] = arith.constant 2 : i32 
-//CHECK:             %[[THREE]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32 
-//CHECK:             %[[FOUR]] = arith.extui %[[THREE]] : i1 to i32 
-//CHECK:             %[[C0_I32_2:.+]] = arith.constant 0 : i32 
-//CHECK:             %[[FIVE]] = arith.cmpi ne, %[[FOUR]], %[[C0_I32_2]] : i32 
-//CHECK:             %[[SIX:.+]] = arith.extui %[[FIVE]] : i1 to i8 
-//CHECK:             %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1 
-//CHECK:             scf.condition(%[[SEVEN]]) 
+//CHECK:             %{{.*}} = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:             %[[C2_I32]] = arith.constant 2 : i32
+//CHECK:             %{{.*}} = arith.cmpi slt, %{{.*}}, %[[C2_I32]] : i32
+//CHECK:             %[[SIX:.+]] = arith.extui %{{.*}} : i1 to i8
+//CHECK:             %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1
+//CHECK:             scf.condition(%[[SEVEN]])
 //CHECK:           } do {
-//CHECK:             %[[TWO]] = memref.load %[[alloca_0]][] : memref<i32> 
-//CHECK:             %[[C1_I32_2:.+]] = arith.constant 1 : i32 
-//CHECK:             %[[THREE]] = arith.addi %[[TWO]], %[[C1_I32_2]] : i32 
-//CHECK:             memref.store %[[THREE]], %[[alloca_0]][] : memref<i32> 
-//CHECK:             scf.yield 
-//CHECK:           } 
-//CHECK:         } 
-//CHECK:         %[[ZERO]] = memref.load %[[alloca]][] : memref<i32> 
-//CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32 
-//CHECK:         %[[ONE]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32 
-//CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32> 
-//CHECK:         scf.yield 
-//CHECK:       } 
-//CHECK:     } 
-//CHECK:     return 
-//CHECK:   } 
-//CHECK: } 
\ No newline at end of file
+//CHECK:             %{{.*}} = memref.load %[[alloca_0]][] : memref<i32>
+//CHECK:             %[[C1_I32_2:.+]] = arith.constant 1 : i32
+//CHECK:             %{{.*}} = arith.addi %{{.*}}, %[[C1_I32_2]] : i32
+//CHECK:             memref.store %{{.*}}, %[[alloca_0]][] : memref<i32>
+//CHECK:             scf.yield
+//CHECK:           }
+//CHECK:         }
+//CHECK:         %[[ZERO]] = memref.load %[[alloca]][] : memref<i32>
+//CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:         %[[ONE]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32
+//CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32>
+//CHECK:         scf.yield
+//CHECK:       }
+//CHECK:     }
+//CHECK:     return
+//CHECK:   }
+//CHECK: }
diff --git a/clang/test/CIR/Lowering/dot.cir b/clang/test/CIR/Lowering/dot.cir
index ad1241e1cad3..4c1586d8eaa5 100644
--- a/clang/test/CIR/Lowering/dot.cir
+++ b/clang/test/CIR/Lowering/dot.cir
@@ -21,9 +21,8 @@ module {
       cir.for : cond {
         %10 = cir.load %8 : !cir.ptr<!s32i>, !s32i
         %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
-        %12 = cir.cmp(lt, %10, %11) : !s32i, !s32i
-        %13 = cir.cast(int_to_bool, %12 : !s32i), !cir.bool
-        cir.condition(%13)
+        %12 = cir.cmp(lt, %10, %11) : !s32i, !cir.bool
+        cir.condition(%12)
       } body {
         %10 = cir.load %0 : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
         %11 = cir.load %8 : !cir.ptr<!s32i>, !s32i
@@ -79,10 +78,7 @@ module {
 // MLIR:           %[[VAL_17:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
 // MLIR:           %[[VAL_18:.*]] = llvm.load %[[VAL_8]] {alignment = 4 : i64} : !llvm.ptr -> i32
 // MLIR:           %[[VAL_19:.*]] = llvm.icmp "slt" %[[VAL_17]], %[[VAL_18]] : i32
-// MLIR:           %[[VAL_20:.*]] = llvm.zext %[[VAL_19]] : i1 to i32
-// MLIR:           %[[VAL_21:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR:           %[[VAL_22:.*]] = llvm.icmp "ne" %[[VAL_20]], %[[VAL_21]] : i32
-// MLIR:           llvm.cond_br %[[VAL_22]], ^bb3, ^bb5
+// MLIR:           llvm.cond_br %[[VAL_19]], ^bb3, ^bb5
 // MLIR:         ^bb3:
 // MLIR:           %[[VAL_23:.*]] = llvm.load %[[VAL_4]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
 // MLIR:           %[[VAL_24:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
@@ -112,4 +108,4 @@ module {
 // MLIR:           llvm.store %[[VAL_39]], %[[VAL_10]] {{.*}}: f64, !llvm.ptr
 // MLIR:           %[[VAL_40:.*]] = llvm.load %[[VAL_10]] {alignment = 8 : i64} : !llvm.ptr -> f64
 // MLIR:           llvm.return %[[VAL_40]] : f64
-// MLIR:         }
\ No newline at end of file
+// MLIR:         }
diff --git a/clang/test/CIR/Lowering/goto.cir b/clang/test/CIR/Lowering/goto.cir
index f09626ec122f..cd3a57d2e713 100644
--- a/clang/test/CIR/Lowering/goto.cir
+++ b/clang/test/CIR/Lowering/goto.cir
@@ -3,7 +3,7 @@
 !s32i = !cir.int<s, 32>
 
 module {
-  
+
   cir.func @gotoFromIf(%arg0: !s32i) -> !s32i {
     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
@@ -11,9 +11,8 @@ module {
     cir.scope {
       %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
       %7 = cir.const #cir.int<5> : !s32i
-      %8 = cir.cmp(gt, %6, %7) : !s32i, !s32i
-      %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
-      cir.if %9 {
+      %8 = cir.cmp(gt, %6, %7) : !s32i, !cir.bool
+      cir.if %8 {
         cir.goto "err"
       }
     }
diff --git a/clang/test/CIR/Lowering/loops-with-break.cir b/clang/test/CIR/Lowering/loops-with-break.cir
index 6a7ef3e8c023..813d9aed05d5 100644
--- a/clang/test/CIR/Lowering/loops-with-break.cir
+++ b/clang/test/CIR/Lowering/loops-with-break.cir
@@ -11,17 +11,15 @@ module {
       cir.for : cond {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } body {
         cir.scope {
           cir.scope {
             %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
             %3 = cir.const #cir.int<5> : !s32i
-            %4 = cir.cmp(eq, %2, %3) : !s32i, !s32i
-            %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-            cir.if %5 {
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !cir.bool
+            cir.if %4 {
               cir.break
             }
           }
@@ -73,9 +71,8 @@ module {
       cir.for : cond {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } body {
         cir.scope {
           cir.scope {
@@ -85,17 +82,15 @@ module {
             cir.for : cond {
               %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
               %5 = cir.const #cir.int<10> : !s32i
-              %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
-              %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-              cir.condition(%7)
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+              cir.condition(%6)
             } body {
               cir.scope {
                 cir.scope {
                   %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
                   %5 = cir.const #cir.int<5> : !s32i
-                  %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-                  %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-                  cir.if %7 {
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+                  cir.if %6 {
                     cir.break
                   }
                 }
@@ -174,9 +169,8 @@ module {
       cir.while {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } do {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.unary(inc, %2) : !s32i, !s32i
@@ -184,9 +178,8 @@ module {
         cir.scope {
           %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
           %5 = cir.const #cir.int<5> : !s32i
-          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-          cir.if %7 {
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
             cir.break
           }
         }
@@ -233,9 +226,8 @@ cir.func @testDoWhile() {
         cir.scope {
           %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
           %5 = cir.const #cir.int<5> : !s32i
-          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-          cir.if %7 {
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
             cir.break
           }
         }
@@ -243,9 +235,8 @@ cir.func @testDoWhile() {
       } while {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       }
     }
     cir.return
@@ -275,4 +266,4 @@ cir.func @testDoWhile() {
   //           [...]
   // CHECK:  }
 
-}
\ No newline at end of file
+}
diff --git a/clang/test/CIR/Lowering/loops-with-continue.cir b/clang/test/CIR/Lowering/loops-with-continue.cir
index 0371d416b61d..f6a91dcab560 100644
--- a/clang/test/CIR/Lowering/loops-with-continue.cir
+++ b/clang/test/CIR/Lowering/loops-with-continue.cir
@@ -11,17 +11,15 @@ module {
       cir.for : cond {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } body {
         cir.scope {
           cir.scope {
             %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
             %3 = cir.const #cir.int<5> : !s32i
-            %4 = cir.cmp(eq, %2, %3) : !s32i, !s32i
-            %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-            cir.if %5 {
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !cir.bool
+            cir.if %4 {
               cir.continue
             }
           }
@@ -74,9 +72,8 @@ module {
       cir.for : cond {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } body {
         cir.scope {
           cir.scope {
@@ -86,17 +83,15 @@ module {
             cir.for : cond {
               %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
               %5 = cir.const #cir.int<10> : !s32i
-              %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
-              %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-              cir.condition(%7)
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+              cir.condition(%6)
             } body {
               cir.scope {
                 cir.scope {
                   %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
                   %5 = cir.const #cir.int<5> : !s32i
-                  %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-                  %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-                  cir.if %7 {
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+                  cir.if %6 {
                     cir.continue
                   }
                 }
@@ -174,9 +169,8 @@ cir.func @testWhile() {
       cir.while {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } do {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.unary(inc, %2) : !s32i, !s32i
@@ -184,9 +178,8 @@ cir.func @testWhile() {
         cir.scope {
           %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
           %5 = cir.const #cir.int<5> : !s32i
-          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-          cir.if %7 {
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
             cir.continue
           }
         }
@@ -230,9 +223,8 @@ cir.func @testWhile() {
         cir.scope {
           %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
           %5 = cir.const #cir.int<5> : !s32i
-          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
-          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-          cir.if %7 {
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
             cir.continue
           }
         }
@@ -240,9 +232,8 @@ cir.func @testWhile() {
       } while {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<10> : !s32i
-        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       }
     }
     cir.return
@@ -271,4 +262,4 @@ cir.func @testWhile() {
   //           [...]
   // CHECK:  }
 
-}
\ No newline at end of file
+}
diff --git a/clang/test/CIR/Lowering/switch.cir b/clang/test/CIR/Lowering/switch.cir
index 81cc6efdc92d..9434b7337f7e 100644
--- a/clang/test/CIR/Lowering/switch.cir
+++ b/clang/test/CIR/Lowering/switch.cir
@@ -154,9 +154,8 @@ module {
         cir.scope {
           %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
           %7 = cir.const #cir.int<0> : !s32i
-          %8 = cir.cmp(ge, %6, %7) : !s32i, !s32i
-          %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
-          cir.if %9 {
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !cir.bool
+          cir.if %8 {
             cir.break
           }
         }
diff --git a/clang/test/CIR/Transforms/mem2reg.c b/clang/test/CIR/Transforms/mem2reg.c
index 5d8d2f59b35b..b60d9eb0d1e9 100644
--- a/clang/test/CIR/Transforms/mem2reg.c
+++ b/clang/test/CIR/Transforms/mem2reg.c
@@ -41,9 +41,8 @@ void alloca_in_loop(int* ar, int n) {
 // BEFORE:      cir.for : cond {
 // BEFORE:        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
 // BEFORE:        %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
-// BEFORE:        %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
-// BEFORE:        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-// BEFORE:        cir.condition(%7)
+// BEFORE:        %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+// BEFORE:        cir.condition(%6)
 // BEFORE:      } body {
 // BEFORE:        cir.scope {
 // BEFORE:          %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
@@ -71,21 +70,20 @@ void alloca_in_loop(int* ar, int n) {
 // MEM2REG:    %0 = cir.const #cir.int<0> : !s32i
 // MEM2REG:    cir.br ^bb2(%0 : !s32i)
 // MEM2REG:  ^bb2(%1: !s32i{{.*}}):  // 2 preds: ^bb1, ^bb6
-// MEM2REG:    %2 = cir.cmp(lt, %1, %arg1) : !s32i, !s32i
-// MEM2REG:    %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
-// MEM2REG:    cir.brcond %3 ^bb3, ^bb7
+// MEM2REG:    %2 = cir.cmp(lt, %1, %arg1) : !s32i, !cir.bool
+// MEM2REG:    cir.brcond %2 ^bb3, ^bb7
 // MEM2REG:  ^bb3:  // pred: ^bb2
 // MEM2REG:    cir.br ^bb4
 // MEM2REG:  ^bb4:  // pred: ^bb3
-// MEM2REG:    %4 = cir.const #cir.int<4> : !s32i
-// MEM2REG:    %5 = cir.ptr_stride(%arg0 : !cir.ptr<!s32i>, %1 : !s32i), !cir.ptr<!s32i>
-// MEM2REG:    cir.store %4, %5 : !s32i, !cir.ptr<!s32i>
+// MEM2REG:    %3 = cir.const #cir.int<4> : !s32i
+// MEM2REG:    %4 = cir.ptr_stride(%arg0 : !cir.ptr<!s32i>, %1 : !s32i), !cir.ptr<!s32i>
+// MEM2REG:    cir.store %3, %4 : !s32i, !cir.ptr<!s32i>
 // MEM2REG:    cir.br ^bb5
 // MEM2REG:  ^bb5:  // pred: ^bb4
 // MEM2REG:    cir.br ^bb6
 // MEM2REG:  ^bb6:  // pred: ^bb5
-// MEM2REG:    %6 = cir.unary(inc, %1) : !s32i, !s32i
-// MEM2REG:    cir.br ^bb2(%6 : !s32i)
+// MEM2REG:    %5 = cir.unary(inc, %1) : !s32i, !s32i
+// MEM2REG:    cir.br ^bb2(%5 : !s32i)
 // MEM2REG:  ^bb7:  // pred: ^bb2
 // MEM2REG:    cir.br ^bb8
 // MEM2REG:  ^bb8:  // pred: ^bb7
@@ -116,24 +114,23 @@ int alloca_in_ifelse(int x) {
 // BEFORE:    cir.scope {
 // BEFORE:      %9 = cir.load %0 : !cir.ptr<!s32i>, !s32i
 // BEFORE:      %10 = cir.const #cir.int<42> : !s32i
-// BEFORE:      %11 = cir.cmp(gt, %9, %10) : !s32i, !s32i
-// BEFORE:      %12 = cir.cast(int_to_bool, %11 : !s32i), !cir.bool
-// BEFORE:      cir.if %12 {
-// BEFORE:        %13 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
-// BEFORE:        %14 = cir.const #cir.int<2> : !s32i
-// BEFORE:        cir.store %14, %13 : !s32i, !cir.ptr<!s32i>
-// BEFORE:        %15 = cir.load %0 : !cir.ptr<!s32i>, !s32i
-// BEFORE:        %16 = cir.load %13 : !cir.ptr<!s32i>, !s32i
-// BEFORE:        %17 = cir.binop(mul, %15, %16) nsw : !s32i
-// BEFORE:        cir.store %17, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:      %11 = cir.cmp(gt, %9, %10) : !s32i, !cir.bool
+// BEFORE:      cir.if %11 {
+// BEFORE:        %12 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %13 = cir.const #cir.int<2> : !s32i
+// BEFORE:        cir.store %13, %12 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %14 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %15 = cir.load %12 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.binop(mul, %14, %15) nsw : !s32i
+// BEFORE:        cir.store %16, %2 : !s32i, !cir.ptr<!s32i>
 // BEFORE:      } else {
-// BEFORE:        %13 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
-// BEFORE:        %14 = cir.const #cir.int<3> : !s32i
-// BEFORE:        cir.store %14, %13 : !s32i, !cir.ptr<!s32i>
-// BEFORE:        %15 = cir.load %0 : !cir.ptr<!s32i>, !s32i
-// BEFORE:        %16 = cir.load %13 : !cir.ptr<!s32i>, !s32i
-// BEFORE:        %17 = cir.binop(mul, %15, %16) nsw : !s32i
-// BEFORE:        cir.store %17, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %12 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %13 = cir.const #cir.int<3> : !s32i
+// BEFORE:        cir.store %13, %12 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %14 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %15 = cir.load %12 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.binop(mul, %14, %15) nsw : !s32i
+// BEFORE:        cir.store %16, %2 : !s32i, !cir.ptr<!s32i>
 // BEFORE:     }
 // BEFORE:    }
 // BEFORE:    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
@@ -150,23 +147,22 @@ int alloca_in_ifelse(int x) {
 // MEM2REG:    cir.br ^bb1
 // MEM2REG:  ^bb1:  // pred: ^bb0
 // MEM2REG:    %1 = cir.const #cir.int<42> : !s32i
-// MEM2REG:    %2 = cir.cmp(gt, %arg0, %1) : !s32i, !s32i
-// MEM2REG:    %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
-// MEM2REG:    cir.brcond %3 ^bb2, ^bb3
+// MEM2REG:    %2 = cir.cmp(gt, %arg0, %1) : !s32i, !cir.bool
+// MEM2REG:    cir.brcond %2 ^bb2, ^bb3
 // MEM2REG:  ^bb2:  // pred: ^bb1
-// MEM2REG:    %4 = cir.const #cir.int<2> : !s32i
-// MEM2REG:    %5 = cir.binop(mul, %arg0, %4) nsw : !s32i
-// MEM2REG:    cir.br ^bb4(%5 : !s32i)
+// MEM2REG:    %3 = cir.const #cir.int<2> : !s32i
+// MEM2REG:    %4 = cir.binop(mul, %arg0, %3) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%4 : !s32i)
 // MEM2REG:  ^bb3:  // pred: ^bb1
-// MEM2REG:    %6 = cir.const #cir.int<3> : !s32i
-// MEM2REG:    %7 = cir.binop(mul, %arg0, %6) nsw : !s32i
-// MEM2REG:    cir.br ^bb4(%7 : !s32i)
-// MEM2REG:  ^bb4(%8: !s32i{{.*}}):  // 2 preds: ^bb2, ^bb3
+// MEM2REG:    %5 = cir.const #cir.int<3> : !s32i
+// MEM2REG:    %6 = cir.binop(mul, %arg0, %5) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%6 : !s32i)
+// MEM2REG:  ^bb4(%7: !s32i{{.*}}):  // 2 preds: ^bb2, ^bb3
 // MEM2REG:    cir.br ^bb5
 // MEM2REG:  ^bb5:  // pred: ^bb4
-// MEM2REG:    %9 = cir.const #cir.int<1> : !s32i
-// MEM2REG:    %10 = cir.binop(add, %8, %9) nsw : !s32i
-// MEM2REG:    cir.return %10 : !s32i
+// MEM2REG:    %8 = cir.const #cir.int<1> : !s32i
+// MEM2REG:    %9 = cir.binop(add, %7, %8) nsw : !s32i
+// MEM2REG:    cir.return %9 : !s32i
 // MEM2REG:  }
 
 
diff --git a/clang/test/CIR/Transforms/scf-prepare.cir b/clang/test/CIR/Transforms/scf-prepare.cir
index 063420b1c516..3e1551de25c4 100644
--- a/clang/test/CIR/Transforms/scf-prepare.cir
+++ b/clang/test/CIR/Transforms/scf-prepare.cir
@@ -14,7 +14,7 @@ module {
     // CHECK: %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
     // CHECK: cir.for : cond {
     // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
-    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %4) : !s32i, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %4) : !s32i, !cir.bool
 
     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
@@ -27,9 +27,8 @@ module {
       cir.for : cond {
         %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
         %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
-        %6 = cir.cmp(gt, %4, %5) : !s32i, !s32i
-        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-        cir.condition(%7)
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
       } body {
         %4 = cir.const #cir.int<3> : !s32i
         %5 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
@@ -57,8 +56,8 @@ module {
     // CHECK: %[[BOUND:.*]] = cir.const #cir.int<50> : !s32i
     // CHECK: cir.for : cond {
     // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
-    // CHECK:   %[[COND:.*]] = cir.cmp(le, %[[IV]], %[[BOUND]]) : !s32i, !s32i
- 
+    // CHECK:   %[[COND:.*]] = cir.cmp(le, %[[IV]], %[[BOUND]]) : !s32i, !cir.bool
+
     cir.scope {
       %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
       %1 = cir.const #cir.int<0> : !s32i
@@ -66,9 +65,8 @@ module {
       cir.for : cond {
         %2 = cir.const #cir.int<50> : !s32i
         %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
-        %4 = cir.cmp(ge, %2, %3) : !s32i, !s32i
-        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
-        cir.condition(%5)
+        %4 = cir.cmp(ge, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
       } body {
         %2 = cir.const #cir.int<3> : !s32i
         %3 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
@@ -99,7 +97,7 @@ module {
     // CHECK: cir.for : cond {
     // CHECK:   %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
     // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
-    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %[[BOUND]]) : !s32i, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %[[BOUND]]) : !s32i, !cir.bool
 
     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
@@ -112,9 +110,8 @@ module {
       cir.for : cond {
         %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
         %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
-        %6 = cir.cmp(gt, %4, %5) : !s32i, !s32i
-        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-        cir.condition(%7)
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
       } body {
         cir.scope {
           %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
@@ -157,9 +154,8 @@ module {
         %3 = cir.const #cir.int<100> : !s32i
         %4 = cir.const #cir.int<1> : !s32i
         %5 = cir.binop(sub, %3, %4) nsw : !s32i
-        %6 = cir.cmp(lt, %2, %5) : !s32i, !s32i
-        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-        cir.condition(%7)
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !cir.bool
+        cir.condition(%6)
       } body {
         cir.scope {
         }
@@ -192,9 +188,8 @@ module {
         %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
         %3 = cir.const #cir.int<100> : !s32i
         %5 = cir.binop(sub, %3, %arg0) nsw : !s32i
-        %6 = cir.cmp(lt, %2, %5) : !s32i, !s32i
-        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
-        cir.condition(%7)
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !cir.bool
+        cir.condition(%6)
       } body {
         cir.scope {
         }
diff --git a/clang/test/CIR/Transforms/simpl.c b/clang/test/CIR/Transforms/simpl.c
index dda9f495ca4c..90d65d651369 100644
--- a/clang/test/CIR/Transforms/simpl.c
+++ b/clang/test/CIR/Transforms/simpl.c
@@ -18,8 +18,9 @@ int foo(int* ptr) {
 // BEFORE:  cir.func {{.*@foo}}
 // BEFORE:  [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
 // BEFORE:  [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
-// BEFORE:  [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !s32i
-// BEFORE:  [[X3:%.*]] = cir.cast(int_to_bool, [[X2]] : !s32i), !cir.bool
+// BEFORE:  [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !cir.bool
+// BEFORE:  [[BOOL_TO_INT:%.*]] = cir.cast(bool_to_int, [[X2]] : !cir.bool), !s32i
+// BEFORE:  [[X3:%.*]] = cir.cast(int_to_bool, [[BOOL_TO_INT]] : !s32i), !cir.bool
 // BEFORE:  [[X4:%.*]] = cir.unary(not, [[X3]]) : !cir.bool, !cir.bool
 // BEFORE:  [[X5:%.*]] = cir.cast(bool_to_int, [[X4]] : !cir.bool), !s32i
 // BEFORE:  [[X6:%.*]] = cir.cast(int_to_bool, [[X5]] : !s32i), !cir.bool
@@ -33,6 +34,5 @@ int foo(int* ptr) {
 
 // AFTER:   [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
 // AFTER:   [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
-// AFTER:   [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !s32i
-// AFTER:   [[X3:%.*]] = cir.cast(int_to_bool, [[X2]] : !s32i), !cir.bool
-// AFTER:   cir.if [[X3]]
\ No newline at end of file
+// AFTER:   [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !cir.bool
+// AFTER:   cir.if [[X2]]
diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir
index f7cc8fb31196..77ca59836f48 100644
--- a/clang/test/CIR/Transforms/switch.cir
+++ b/clang/test/CIR/Transforms/switch.cir
@@ -174,9 +174,8 @@ module {
         cir.scope {
           %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
           %7 = cir.const #cir.int<0> : !s32i
-          %8 = cir.cmp(ge, %6, %7) : !s32i, !s32i
-          %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
-          cir.if %9 {
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !cir.bool
+          cir.if %8 {
             cir.break
           }
         }

From da601b374deea6665f710f7e432dfa82f457059e Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy@linux.alibaba.com>
Date: Tue, 19 Nov 2024 00:59:23 +0800
Subject: [PATCH 2/7] [cherry-pick][mlir][llvm] Add support for memset.inline
 (#115711) (#1135)

support `llvm.intr.memset.inline` in llvm-project repo before we add
support for `__builtin_memset_inline` in clangir

cc @bcardosolopes

(cherry picked from commit 30753afc2a3171e962e261622781852a01fbec72)
---
 .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td   |  26 ++
 mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp |   4 +
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 281 +++++++++++++-----
 .../Dialect/LLVMIR/mem2reg-intrinsics.mlir    | 132 ++++++++
 mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir | 217 ++++++++++++++
 mlir/test/Target/LLVMIR/Import/intrinsic.ll   |   4 +
 .../test/Target/LLVMIR/llvmir-intrinsics.mlir |   4 +
 7 files changed, 598 insertions(+), 70 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index d07ebbacc604..857859384058 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -256,6 +256,32 @@ def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2],
   ];
 }
 
+def LLVM_MemsetInlineOp : LLVM_ZeroResultIntrOp<"memset.inline", [0, 2],
+    [DeclareOpInterfaceMethods<PromotableMemOpInterface>,
+     DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
+     DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>],
+    /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1,
+    /*requiresOpBundles=*/0, /*immArgPositions=*/[2, 3],
+    /*immArgAttrNames=*/["len", "isVolatile"]> {
+  dag args = (ins Arg<LLVM_AnyPointer,"",[MemWrite]>:$dst,
+                  I8:$val, APIntAttr:$len, I1Attr:$isVolatile);
+  // Append the alias attributes defined by LLVM_IntrOpBase.
+  let arguments = !con(args, aliasAttrs);
+  let builders = [
+    OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+                    "bool":$isVolatile), [{
+      build($_builder, $_state, dst, val, len,
+            $_builder.getBoolAttr(isVolatile));
+    }]>,
+    OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+                    "IntegerAttr":$isVolatile), [{
+      build($_builder, $_state, dst, val, len, isVolatile,
+            /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+            /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
+    }]>
+  ];
+}
+
 def LLVM_NoAliasScopeDeclOp
     : LLVM_ZeroResultIntrOp<"experimental.noalias.scope.decl"> {
   let arguments = (ins LLVM_AliasScopeAttr:$scope);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
index cff16afc73af..a59900745d02 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
@@ -94,6 +94,10 @@ SmallVector<Value> mlir::LLVM::MemsetOp::getAccessedOperands() {
   return {getDst()};
 }
 
+SmallVector<Value> mlir::LLVM::MemsetInlineOp::getAccessedOperands() {
+  return {getDst()};
+}
+
 SmallVector<Value> mlir::LLVM::CallOp::getAccessedOperands() {
   return llvm::to_vector(
       llvm::make_filter_range(getArgOperands(), [](Value arg) {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 5dc506c14ef9..8b164cf59a07 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -904,6 +904,76 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
   return memIntrLen.getZExtValue();
 }
 
+/// Returns the length of the given memory intrinsic in bytes if it can be known
+/// at compile-time on a best-effort basis, nothing otherwise.
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
+template <>
+std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
+  APInt memIntrLen = op.getLen();
+  if (memIntrLen.getBitWidth() > 64)
+    return {};
+  return memIntrLen.getZExtValue();
+}
+
+/// Returns an integer attribute representing the length of a memset intrinsic
+template <class MemsetIntr>
+IntegerAttr createMemsetLenAttr(MemsetIntr op) {
+  IntegerAttr memsetLenAttr;
+  bool successfulMatch =
+      matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
+  (void)successfulMatch;
+  assert(successfulMatch);
+  return memsetLenAttr;
+}
+
+/// Returns an integer attribute representing the length of a memset intrinsic
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
+template <>
+IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
+  return op.getLenAttr();
+}
+
+/// Creates a memset intrinsic of that matches the `toReplace` intrinsic
+/// using the provided parameters. There are template specializations for
+/// MemsetOp and MemsetInlineOp.
+template <class MemsetIntr>
+void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index);
+
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
+  Value newMemsetSizeValue =
+      builder
+          .create<LLVM::ConstantOp>(
+              toReplace.getLen().getLoc(),
+              IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
+          .getResult();
+
+  builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
+                                 toReplace.getVal(), newMemsetSizeValue,
+                                 toReplace.getIsVolatile());
+}
+
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
+  auto newMemsetSizeValue =
+      IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
+
+  builder.create<LLVM::MemsetInlineOp>(
+      toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
+      newMemsetSizeValue, toReplace.getIsVolatile());
+}
+
 } // namespace
 
 /// Returns whether one can be sure the memory intrinsic does not write outside
@@ -931,23 +1001,37 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
 }
 
 //===----------------------------------------------------------------------===//
-// Interfaces for memset
+// Interfaces for memset and memset.inline
 //===----------------------------------------------------------------------===//
 
-bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+template <class MemsetIntr>
+static bool memsetCanRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
+                            SmallPtrSetImpl<Attribute> &usedIndices,
+                            SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+                            const DataLayout &dataLayout) {
+  if (&slot.elemType.getDialect() != op.getOperation()->getDialect())
+    return false;
 
-bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
-  return getDst() == slot.ptr;
+  if (op.getIsVolatile())
+    return false;
+
+  if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
+    return false;
+
+  if (!areAllIndicesI32(slot))
+    return false;
+
+  return definitelyWritesOnlyWithinSlot(op, slot, dataLayout);
 }
 
-Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
-                                Value reachingDef,
-                                const DataLayout &dataLayout) {
+template <class MemsetIntr>
+static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot,
+                             OpBuilder &builder) {
   // TODO: Support non-integer types.
   return TypeSwitch<Type, Value>(slot.elemType)
       .Case([&](IntegerType intType) -> Value {
         if (intType.getWidth() == 8)
-          return getVal();
+          return op.getVal();
 
         assert(intType.getWidth() % 8 == 0);
 
@@ -955,14 +1039,14 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
         // or-ing it with the previous value.
         uint64_t coveredBits = 8;
         Value currentValue =
-            builder.create<LLVM::ZExtOp>(getLoc(), intType, getVal());
+            builder.create<LLVM::ZExtOp>(op.getLoc(), intType, op.getVal());
         while (coveredBits < intType.getWidth()) {
-          Value shiftBy =
-              builder.create<LLVM::ConstantOp>(getLoc(), intType, coveredBits);
+          Value shiftBy = builder.create<LLVM::ConstantOp>(op.getLoc(), intType,
+                                                           coveredBits);
           Value shifted =
-              builder.create<LLVM::ShlOp>(getLoc(), currentValue, shiftBy);
+              builder.create<LLVM::ShlOp>(op.getLoc(), currentValue, shiftBy);
           currentValue =
-              builder.create<LLVM::OrOp>(getLoc(), currentValue, shifted);
+              builder.create<LLVM::OrOp>(op.getLoc(), currentValue, shifted);
           coveredBits *= 2;
         }
 
@@ -974,10 +1058,12 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
       });
 }
 
-bool LLVM::MemsetOp::canUsesBeRemoved(
-    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
-    SmallVectorImpl<OpOperand *> &newBlockingUses,
-    const DataLayout &dataLayout) {
+template <class MemsetIntr>
+static bool
+memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot,
+                       const SmallPtrSetImpl<OpOperand *> &blockingUses,
+                       SmallVectorImpl<OpOperand *> &newBlockingUses,
+                       const DataLayout &dataLayout) {
   // TODO: Support non-integer types.
   bool canConvertType =
       TypeSwitch<Type, bool>(slot.elemType)
@@ -988,62 +1074,28 @@ bool LLVM::MemsetOp::canUsesBeRemoved(
   if (!canConvertType)
     return false;
 
-  if (getIsVolatile())
+  if (op.getIsVolatile())
     return false;
 
-  return getStaticMemIntrLen(*this) == dataLayout.getTypeSize(slot.elemType);
-}
-
-DeletionKind LLVM::MemsetOp::removeBlockingUses(
-    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
-    OpBuilder &builder, Value reachingDefinition,
-    const DataLayout &dataLayout) {
-  return DeletionKind::Delete;
-}
-
-LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
-    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
-    const DataLayout &dataLayout) {
-  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+  return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
 }
 
-bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
-                               SmallPtrSetImpl<Attribute> &usedIndices,
-                               SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
-                               const DataLayout &dataLayout) {
-  if (&slot.elemType.getDialect() != getOperation()->getDialect())
-    return false;
-
-  if (getIsVolatile())
-    return false;
-
-  if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
-    return false;
-
-  if (!areAllIndicesI32(slot))
-    return false;
-
-  return definitelyWritesOnlyWithinSlot(*this, slot, dataLayout);
-}
+template <class MemsetIntr>
+static DeletionKind
+memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
+             DenseMap<Attribute, MemorySlot> &subslots, OpBuilder &builder,
+             const DataLayout &dataLayout) {
 
-DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
-                                    DenseMap<Attribute, MemorySlot> &subslots,
-                                    OpBuilder &builder,
-                                    const DataLayout &dataLayout) {
   std::optional<DenseMap<Attribute, Type>> types =
       cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap();
 
-  IntegerAttr memsetLenAttr;
-  bool successfulMatch =
-      matchPattern(getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
-  (void)successfulMatch;
-  assert(successfulMatch);
+  IntegerAttr memsetLenAttr = createMemsetLenAttr(op);
 
   bool packed = false;
   if (auto structType = dyn_cast<LLVM::LLVMStructType>(slot.elemType))
     packed = structType.isPacked();
 
-  Type i32 = IntegerType::get(getContext(), 32);
+  Type i32 = IntegerType::get(op.getContext(), 32);
   uint64_t memsetLen = memsetLenAttr.getValue().getZExtValue();
   uint64_t covered = 0;
   for (size_t i = 0; i < types->size(); i++) {
@@ -1063,16 +1115,8 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
     // Otherwise, only compute its offset within the original memset.
     if (subslots.contains(index)) {
       uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
-
-      Value newMemsetSizeValue =
-          builder
-              .create<LLVM::ConstantOp>(
-                  getLen().getLoc(),
-                  IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
-              .getResult();
-
-      builder.create<LLVM::MemsetOp>(getLoc(), subslots.at(index).ptr, getVal(),
-                                     newMemsetSizeValue, getIsVolatile());
+      createMemsetIntr(builder, op, memsetLenAttr, newMemsetSize, subslots,
+                       index);
     }
 
     covered += typeSize;
@@ -1081,6 +1125,103 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
   return DeletionKind::Delete;
 }
 
+bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
+  return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+                                Value reachingDef,
+                                const DataLayout &dataLayout) {
+  return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+                                dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
+    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
+                               SmallPtrSetImpl<Attribute> &usedIndices,
+                               SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+                               const DataLayout &dataLayout) {
+  return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+                         dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
+                                    DenseMap<Attribute, MemorySlot> &subslots,
+                                    OpBuilder &builder,
+                                    const DataLayout &dataLayout) {
+  return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
+bool LLVM::MemsetInlineOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetInlineOp::storesTo(const MemorySlot &slot) {
+  return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetInlineOp::getStored(const MemorySlot &slot,
+                                      OpBuilder &builder, Value reachingDef,
+                                      const DataLayout &dataLayout) {
+  return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetInlineOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+                                dataLayout);
+}
+
+DeletionKind LLVM::MemsetInlineOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetInlineOp::ensureOnlySafeAccesses(
+    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetInlineOp::canRewire(
+    const DestructurableMemorySlot &slot,
+    SmallPtrSetImpl<Attribute> &usedIndices,
+    SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+                         dataLayout);
+}
+
+DeletionKind
+LLVM::MemsetInlineOp::rewire(const DestructurableMemorySlot &slot,
+                             DenseMap<Attribute, MemorySlot> &subslots,
+                             OpBuilder &builder, const DataLayout &dataLayout) {
+  return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
 //===----------------------------------------------------------------------===//
 // Interfaces for memcpy/memmove
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
index 4fc80a87f20d..646667505a37 100644
--- a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
@@ -23,6 +23,28 @@ llvm.func @basic_memset(%memset_value: i8) -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @basic_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @basic_memset_inline(%memset_value: i8) -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4 : i32}> : (!llvm.ptr, i8) -> ()
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i32
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+  // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+  // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+  // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[VALUE_32]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @basic_memset_constant
 llvm.func @basic_memset_constant() -> i32 {
   %0 = llvm.mlir.constant(1 : i32) : i32
@@ -45,6 +67,27 @@ llvm.func @basic_memset_constant() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @basic_memset_inline_constant
+llvm.func @basic_memset_inline_constant() -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[C42:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: %[[VALUE_42:.*]] = llvm.zext %[[C42]] : i8 to i32
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[SHIFTED_42:.*]] = llvm.shl %[[VALUE_42]], %[[C8]]  : i32
+  // CHECK: %[[OR0:.*]] = llvm.or %[[VALUE_42]], %[[SHIFTED_42]]  : i32
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+  // CHECK: %[[SHIFTED:.*]] = llvm.shl %[[OR0]], %[[C16]]  : i32
+  // CHECK: %[[RES:..*]] = llvm.or %[[OR0]], %[[SHIFTED]]  : i32
+  // CHECK: llvm.return %[[RES]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @exotic_target_memset
 // CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
 llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
@@ -71,6 +114,31 @@ llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @exotic_target_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @exotic_target_memset_inline(%memset_value: i8) -> i40 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i40 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 5}> : (!llvm.ptr, i8) -> ()
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i40
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i40) : i40
+  // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+  // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i40) : i40
+  // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+  // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+  // CHECK: %[[C32:.*]] = llvm.mlir.constant(32 : i40) : i40
+  // CHECK: %[[SHIFTED_COMPL:.*]] = llvm.shl %[[VALUE_32]], %[[C32]]
+  // CHECK: %[[VALUE_COMPL:.*]] = llvm.or %[[VALUE_32]], %[[SHIFTED_COMPL]]
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i40
+  // CHECK: llvm.return %[[VALUE_COMPL]] : i40
+  llvm.return %2 : i40
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_volatile_memset
 llvm.func @no_volatile_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -89,6 +157,22 @@ llvm.func @no_volatile_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_volatile_memset_inline
+llvm.func @no_volatile_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 4}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_partial_memset
 llvm.func @no_partial_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -107,6 +191,22 @@ llvm.func @no_partial_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_partial_memset_inline
+llvm.func @no_partial_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_overflowing_memset
 llvm.func @no_overflowing_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -125,6 +225,22 @@ llvm.func @no_overflowing_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_overflowing_memset_inline
+llvm.func @no_overflowing_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset
 llvm.func @only_byte_aligned_integers_memset() -> i10 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -143,6 +259,22 @@ llvm.func @only_byte_aligned_integers_memset() -> i10 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset_inline
+llvm.func @only_byte_aligned_integers_memset_inline() -> i10 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i10
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i10 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i10
+  llvm.return %2 : i10
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @basic_memcpy
 // CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
 llvm.func @basic_memcpy(%source: !llvm.ptr) -> i32 {
diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
index ba73025814cc..6dc8a97884ee 100644
--- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
@@ -21,6 +21,25 @@ llvm.func @memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline
+llvm.func @memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 16 bytes means it will span over the first 4 i32 entries.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_partial
 llvm.func @memset_partial() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -43,6 +62,26 @@ llvm.func @memset_partial() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_partial
+llvm.func @memset_inline_partial() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only the second i32 will be actually used. As the memset writes up
+  // to half of it, only 2 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 6 bytes means it will span over the first i32 and half of the second i32.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_full
 llvm.func @memset_full() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -64,6 +103,25 @@ llvm.func @memset_full() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_full
+llvm.func @memset_inline_full() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 40 bytes means it will span over the entire array.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_too_much
 llvm.func @memset_too_much() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -84,6 +142,24 @@ llvm.func @memset_too_much() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_too_much
+llvm.func @memset_inline_too_much() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 41 bytes means it will span over the entire array, and then some.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_no_volatile
 llvm.func @memset_no_volatile() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -103,6 +179,23 @@ llvm.func @memset_no_volatile() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_no_volatile
+llvm.func @memset_inline_no_volatile() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @indirect_memset
 llvm.func @indirect_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -123,6 +216,24 @@ llvm.func @indirect_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @indirect_memset_inline
+llvm.func @indirect_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // This memset will only cover the selected element.
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @invalid_indirect_memset
 llvm.func @invalid_indirect_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -144,6 +255,25 @@ llvm.func @invalid_indirect_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline
+llvm.func @invalid_indirect_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // This memset will go slightly beyond one of the elements.
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0]
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+  // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_double_use
 llvm.func @memset_double_use() -> i32 {
   // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -176,6 +306,35 @@ llvm.func @memset_double_use() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_double_use
+llvm.func @memset_inline_double_use() -> i32 {
+  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // We expect two generated memset, one for each field.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // 8 bytes means it will span over the two i32 entries.
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_considers_alignment
 llvm.func @memset_considers_alignment() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -207,6 +366,35 @@ llvm.func @memset_considers_alignment() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_considers_alignment
+llvm.func @memset_inline_considers_alignment() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 8 bytes means it will span over the i8 and the i32 entry.
+  // Because of padding, the f32 entry will not be touched.
+  // Even though the two i32 are used, only one memset should be generated,
+  // as the second i32 is not touched by the initial memset.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_considers_packing
 llvm.func @memset_considers_packing() -> i32 {
   // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -239,6 +427,35 @@ llvm.func @memset_considers_packing() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_considers_packing
+llvm.func @memset_inline_considers_packing() -> i32 {
+  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // Now all fields are touched by the memset.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memcpy_dest
 // CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
 llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 606b11175f57..e857e252ff08 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -505,6 +505,10 @@ define void @memmove_test(i32 %0, ptr %1, ptr %2) {
 define void @memset_test(i32 %0, ptr %1, i8 %2) {
   ; CHECK: "llvm.intr.memset"(%{{.*}}, %{{.*}}, %{{.*}}) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
   call void @llvm.memset.p0.i32(ptr %1, i8 %2, i32 %0, i1 false)
+  ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
+  call void @llvm.memset.inline.p0.i64(ptr %1, i8 %2, i64 10, i1 false)
+  ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+  call void @llvm.memset.inline.p0.i32(ptr %1, i8 %2, i32 10, i1 false)
   ret void
 }
 
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index cb712eb4e126..9d45f219cf74 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -533,6 +533,10 @@ llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) {
   %i1 = llvm.mlir.constant(false) : i1
   // CHECK: call void @llvm.memset.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 %{{.*}}, i1 false
   "llvm.intr.memset"(%arg2, %arg3, %arg0) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
+  // CHECK: call void @llvm.memset.inline.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 10, i1 true
+  "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+  // CHECK: call void @llvm.memset.inline.p0.i64(ptr %{{.*}}, i8 %{{.*}}, i64 10, i1 true
+  "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
   llvm.return
 }
 

From affa8f84393683e4a2f63436594129af3a769774 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy@linux.alibaba.com>
Date: Wed, 20 Nov 2024 01:40:34 +0800
Subject: [PATCH 3/7] [CIR][CIRGen] Support __builtin_memset_inline (#1114)

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 31 +++++++++++++++++++
 clang/lib/CIR/CodeGen/CIRGenBuilder.h         |  7 +++++
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       | 16 ++++++++--
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 29 ++++++++++++-----
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h   | 10 ++++++
 clang/test/CIR/CodeGen/builtins-memory.c      | 20 ++++++++++++
 6 files changed, 103 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 3d135e59e6ba..fc485c38f453 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4147,6 +4147,37 @@ def MemSetOp : CIR_Op<"libc.memset"> {
   let hasVerifier = 0;
 }
 
+//===----------------------------------------------------------------------===//
+// MemSetInlineOp
+//===----------------------------------------------------------------------===//
+
+def MemSetInlineOp : CIR_Op<"memset_inline"> {
+  let arguments = (ins Arg<VoidPtr, "", [MemWrite]>:$dst,
+                       SInt32:$val,
+                       I64Attr:$len);
+  let summary = "Fill a block of memory with constant length without calling"
+                "any external function";
+  let description = [{
+    Given the CIR pointer, `dst`, `cir.memset_inline` will set the first `len`
+    bytes of the memory pointed by `dst` to the specified `val`.
+
+    The `len` argument must be a constant integer argument specifying the number
+    of bytes to fill.
+
+    Examples:
+
+    ```mlir
+      // Set 2 bytes from a struct to 0
+      cir.memset_inline 2 bytes from %struct set to %zero : !cir.ptr<!void>, !s32i
+    ```
+  }];
+
+  let assemblyFormat = [{
+    $len `bytes` `from` $dst `set` `to` $val attr-dict
+    `:` qualified(type($dst)) `,` type($val)
+  }];
+  let hasVerifier = 0;
+}
 //===----------------------------------------------------------------------===//
 // MemChrOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index aedf2c390911..e66b40700875 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -623,6 +623,13 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
     return create<cir::MemSetOp>(loc, dst, val, len);
   }
 
+  cir::MemSetInlineOp createMemSetInline(mlir::Location loc, mlir::Value dst,
+                                         mlir::Value val,
+                                         mlir::IntegerAttr len) {
+    val = createIntCast(val, cir::IntType::get(getContext(), 32, true));
+    return create<cir::MemSetInlineOp>(loc, dst, val, len);
+  }
+
   mlir::Value createNeg(mlir::Value value) {
 
     if (auto intTy = mlir::dyn_cast<cir::IntType>(value.getType())) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 55619a8d935a..e7a07f1523c3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1554,8 +1554,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Dest.getPointer());
   }
 
-  case Builtin::BI__builtin_memset_inline:
-    llvm_unreachable("BI__builtin_memset_inline NYI");
+  case Builtin::BI__builtin_memset_inline: {
+    Address Dest = emitPointerWithAlignment(E->getArg(0));
+    mlir::Value ByteVal = emitScalarExpr(E->getArg(1));
+    uint64_t size =
+        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+    emitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
+    builder.createMemSetInline(
+        getLoc(E->getSourceRange()), Dest.getPointer(), ByteVal,
+        mlir::IntegerAttr::get(mlir::IntegerType::get(builder.getContext(), 64),
+                               size));
+    // __builtin_memset_inline has no return value
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__builtin___memset_chk: {
     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
     llvm::APSInt size;
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index e0a87dd78413..035be9cca2da 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -777,6 +777,18 @@ mlir::LogicalResult CIRToLLVMMemSetOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMMemSetInlineOpLowering::matchAndRewrite(
+    cir::MemSetInlineOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto converted = rewriter.create<mlir::LLVM::TruncOp>(
+      op.getLoc(), mlir::IntegerType::get(op.getContext(), 8),
+      adaptor.getVal());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MemsetInlineOp>(
+      op, adaptor.getDst(), converted, adaptor.getLenAttr(),
+      /*isVolatile=*/false);
+  return mlir::success();
+}
+
 static mlir::Value getLLVMIntCast(mlir::ConversionPatternRewriter &rewriter,
                                   mlir::Value llvmSrc, mlir::Type llvmDstIntTy,
                                   bool isUnsigned, uint64_t cirSrcWidth,
@@ -1851,8 +1863,8 @@ mlir::LogicalResult CIRToLLVMVAArgOpLowering::matchAndRewrite(
   return op.emitError("cir.vaarg lowering is NYI");
 }
 
-  /// Returns the name used for the linkage attribute. This *must* correspond
-  /// to the name of the attribute in ODS.
+/// Returns the name used for the linkage attribute. This *must* correspond
+/// to the name of the attribute in ODS.
 StringRef CIRToLLVMFuncOpLowering::getLinkageAttrNameString() {
   return "linkage";
 }
@@ -1886,8 +1898,8 @@ void CIRToLLVMFuncOpLowering::lowerFuncAttributes(
   }
 }
 
-  /// When do module translation, we can only translate LLVM-compatible types.
-  /// Here we lower possible OpenCLKernelMetadataAttr to use the converted type.
+/// When do module translation, we can only translate LLVM-compatible types.
+/// Here we lower possible OpenCLKernelMetadataAttr to use the converted type.
 void CIRToLLVMFuncOpLowering::lowerFuncOpenCLKernelMetadata(
     mlir::NamedAttribute &extraAttrsEntry) const {
   const auto attrKey = cir::OpenCLKernelMetadataAttr::getMnemonic();
@@ -2100,8 +2112,8 @@ mlir::LogicalResult CIRToLLVMSwitchFlatOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
-  /// Replace CIR global with a region initialized LLVM global and update
-  /// insertion point to the end of the initializer block.
+/// Replace CIR global with a region initialized LLVM global and update
+/// insertion point to the end of the initializer block.
 void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
     cir::GlobalOp op, mlir::ConversionPatternRewriter &rewriter) const {
   const auto llvmType = getTypeConverter()->convertType(op.getSymType());
@@ -3890,8 +3902,9 @@ void populateCIRToLLVMConversionPatterns(
       CIRToLLVMBaseClassAddrOpLowering, CIRToLLVMDerivedClassAddrOpLowering,
       CIRToLLVMVTTAddrPointOpLowering, CIRToLLVMIsFPClassOpLowering,
       CIRToLLVMAbsOpLowering, CIRToLLVMMemMoveOpLowering,
-      CIRToLLVMMemSetOpLowering, CIRToLLVMMemCpyInlineOpLowering,
-      CIRToLLVMSignBitOpLowering, CIRToLLVMPtrMaskOpLowering
+      CIRToLLVMMemSetOpLowering, CIRToLLVMMemSetInlineOpLowering,
+      CIRToLLVMMemCpyInlineOpLowering, CIRToLLVMSignBitOpLowering,
+      CIRToLLVMPtrMaskOpLowering
 #define GET_BUILTIN_LOWERING_LIST
 #include "clang/CIR/Dialect/IR/CIRBuiltinsLowering.inc"
 #undef GET_BUILTIN_LOWERING_LIST
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index d1488ec8f6f5..a88c30d3dd15 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -125,6 +125,16 @@ class CIRToLLVMMemSetOpLowering
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMMemSetInlineOpLowering
+    : public mlir::OpConversionPattern<cir::MemSetInlineOp> {
+public:
+  using mlir::OpConversionPattern<cir::MemSetInlineOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::MemSetInlineOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override;
+};
+
 class CIRToLLVMPtrStrideOpLowering
     : public mlir::OpConversionPattern<cir::PtrStrideOp> {
 public:
diff --git a/clang/test/CIR/CodeGen/builtins-memory.c b/clang/test/CIR/CodeGen/builtins-memory.c
index 472d2103a960..9c7a74301aaa 100644
--- a/clang/test/CIR/CodeGen/builtins-memory.c
+++ b/clang/test/CIR/CodeGen/builtins-memory.c
@@ -210,3 +210,23 @@ void test_memcpy_inline_aligned_buffers(unsigned long long *dst, const unsigned
   // COM: LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 {{%.*}}, ptr align 8 {{%.*}}, i64 4, i1 false)
   __builtin_memcpy_inline(dst, src, 4);
 }
+
+void test_memset_inline(void *dst, int val) {
+
+  // CIR-LABEL: test_memset_inline
+  // CIR: cir.memset_inline 0 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM-LABEL: test_memset_inline
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 0, i1 false)
+  __builtin_memset_inline(dst, val, 0);
+
+  // CIR: cir.memset_inline 1 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 1, i1 false)
+  __builtin_memset_inline(dst, val, 1);
+
+  // CIR: cir.memset_inline 4 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 4, i1 false)
+  __builtin_memset_inline(dst, val, 4);
+}

From e57a9daf04b2c93dae22bf27b35aaa7206bc98bb Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy@linux.alibaba.com>
Date: Wed, 20 Nov 2024 01:56:38 +0800
Subject: [PATCH 4/7] [CIR][CIRGen][TBAA] Initial TBAA support (#1116)

This is the first patch to support TBAA, following the discussion at
https://github.com/llvm/clangir/pull/1076#discussion_r1835031415

- add skeleton for CIRGen, utilizing `decorateOperationWithTBAA`
- add empty implementation in `CIRGenTBAA`
- introduce `CIR_TBAAAttr` with empty body
- attach `CIR_TBAAAttr` to `LoadOp` and `StoreOp`
- no handling of vtable pointer
- no LLVM lowering
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |  12 +-
 .../include/clang/CIR/Dialect/IR/CIRAttrs.td  |   3 +
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  15 +-
 clang/include/clang/CIR/MissingFeatures.h     |   1 +
 clang/lib/CIR/CodeGen/CIRGenAtomic.cpp        |   5 +-
 clang/lib/CIR/CodeGen/CIRGenBuilder.h         |   4 +-
 clang/lib/CIR/CodeGen/CIRGenClass.cpp         |  16 +-
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp          |   2 +-
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          | 262 ++++++++++--------
 clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp       |  11 +-
 clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp       |   6 +-
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp    |   5 +-
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp      |   3 +-
 clang/lib/CIR/CodeGen/CIRGenFunction.h        |  14 +-
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        |  71 ++++-
 clang/lib/CIR/CodeGen/CIRGenModule.h          |  47 +++-
 clang/lib/CIR/CodeGen/CIRGenTBAA.cpp          |  64 +++++
 clang/lib/CIR/CodeGen/CIRGenTBAA.h            | 167 ++++++++++-
 clang/lib/CIR/CodeGen/CIRGenValue.h           |  19 +-
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp       |   4 +
 clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp    |   7 +-
 clang/test/CIR/CodeGen/tbaa.c                 |  22 ++
 22 files changed, 579 insertions(+), 181 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/tbaa.c

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index a8589baa5ae0..b19fe5884e86 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -158,7 +158,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     llvm_unreachable("Zero initializer for given type is NYI");
   }
 
-  mlir::Value createLoad(mlir::Location loc, mlir::Value ptr,
+  cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
                          bool isVolatile = false, uint64_t alignment = 0) {
     mlir::IntegerAttr intAttr;
     if (alignment)
@@ -167,7 +167,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
     return create<cir::LoadOp>(loc, ptr, /*isDeref=*/false, isVolatile,
                                /*alignment=*/intAttr,
-                               /*mem_order=*/cir::MemOrderAttr{});
+                               /*mem_order=*/
+                               cir::MemOrderAttr{},
+                               /*tbaa=*/mlir::ArrayAttr{});
   }
 
   mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr,
@@ -353,7 +355,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
         val.getType())
       dst = createPtrBitcast(dst, val.getType());
-    return create<cir::StoreOp>(loc, val, dst, _volatile, align, order);
+    return create<cir::StoreOp>(loc, val, dst, _volatile, align, order,
+                                /*tbaa=*/mlir::ArrayAttr{});
   }
 
   mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType,
@@ -400,7 +403,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   /// Create a copy with inferred length.
   cir::CopyOp createCopy(mlir::Value dst, mlir::Value src,
                          bool isVolatile = false) {
-    return create<cir::CopyOp>(dst.getLoc(), dst, src, isVolatile);
+    return create<cir::CopyOp>(dst.getLoc(), dst, src, isVolatile,
+                               /*tbaa=*/mlir::ArrayAttr{});
   }
 
   cir::MemCpyOp createMemCpy(mlir::Location loc, mlir::Value dst,
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index 463bdd5cec7a..d0ac1d00c4b5 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -1216,6 +1216,9 @@ def GlobalAnnotationValuesAttr : CIR_Attr<"GlobalAnnotationValues",
   let genVerifyDecl = 1;
 }
 
+def CIR_TBAAAttr : CIR_Attr<"TBAA", "tbaa", []> {
+}
+
 include "clang/CIR/Dialect/IR/CIROpenCLAttrs.td"
 
 #endif // MLIR_CIR_DIALECT_CIR_ATTRS
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index fc485c38f453..bd2f34dbfaaf 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -587,7 +587,8 @@ def LoadOp : CIR_Op<"load", [
                            [MemRead]>:$addr, UnitAttr:$isDeref,
                        UnitAttr:$is_volatile,
                        OptionalAttr<I64Attr>:$alignment,
-                       OptionalAttr<MemOrder>:$mem_order
+                       OptionalAttr<MemOrder>:$mem_order,
+                       OptionalAttr<ArrayAttr>:$tbaa
                        );
   let results = (outs CIR_AnyType:$result);
 
@@ -597,6 +598,7 @@ def LoadOp : CIR_Op<"load", [
     (`align` `(` $alignment^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
     $addr `:` qualified(type($addr)) `,` type($result) attr-dict
+    (`tbaa` `(` $tbaa^ `)`)?
   }];
 
   let extraClassDeclaration = [{
@@ -654,13 +656,15 @@ def StoreOp : CIR_Op<"store", [
                            [MemWrite]>:$addr,
                        UnitAttr:$is_volatile,
                        OptionalAttr<I64Attr>:$alignment,
-                       OptionalAttr<MemOrder>:$mem_order);
+                       OptionalAttr<MemOrder>:$mem_order,
+                       OptionalAttr<ArrayAttr>:$tbaa);
 
   let assemblyFormat = [{
     (`volatile` $is_volatile^)?
     (`align` `(` $alignment^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
     $value `,` $addr attr-dict `:` type($value) `,` qualified(type($addr))
+    (`tbaa` `(` $tbaa^ `)`)?
   }];
 
   let extraClassDeclaration = [{
@@ -3980,7 +3984,8 @@ def CopyOp : CIR_Op<"copy",
               DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
   let arguments = (ins Arg<CIR_PointerType, "", [MemWrite]>:$dst,
                        Arg<CIR_PointerType, "", [MemRead]>:$src,
-                       UnitAttr:$is_volatile);
+                       UnitAttr:$is_volatile,
+                       OptionalAttr<ArrayAttr>:$tbaa);
   let summary = "Copies contents from a CIR pointer to another";
   let description = [{
     Given two CIR pointers, `src` and `dst`, `cir.copy` will copy the memory
@@ -3999,7 +4004,9 @@ def CopyOp : CIR_Op<"copy",
   }];
 
   let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)?
-                        attr-dict `:` qualified(type($dst)) }];
+                        attr-dict `:` qualified(type($dst))
+                        (`tbaa` `(` $tbaa^ `)`)?
+  }];
   let hasVerifier = 1;
 
   let extraClassDeclaration = [{
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 7d59e10809eb..f6e7b34ad197 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -58,6 +58,7 @@ struct MissingFeatures {
   // sanitizer related type check features
   static bool emitTypeCheck() { return false; }
   static bool tbaa() { return false; }
+  static bool tbaa_struct() { return false; }
   static bool cleanups() { return false; }
   static bool emitNullabilityCheck() { return false; }
   static bool ptrAuth() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 41fcd60179d0..607f62cea8ca 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -585,7 +585,7 @@ static void emitAtomicOp(CIRGenFunction &CGF, AtomicExpr *E, Address Dest,
   case AtomicExpr::AO__atomic_load:
   case AtomicExpr::AO__scoped_atomic_load_n:
   case AtomicExpr::AO__scoped_atomic_load: {
-    auto *load = builder.createLoad(loc, Ptr).getDefiningOp();
+    auto load = builder.createLoad(loc, Ptr);
     // FIXME(cir): add scope information.
     assert(!cir::MissingFeatures::syncScopeID());
     load->setAttr("mem_order", orderAttr);
@@ -1462,8 +1462,7 @@ void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest,
     if (IsVolatile)
       store.setIsVolatile(true);
 
-    // DecorateInstructionWithTBAA
-    assert(!cir::MissingFeatures::tbaa());
+    CGM.decorateOperationWithTBAA(store, dest.getTBAAInfo());
     return;
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index e66b40700875..ef098ceaabe2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -832,7 +832,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
                    addr.getAlignment());
   }
 
-  mlir::Value createLoad(mlir::Location loc, Address addr,
+  cir::LoadOp createLoad(mlir::Location loc, Address addr,
                          bool isVolatile = false) {
     auto ptrTy = mlir::dyn_cast<cir::PointerType>(addr.getPointer().getType());
     if (addr.getElementType() != ptrTy.getPointee())
@@ -842,7 +842,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
     return create<cir::LoadOp>(
         loc, addr.getElementType(), addr.getPointer(), /*isDeref=*/false,
         /*is_volatile=*/isVolatile, /*alignment=*/mlir::IntegerAttr{},
-        /*mem_order=*/cir::MemOrderAttr{});
+        /*mem_order=*/cir::MemOrderAttr{}, /*tbaa=*/mlir::ArrayAttr{});
   }
 
   mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index 8a20664fd2a8..c54d06c1f9d1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -751,8 +751,14 @@ void CIRGenFunction::initializeVTablePointer(mlir::Location loc,
   assert(!cir::MissingFeatures::addressSpace());
   VTableField = builder.createElementBitCast(loc, VTableField,
                                              VTableAddressPoint.getType());
-  builder.createStore(loc, VTableAddressPoint, VTableField);
-  assert(!cir::MissingFeatures::tbaa());
+  auto storeOp = builder.createStore(loc, VTableAddressPoint, VTableField);
+  TBAAAccessInfo TBAAInfo =
+      CGM.getTBAAVTablePtrAccessInfo(VTableAddressPoint.getType());
+  CGM.decorateOperationWithTBAA(storeOp, TBAAInfo);
+  if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      CGM.getCodeGenOpts().StrictVTablePointers) {
+    assert(!cir::MissingFeatures::createInvariantGroup());
+  }
 }
 
 void CIRGenFunction::initializeVTablePointers(mlir::Location loc,
@@ -1659,14 +1665,16 @@ mlir::Value CIRGenFunction::getVTablePtr(mlir::Location Loc, Address This,
 
 Address CIRGenFunction::emitCXXMemberDataPointerAddress(
     const Expr *E, Address base, mlir::Value memberPtr,
-    const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo) {
+    const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo,
+    TBAAAccessInfo *tbaaInfo) {
   assert(!cir::MissingFeatures::cxxABI());
 
   auto op = builder.createGetIndirectMember(getLoc(E->getSourceRange()),
                                             base.getPointer(), memberPtr);
 
   QualType memberType = memberPtrType->getPointeeType();
-  CharUnits memberAlign = CGM.getNaturalTypeAlignment(memberType, baseInfo);
+  CharUnits memberAlign =
+      CGM.getNaturalTypeAlignment(memberType, baseInfo, tbaaInfo);
   memberAlign = CGM.getDynamicOffsetAlignment(
       base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(),
       memberAlign);
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index 6fa387483492..c4d53a8477ec 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -327,7 +327,7 @@ void CIRGenFunction::emitAutoVarInit(const AutoVarEmission &emission) {
   // its removal/optimization to the CIR lowering.
   if (!constant || isa<CXXTemporaryObjectExpr>(Init)) {
     initializeWhatIsTechnicallyUninitialized(Loc);
-    LValue lv = LValue::makeAddr(Loc, type, AlignmentSource::Decl);
+    LValue lv = makeAddrLValue(Loc, type, AlignmentSource::Decl);
     emitExprAsInit(Init, &D, lv);
     // In case lv has uses it means we indeed initialized something
     // out of it while trying to build the expression, mark it as such.
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 22ab92a3f57b..6090e7411a47 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -136,20 +136,26 @@ static Address emitPointerWithAlignment(const Expr *expr,
               CE->getSubExpr()->getType()->getAs<clang::PointerType>()) {
         if (PtrTy->getPointeeType()->isVoidType())
           break;
-        assert(!cir::MissingFeatures::tbaa());
 
         LValueBaseInfo innerBaseInfo;
+        TBAAAccessInfo innerTBAAInfo;
         Address addr = cgf.emitPointerWithAlignment(
-            CE->getSubExpr(), &innerBaseInfo, tbaaInfo, isKnownNonNull);
+            CE->getSubExpr(), &innerBaseInfo, &innerTBAAInfo, isKnownNonNull);
         if (baseInfo)
           *baseInfo = innerBaseInfo;
+        if (tbaaInfo) {
+          *tbaaInfo = innerTBAAInfo;
+        }
 
         if (isa<ExplicitCastExpr>(CE)) {
-          assert(!cir::MissingFeatures::tbaa());
           LValueBaseInfo TargetTypeBaseInfo;
+          TBAAAccessInfo TargetTypeTBAAInfo;
 
           CharUnits Align = cgf.CGM.getNaturalPointeeTypeAlignment(
-              expr->getType(), &TargetTypeBaseInfo);
+              expr->getType(), &TargetTypeBaseInfo, &TargetTypeTBAAInfo);
+          if (tbaaInfo)
+            *tbaaInfo =
+                cgf.CGM.mergeTBAAInfoForCast(*tbaaInfo, TargetTypeTBAAInfo);
 
           // If the source l-value is opaque, honor the alignment of the
           // casted-to type.
@@ -188,9 +194,15 @@ static Address emitPointerWithAlignment(const Expr *expr,
       // TODO: Support accesses to members of base classes in TBAA. For now, we
       // conservatively pretend that the complete object is of the base class
       // type.
-      assert(!cir::MissingFeatures::tbaa());
-      Address Addr = cgf.emitPointerWithAlignment(CE->getSubExpr(), baseInfo);
-      auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
+      if (tbaaInfo) {
+        *tbaaInfo = cgf.CGM.getTBAAAccessInfo(expr->getType());
+      }
+      Address Addr = cgf.emitPointerWithAlignment(
+          CE->getSubExpr(), baseInfo, nullptr,
+          (KnownNonNull_t)(isKnownNonNull ||
+                           CE->getCastKind() == CK_UncheckedDerivedToBase));
+      const auto *Derived =
+          CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
       return cgf.getAddressOfBaseClass(
           Addr, Derived, CE->path_begin(), CE->path_end(),
           cgf.shouldNullCheckClassCastValue(CE), CE->getExprLoc());
@@ -210,7 +222,8 @@ static Address emitPointerWithAlignment(const Expr *expr,
       LValue LV = cgf.emitLValue(UO->getSubExpr());
       if (baseInfo)
         *baseInfo = LV.getBaseInfo();
-      assert(!cir::MissingFeatures::tbaa());
+      if (tbaaInfo)
+        *tbaaInfo = LV.getTBAAInfo();
       return LV.getAddress();
     }
   }
@@ -288,7 +301,7 @@ LValue CIRGenFunction::emitLValueForBitField(LValue base,
 
   QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
-  assert(!cir::MissingFeatures::tbaa() && "NYI TBAA for bit fields");
+  // TODO(cir): Support TBAA for bit fields.
   LValueBaseInfo fieldBaseInfo(BaseInfo.getAlignmentSource());
   return LValue::MakeBitfield(Addr, info, fieldType, fieldBaseInfo,
                               TBAAAccessInfo());
@@ -306,15 +319,34 @@ LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) {
   const RecordDecl *rec = field->getParent();
   AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource();
   LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource));
-  if (cir::MissingFeatures::tbaa() || rec->hasAttr<MayAliasAttr>() ||
+  TBAAAccessInfo FieldTBAAInfo;
+  if (base.getTBAAInfo().isMayAlias() || rec->hasAttr<MayAliasAttr>() ||
       FieldType->isVectorType()) {
-    assert(!cir::MissingFeatures::tbaa() && "NYI");
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
   } else if (rec->isUnion()) {
-    assert(!cir::MissingFeatures::tbaa() && "NYI");
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
   } else {
     // If no base type been assigned for the base access, then try to generate
     // one for this base lvalue.
-    assert(!cir::MissingFeatures::tbaa() && "NYI");
+    FieldTBAAInfo = base.getTBAAInfo();
+    if (!FieldTBAAInfo.baseType) {
+      FieldTBAAInfo.baseType = CGM.getTBAABaseTypeInfo(base.getType());
+      assert(!FieldTBAAInfo.offset &&
+             "Nonzero offset for an access with no base type!");
+    }
+
+    // Adjust offset to be relative to the base type.
+    const ASTRecordLayout &Layout =
+        getContext().getASTRecordLayout(field->getParent());
+    unsigned CharWidth = getContext().getCharWidth();
+    if (FieldTBAAInfo.baseType)
+      FieldTBAAInfo.offset +=
+          Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
+
+    // Update the final access type and size.
+    FieldTBAAInfo.accessType = CGM.getTBAAAccessInfo(FieldType).accessType;
+    FieldTBAAInfo.size =
+        getContext().getTypeSizeInChars(FieldType).getQuantity();
   }
 
   Address addr = base.getAddress();
@@ -365,12 +397,12 @@ LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) {
 
   // If this is a reference field, load the reference right now.
   if (FieldType->isReferenceType()) {
-    assert(!cir::MissingFeatures::tbaa());
-    LValue RefLVal = makeAddrLValue(addr, FieldType, FieldBaseInfo);
+    LValue RefLVal =
+        makeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo);
     if (RecordCVR & Qualifiers::Volatile)
       RefLVal.getQuals().addVolatile();
     addr = emitLoadOfReference(RefLVal, getLoc(field->getSourceRange()),
-                               &FieldBaseInfo);
+                               &FieldBaseInfo, &FieldTBAAInfo);
 
     // Qualifiers on the struct don't apply to the referencee.
     RecordCVR = 0;
@@ -387,10 +419,7 @@ LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) {
   if (field->hasAttr<AnnotateAttr>())
     llvm_unreachable("NYI");
 
-  if (cir::MissingFeatures::tbaa())
-    // Next line should take a TBAA object
-    llvm_unreachable("NYI");
-  LValue LV = makeAddrLValue(addr, FieldType, FieldBaseInfo);
+  LValue LV = makeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo);
   LV.getQuals().addCVRQualifiers(RecordCVR);
 
   // __weak attribute on a field is ignored.
@@ -423,8 +452,8 @@ LValue CIRGenFunction::emitLValueForFieldInitialization(
   LValueBaseInfo BaseInfo = Base.getBaseInfo();
   AlignmentSource FieldAlignSource = BaseInfo.getAlignmentSource();
   LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(FieldAlignSource));
-  assert(!cir::MissingFeatures::tbaa() && "NYI");
-  return makeAddrLValue(V, FieldType, FieldBaseInfo);
+  return makeAddrLValue(V, FieldType, FieldBaseInfo,
+                        CGM.getTBAAInfoForSubobject(Base, FieldType));
 }
 
 LValue CIRGenFunction::emitCompoundLiteralLValue(const CompoundLiteralExpr *E) {
@@ -628,14 +657,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
   }
 
   assert(currSrcLoc && "must pass in source location");
-  builder.createStore(*currSrcLoc, value, addr, isVolatile);
+  auto storeOp = builder.createStore(*currSrcLoc, value, addr, isVolatile);
 
   if (isNontemporal) {
     llvm_unreachable("NYI");
   }
 
-  if (cir::MissingFeatures::tbaa())
-    llvm_unreachable("NYI");
+  CGM.decorateOperationWithTBAA(storeOp, tbaaInfo);
 }
 
 void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue,
@@ -1111,12 +1139,12 @@ CIRGenFunction::emitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
   auto memberPtr = emitScalarExpr(E->getRHS());
 
   LValueBaseInfo baseInfo;
-  // TODO(cir): add TBAA
-  assert(!cir::MissingFeatures::tbaa());
-  auto memberAddr = emitCXXMemberDataPointerAddress(E, baseAddr, memberPtr,
-                                                    memberPtrTy, &baseInfo);
+  TBAAAccessInfo tbaaInfo;
+  auto memberAddr = emitCXXMemberDataPointerAddress(
+      E, baseAddr, memberPtr, memberPtrTy, &baseInfo, &tbaaInfo);
 
-  return makeAddrLValue(memberAddr, memberPtrTy->getPointeeType(), baseInfo);
+  return makeAddrLValue(memberAddr, memberPtrTy->getPointeeType(), baseInfo,
+                        tbaaInfo);
 }
 
 LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *E) {
@@ -1128,11 +1156,10 @@ LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     // If it is a pointer to a vector, emit the address and form an lvalue with
     // it.
     LValueBaseInfo BaseInfo;
-    // TODO(cir): Support TBAA
-    assert(!cir::MissingFeatures::tbaa());
-    Address Ptr = emitPointerWithAlignment(E->getBase(), &BaseInfo);
+    TBAAAccessInfo TBAAInfo;
+    Address Ptr = emitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo);
     const auto *PT = E->getBase()->getType()->castAs<clang::PointerType>();
-    base = makeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo);
+    base = makeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, TBAAInfo);
     base.getQuals().removeObjCGCAttr();
   } else if (E->getBase()->isGLValue()) {
     // Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
@@ -1271,8 +1298,9 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *E) {
     assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
 
     LValueBaseInfo BaseInfo;
-    // TODO: add TBAAInfo
-    Address Addr = emitPointerWithAlignment(E->getSubExpr(), &BaseInfo);
+    TBAAAccessInfo TBAAInfo;
+    Address Addr =
+        emitPointerWithAlignment(E->getSubExpr(), &BaseInfo, &TBAAInfo);
 
     // Tag 'load' with deref attribute.
     if (auto loadOp =
@@ -1280,7 +1308,7 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *E) {
       loadOp.setIsDerefAttr(mlir::UnitAttr::get(&getMLIRContext()));
     }
 
-    LValue LV = LValue::makeAddr(Addr, T, BaseInfo);
+    LValue LV = LValue::makeAddr(Addr, T, BaseInfo, TBAAInfo);
     // TODO: set addr space
     // TODO: ObjC/GC/__weak write barrier stuff.
     return LV;
@@ -1305,9 +1333,8 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *E) {
         (E->getOpcode() == UO_Real
              ? emitAddrOfRealComponent(Loc, LV.getAddress(), LV.getType())
              : emitAddrOfImagComponent(Loc, LV.getAddress(), LV.getType()));
-    // TODO(cir): TBAA info.
-    assert(!cir::MissingFeatures::tbaa());
-    LValue ElemLV = makeAddrLValue(Component, T, LV.getBaseInfo());
+    LValue ElemLV = makeAddrLValue(Component, T, LV.getBaseInfo(),
+                                   CGM.getTBAAInfoForSubobject(LV, T));
     ElemLV.getQuals().addQualifiers(LV.getQuals());
     return ElemLV;
   }
@@ -1527,7 +1554,8 @@ void CIRGenFunction::emitIgnoredExpr(const Expr *E) {
 }
 
 Address CIRGenFunction::emitArrayToPointerDecay(const Expr *E,
-                                                LValueBaseInfo *BaseInfo) {
+                                                LValueBaseInfo *BaseInfo,
+                                                TBAAAccessInfo *TBAAInfo) {
   assert(E->getType()->isArrayType() &&
          "Array to pointer decay must have array source type!");
 
@@ -1559,7 +1587,8 @@ Address CIRGenFunction::emitArrayToPointerDecay(const Expr *E,
   QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType();
   if (BaseInfo)
     *BaseInfo = LV.getBaseInfo();
-  assert(!cir::MissingFeatures::tbaa() && "NYI");
+  if (TBAAInfo)
+    *TBAAInfo = CGM.getTBAAAccessInfo(EltType);
 
   mlir::Value ptr = CGM.getBuilder().maybeBuildArrayDecay(
       CGM.getLoc(E->getSourceRange()), Addr.getPointer(),
@@ -1755,15 +1784,16 @@ LValue CIRGenFunction::emitArraySubscriptExpr(const ArraySubscriptExpr *E,
     llvm_unreachable("extvector subscript is NYI");
   }
 
-  assert(!cir::MissingFeatures::tbaa() && "TBAA is NYI");
   LValueBaseInfo EltBaseInfo;
+  TBAAAccessInfo EltTBAAInfo;
+
   Address Addr = Address::invalid();
   if (const VariableArrayType *vla =
           getContext().getAsVariableArrayType(E->getType())) {
     // The base must be a pointer, which is not an aggregate.  Emit
     // it.  It needs to be emitted first in case it's what captures
     // the VLA bounds.
-    Addr = emitPointerWithAlignment(E->getBase(), &EltBaseInfo);
+    Addr = emitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto Idx = EmitIdxAfterBase(/*Promote*/ true);
 
     // The element count here is the total number of non-VLA elements.
@@ -1806,13 +1836,10 @@ LValue CIRGenFunction::emitArraySubscriptExpr(const ArraySubscriptExpr *E,
         CGM.getLoc(E->getExprLoc()), /*shouldDecay=*/true, &arrayType,
         E->getBase());
     EltBaseInfo = ArrayLV.getBaseInfo();
-    // TODO(cir): EltTBAAInfo
-    assert(!cir::MissingFeatures::tbaa() && "TBAA is NYI");
+    EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
   } else {
     // The base must be a pointer; emit it with an estimate of its alignment.
-    // TODO(cir): EltTBAAInfo
-    assert(!cir::MissingFeatures::tbaa() && "TBAA is NYI");
-    Addr = emitPointerWithAlignment(E->getBase(), &EltBaseInfo);
+    Addr = emitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto Idx = EmitIdxAfterBase(/*Promote*/ true);
     QualType ptrType = E->getBase()->getType();
     Addr = emitArraySubscriptPtr(
@@ -1822,7 +1849,7 @@ LValue CIRGenFunction::emitArraySubscriptExpr(const ArraySubscriptExpr *E,
         &ptrType, E->getBase());
   }
 
-  LValue LV = LValue::makeAddr(Addr, E->getType(), EltBaseInfo);
+  LValue LV = LValue::makeAddr(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
 
   if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC) {
     llvm_unreachable("ObjC is NYI");
@@ -1967,8 +1994,8 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *E) {
     // TODO: Support accesses to members of base classes in TBAA. For now, we
     // conservatively pretend that the complete object is of the base class
     // type.
-    assert(!cir::MissingFeatures::tbaa());
-    return makeAddrLValue(Base, E->getType(), LV.getBaseInfo());
+    return makeAddrLValue(Base, E->getType(), LV.getBaseInfo(),
+                          CGM.getTBAAInfoForSubobject(LV, E->getType()));
   }
   case CK_ToUnion:
     assert(0 && "NYI");
@@ -1986,10 +2013,9 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *E) {
     auto DestAS = builder.getAddrSpaceAttr(E->getType().getAddressSpace());
     mlir::Value V = getTargetHooks().performAddrSpaceCast(
         *this, LV.getPointer(), SrcAS, DestAS, ConvertType(DestTy));
-    assert(!cir::MissingFeatures::tbaa());
     return makeAddrLValue(Address(V, getTypes().convertTypeForMem(E->getType()),
                                   LV.getAddress().getAlignment()),
-                          E->getType(), LV.getBaseInfo());
+                          E->getType(), LV.getBaseInfo(), LV.getTBAAInfo());
   }
   case CK_ObjCObjectLValueCast: {
     assert(0 && "NYI");
@@ -2070,7 +2096,8 @@ LValue CIRGenFunction::emitMemberExpr(const MemberExpr *E) {
   LValue BaseLV;
   if (E->isArrow()) {
     LValueBaseInfo BaseInfo;
-    Address Addr = emitPointerWithAlignment(BaseExpr, &BaseInfo);
+    TBAAAccessInfo TBAAInfo;
+    Address Addr = emitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
     QualType PtrTy = BaseExpr->getType()->getPointeeType();
     SanitizerSet SkippedChecks;
     bool IsBaseCXXThis = isWrappedCXXThis(BaseExpr);
@@ -2080,7 +2107,7 @@ LValue CIRGenFunction::emitMemberExpr(const MemberExpr *E) {
       SkippedChecks.set(SanitizerKind::Null, true);
     emitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy,
                   /*Alignment=*/CharUnits::Zero(), SkippedChecks);
-    BaseLV = makeAddrLValue(Addr, PtrTy, BaseInfo);
+    BaseLV = makeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
   } else
     BaseLV = emitCheckedLValue(BaseExpr, TCK_MemberAccess);
 
@@ -2434,56 +2461,55 @@ CIRGenFunction::emitConditionalBlocks(const AbstractConditionalOperator *E,
     }
   };
 
-  Info.Result = builder
-                    .create<cir::TernaryOp>(
-                        loc, condV, /*trueBuilder=*/
-                        [&](mlir::OpBuilder &b, mlir::Location loc) {
-                          CIRGenFunction::LexicalScope lexScope{
-                              *this, loc, b.getInsertionBlock()};
-                          CGF.currLexScope->setAsTernary();
-
-                          assert(
-                              !cir::MissingFeatures::incrementProfileCounter());
-                          eval.begin(CGF);
-                          Info.LHS = BranchGenFunc(CGF, trueExpr);
-                          auto lhs = Info.LHS->getPointer();
-                          eval.end(CGF);
-
-                          if (lhs) {
-                            yieldTy = lhs.getType();
-                            b.create<cir::YieldOp>(loc, lhs);
-                            return;
-                          }
-                          // If LHS or RHS is a throw or void expression we need
-                          // to patch arms as to properly match yield types.
-                          insertPoints.push_back(b.saveInsertionPoint());
-                        },
-                        /*falseBuilder=*/
-                        [&](mlir::OpBuilder &b, mlir::Location loc) {
-                          CIRGenFunction::LexicalScope lexScope{
-                              *this, loc, b.getInsertionBlock()};
-                          CGF.currLexScope->setAsTernary();
-
-                          assert(
-                              !cir::MissingFeatures::incrementProfileCounter());
-                          eval.begin(CGF);
-                          Info.RHS = BranchGenFunc(CGF, falseExpr);
-                          auto rhs = Info.RHS->getPointer();
-                          eval.end(CGF);
-
-                          if (rhs) {
-                            yieldTy = rhs.getType();
-                            b.create<cir::YieldOp>(loc, rhs);
-                          } else {
-                            // If LHS or RHS is a throw or void expression we
-                            // need to patch arms as to properly match yield
-                            // types.
-                            insertPoints.push_back(b.saveInsertionPoint());
-                          }
-
-                          patchVoidOrThrowSites();
-                        })
-                    .getResult();
+  Info.Result =
+      builder
+          .create<cir::TernaryOp>(
+              loc, condV, /*trueBuilder=*/
+              [&](mlir::OpBuilder &b, mlir::Location loc) {
+                CIRGenFunction::LexicalScope lexScope{*this, loc,
+                                                      b.getInsertionBlock()};
+                CGF.currLexScope->setAsTernary();
+
+                assert(!cir::MissingFeatures::incrementProfileCounter());
+                eval.begin(CGF);
+                Info.LHS = BranchGenFunc(CGF, trueExpr);
+                auto lhs = Info.LHS->getPointer();
+                eval.end(CGF);
+
+                if (lhs) {
+                  yieldTy = lhs.getType();
+                  b.create<cir::YieldOp>(loc, lhs);
+                  return;
+                }
+                // If LHS or RHS is a throw or void expression we need
+                // to patch arms as to properly match yield types.
+                insertPoints.push_back(b.saveInsertionPoint());
+              },
+              /*falseBuilder=*/
+              [&](mlir::OpBuilder &b, mlir::Location loc) {
+                CIRGenFunction::LexicalScope lexScope{*this, loc,
+                                                      b.getInsertionBlock()};
+                CGF.currLexScope->setAsTernary();
+
+                assert(!cir::MissingFeatures::incrementProfileCounter());
+                eval.begin(CGF);
+                Info.RHS = BranchGenFunc(CGF, falseExpr);
+                auto rhs = Info.RHS->getPointer();
+                eval.end(CGF);
+
+                if (rhs) {
+                  yieldTy = rhs.getType();
+                  b.create<cir::YieldOp>(loc, rhs);
+                } else {
+                  // If LHS or RHS is a throw or void expression we
+                  // need to patch arms as to properly match yield
+                  // types.
+                  insertPoints.push_back(b.saveInsertionPoint());
+                }
+
+                patchVoidOrThrowSites();
+              })
+          .getResult();
   return Info;
 }
 
@@ -2518,8 +2544,10 @@ LValue CIRGenFunction::emitConditionalOperatorLValue(
     AlignmentSource alignSource =
         std::max(Info.LHS->getBaseInfo().getAlignmentSource(),
                  Info.RHS->getBaseInfo().getAlignmentSource());
-    assert(!cir::MissingFeatures::tbaa());
-    return makeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource));
+    TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForConditionalOperator(
+        Info.LHS->getTBAAInfo(), Info.RHS->getTBAAInfo());
+    return makeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource),
+                          TBAAInfo);
   } else {
     llvm_unreachable("NYI");
   }
@@ -2623,7 +2651,7 @@ LValue CIRGenFunction::emitLValue(const Expr *E) {
     return emitStmtExprLValue(cast<StmtExpr>(E));
   }
 
-  return LValue::makeAddr(Address::invalid(), E->getType());
+  llvm_unreachable("NYI");
 }
 
 /// Given the address of a temporary variable, produce an r-value of its type.
@@ -2894,17 +2922,16 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
     Ptr = builder.create<cir::CastOp>(loc, ElemPtrTy, cir::CastKind::bitcast,
                                       Ptr);
   }
-
-  mlir::Value Load = builder.CIRBaseBuilderTy::createLoad(loc, Ptr, isVolatile);
+  auto loadOp = builder.CIRBaseBuilderTy::createLoad(loc, Ptr, isVolatile);
 
   if (isNontemporal) {
     llvm_unreachable("NYI");
   }
+  CGM.decorateOperationWithTBAA(loadOp, tbaaInfo);
 
-  assert(!cir::MissingFeatures::tbaa() && "NYI");
   assert(!cir::MissingFeatures::emitScalarRangeCheck() && "NYI");
 
-  return emitFromMemory(Load, ty);
+  return emitFromMemory(loadOp, ty);
 }
 
 // Note: this function also emit constructor calls to support a MSVC extensions
@@ -2954,8 +2981,7 @@ Address CIRGenFunction::emitLoadOfReference(LValue refLVal, mlir::Location loc,
       builder.create<cir::LoadOp>(loc, refLVal.getAddress().getElementType(),
                                   refLVal.getAddress().getPointer());
 
-  // TODO(cir): DecorateInstructionWithTBAA relevant for us?
-  assert(!cir::MissingFeatures::tbaa());
+  CGM.decorateOperationWithTBAA(load, refLVal.getTBAAInfo());
 
   QualType pointeeType = refLVal.getType()->getPointeeType();
   CharUnits align =
@@ -2967,9 +2993,11 @@ Address CIRGenFunction::emitLoadOfReference(LValue refLVal, mlir::Location loc,
 LValue CIRGenFunction::emitLoadOfReferenceLValue(LValue RefLVal,
                                                  mlir::Location Loc) {
   LValueBaseInfo PointeeBaseInfo;
-  Address PointeeAddr = emitLoadOfReference(RefLVal, Loc, &PointeeBaseInfo);
+  TBAAAccessInfo PointeeTBAAInfo;
+  Address PointeeAddr =
+      emitLoadOfReference(RefLVal, Loc, &PointeeBaseInfo, &PointeeTBAAInfo);
   return makeAddrLValue(PointeeAddr, RefLVal.getType()->getPointeeType(),
-                        PointeeBaseInfo);
+                        PointeeBaseInfo, PointeeTBAAInfo);
 }
 
 void CIRGenFunction::emitUnreachable(SourceLocation Loc) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp
index f13cb8600f9a..32f343ffd605 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp
@@ -1708,13 +1708,18 @@ void CIRGenFunction::emitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
     }
   }
 
-  builder.createCopy(DestPtr.getPointer(), SrcPtr.getPointer(), isVolatile);
+  auto copyOp =
+      builder.createCopy(DestPtr.getPointer(), SrcPtr.getPointer(), isVolatile);
 
   // Determine the metadata to describe the position of any padding in this
   // memcpy, as well as the TBAA tags for the members of the struct, in case
   // the optimizer wishes to expand it in to scalar memory operations.
-  if (CGM.getCodeGenOpts().NewStructPathTBAA || cir::MissingFeatures::tbaa())
-    llvm_unreachable("TBAA is NYI");
+  assert(!cir::MissingFeatures::tbaa_struct() && "tbaa.struct NYI");
+  if (CGM.getCodeGenOpts().NewStructPathTBAA) {
+    TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer(
+        Dest.getTBAAInfo(), Src.getTBAAInfo());
+    CGM.decorateOperationWithTBAA(copyOp, TBAAInfo);
+  }
 }
 
 AggValueSlot::Overlap_t
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index be3ec6071def..e336594388ae 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -218,9 +218,9 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorMemberCallExpr(
   LValue This;
   if (IsArrow) {
     LValueBaseInfo BaseInfo;
-    assert(!cir::MissingFeatures::tbaa());
-    Address ThisValue = emitPointerWithAlignment(Base, &BaseInfo);
-    This = makeAddrLValue(ThisValue, Base->getType(), BaseInfo);
+    TBAAAccessInfo TBAAInfo;
+    Address ThisValue = emitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
+    This = makeAddrLValue(ThisValue, Base->getType(), BaseInfo, TBAAInfo);
   } else {
     This = emitLValue(Base);
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index ee2a0c32cbff..7d51dc05e7c1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1604,10 +1604,7 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     Address DestAddr =
         SourceAddr.withPointer(DestPtr).withElementType(DestElemTy);
     LValue DestLVal = CGF.makeAddrLValue(DestAddr, DestTy);
-
-    if (Kind == CK_LValueToRValueBitCast)
-      assert(!cir::MissingFeatures::tbaa());
-
+    DestLVal.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
     return emitLoadOfLValue(DestLVal, CE->getExprLoc());
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index b31a4ba325ae..1c84cb3ca71b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -824,14 +824,13 @@ LValue CIRGenFunction::MakeNaturalAlignPointeeAddrLValue(mlir::Value val,
   TBAAAccessInfo tbaaInfo;
   CharUnits align = CGM.getNaturalTypeAlignment(ty, &baseInfo, &tbaaInfo,
                                                 /* for PointeeType= */ true);
-  return makeAddrLValue(Address(val, align), ty, baseInfo);
+  return makeAddrLValue(Address(val, align), ty, baseInfo, tbaaInfo);
 }
 
 LValue CIRGenFunction::MakeNaturalAlignAddrLValue(mlir::Value val,
                                                   QualType ty) {
   LValueBaseInfo baseInfo;
   TBAAAccessInfo tbaaInfo;
-  assert(!cir::MissingFeatures::tbaa());
   CharUnits alignment = CGM.getNaturalTypeAlignment(ty, &baseInfo, &tbaaInfo);
   Address addr(val, getTypes().convertTypeForMem(ty), alignment);
   return LValue::makeAddr(addr, ty, getContext(), baseInfo, tbaaInfo);
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 8d4fabeff642..e5db0a01e429 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -886,7 +886,8 @@ class CIRGenFunction : public CIRGenTypeCache {
   LValue
   emitLoadOfReferenceLValue(Address RefAddr, mlir::Location Loc, QualType RefTy,
                             AlignmentSource Source = AlignmentSource::Type) {
-    LValue RefLVal = makeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source));
+    LValue RefLVal = makeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source),
+                                    CGM.getTBAAAccessInfo(RefTy));
     return emitLoadOfReferenceLValue(RefLVal, Loc);
   }
   void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
@@ -909,7 +910,8 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// TODO: Add TBAAAccessInfo
   Address emitCXXMemberDataPointerAddress(
       const Expr *E, Address base, mlir::Value memberPtr,
-      const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo);
+      const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo,
+      TBAAAccessInfo *tbaaInfo);
 
   /// Generate a call of the given function, expecting the given
   /// result type, and using the given argument list which specifies both the
@@ -1649,9 +1651,8 @@ class CIRGenFunction : public CIRGenTypeCache {
                                             QualType DstTy, SourceLocation Loc);
 
   LValue makeAddrLValue(Address addr, clang::QualType ty,
-                        LValueBaseInfo baseInfo) {
-    return LValue::makeAddr(addr, ty, getContext(), baseInfo,
-                            CGM.getTBAAAccessInfo(ty));
+                        LValueBaseInfo baseInfo, TBAAAccessInfo tbaaInfo) {
+    return LValue::makeAddr(addr, ty, getContext(), baseInfo, tbaaInfo);
   }
 
   LValue makeAddrLValue(Address addr, clang::QualType ty,
@@ -1744,7 +1745,8 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   /// TODO(cir): add TBAAAccessInfo
   Address emitArrayToPointerDecay(const Expr *Array,
-                                  LValueBaseInfo *BaseInfo = nullptr);
+                                  LValueBaseInfo *BaseInfo = nullptr,
+                                  TBAAAccessInfo *TBAAInfo = nullptr);
 
   /// Emits the code necessary to evaluate an arbitrary expression into the
   /// given memory location.
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 97678fa2ad8a..b9e332d4b27a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -205,6 +205,11 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &context,
                                                 /*line=*/0,
                                                 /*col=*/0));
   }
+  if (langOpts.Sanitize.has(SanitizerKind::Thread) ||
+      (!codeGenOpts.RelaxedAliasing && codeGenOpts.OptimizationLevel > 0)) {
+    tbaa.reset(new CIRGenTBAA(&context, astctx, genTypes, theModule,
+                              codeGenOpts, langOpts));
+  }
 }
 
 CIRGenModule::~CIRGenModule() {}
@@ -258,6 +263,9 @@ CharUnits CIRGenModule::getNaturalTypeAlignment(QualType T,
                                                 LValueBaseInfo *BaseInfo,
                                                 TBAAAccessInfo *tbaaInfo,
                                                 bool forPointeeType) {
+  if (tbaaInfo) {
+    *tbaaInfo = getTBAAAccessInfo(T);
+  }
   // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
   // that doesn't return the information we need to compute BaseInfo.
 
@@ -3513,8 +3521,69 @@ void CIRGenModule::emitGlobalAnnotations() {
   deferredAnnotations.clear();
 }
 
+cir::TBAAAttr CIRGenModule::getTBAATypeInfo(QualType QTy) {
+  if (!tbaa) {
+    return nullptr;
+  }
+  return tbaa->getTypeInfo(QTy);
+}
+
 TBAAAccessInfo CIRGenModule::getTBAAAccessInfo(QualType accessType) {
+  if (!tbaa) {
+    return TBAAAccessInfo();
+  }
+  if (getLangOpts().CUDAIsDevice) {
+    llvm_unreachable("NYI");
+  }
+  return tbaa->getAccessInfo(accessType);
+}
+
+TBAAAccessInfo
+CIRGenModule::getTBAAVTablePtrAccessInfo(mlir::Type VTablePtrType) {
   if (!tbaa)
     return TBAAAccessInfo();
-  llvm_unreachable("NYI");
+  return tbaa->getVTablePtrAccessInfo(VTablePtrType);
+}
+
+mlir::ArrayAttr CIRGenModule::getTBAAStructInfo(QualType QTy) {
+  if (!tbaa)
+    return nullptr;
+  return tbaa->getTBAAStructInfo(QTy);
+}
+
+cir::TBAAAttr CIRGenModule::getTBAABaseTypeInfo(QualType QTy) {
+  if (!tbaa) {
+    return nullptr;
+  }
+  return tbaa->getBaseTypeInfo(QTy);
+}
+
+mlir::ArrayAttr CIRGenModule::getTBAAAccessTagInfo(TBAAAccessInfo tbaaInfo) {
+  if (!tbaa) {
+    return nullptr;
+  }
+  return tbaa->getAccessTagInfo(tbaaInfo);
+}
+
+TBAAAccessInfo CIRGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
+                                                  TBAAAccessInfo TargetInfo) {
+  if (!tbaa)
+    return TBAAAccessInfo();
+  return tbaa->mergeTBAAInfoForCast(SourceInfo, TargetInfo);
+}
+
+TBAAAccessInfo
+CIRGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                  TBAAAccessInfo InfoB) {
+  if (!tbaa)
+    return TBAAAccessInfo();
+  return tbaa->mergeTBAAInfoForConditionalOperator(InfoA, InfoB);
+}
+
+TBAAAccessInfo
+CIRGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                             TBAAAccessInfo SrcInfo) {
+  if (!tbaa)
+    return TBAAAccessInfo();
+  return tbaa->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo);
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 961a999990b6..61d975491f33 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -39,6 +39,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
@@ -506,10 +507,52 @@ class CIRGenModule : public CIRGenTypeCache {
     return VTables.getItaniumVTableContext();
   }
 
-  /// getTBAAAccessInfo - Gte TBAA information that describes an access to an
-  /// object of the given type.
+  /// Get attribute used to describe accesses to objects of
+  /// the given type.
+  cir::TBAAAttr getTBAATypeInfo(QualType QTy);
+
+  /// Get TBAA information that describes an access to an object of the given
+  /// type.
   TBAAAccessInfo getTBAAAccessInfo(QualType accessType);
 
+  /// Get the TBAA information that describes an access to a virtual table
+  /// pointer.
+  TBAAAccessInfo getTBAAVTablePtrAccessInfo(mlir::Type VTablePtrType);
+
+  mlir::ArrayAttr getTBAAStructInfo(QualType QTy);
+
+  /// Get metadata that describes the given base access type. Return null if the
+  /// type is not suitable for use in TBAA access tags.
+  cir::TBAAAttr getTBAABaseTypeInfo(QualType QTy);
+
+  mlir::ArrayAttr getTBAAAccessTagInfo(TBAAAccessInfo tbaaInfo);
+
+  /// Get merged TBAA information for the purposes of type casts.
+  TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
+                                      TBAAAccessInfo TargetInfo);
+
+  /// Get merged TBAA information for the purposes of conditional operator.
+  TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
+                                                     TBAAAccessInfo InfoB);
+
+  /// Get merged TBAA information for the purposes of memory transfer calls.
+  TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
+                                                TBAAAccessInfo SrcInfo);
+
+  /// Get TBAA information for an access with a given base lvalue.
+  TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) {
+    if (Base.getTBAAInfo().isMayAlias())
+      return TBAAAccessInfo::getMayAliasInfo();
+    return getTBAAAccessInfo(AccessType);
+  }
+
+  template <typename Op>
+  void decorateOperationWithTBAA(Op op, TBAAAccessInfo tbaaInfo) {
+    if (auto tag = getTBAAAccessTagInfo(tbaaInfo)) {
+      op.setTbaaAttr(tag);
+    }
+  }
+
   /// This contains all the decls which have definitions but which are deferred
   /// for emission and therefore should only be output if they are actually
   /// used. If a decl is in this, then it is known to have not been referenced
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
index e69de29bb2d1..c3083e93eeb1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
@@ -0,0 +1,64 @@
+#include "CIRGenTBAA.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenTypes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecordLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+namespace clang::CIRGen {
+
+cir::TBAAAttr tbaa_NYI(mlir::MLIRContext *ctx) {
+  return cir::TBAAAttr::get(ctx);
+}
+
+CIRGenTBAA::CIRGenTBAA(mlir::MLIRContext *ctx, clang::ASTContext &context,
+                       CIRGenTypes &types, mlir::ModuleOp moduleOp,
+                       const clang::CodeGenOptions &codeGenOpts,
+                       const clang::LangOptions &features)
+    : ctx(ctx), context(context), types(types), moduleOp(moduleOp),
+      codeGenOpts(codeGenOpts), features(features) {}
+
+cir::TBAAAttr CIRGenTBAA::getTypeInfo(clang::QualType qty) {
+  return tbaa_NYI(ctx);
+}
+
+TBAAAccessInfo CIRGenTBAA::getAccessInfo(clang::QualType accessType) {
+  return TBAAAccessInfo();
+}
+
+TBAAAccessInfo CIRGenTBAA::getVTablePtrAccessInfo(mlir::Type vtablePtrType) {
+  return TBAAAccessInfo();
+}
+
+mlir::ArrayAttr CIRGenTBAA::getTBAAStructInfo(clang::QualType qty) {
+  return mlir::ArrayAttr::get(ctx, {});
+}
+
+cir::TBAAAttr CIRGenTBAA::getBaseTypeInfo(clang::QualType qty) {
+  return tbaa_NYI(ctx);
+}
+
+mlir::ArrayAttr CIRGenTBAA::getAccessTagInfo(TBAAAccessInfo tbaaInfo) {
+  return mlir::ArrayAttr::get(ctx, {tbaa_NYI(ctx)});
+}
+
+TBAAAccessInfo CIRGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo,
+                                                TBAAAccessInfo targetInfo) {
+  return TBAAAccessInfo();
+}
+
+TBAAAccessInfo
+CIRGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA,
+                                                TBAAAccessInfo infoB) {
+  return TBAAAccessInfo();
+}
+
+TBAAAccessInfo
+CIRGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo,
+                                           TBAAAccessInfo srcInfo) {
+  return TBAAAccessInfo();
+}
+
+} // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.h b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
index 2b33f0da16d4..b6a392bd164c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTBAA.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
@@ -13,16 +13,169 @@
 
 #ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
 #define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
-
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
 namespace clang::CIRGen {
+class CIRGenTypes;
+enum class TBAAAccessKind : unsigned {
+  Ordinary,
+  MayAlias,
+  Incomplete,
+};
+// Describes a memory access in terms of TBAA.
+struct TBAAAccessInfo {
+  TBAAAccessInfo(TBAAAccessKind kind, cir::TBAAAttr baseType,
+                 cir::TBAAAttr accessType, uint64_t offset, uint64_t size)
+      : kind(kind), baseType(baseType), accessType(accessType), offset(offset) {
+  }
 
-// TBAAAccessInfo - Describes a memory access in terms of TBAA.
-struct TBAAAccessInfo {};
+  TBAAAccessInfo(cir::TBAAAttr baseType, cir::TBAAAttr accessType,
+                 uint64_t offset, uint64_t size)
+      : kind(TBAAAccessKind::Ordinary), baseType(baseType),
+        accessType(accessType), offset(offset) {}
 
-/// CIRGenTBAA - This class organizes the cross-module state that is used while
-/// lowering AST types to LLVM types.
-class CIRGenTBAA {};
+  explicit TBAAAccessInfo(cir::TBAAAttr accessType, uint64_t size)
+      : TBAAAccessInfo(TBAAAccessKind::Ordinary, /* baseType= */ {}, accessType,
+                       /* offset= */ 0, size) {}
 
-} // namespace clang::CIRGen
+  TBAAAccessInfo()
+      : TBAAAccessInfo(/* accessType= */ nullptr, /* size= */ 0) {};
+
+  static TBAAAccessInfo getMayAliasInfo() {
+    return TBAAAccessInfo(TBAAAccessKind::MayAlias, /* baseType= */ {},
+                          /* accessType= */ nullptr,
+                          /* offset= */ 0, /* size= */ 0);
+  }
+
+  bool isMayAlias() const { return kind == TBAAAccessKind::MayAlias; }
+
+  static TBAAAccessInfo getIncompleteInfo() {
+    return TBAAAccessInfo(TBAAAccessKind::Incomplete, /* baseType= */ {},
+                          /* accessType= */ {},
+                          /* offset= */ 0, /* size= */ 0);
+  }
+
+  bool isIncomplete() const { return kind == TBAAAccessKind::Incomplete; }
+
+  bool operator==(const TBAAAccessInfo &other) const {
+    return kind == other.kind && baseType == other.baseType &&
+           accessType == other.accessType && offset == other.offset &&
+           size == other.size;
+  }
+
+  bool operator!=(const TBAAAccessInfo &other) const {
+    return !(*this == other);
+  }
+
+  explicit operator bool() const { return *this != TBAAAccessInfo(); }
+
+  /// The kind of the access descriptor.
+  TBAAAccessKind kind;
+
+  /// The base/leading access type. May be null if this access
+  /// descriptor represents an access that is not considered to be an access
+  /// to an aggregate or union member.
+  cir::TBAAAttr baseType;
+
+  /// The final access type. May be null if there is no TBAA
+  /// information available about this access.
+  cir::TBAAAttr accessType;
+
+  /// The byte offset of the final access within the base one. Must be
+  /// zero if the base access type is not specified.
+  uint64_t offset;
 
+  /// The size of access, in bytes.
+  uint64_t size;
+};
+
+/// This class organizes the cross-module state that is used while lowering AST
+/// types to LLVM types.
+class CIRGenTBAA {
+  mlir::MLIRContext *ctx;
+  clang::ASTContext &context;
+  CIRGenTypes &types;
+  mlir::ModuleOp moduleOp;
+  const clang::CodeGenOptions &codeGenOpts;
+  const clang::LangOptions &features;
+
+public:
+  CIRGenTBAA(mlir::MLIRContext *ctx, clang::ASTContext &context,
+             CIRGenTypes &types, mlir::ModuleOp moduleOp,
+             const clang::CodeGenOptions &codeGenOpts,
+             const clang::LangOptions &features);
+
+  /// Get attribute used to describe accesses to objects of the given type.
+  cir::TBAAAttr getTypeInfo(clang::QualType qty);
+
+  /// Get TBAA information that describes an access to an object of the given
+  /// type.
+  TBAAAccessInfo getAccessInfo(clang::QualType accessType);
+
+  /// Get the TBAA information that describes an access to a virtual table
+  /// pointer.
+  TBAAAccessInfo getVTablePtrAccessInfo(mlir::Type vtablePtrType);
+
+  /// Get the TBAAStruct attributes to be used for a memcpy of the given type.
+  mlir::ArrayAttr getTBAAStructInfo(clang::QualType qty);
+
+  /// Get attribute that describes the given base access type. Return null if
+  /// the type is not suitable for use in TBAA access tags.
+  cir::TBAAAttr getBaseTypeInfo(clang::QualType qty);
+
+  /// Get TBAA tag for a given memory access.
+  mlir::ArrayAttr getAccessTagInfo(TBAAAccessInfo tbaaInfo);
+
+  /// Get merged TBAA information for the purpose of type casts.
+  TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo,
+                                      TBAAAccessInfo targetInfo);
+
+  /// Get merged TBAA information for the purpose of conditional operator.
+  TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA,
+                                                     TBAAAccessInfo infoB);
+
+  /// Get merged TBAA information for the purpose of memory transfer calls.
+  TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo,
+                                                TBAAAccessInfo srcInfo);
+};
+} // namespace clang::CIRGen
+namespace llvm {
+template <> struct DenseMapInfo<clang::CIRGen::TBAAAccessInfo> {
+  static clang::CIRGen::TBAAAccessInfo getEmptyKey() {
+    unsigned unsignedKey = DenseMapInfo<unsigned>::getEmptyKey();
+    return clang::CIRGen::TBAAAccessInfo(
+        static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey),
+        DenseMapInfo<cir::TBAAAttr>::getEmptyKey(),
+        DenseMapInfo<cir::TBAAAttr>::getEmptyKey(),
+        DenseMapInfo<uint64_t>::getEmptyKey(),
+        DenseMapInfo<uint64_t>::getEmptyKey());
+  }
+  static clang::CIRGen::TBAAAccessInfo getTombstoneKey() {
+    unsigned unsignedKey = DenseMapInfo<unsigned>::getTombstoneKey();
+    return clang::CIRGen::TBAAAccessInfo(
+        static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey),
+        DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(),
+        DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(),
+        DenseMapInfo<uint64_t>::getTombstoneKey(),
+        DenseMapInfo<uint64_t>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const clang::CIRGen::TBAAAccessInfo &val) {
+    auto kindValue = static_cast<unsigned>(val.kind);
+    return DenseMapInfo<unsigned>::getHashValue(kindValue) ^
+           DenseMapInfo<cir::TBAAAttr>::getHashValue(val.baseType) ^
+           DenseMapInfo<cir::TBAAAttr>::getHashValue(val.accessType) ^
+           DenseMapInfo<uint64_t>::getHashValue(val.offset) ^
+           DenseMapInfo<uint64_t>::getHashValue(val.size);
+  }
+  static bool isEqual(const clang::CIRGen::TBAAAccessInfo &lhs,
+                      const clang::CIRGen::TBAAAccessInfo &rhs) {
+    return lhs == rhs;
+  }
+};
+} // namespace llvm
 #endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
index fc2f650eaed6..8dd16f6ce9e1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -268,26 +268,14 @@ class LValue {
   LValueBaseInfo getBaseInfo() const { return BaseInfo; }
   void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; }
 
-  static LValue makeAddr(Address address, clang::QualType T,
-                         AlignmentSource Source = AlignmentSource::Type) {
-    LValue R;
-    R.LVType = Simple;
-    R.V = address.getPointer();
-    R.ElementType = address.getElementType();
-    R.Initialize(T, T.getQualifiers(), address.getAlignment(),
-                 LValueBaseInfo(Source), TBAAAccessInfo());
-    return R;
-  }
-
   // FIXME: only have one of these static methods.
-  static LValue makeAddr(Address address, clang::QualType T,
-                         LValueBaseInfo LBI) {
+  static LValue makeAddr(Address address, clang::QualType T, LValueBaseInfo LBI,
+                         TBAAAccessInfo tbaaInfo) {
     LValue R;
     R.LVType = Simple;
     R.V = address.getPointer();
     R.ElementType = address.getElementType();
-    R.Initialize(T, T.getQualifiers(), address.getAlignment(), LBI,
-                 TBAAAccessInfo());
+    R.Initialize(T, T.getQualifiers(), address.getAlignment(), LBI, tbaaInfo);
     return R;
   }
 
@@ -307,6 +295,7 @@ class LValue {
   }
 
   TBAAAccessInfo getTBAAInfo() const { return tbaaInfo; }
+  void setTBAAInfo(TBAAAccessInfo info) { tbaaInfo = info; }
 
   const clang::Qualifiers &getQuals() const { return Quals; }
   clang::Qualifiers &getQuals() { return Quals; }
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 214553b66e83..ab49ffbdcb98 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -106,6 +106,10 @@ struct CIROpAsmDialectInterface : public OpAsmDialectInterface {
       os << dynCastInfoAttr.getAlias();
       return AliasResult::FinalAlias;
     }
+    if (auto tbaaAttr = mlir::dyn_cast<cir::TBAAAttr>(attr)) {
+      os << tbaaAttr.getMnemonic();
+      return AliasResult::OverridableAlias;
+    }
 
     return AliasResult::NoAlias;
   }
diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
index e75b30b1c1c3..80963353a304 100644
--- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Interfaces/DataLayoutInterfaces.h"
@@ -52,8 +53,7 @@ void cir::AllocaOp::handleBlockArgument(const MemorySlot &slot,
 
 std::optional<PromotableAllocationOpInterface>
 cir::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
-                                       Value defaultValue,
-                                       OpBuilder &builder) {
+                                       Value defaultValue, OpBuilder &builder) {
   if (defaultValue && defaultValue.use_empty())
     defaultValue.getDefiningOp()->erase();
   this->erase();
@@ -150,7 +150,8 @@ DeletionKind cir::CopyOp::removeBlockingUses(
     const DataLayout &dataLayout) {
   if (loadsFrom(slot))
     builder.create<cir::StoreOp>(getLoc(), reachingDefinition, getDst(), false,
-                                 mlir::IntegerAttr{}, cir::MemOrderAttr());
+                                 mlir::IntegerAttr{}, cir::MemOrderAttr(),
+                                 mlir::ArrayAttr{});
   return DeletionKind::Delete;
 }
 
diff --git a/clang/test/CIR/CodeGen/tbaa.c b/clang/test/CIR/CodeGen/tbaa.c
new file mode 100644
index 000000000000..43cdde47ecb7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/tbaa.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// CIR: #tbaa[[TBAA_NO:.*]] = #cir.tbaa
+void f(int *a, float *b) {
+  // CIR: cir.scope
+  // CIR: %[[TMP1:.*]] = cir.load deref %{{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> tbaa([#tbaa[[TBAA_NO]]])
+  // CIR: %[[TMP2:.*]] = cir.load %[[TMP1]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa[[TBAA_NO]]])
+  // CIR: cir.if
+  // CIR: %[[C2:.*]] = cir.const #cir.fp<2
+  // CIR: %[[TMP3:.*]] = cir.load deref %[[ARG_b:.*]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> tbaa([#tbaa[[TBAA_NO]]])
+  // CIR: cir.store %[[C2]], %[[TMP3]] : !cir.float, !cir.ptr<!cir.float> tbaa([#tbaa[[TBAA_NO]]])
+  // CIR: else
+  // CIR: %[[C3:.*]] = cir.const #cir.fp<3
+  // CIR: %[[TMP4:.*]] = cir.load deref %[[ARG_b]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float> tbaa([#tbaa[[TBAA_NO]]])
+  // CIR: cir.store %[[C3]], %[[TMP4]] : !cir.float, !cir.ptr<!cir.float> tbaa([#tbaa[[TBAA_NO]]])
+  if (*a == 1) {
+    *b = 2.0f;
+  } else {
+    *b = 3.0f;
+  }
+}

From 70fed1b9257cc74f05445ece42b6b76c875a2f24 Mon Sep 17 00:00:00 2001
From: "Chibuoyim (Wilson) Ogbonna" <ogbonnachibuoyim12@gmail.com>
Date: Tue, 19 Nov 2024 20:58:00 +0300
Subject: [PATCH 5/7] [CIR][ABI][AArch64][Lowering] Support structures with
 padding (#1118)

The title describes the purpose of the PR. It adds initial support for
structures with padding to the call convention lowering for AArch64.

I have also _initial support_ for the missing feature
[FinishLayout](https://github.com/llvm/clangir/blob/5c5d58402bebdb1e851fb055f746662d4e7eb586/clang/lib/AST/RecordLayoutBuilder.cpp#L786)
for records, and the logic is gotten from the original codegen.

Finally, I added a test for verification.
---
 clang/include/clang/CIR/MissingFeatures.h     |  3 --
 .../TargetLowering/LowerFunction.cpp          | 28 +++++++++++-
 .../TargetLowering/RecordLayoutBuilder.cpp    | 43 +++++++++++++++----
 .../AArch64/aarch64-cc-structs.c              | 31 ++++++++++++-
 4 files changed, 92 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index f6e7b34ad197..8f56f0726f8a 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -404,9 +404,6 @@ struct MissingFeatures {
   // specs. We should make it always present.
   static bool makeTripleAlwaysPresent() { return false; }
 
-  // This Itanium bit is currently being skipped in cir.
-  static bool itaniumRecordLayoutBuilderFinishLayout() { return false; }
-
   static bool mustProgress() { return false; }
 
   static bool skipTempCopy() { return false; }
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp
index 483ce026ee0e..06242e52383a 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp
@@ -347,6 +347,31 @@ mlir::Value emitAddressAtOffset(LowerFunction &LF, mlir::Value addr,
   return addr;
 }
 
+/// Creates a coerced value from \param src having a type of \param ty which is
+/// a non primitive type
+mlir::Value createCoercedNonPrimitive(mlir::Value src, mlir::Type ty,
+                                      LowerFunction &LF) {
+  if (auto load = mlir::dyn_cast<LoadOp>(src.getDefiningOp())) {
+    auto &bld = LF.getRewriter();
+    auto addr = load.getAddr();
+
+    auto oldAlloca = mlir::dyn_cast<AllocaOp>(addr.getDefiningOp());
+    auto alloca = bld.create<AllocaOp>(
+        src.getLoc(), bld.getType<PointerType>(ty), ty,
+        /*name=*/llvm::StringRef(""), oldAlloca.getAlignmentAttr());
+
+    auto tySize = LF.LM.getDataLayout().getTypeStoreSize(ty);
+    createMemCpy(LF, alloca, addr, tySize.getFixedValue());
+
+    auto newLoad = bld.create<LoadOp>(src.getLoc(), alloca.getResult());
+    bld.replaceAllOpUsesWith(load, newLoad);
+
+    return newLoad;
+  }
+
+  cir_cconv_unreachable("NYI");
+}
+
 /// After the calling convention is lowered, an ABI-agnostic type might have to
 /// be loaded back to its ABI-aware couterpart so it may be returned. If they
 /// differ, we have to do a coerced load. A coerced load, which means to load a
@@ -370,7 +395,8 @@ mlir::Value castReturnValue(mlir::Value Src, mlir::Type Ty, LowerFunction &LF) {
 
   auto intTy = mlir::dyn_cast<IntType>(Ty);
   if (intTy && !intTy.isPrimitive())
-    cir_cconv_unreachable("non-primitive types NYI");
+    return createCoercedNonPrimitive(Src, Ty, LF);
+
   llvm::TypeSize DstSize = LF.LM.getDataLayout().getTypeAllocSize(Ty);
 
   // FIXME(cir): Do we need the EnterStructPointerForCoercedAccess routine here?
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp
index 627f3b048817..db2af4ac9177 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp
@@ -223,6 +223,9 @@ class ItaniumRecordLayoutBuilder {
   /// Initialize record layout for the given record decl.
   void initializeLayout(const Type Ty);
 
+  /// Finalize record layout. Adjust record size based on the alignment.
+  void finishLayout(const StructType D);
+
   uint64_t getDataSizeInBits() const { return DataSize; }
 
   void setDataSize(clang::CharUnits NewSize) {
@@ -243,8 +246,7 @@ void ItaniumRecordLayoutBuilder::layout(const StructType RT) {
   // FIXME(cir): Handle virtual-related layouts.
   cir_cconv_assert(!cir::MissingFeatures::getCXXRecordBases());
 
-  cir_cconv_assert(
-      !cir::MissingFeatures::itaniumRecordLayoutBuilderFinishLayout());
+  finishLayout(RT);
 }
 
 void ItaniumRecordLayoutBuilder::initializeLayout(const mlir::Type Ty) {
@@ -478,6 +480,31 @@ void ItaniumRecordLayoutBuilder::layoutFields(const StructType D) {
   }
 }
 
+void ItaniumRecordLayoutBuilder::finishLayout(const StructType D) {
+  // If we have any remaining field tail padding, include that in the overall
+  // size.
+  setSize(std::max(getSizeInBits(), (uint64_t)Context.toBits(PaddedFieldSize)));
+
+  // Finally, round the size of the record up to the alignment of the
+  // record itself.
+  uint64_t unpaddedSize = getSizeInBits() - UnfilledBitsInLastUnit;
+  uint64_t unpackedSizeInBits =
+      llvm::alignTo(getSizeInBits(), Context.toBits(UnpackedAlignment));
+
+  uint64_t roundedSize = llvm::alignTo(
+      getSizeInBits(),
+      Context.toBits(!Context.getTargetInfo().defaultsToAIXPowerAlignment()
+                         ? Alignment
+                         : PreferredAlignment));
+
+  if (UseExternalLayout) {
+    cir_cconv_unreachable("NYI");
+  }
+
+  // Set the size to the final size.
+  setSize(roundedSize);
+}
+
 void ItaniumRecordLayoutBuilder::UpdateAlignment(
     clang::CharUnits NewAlignment, clang::CharUnits UnpackedNewAlignment,
     clang::CharUnits PreferredNewAlignment) {
@@ -521,13 +548,13 @@ void ItaniumRecordLayoutBuilder::checkFieldPadding(
 
   // Warn if padding was introduced to the struct/class.
   if (!IsUnion && Offset > UnpaddedOffset) {
-    unsigned PadSize = Offset - UnpaddedOffset;
-    // bool InBits = true;
-    if (PadSize % CharBitNum == 0) {
-      PadSize = PadSize / CharBitNum;
-      // InBits = false;
+    unsigned padSize = Offset - UnpaddedOffset;
+    bool inBits = true;
+    if (padSize % CharBitNum == 0) {
+      padSize = padSize / CharBitNum;
+      inBits = false;
     }
-    cir_cconv_assert(cir::MissingFeatures::bitFieldPaddingDiagnostics());
+    cir_cconv_assert(!cir::MissingFeatures::bitFieldPaddingDiagnostics());
   }
   if (isPacked && Offset != UnpackedOffset) {
     HasPackedField = true;
diff --git a/clang/test/CIR/CallConvLowering/AArch64/aarch64-cc-structs.c b/clang/test/CIR/CallConvLowering/AArch64/aarch64-cc-structs.c
index 93f87db39cfb..969d40842b75 100644
--- a/clang/test/CIR/CallConvLowering/AArch64/aarch64-cc-structs.c
+++ b/clang/test/CIR/CallConvLowering/AArch64/aarch64-cc-structs.c
@@ -204,4 +204,33 @@ GT_128 call_and_get_gt_128() {
 // LLVM:   %[[#V2:]] = alloca [2 x i64], i64 1, align 8
 // LLVM:   store [2 x i64] %[[#ARG]], ptr %[[#V2]], align 8
 // LLVM:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#V1]], ptr %[[#V2]], i64 12, i1 false)
-void passS(S s) {}
\ No newline at end of file
+void passS(S s) {}
+
+typedef struct {
+  uint8_t a;
+  uint16_t b;
+  uint8_t c;
+} S_PAD;
+
+// CHECK: cir.func {{.*@ret_s_pad}}()  -> !u48i
+// CHECK: %[[#V0:]] = cir.alloca !ty_S_PAD, !cir.ptr<!ty_S_PAD>, ["__retval"] {alignment = 2 : i64}
+// CHECK: %[[#V1:]] = cir.load %[[#V0]] : !cir.ptr<!ty_S_PAD>, !ty_S_PAD
+// CHECK: %[[#V2:]] = cir.alloca !u48i, !cir.ptr<!u48i>, [""] {alignment = 2 : i64}
+// CHECK: %[[#V3:]] = cir.cast(bitcast, %[[#V0]] : !cir.ptr<!ty_S_PAD>)
+// CHECK: %[[#V4:]] = cir.cast(bitcast, %[[#V2:]] : !cir.ptr<!u48i>), !cir.ptr<!void>
+// CHECK: %[[#V5:]] = cir.const #cir.int<6> : !u64i
+// CHECK: cir.libc.memcpy %[[#V5]] bytes from %[[#V3]] to %[[#V4]] : !u64i, !cir.ptr<!void>
+// CHECK: %[[#V6:]] = cir.load %[[#V2]] : !cir.ptr<!u48i>
+// CHECK: cir.return %[[#V6]]
+
+// LLVM: i48 @ret_s_pad()
+// LLVM: %[[#V1:]] = alloca %struct.S_PAD, i64 1, align 2
+// LLVM: %[[#V2:]] = load %struct.S_PAD, ptr %[[#V1]], align 2
+// LLVM: %[[#V3:]] = alloca i48, i64 1, align 2
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V3]], ptr %[[#V1]], i64 6, i1 false)
+// LLVM: %[[#V4:]] = load i48, ptr %[[#V3]]
+// LLVM: ret i48 %[[#V4]]
+S_PAD ret_s_pad() {
+  S_PAD s;
+  return s;
+}

From bae7bd9e68764afb586040f6d63b3759e0b6d856 Mon Sep 17 00:00:00 2001
From: Congcong Cai <congcong.cai@bmw.com>
Date: Wed, 20 Nov 2024 12:49:38 +0800
Subject: [PATCH 6/7] [CIR] fix deref nullptr when verify symbol for
 `cir.get_global` (#1143)

---
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 4 ++--
 clang/test/CIR/IR/invalid.cir           | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index ab49ffbdcb98..78a8ed9c5c6f 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2023,8 +2023,8 @@ LogicalResult
 cir::GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   // Verify that the result type underlying pointer type matches the type of
   // the referenced cir.global or cir.func op.
-  auto op = symbolTable.lookupNearestSymbolFrom(*this, getNameAttr());
-  if (!(isa<GlobalOp>(op) || isa<FuncOp>(op)))
+  auto *op = symbolTable.lookupNearestSymbolFrom(*this, getNameAttr());
+  if (op == nullptr || !(isa<GlobalOp>(op) || isa<FuncOp>(op)))
     return emitOpError("'")
            << getName()
            << "' does not reference a valid cir.global or cir.func";
diff --git a/clang/test/CIR/IR/invalid.cir b/clang/test/CIR/IR/invalid.cir
index af516b2aaed6..01828fbe22b4 100644
--- a/clang/test/CIR/IR/invalid.cir
+++ b/clang/test/CIR/IR/invalid.cir
@@ -1103,6 +1103,15 @@ module {
 
 // -----
 
+!s8i = !cir.int<s, 8>
+cir.func @no_reference_global() {
+  // expected-error @below {{'cir.get_global' op 'str' does not reference a valid cir.global or cir.func}}
+  %0 = cir.get_global @str : !cir.ptr<!s8i> 
+  cir.return 
+}
+
+// -----
+
 // expected-error@+1 {{invalid underlying type for long double}}
 cir.func @bad_long_double(%arg0 : !cir.long_double<!cir.float>) -> () {
   cir.return

From dbd3e03ecb97d3b0697711964bbce6dd30d88a52 Mon Sep 17 00:00:00 2001
From: Sirui Mu <msrlancern@gmail.com>
Date: Fri, 22 Nov 2024 14:05:22 +0800
Subject: [PATCH 7/7] [CIR][NFC] Refactor code for populating LLVM conversion
 patterns (#1152)

The function `populateCIRToLLVMConversionPatterns` contains a spaghetti
of LLVM dialect conversion patterns, which results in merge conflicts
very easily. Besides, a few patterns are even registered for more than
once, possibly due to careless resolution of merge conflicts.

This PR attempts to mitigate this problem. Pattern names now are sorted
in alphabetical order, and each source code line now only lists exactly
one pattern name to reduce potential merge conflicts.
---
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 135 ++++++++++++------
 1 file changed, 89 insertions(+), 46 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 035be9cca2da..208039b0d77f 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -3859,55 +3859,98 @@ void populateCIRToLLVMConversionPatterns(
                                           stringGlobalsMap, argStringGlobalsMap,
                                           argsVarMap, patterns.getContext());
   patterns.add<
-      CIRToLLVMCmpOpLowering, CIRToLLVMSelectOpLowering,
-      CIRToLLVMBitClrsbOpLowering, CIRToLLVMBitClzOpLowering,
-      CIRToLLVMBitCtzOpLowering, CIRToLLVMBitFfsOpLowering,
-      CIRToLLVMBitParityOpLowering, CIRToLLVMBitPopcountOpLowering,
-      CIRToLLVMAtomicCmpXchgLowering, CIRToLLVMAtomicXchgLowering,
-      CIRToLLVMAtomicFetchLowering, CIRToLLVMByteswapOpLowering,
-      CIRToLLVMRotateOpLowering, CIRToLLVMBrCondOpLowering,
-      CIRToLLVMPtrStrideOpLowering, CIRToLLVMCallOpLowering,
-      CIRToLLVMTryCallOpLowering, CIRToLLVMEhInflightOpLowering,
-      CIRToLLVMUnaryOpLowering, CIRToLLVMBinOpLowering,
-      CIRToLLVMBinOpOverflowOpLowering, CIRToLLVMShiftOpLowering,
-      CIRToLLVMLoadOpLowering, CIRToLLVMConstantOpLowering,
-      CIRToLLVMStoreOpLowering, CIRToLLVMFuncOpLowering,
-      CIRToLLVMCastOpLowering, CIRToLLVMGlobalOpLowering,
-      CIRToLLVMGetGlobalOpLowering, CIRToLLVMComplexCreateOpLowering,
-      CIRToLLVMComplexRealOpLowering, CIRToLLVMComplexImagOpLowering,
-      CIRToLLVMComplexRealPtrOpLowering, CIRToLLVMComplexImagPtrOpLowering,
-      CIRToLLVMVAStartOpLowering, CIRToLLVMVAEndOpLowering,
-      CIRToLLVMVACopyOpLowering, CIRToLLVMVAArgOpLowering,
-      CIRToLLVMBrOpLowering, CIRToLLVMGetMemberOpLowering,
-      CIRToLLVMGetRuntimeMemberOpLowering, CIRToLLVMSwitchFlatOpLowering,
-      CIRToLLVMPtrDiffOpLowering, CIRToLLVMCopyOpLowering,
-      CIRToLLVMMemCpyOpLowering, CIRToLLVMMemChrOpLowering,
-      CIRToLLVMAbsOpLowering, CIRToLLVMExpectOpLowering,
-      CIRToLLVMVTableAddrPointOpLowering, CIRToLLVMVecCreateOpLowering,
-      CIRToLLVMVecCmpOpLowering, CIRToLLVMVecSplatOpLowering,
-      CIRToLLVMVecTernaryOpLowering, CIRToLLVMVecShuffleDynamicOpLowering,
-      CIRToLLVMVecShuffleOpLowering, CIRToLLVMStackSaveOpLowering,
-      CIRToLLVMUnreachableOpLowering, CIRToLLVMTrapOpLowering,
-      CIRToLLVMInlineAsmOpLowering, CIRToLLVMSetBitfieldOpLowering,
-      CIRToLLVMGetBitfieldOpLowering, CIRToLLVMPrefetchOpLowering,
-      CIRToLLVMObjSizeOpLowering, CIRToLLVMIsConstantOpLowering,
-      CIRToLLVMCmpThreeWayOpLowering, CIRToLLVMMemCpyOpLowering,
-      CIRToLLVMIsConstantOpLowering, CIRToLLVMCmpThreeWayOpLowering,
-      CIRToLLVMReturnAddrOpLowering, CIRToLLVMClearCacheOpLowering,
-      CIRToLLVMEhTypeIdOpLowering, CIRToLLVMCatchParamOpLowering,
-      CIRToLLVMResumeOpLowering, CIRToLLVMAllocExceptionOpLowering,
-      CIRToLLVMFreeExceptionOpLowering, CIRToLLVMThrowOpLowering,
-      CIRToLLVMLLVMIntrinsicCallOpLowering, CIRToLLVMAssumeOpLowering,
-      CIRToLLVMAssumeAlignedOpLowering, CIRToLLVMAssumeSepStorageOpLowering,
-      CIRToLLVMBaseClassAddrOpLowering, CIRToLLVMDerivedClassAddrOpLowering,
-      CIRToLLVMVTTAddrPointOpLowering, CIRToLLVMIsFPClassOpLowering,
-      CIRToLLVMAbsOpLowering, CIRToLLVMMemMoveOpLowering,
-      CIRToLLVMMemSetOpLowering, CIRToLLVMMemSetInlineOpLowering,
-      CIRToLLVMMemCpyInlineOpLowering, CIRToLLVMSignBitOpLowering,
-      CIRToLLVMPtrMaskOpLowering
+      // clang-format off
+      CIRToLLVMAbsOpLowering,
+      CIRToLLVMAllocExceptionOpLowering,
+      CIRToLLVMAssumeAlignedOpLowering,
+      CIRToLLVMAssumeOpLowering,
+      CIRToLLVMAssumeSepStorageOpLowering,
+      CIRToLLVMAtomicCmpXchgLowering,
+      CIRToLLVMAtomicFetchLowering,
+      CIRToLLVMAtomicXchgLowering,
+      CIRToLLVMBaseClassAddrOpLowering,
+      CIRToLLVMBinOpLowering,
+      CIRToLLVMBinOpOverflowOpLowering,
+      CIRToLLVMBitClrsbOpLowering,
+      CIRToLLVMBitClzOpLowering,
+      CIRToLLVMBitCtzOpLowering,
+      CIRToLLVMBitFfsOpLowering,
+      CIRToLLVMBitParityOpLowering,
+      CIRToLLVMBitPopcountOpLowering,
+      CIRToLLVMBrCondOpLowering,
+      CIRToLLVMBrOpLowering,
+      CIRToLLVMByteswapOpLowering,
+      CIRToLLVMCallOpLowering,
+      CIRToLLVMCastOpLowering,
+      CIRToLLVMCatchParamOpLowering,
+      CIRToLLVMClearCacheOpLowering,
+      CIRToLLVMCmpOpLowering,
+      CIRToLLVMCmpThreeWayOpLowering,
+      CIRToLLVMComplexCreateOpLowering,
+      CIRToLLVMComplexImagOpLowering,
+      CIRToLLVMComplexImagPtrOpLowering,
+      CIRToLLVMComplexRealOpLowering,
+      CIRToLLVMComplexRealPtrOpLowering,
+      CIRToLLVMConstantOpLowering,
+      CIRToLLVMCopyOpLowering,
+      CIRToLLVMDerivedClassAddrOpLowering,
+      CIRToLLVMEhInflightOpLowering,
+      CIRToLLVMEhTypeIdOpLowering,
+      CIRToLLVMExpectOpLowering,
+      CIRToLLVMFreeExceptionOpLowering,
+      CIRToLLVMFuncOpLowering,
+      CIRToLLVMGetBitfieldOpLowering,
+      CIRToLLVMGetGlobalOpLowering,
+      CIRToLLVMGetMemberOpLowering,
+      CIRToLLVMGetRuntimeMemberOpLowering,
+      CIRToLLVMGlobalOpLowering,
+      CIRToLLVMInlineAsmOpLowering,
+      CIRToLLVMIsConstantOpLowering,
+      CIRToLLVMIsFPClassOpLowering,
+      CIRToLLVMLLVMIntrinsicCallOpLowering,
+      CIRToLLVMLoadOpLowering,
+      CIRToLLVMMemChrOpLowering,
+      CIRToLLVMMemCpyInlineOpLowering,
+      CIRToLLVMMemCpyOpLowering,
+      CIRToLLVMMemMoveOpLowering,
+      CIRToLLVMMemSetInlineOpLowering,
+      CIRToLLVMMemSetOpLowering,
+      CIRToLLVMObjSizeOpLowering,
+      CIRToLLVMPrefetchOpLowering,
+      CIRToLLVMPtrDiffOpLowering,
+      CIRToLLVMPtrMaskOpLowering,
+      CIRToLLVMPtrStrideOpLowering,
+      CIRToLLVMResumeOpLowering,
+      CIRToLLVMReturnAddrOpLowering,
+      CIRToLLVMRotateOpLowering,
+      CIRToLLVMSelectOpLowering,
+      CIRToLLVMSetBitfieldOpLowering,
+      CIRToLLVMShiftOpLowering,
+      CIRToLLVMSignBitOpLowering,
+      CIRToLLVMStackSaveOpLowering,
+      CIRToLLVMStoreOpLowering,
+      CIRToLLVMSwitchFlatOpLowering,
+      CIRToLLVMThrowOpLowering,
+      CIRToLLVMTrapOpLowering,
+      CIRToLLVMTryCallOpLowering,
+      CIRToLLVMUnaryOpLowering,
+      CIRToLLVMUnreachableOpLowering,
+      CIRToLLVMVAArgOpLowering,
+      CIRToLLVMVACopyOpLowering,
+      CIRToLLVMVAEndOpLowering,
+      CIRToLLVMVAStartOpLowering,
+      CIRToLLVMVecCmpOpLowering,
+      CIRToLLVMVecCreateOpLowering,
+      CIRToLLVMVecShuffleDynamicOpLowering,
+      CIRToLLVMVecShuffleOpLowering,
+      CIRToLLVMVecSplatOpLowering,
+      CIRToLLVMVecTernaryOpLowering,
+      CIRToLLVMVTableAddrPointOpLowering,
+      CIRToLLVMVTTAddrPointOpLowering
 #define GET_BUILTIN_LOWERING_LIST
 #include "clang/CIR/Dialect/IR/CIRBuiltinsLowering.inc"
 #undef GET_BUILTIN_LOWERING_LIST
+      // clang-format on
       >(converter, patterns.getContext());
 }