From 8fbc640ffd5f8296d453649623877a6aa06b22c7 Mon Sep 17 00:00:00 2001
From: Guojin <guojinhe@meta.com>
Date: Fri, 15 Nov 2024 00:33:59 -0500
Subject: [PATCH]  [CIR][CIRGen][Builtin][Neon] Lower neon_splat_lane,
 neon_splat_laneq, neon_splatq_lane and neon_splatq_laneq (#1126)

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  21 +
 clang/test/CIR/CodeGen/AArch64/neon-misc.c    | 609 ++++++++++++++++++
 2 files changed, 630 insertions(+)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 4ff8908d347a..ba4c4a07943c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2188,6 +2188,14 @@ static int64_t getIntValueFromConstOp(mlir::Value val) {
       .getSExtValue();
 }
 
+static mlir::Value emitNeonSplat(CIRGenBuilderTy &builder, mlir::Location loc,
+                                 mlir::Value splatVec, mlir::Value splatLane,
+                                 unsigned int splatCnt) {
+  int64_t splatValInt = getIntValueFromConstOp(splatLane);
+  llvm::SmallVector<int64_t, 4> splatMask(splatCnt, splatValInt);
+  return builder.createVecShuffle(loc, splatVec, splatMask);
+}
+
 /// Build a constant shift amount vector of `vecTy` to shift a vector
 /// Here `shitfVal` is a constant integer that will be splated into a
 /// a const vector of `vecTy` which is the return of this function
@@ -2341,6 +2349,19 @@ mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
   switch (builtinID) {
   default:
     break;
+  case NEON::BI__builtin_neon_splat_lane_v:
+  case NEON::BI__builtin_neon_splat_laneq_v:
+  case NEON::BI__builtin_neon_splatq_lane_v:
+  case NEON::BI__builtin_neon_splatq_laneq_v: {
+    uint64_t numElements = vTy.getSize();
+    if (builtinID == NEON::BI__builtin_neon_splatq_lane_v)
+      numElements = numElements << 1;
+    if (builtinID == NEON::BI__builtin_neon_splat_laneq_v)
+      numElements = numElements >> 1;
+    ops[0] = builder.createBitcast(ops[0], vTy);
+    return emitNeonSplat(builder, getLoc(e->getExprLoc()), ops[0], ops[1],
+                         numElements);
+  }
   case NEON::BI__builtin_neon_vmovl_v: {
     cir::VectorType dTy = builder.getExtendedOrTruncatedElementVectorType(
         vTy, false /* truncate */,
diff --git a/clang/test/CIR/CodeGen/AArch64/neon-misc.c b/clang/test/CIR/CodeGen/AArch64/neon-misc.c
index a798139de55f..4c516ad06f18 100644
--- a/clang/test/CIR/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CIR/CodeGen/AArch64/neon-misc.c
@@ -860,6 +860,7 @@ uint32x2_t test_vqmovn_u64(uint64x2_t a) {
   // LLVM:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[A]])
   // LLVM:   ret <2 x i32> [[VQMOVN_V1_I]]
 }
+
 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
   return vcvt_f32_s32(a);
 
@@ -907,3 +908,611 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
   // LLVM:  [[VCVT_I:%.*]] = uitofp <4 x i32> [[a]] to <4 x float>
   // LLVM:  ret <4 x float> [[VCVT_I]]
 }
+
+int8x8_t test_splat_lane_s8(int8x8_t v) {
+  return (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)v, 7, 0);
+
+  // CIR-LABEL: test_splat_lane_s8
+  // CIR: [[VEC:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.vector<!s8i x 8>>, !cir.vector<!s8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_splat_lane_s8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> [[V]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+int16x4_t test_splat_lane_s16(int16x4_t v) {
+  return (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)v, 3, 1);
+
+  // CIR-LABEL: test_splat_lane_s16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s16i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_splat_lane_s16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+int32x2_t test_splat_lane_s32(int32x2_t v) {
+  return (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 2);
+
+  // CIR-LABEL: test_splat_lane_s32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s32i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s32i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_s32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x i32> [[RES]]
+}  
+
+int64x1_t test_splat_lane_s64(int64x1_t v) {
+  return (int64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 3);
+
+  // CIR-LABEL: test_splat_lane_s64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s64i x 1>) [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_s64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+uint8x8_t test_splat_lane_u8(uint8x8_t v) {
+  return (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)v, 7, 16);
+
+  // CIR-LABEL: test_splat_lane_u8
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_splat_lane_u8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> [[V]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+uint16x4_t test_splat_lane_u16(uint16x4_t v) {
+  return (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)v, 3, 17);
+
+  // CIR-LABEL: test_splat_lane_u16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_splat_lane_u16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+uint32x2_t test_splat_lane_u32(uint32x2_t v) {
+  return (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 18);
+
+  // CIR-LABEL: test_splat_lane_u32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u32i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_u32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+uint64x1_t test_splat_lane_u64(uint64x1_t v) {
+  return (uint64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 19);
+
+  // CIR-LABEL: test_splat_lane_u64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u64i x 1>) [#cir.int<0> : !s32i] : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_u64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+float32x2_t test_splat_lane_f32(float32x2_t v) {
+  return (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 9);
+
+  // CIR-LABEL: test_splat_lane_f32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.float x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.float x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_f32(<2 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x float> [[RES]]
+}
+
+float64x1_t test_splat_lane_f64(float64x1_t v) {
+  return (float64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 10);
+
+  // CIR-LABEL: test_splat_lane_f64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.double x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.double x 1>) [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_f64(<1 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x double> [[RES]]
+}
+
+int8x16_t test_splatq_lane_s8(int8x8_t v) {
+  return (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 7, 0);
+
+  // CIR-LABEL: test_splatq_lane_s8
+  // CIR: [[VEC:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.vector<!s8i x 8>>, !cir.vector<!s8i x 8>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_lane_s8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> [[V]], 
+  // LLVM-SAME: <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <16 x i8> [[RES]]
+} 
+
+int16x8_t test_splatq_lane_s16(int16x4_t v) {
+  return (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 3, 1);
+
+  // CIR-LABEL: test_splatq_lane_s16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME:  #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_lane_s16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+int32x4_t test_splatq_lane_s32(int32x2_t v) {
+  return (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 2);
+
+  // CIR-LABEL: test_splatq_lane_s32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_s32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x i32> [[RES]]
+}  
+
+int64x2_t test_splatq_lane_s64(int64x1_t v) {
+  return (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 3);
+
+  // CIR-LABEL: test_splatq_lane_s64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s64i x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_s64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+uint8x16_t test_splatq_lane_u8(uint8x8_t v) {
+  return (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 7, 16);
+
+  // CIR-LABEL: test_splatq_lane_u8
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_lane_u8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> [[V]], 
+  // LLVM-SAME: <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+uint16x8_t test_splatq_lane_u16(uint16x4_t v) {
+  return (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 3, 17);
+
+  // CIR-LABEL: test_splatq_lane_u16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME:  #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_lane_u16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+uint32x4_t test_splatq_lane_u32(uint32x2_t v) {
+  return (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 18);
+
+  // CIR-LABEL: test_splatq_lane_u32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_u32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+uint64x2_t test_splatq_lane_u64(uint64x1_t v) {
+  return (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 19);
+
+  // CIR-LABEL: test_splatq_lane_u64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u64i x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_u64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+float32x4_t test_splatq_lane_f32(float32x2_t v) {
+  return (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 9);
+
+  // CIR-LABEL: test_splatq_lane_f32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.float x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_f32(<2 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x float> [[RES]]
+}
+
+float64x2_t test_splatq_lane_f64(float64x1_t v) {
+  return (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 10);
+
+  // CIR-LABEL: test_splatq_lane_f64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.double x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.double x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_f64(<1 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x double> [[RES]]
+}
+
+int8x8_t test_splat_laneq_s8(int8x16_t v) {
+  return (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 15, 32);
+
+  // CIR-LABEL: test_splat_laneq_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_splat_laneq_s8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> [[V]], 
+  // LLVM-SAME: <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+int16x4_t test_splat_laneq_s16(int16x8_t v) {
+  return (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 7, 33);
+
+  // CIR-LABEL: test_splat_laneq_s16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_splat_laneq_s16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+int32x2_t test_splat_laneq_s32(int32x4_t v) {
+  return (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 3, 34);
+
+  // CIR-LABEL: test_splat_laneq_s32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_s32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+int64x1_t test_splat_laneq_s64(int64x2_t v) {
+  return (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 35);
+
+  // CIR-LABEL: test_splat_laneq_s64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_s64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+float32x2_t test_splat_laneq_f32(float32x4_t v) {
+  return (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 1, 41);
+
+  // CIR-LABEL: test_splat_laneq_f32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.float x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_f32(<4 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x float> [[RES]]
+}
+
+float64x1_t test_splat_laneq_f64(float64x2_t v) {
+  return (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 42);
+
+  // CIR-LABEL: test_splat_laneq_f64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.double x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.double x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_f64(<2 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x double> [[RES]]
+}
+
+uint8x8_t test_splat_laneq_u8(uint8x16_t v) {
+  return (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 15, 48);
+
+  // CIR-LABEL: test_splat_laneq_u8
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_splat_laneq_u8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> [[V]], 
+  // LLVM-SAME: <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+uint16x4_t test_splat_laneq_u16(uint16x8_t v) {
+  return (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 7, 49);
+
+  // CIR-LABEL: test_splat_laneq_u16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_splat_laneq_u16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+uint32x2_t test_splat_laneq_u32(uint32x4_t v) {
+  return (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 3, 50);
+
+  // CIR-LABEL: test_splat_laneq_u32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_u32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+uint64x1_t test_splat_laneq_u64(uint64x2_t v) {
+  return (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 51);
+
+  // CIR-LABEL: test_splat_laneq_u64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_u64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+int8x16_t test_splatq_laneq_s8(int8x16_t v) {
+  return (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 15, 32);
+
+  // CIR-LABEL: test_splatq_laneq_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> [[V]], 
+  // LLVM-SAME: <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15,
+  // LLVM-SAME:  i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+int16x8_t test_splatq_laneq_s16(int16x8_t v) {
+  return (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 7, 33);
+
+  // CIR-LABEL: test_splatq_laneq_s16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+int32x4_t test_splatq_laneq_s32(int32x4_t v) {
+  return (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 34);
+
+  // CIR-LABEL: test_splatq_laneq_s32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+int64x2_t test_splatq_laneq_s64(int64x2_t v) {
+  return (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 35);
+
+  // CIR-LABEL: test_splatq_laneq_s64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!s64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+float32x4_t test_splatq_laneq_f32(float32x4_t v) {
+  return (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 41);
+
+  // CIR-LABEL: test_splatq_laneq_f32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.float x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_f32(<4 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x float> [[RES]]
+}
+
+float64x2_t test_splatq_laneq_f64(float64x2_t v) {
+  return (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 42);
+
+  // CIR-LABEL: test_splatq_laneq_f64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.double x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!cir.double x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_f64(<2 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x double> [[RES]]
+}
+
+uint8x16_t test_splatq_laneq_u8(uint8x16_t v) {
+  return (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 15, 48);
+
+  // CIR-LABEL: test_splatq_laneq_u8
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> [[V]], 
+  // LLVM-SAME: <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15,
+  // LLVM-SAME:  i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+uint16x8_t test_splatq_laneq_u16(uint16x8_t v) {
+  return (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 7, 49);
+
+  // CIR-LABEL: test_splatq_laneq_u16
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+uint32x4_t test_splatq_laneq_u32(uint32x4_t v) {
+  return (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 50);
+
+  // CIR-LABEL: test_splatq_laneq_u32
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u32(<4 x i32>{{.*}}
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+uint64x2_t test_splatq_laneq_u64(uint64x2_t v) {
+  return (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 51);
+
+  // CIR-LABEL: test_splatq_laneq_u64
+  // CIR: [[VEC:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[VEC]] : !cir.vector<!u64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}