diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index e0512bbf5055..4f4151efcb3f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2966,7 +2966,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, buildAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) return V; - mlir::Type VTy = Ty; + mlir::cir::VectorType VTy = Ty; llvm::SmallVector args; switch (BuiltinID) { default: @@ -3399,7 +3399,11 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, } case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: { - llvm_unreachable("NYI"); + Ops[1] = builder.createBitcast(Ops[1], VTy); + Ops[0] = builder.createAlignedLoad(Ops[0].getLoc(), VTy.getEltType(), + Ops[0], PtrOp0.getAlignment()); + return builder.create(getLoc(E->getExprLoc()), + Ops[1], Ops[0], Ops[2]); } case NEON::BI__builtin_neon_vldap1_lane_s64: case NEON::BI__builtin_neon_vldap1q_lane_s64: { diff --git a/clang/test/CIR/CodeGen/aarch64-neon-ldst.c b/clang/test/CIR/CodeGen/aarch64-neon-ldst.c new file mode 100644 index 000000000000..9b6ed9ee479c --- /dev/null +++ b/clang/test/CIR/CodeGen/aarch64-neon-ldst.c @@ -0,0 +1,376 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ +// RUN: -ffreestanding -emit-cir -target-feature +neon %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \ +// RUN: -ffreestanding -emit-llvm -target-feature +neon %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// REQUIRES: aarch64-registered-target || arm-registered-target +#include + +int8x8_t test_vld1_lane_s8(int8_t const * ptr, int8x8_t src) { + return vld1_lane_s8(ptr, src, 7); +} + +// CIR-LABEL: test_vld1_lane_s8 +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr, !s8i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_s8(ptr{{.*}}[[PTR:%.*]], <8 x i8>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <8 x i8> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <8 x i8>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <8 x i8> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <8 x i8>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR_VAL]], align 1 +// LLVM: {{.*}} = insertelement <8 x i8> [[INTRN_VEC]], i8 [[INTRN_VAL]], i32 7 +// LLVM: ret <8 x i8> {{.*}} + +int8x16_t test_vld1q_lane_s8(int8_t const * ptr, int8x16_t src) { + return vld1q_lane_s8(ptr, src, 15); +} + +// CIR-LABEL: test_vld1q_lane_s8 +// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr, !s8i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_s8(ptr{{.*}}[[PTR:%.*]], <16 x i8>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <16 x i8> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <16 x i8>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <16 x i8> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <16 x i8>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR_VAL]], align 1 +// LLVM: {{.*}} = insertelement <16 x i8> [[INTRN_VEC]], i8 [[INTRN_VAL]], i32 15 +// LLVM: ret <16 x i8> {{.*}} + +uint8x16_t test_vld1q_lane_u8(uint8_t const * ptr, uint8x16_t src) { + return vld1q_lane_u8(ptr, src, 15); +} + +// CIR-LABEL: test_vld1q_lane_u8 +// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr, !u8i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_u8(ptr{{.*}}[[PTR:%.*]], <16 x i8>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <16 x i8> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <16 x i8>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <16 x i8> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <16 x i8>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR_VAL]], align 1 +// LLVM: {{.*}} = insertelement <16 x i8> [[INTRN_VEC]], i8 [[INTRN_VAL]], i32 15 +// LLVM: ret <16 x i8> {{.*}} + + +uint8x8_t test_vld1_lane_u8(uint8_t const * ptr, uint8x8_t src) { + return vld1_lane_u8(ptr, src, 7); +} + +// CIR-LABEL: test_vld1_lane_u8 +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr, !u8i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_u8(ptr{{.*}}[[PTR:%.*]], <8 x i8>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <8 x i8> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <8 x i8>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <8 x i8> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <8 x i8>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR_VAL]], align 1 +// LLVM: {{.*}} = insertelement <8 x i8> [[INTRN_VEC]], i8 [[INTRN_VAL]], i32 7 +// LLVM: ret <8 x i8> {{.*}} + + +int16x4_t test_vld1_lane_s16(int16_t const * ptr, int16x4_t src) { + return vld1_lane_s16(ptr, src, 3); +} + +// CIR-LABEL: test_vld1_lane_s16 +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr, !s16i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_s16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <4 x i16> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <4 x i16>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <4 x i16> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <4 x i16>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <4 x i16> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <4 x i16> +// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR_VAL]], align 2 +// LLVM: {{.*}} = insertelement <4 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 3 +// LLVM: ret <4 x i16> {{.*}} + +uint16x4_t test_vld1_lane_u16(uint16_t const * ptr, uint16x4_t src) { + return vld1_lane_u16(ptr, src, 3); +} + +// CIR-LABEL: test_vld1_lane_u16 +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr, !u16i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_u16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <4 x i16> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <4 x i16>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <4 x i16> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <4 x i16>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <4 x i16> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <4 x i16> +// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR_VAL]], align 2 +// LLVM: {{.*}} = insertelement <4 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 3 +// LLVM: ret <4 x i16> {{.*}} + +int16x8_t test_vld1q_lane_s16(int16_t const * ptr, int16x8_t src) { + return vld1q_lane_s16(ptr, src, 7); +} + +// CIR-LABEL: test_vld1q_lane_s16 +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr, !s16i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_s16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <8 x i16> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <8 x i16>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <8 x i16> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <8 x i16>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <8 x i16> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <8 x i16> +// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR_VAL]], align 2 +// LLVM: {{.*}} = insertelement <8 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 7 +// LLVM: ret <8 x i16> {{.*}} + +uint16x8_t test_vld1q_lane_u16(uint16_t const * ptr, uint16x8_t src) { + return vld1q_lane_u16(ptr, src, 7); +} + +// CIR-LABEL: test_vld1q_lane_u16 +// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr, !u16i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_u16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <8 x i16> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <8 x i16>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <8 x i16> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <8 x i16>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <8 x i16> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <8 x i16> +// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR_VAL]], align 2 +// LLVM: {{.*}} = insertelement <8 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 7 +// LLVM: ret <8 x i16> {{.*}} + + + + +int32x2_t test_vld1_lane_s32(int32_t const * ptr, int32x2_t src) { + return vld1_lane_s32(ptr, src, 1); +} + +// CIR-LABEL: test_vld1_lane_s32 +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr, !s32i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_s32(ptr{{.*}}[[PTR:%.*]], <2 x i32>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <2 x i32> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <2 x i32>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <2 x i32> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <2 x i32>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <2 x i32> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <2 x i32> +// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR_VAL]], align 4 +// LLVM: {{.*}} = insertelement <2 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 1 +// LLVM: ret <2 x i32> {{.*}} + +uint32x2_t test_vld1_lane_u32(uint32_t const * ptr, uint32x2_t src) { + return vld1_lane_u32(ptr, src, 1); +} + +// CIR-LABEL: test_vld1_lane_u32 +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr, !u32i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_u32(ptr{{.*}}[[PTR:%.*]], <2 x i32>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <2 x i32> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <2 x i32>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <2 x i32> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <2 x i32>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <2 x i32> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <2 x i32> +// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR_VAL]], align 4 +// LLVM: {{.*}} = insertelement <2 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 1 +// LLVM: ret <2 x i32> {{.*}} + + +int32x4_t test_vld1q_lane_s32(int32_t const * ptr, int32x4_t src) { + return vld1q_lane_s32(ptr, src, 3); +} + +// CIR-LABEL: test_vld1q_lane_s32 +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr, !s32i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_s32(ptr{{.*}}[[PTR:%.*]], <4 x i32>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <4 x i32> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <4 x i32> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <4 x i32>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <4 x i32> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <4 x i32> +// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR_VAL]], align 4 +// LLVM: {{.*}} = insertelement <4 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 3 +// LLVM: ret <4 x i32> {{.*}} + + +uint32x4_t test_vld1q_lane_u32(uint32_t const * ptr, uint32x4_t src) { + return vld1q_lane_u32(ptr, src, 3); +} + +// CIR-LABEL: test_vld1q_lane_u32 +// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr, !u32i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_u32(ptr{{.*}}[[PTR:%.*]], <4 x i32>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <4 x i32> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <4 x i32> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <4 x i32>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <4 x i32> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <4 x i32> +// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR_VAL]], align 4 +// LLVM: {{.*}} = insertelement <4 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 3 +// LLVM: ret <4 x i32> {{.*}} + +int64x1_t test_vld1_lane_s64(int64_t const * ptr, int64x1_t src) { + return vld1_lane_s64(ptr, src, 0); +} + +// CIR-LABEL: test_vld1_lane_s64 +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr, !s64i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <1 x i64> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <1 x i64>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <1 x i64> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <1 x i64>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <1 x i64> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <1 x i64> +// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR_VAL]], align 8 +// LLVM: {{.*}} = insertelement <1 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 0 +// LLVM: ret <1 x i64> {{.*}} + +uint64x1_t test_vld1_lane_u64(uint64_t const * ptr, uint64x1_t src) { + return vld1_lane_u64(ptr, src, 0); +} + +// CIR-LABEL: test_vld1_lane_u64 +// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr, !u64i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <1 x i64> [[SRC]], ptr [[SRC_ADDR:%.*]], align 8 +// LLVM: [[SRC_VAL:%.*]] = load <1 x i64>, ptr [[SRC_ADDR]], align 8 +// LLVM: store <1 x i64> [[SRC_VAL]], ptr [[S1:%.*]], align 8 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <1 x i64>, ptr [[S1]], align 8 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <1 x i64> [[INTRN_VEC]] to <8 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <1 x i64> +// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR_VAL]], align 8 +// LLVM: {{.*}} = insertelement <1 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 0 +// LLVM: ret <1 x i64> {{.*}} + +int64x2_t test_vld1q_lane_s64(int64_t const * ptr, int64x2_t src) { + return vld1q_lane_s64(ptr, src, 1); +} + +// CIR-LABEL: test_vld1q_lane_s64 +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr, !s64i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <2 x i64> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <2 x i64>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <2 x i64> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <2 x i64>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <2 x i64> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <2 x i64> +// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR_VAL]], align 8 +// LLVM: {{.*}} = insertelement <2 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 1 +// LLVM: ret <2 x i64> {{.*}} + +uint64x2_t test_vld1q_lane_u64(uint64_t const * ptr, uint64x2_t src) { + return vld1q_lane_u64(ptr, src, 1); +} + +// CIR-LABEL: test_vld1q_lane_u64 +// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i +// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr), !cir.ptr +// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr, !u64i +// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector + +// LLVM: {{.*}}test_vld1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]]) +// LLVM: store ptr [[PTR]], ptr [[PTR_ADDR:%.*]], align 8 +// LLVM: store <2 x i64> [[SRC]], ptr [[SRC_ADDR:%.*]], align 16 +// LLVM: [[SRC_VAL:%.*]] = load <2 x i64>, ptr [[SRC_ADDR]], align 16 +// LLVM: store <2 x i64> [[SRC_VAL]], ptr [[S1:%.*]], align 16 +// LLVM: [[PTR_VAL:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// LLVM: [[INTRN_VEC:%.*]] = load <2 x i64>, ptr [[S1]], align 16 +// LLVM: [[INTRN_VEC_CAST0:%.*]] = bitcast <2 x i64> [[INTRN_VEC]] to <16 x i8> +// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <2 x i64> +// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR_VAL]], align 8 +// LLVM: {{.*}} = insertelement <2 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 1 +// LLVM: ret <2 x i64> {{.*}}