Skip to content

Commit

Permalink
[LLVM][InstCombine][SVE] fcvtnt(a,all_active,b) != fcvtnt(undef,all_a…
Browse files Browse the repository at this point in the history
…ctive,b) (llvm#110278)

The "narrowing top" convert instructions leave the bottom half of active
elements untouched and thus the first paramater of their associated
intrinsic remains live even when there are no inactive lanes.
  • Loading branch information
paulwalker-arm authored Oct 1, 2024
1 parent 8a8e7f3 commit be9461c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2166,11 +2166,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_fcvt_f64f32:
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
case Intrinsic::aarch64_sve_fcvtx_f32f64:
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
case Intrinsic::aarch64_sve_fcvtzs:
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
Expand All @@ -2192,6 +2188,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ucvtf_f32i64:
case Intrinsic::aarch64_sve_ucvtf_f64i32:
return instCombineSVEAllOrNoActiveUnary(IC, II);
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
return instCombineSVENoActiveReplace(IC, II, true);
case Intrinsic::aarch64_sve_st1_scatter:
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[A]], <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
Expand All @@ -150,7 +150,7 @@ define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
Expand All @@ -162,7 +162,7 @@ define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscal
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
Expand All @@ -186,7 +186,7 @@ define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vsca
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
Expand Down

0 comments on commit be9461c

Please sign in to comment.