Skip to content

Commit 97aa332

Browse files
[AArch64] Implement intrinsics for SVE FAMIN/FAMAX
This patch implements the following intrinsics: * Floating-point absolute maximum (predicated) svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t); * Floating-point absolute minimum (predicated) svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t); All the intrinsics have also variants for `f32` and `f64`, and have the `__arm_streaming` attribute. (cf. ARM-software/acle#324)
1 parent 126d6f2 commit 97aa332

File tree

7 files changed

+1076
-3
lines changed

7 files changed

+1076
-3
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2418,4 +2418,9 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
24182418

24192419
def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
24202420
def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
2421-
}
2421+
}
2422+
2423+
let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
2424+
defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
2425+
defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
2426+
}

clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c

Lines changed: 775 additions & 0 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3785,3 +3785,10 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic
37853785
def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
37863786
def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
37873787

3788+
// SVE2/SME2 - Floating point absolute maximum and minimum
3789+
3790+
def int_aarch64_sve_famax : AdvSIMD_Pred2VectorArg_Intrinsic;
3791+
def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
3792+
3793+
def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
3794+
def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2735,6 +2735,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
27352735
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
27362736
MAKE_CASE(AArch64ISD::FMUL_PRED)
27372737
MAKE_CASE(AArch64ISD::FSUB_PRED)
2738+
MAKE_CASE(AArch64ISD::FAMAX_PRED)
2739+
MAKE_CASE(AArch64ISD::FAMIN_PRED)
27382740
MAKE_CASE(AArch64ISD::RDSVL)
27392741
MAKE_CASE(AArch64ISD::BIC)
27402742
MAKE_CASE(AArch64ISD::CBZ)
@@ -22132,6 +22134,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
2213222134
AArch64CC::LAST_ACTIVE);
2213322135
case Intrinsic::aarch64_sve_whilelo:
2213422136
return tryCombineWhileLo(N, DCI, Subtarget);
22137+
case Intrinsic::aarch64_sve_famax_u:
22138+
return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0),
22139+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
22140+
case Intrinsic::aarch64_sve_famin_u:
22141+
return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0),
22142+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
2213522143
}
2213622144
return SDValue();
2213722145
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ enum NodeType : unsigned {
135135
UDIV_PRED,
136136
UMAX_PRED,
137137
UMIN_PRED,
138+
FAMAX_PRED,
139+
FAMIN_PRED,
138140

139141
// Unpredicated vector instructions
140142
BIC,

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
218218
return N->getFlags().hasAllowContract();
219219
}]>;
220220

221+
def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>;
222+
def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>;
223+
221224
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
222225
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
223226
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -483,6 +486,8 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm,
483486
def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
484487
def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
485488
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
489+
def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>;
490+
def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>;
486491

487492
def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
488493
[(fadd node:$op1, node:$op2),
@@ -717,6 +722,11 @@ let Predicates = [HasSVEorSME] in {
717722
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
718723
} // End HasSVEorSME
719724

725+
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
726+
defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
727+
defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
728+
}
729+
720730
let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
721731
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
722732
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
@@ -4184,8 +4194,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
41844194

41854195
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
41864196
// FP8 Arithmetic - Predicated Group
4187-
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
4188-
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
4197+
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>;
4198+
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>;
41894199
} // End HasSVE2orSME2, HasFAMINMAX
41904200

41914201
let Predicates = [HasSSVE_FP8FMA] in {
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mattr=+sve2 < %s | FileCheck %s
3+
; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
4+
5+
target triple = "aarch64-linux"
6+
7+
define <vscale x 8 x half> @famin_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
8+
; CHECK-LABEL: famin_f16:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
11+
; CHECK-NEXT: ret
12+
%r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
13+
ret <vscale x 8 x half> %r
14+
}
15+
16+
define <vscale x 4 x float> @famin_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
17+
; CHECK-LABEL: famin_f32:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
20+
; CHECK-NEXT: ret
21+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
22+
ret <vscale x 4 x float> %r
23+
}
24+
25+
define <vscale x 2 x double> @famin_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
26+
; CHECK-LABEL: famin_f64:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
29+
; CHECK-NEXT: ret
30+
%r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
31+
ret <vscale x 2 x double> %r
32+
}
33+
34+
define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
35+
; CHECK-LABEL: famin_u_f16:
36+
; CHECK: // %bb.0:
37+
; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
38+
; CHECK-NEXT: ret
39+
%r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
40+
ret <vscale x 8 x half> %r
41+
}
42+
43+
define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
44+
; CHECK-LABEL: famin_u_f32:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
47+
; CHECK-NEXT: ret
48+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
49+
ret <vscale x 4 x float> %r
50+
}
51+
52+
define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
53+
; CHECK-LABEL: famin_u_f64:
54+
; CHECK: // %bb.0:
55+
; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
56+
; CHECK-NEXT: ret
57+
%r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
58+
ret <vscale x 2 x double> %r
59+
}
60+
61+
define <vscale x 8 x half> @famax_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
62+
; CHECK-LABEL: famax_f16:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
65+
; CHECK-NEXT: ret
66+
%r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
67+
ret <vscale x 8 x half> %r
68+
}
69+
70+
define <vscale x 4 x float> @famax_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
71+
; CHECK-LABEL: famax_f32:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
74+
; CHECK-NEXT: ret
75+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
76+
ret <vscale x 4 x float> %r
77+
}
78+
79+
define <vscale x 2 x double> @famax_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
80+
; CHECK-LABEL: famax_f64:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
83+
; CHECK-NEXT: ret
84+
%r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
85+
ret <vscale x 2 x double> %r
86+
}
87+
88+
define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
89+
; CHECK-LABEL: famax_u_f16:
90+
; CHECK: // %bb.0:
91+
; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
92+
; CHECK-NEXT: ret
93+
%r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
94+
ret <vscale x 8 x half> %r
95+
}
96+
97+
define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
98+
; CHECK-LABEL: famax_u_f32:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
101+
; CHECK-NEXT: ret
102+
%r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
103+
ret <vscale x 4 x float> %r
104+
}
105+
106+
define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
107+
; CHECK-LABEL: famax_u_f64:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
110+
; CHECK-NEXT: ret
111+
%r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
112+
ret <vscale x 2 x double> %r
113+
}
114+
115+
define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
116+
; CHECK-LABEL: select_famin_f16a:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
119+
; CHECK-NEXT: ret
120+
%all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
121+
%m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
122+
%r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
123+
ret <vscale x 8 x half> %r
124+
}
125+
126+
define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
127+
; CHECK-LABEL: select_famin_f16b:
128+
; CHECK: // %bb.0:
129+
; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
130+
; CHECK-NEXT: ret
131+
%all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
132+
%m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
133+
%r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
134+
ret <vscale x 8 x half> %r
135+
}
136+
137+
define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
138+
; CHECK-LABEL: select_famin_f32a:
139+
; CHECK: // %bb.0:
140+
; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
141+
; CHECK-NEXT: ret
142+
%all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
143+
%m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
144+
%r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
145+
ret <vscale x 4 x float> %r
146+
}
147+
148+
define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
149+
; CHECK-LABEL: select_famin_f32b:
150+
; CHECK: // %bb.0:
151+
; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
152+
; CHECK-NEXT: ret
153+
%all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
154+
%m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
155+
%r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
156+
ret <vscale x 4 x float> %r
157+
}
158+
159+
define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
160+
; CHECK-LABEL: select_famin_f64a:
161+
; CHECK: // %bb.0:
162+
; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
163+
; CHECK-NEXT: ret
164+
%all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
165+
%m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
166+
%r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
167+
ret <vscale x 2 x double> %r
168+
}
169+
170+
define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
171+
; CHECK-LABEL: select_famin_f64b:
172+
; CHECK: // %bb.0:
173+
; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
174+
; CHECK-NEXT: ret
175+
%all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
176+
%m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
177+
%r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
178+
ret <vscale x 2 x double> %r
179+
}
180+
181+
182+
define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
183+
; CHECK-LABEL: select_famax_f16a:
184+
; CHECK: // %bb.0:
185+
; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
186+
; CHECK-NEXT: ret
187+
%all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
188+
%m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
189+
%r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
190+
ret <vscale x 8 x half> %r
191+
}
192+
193+
define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
194+
; CHECK-LABEL: select_famax_f16b:
195+
; CHECK: // %bb.0:
196+
; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
197+
; CHECK-NEXT: ret
198+
%all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
199+
%m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
200+
%r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
201+
ret <vscale x 8 x half> %r
202+
}
203+
204+
define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
205+
; CHECK-LABEL: select_famax_f32a:
206+
; CHECK: // %bb.0:
207+
; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
208+
; CHECK-NEXT: ret
209+
%all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
210+
%m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
211+
%r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
212+
ret <vscale x 4 x float> %r
213+
}
214+
215+
define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
216+
; CHECK-LABEL: select_famax_f32b:
217+
; CHECK: // %bb.0:
218+
; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
219+
; CHECK-NEXT: ret
220+
%all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
221+
%m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
222+
%r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
223+
ret <vscale x 4 x float> %r
224+
}
225+
226+
define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
227+
; CHECK-LABEL: select_famax_f64a:
228+
; CHECK: // %bb.0:
229+
; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
230+
; CHECK-NEXT: ret
231+
%all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
232+
%m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
233+
%r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
234+
ret <vscale x 2 x double> %r
235+
}
236+
237+
define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
238+
; CHECK-LABEL: select_famax_f64b:
239+
; CHECK: // %bb.0:
240+
; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
241+
; CHECK-NEXT: ret
242+
%all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
243+
%m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
244+
%r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
245+
ret <vscale x 2 x double> %r
246+
}
247+
248+
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
249+
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
250+
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
251+
252+
declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
253+
declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
254+
declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
255+
declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
256+
declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
257+
declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
258+
259+
declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
260+
declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
261+
declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
262+
declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
263+
declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
264+
declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
265+
266+
attributes #0 = { nounwind "target-features" = "+faminmax" }

0 commit comments

Comments
 (0)