-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64] Implement intrinsics for SVE FAMIN/FAMAX #99042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: Momchil Velikov (momchil-velikov) ChangesThis patch implements the following intrinsics:
All the intrinsics have also variants for (cf. ARM-software/acle#324) Patch is 85.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99042.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 94c093d891156..be27015ba42c3 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2385,3 +2385,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
}
+
+let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
+ defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
+ defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
new file mode 100644
index 0000000000000..3cf7d99d606f3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
@@ -0,0 +1,775 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_A...
[truncated]
|
02d790e
to
7dcd5a7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of suggestions but otherwise this looks good to me.
261e75c
to
628d5d4
Compare
Rebased. |
This patch implements the following intrinsics: * Floating-point absolute maximum (predicated) svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t); * Floating-point absolute minimum (predicated) svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t); All the intrinsics have also variants for `f32` and `f64`, and have the `__arm_streaming` attribute. (cf. ARM-software/acle#324)
628d5d4
to
562512c
Compare
This patch implements the following intrinsics:
Floating-point absolute maximum (predicated)
svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t);
Floating-point absolute minimum (predicated)
svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t);
All the intrinsics have also variants for
f32
andf64
, and have the__arm_streaming
attribute.(cf. ARM-software/acle#324)