-
Notifications
You must be signed in to change notification settings - Fork 13.8k
Clang: Add elementwise minnum/maxnum builtin functions #129207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: YunQiang Su (wzssyqa) ChangesWith #112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it. To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include All of them support _Float16, float, double, long double. Patch is 22.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129207.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 598ae171b1389..fd49180d1e58d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -209,6 +209,18 @@ def FmaxF16F128 : Builtin, F16F128MathTemplate {
let Prototype = "T(T, T)";
}
+def MinNum : Builtin {
+ let Spellings = ["__builtin_minnum"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
+def MaxNum : Builtin {
+ let Spellings = ["__builtin_maxnum"];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
def FminF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_fmin"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
@@ -1298,6 +1310,18 @@ def ElementwiseMin : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseMaxNum : Builtin {
+ let Spellings = ["__builtin_elementwise_maxnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseMinNum : Builtin {
+ let Spellings = ["__builtin_elementwise_minnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ElementwiseMaximum : Builtin {
let Spellings = ["__builtin_elementwise_maximum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ebdbc69384efb..41a0b41f331de 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2569,6 +2569,7 @@ class Sema final : public SemaBase {
ExprResult AtomicOpsOverloaded(ExprResult TheCallResult,
AtomicExpr::AtomicOp Op);
+ bool BuiltinMaxNumMinNumMath(CallExpr *TheCall);
/// \param FPOnly restricts the arguments to floating-point types.
bool BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly = false);
bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall);
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65fac01d58362..cfbb7b20c33c7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3220,6 +3220,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Intrinsic::minnum,
Intrinsic::experimental_constrained_minnum));
+ case Builtin::BI__builtin_maxnum:
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::maxnum,
+ Intrinsic::experimental_constrained_maxnum));
+
+ case Builtin::BI__builtin_minnum:
+ return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(
+ *this, E, Intrinsic::minnum,
+ Intrinsic::experimental_constrained_minnum));
+
case Builtin::BIfmaximum_num:
case Builtin::BIfmaximum_numf:
case Builtin::BIfmaximum_numl:
@@ -4398,6 +4408,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
+ case Builtin::BI__builtin_elementwise_maxnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, Op0,
+ Op1, nullptr, "elt.maxnum");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_elementwise_minnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minnum, Op0,
+ Op1, nullptr, "elt.minnum");
+ return RValue::get(Result);
+ }
+
case Builtin::BI__builtin_elementwise_maximum: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f9926c6b4adab..d23b7368bd653 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2753,8 +2753,17 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
+ case Builtin::BI__builtin_minnum:
+ case Builtin::BI__builtin_maxnum: {
+ if (BuiltinMaxNumMinNumMath(TheCall))
+ return ExprError();
+ break;
+ }
+
// These builtins restrict the element type to floating point
// types only, and take in two arguments.
+ case Builtin::BI__builtin_elementwise_minnum:
+ case Builtin::BI__builtin_elementwise_maxnum:
case Builtin::BI__builtin_elementwise_minimum:
case Builtin::BI__builtin_elementwise_maximum:
case Builtin::BI__builtin_elementwise_atan2:
@@ -15234,6 +15243,42 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) {
return false;
}
+bool Sema::BuiltinMaxNumMinNumMath(CallExpr *TheCall) {
+ if (checkArgCount(TheCall, 2))
+ return true;
+
+ ExprResult OrigArg0 = TheCall->getArg(0);
+ ExprResult OrigArg1 = TheCall->getArg(1);
+
+ // Do standard promotions between the two arguments, returning their common
+ // type.
+ QualType Res = UsualArithmeticConversions(
+ OrigArg0, OrigArg1, TheCall->getExprLoc(), ACK_Comparison);
+ if (OrigArg0.isInvalid() || OrigArg1.isInvalid())
+ return true;
+
+ // Make sure any conversions are pushed back into the call; this is
+ // type safe since unordered compare builtins are declared as "_Bool
+ // foo(...)".
+ TheCall->setArg(0, OrigArg0.get());
+ TheCall->setArg(1, OrigArg1.get());
+
+ if (!OrigArg0.get()->isTypeDependent() && OrigArg1.get()->isTypeDependent())
+ return true;
+
+ // If the common type isn't a real floating type, then the arguments were
+ // invalid for this operation.
+ if (Res.isNull() || !Res->isRealFloatingType())
+ return Diag(OrigArg0.get()->getBeginLoc(),
+ diag::err_typecheck_call_invalid_ordered_compare)
+ << OrigArg0.get()->getType() << OrigArg1.get()->getType()
+ << SourceRange(OrigArg0.get()->getBeginLoc(),
+ OrigArg1.get()->getEndLoc());
+
+ TheCall->setType(Res);
+ return false;
+}
+
bool Sema::BuiltinElementwiseMath(CallExpr *TheCall, bool FPOnly) {
if (auto Res = BuiltinVectorMath(TheCall, FPOnly); Res.has_value()) {
TheCall->setType(*Res);
diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c
new file mode 100644
index 0000000000000..3971ec644ffc5
--- /dev/null
+++ b/clang/test/CodeGen/builtin-maxnum-minnum.c
@@ -0,0 +1,269 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK
+
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef long double ldouble2 __attribute__((ext_vector_type(2)));
+
+// CHECK-LABEL: define dso_local half @fmin16(
+// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minnum.f16(half [[TMP0]], half [[TMP1]])
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 fmin16(_Float16 a, _Float16 b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <8 x half> @pfmin16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]]
+//
+half8 pfmin16(half8 a, half8 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local float @fmin32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]])
+// CHECK-NEXT: ret float [[TMP2]]
+//
+float fmin32(float a, float b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <4 x float> @pfmin32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]]
+//
+float4 pfmin32(float4 a, float4 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local double @fmin64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[TMP0]], double [[TMP1]])
+// CHECK-NEXT: ret double [[TMP2]]
+//
+double fmin64(double a, double b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x double> @pfmin64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]]
+//
+double2 pfmin64(double2 a, double2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local x86_fp80 @fmin80(
+// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.minnum.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]])
+// CHECK-NEXT: ret x86_fp80 [[TMP2]]
+//
+long double fmin80(long double a, long double b) {
+ return __builtin_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x x86_fp80> @pfmin80(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]]
+//
+ldouble2 pfmin80(ldouble2 a, ldouble2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local half @fmax16(
+// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: store half [[B]], ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maxnum.f16(half [[TMP0]], half [[TMP1]])
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 fmax16(_Float16 a, _Float16 b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <8 x half> @pfmax16(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]]
+//
+half8 pfmax16(half8 a, half8 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local float @fmax32(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: store float [[A]], ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: store float [[B]], ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]])
+// CHECK-NEXT: ret float [[TMP2]]
+//
+float fmax32(float a, float b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <4 x float> @pfmax32(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]]
+//
+float4 pfmax32(float4 a, float4 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local double @fmax64(
+// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT: store double [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: store double [[B]], ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8, !tbaa [[TBAA9]]
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[TMP0]], double [[TMP1]])
+// CHECK-NEXT: ret double [[TMP2]]
+//
+double fmax64(double a, double b) {
+ return __builtin_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local <2 x double> @pfmax64(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA6]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]]
+//
+double2 pfmax64(double2 a, double2 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local x86_fp80 @fmax80(
+// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: store x86_fp80 [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: store x86_fp80 [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[A_ADDR]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[B_ADDR]], align 16, !tbaa [[T...
[truncated]
|
Needs the documentation updates in clang language extensions with the other elementwise builtins, and a release note |
Documentation updated. |
Best to keep the note about the new builtins here. The PR mentioning the other changes is separate from this standalone addition |
Done |
ping |
With llvm#112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it. To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_elementwise_minnum __builtin_minnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum __builtin_elementwise_maxnum __builtin_maxnum All of them support _Float16, float, double, long double.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/81/builds/6292 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/17/builds/7273 Here is the relevant piece of the build log for the reference
|
With llvm#112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it. To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include __builtin_elementwise_minnum __builtin_elementwise_maxnum All of them support _Float16, __bf16, float, double, long double.
With #112852, we claimed that llvm.minnum and llvm.maxnum should treat +0.0>-0.0, while libc doesn't require fmin(3)/fmax(3) for it.
To make llvm.minnum/llvm.maxnum easy to use, we define the builtin functions for them, include
__builtin_elementwise_minnum
__builtin_elementwise_maxnum
All of them support _Float16, __bf16, float, double, long double.