Skip to content

Commit 85d049a

Browse files
committed
Implement support for option 'fexcess-precision'.
Differential revision: https://reviews.llvm.org/D136176
1 parent abcdc26 commit 85d049a

File tree

15 files changed

+542
-28
lines changed

15 files changed

+542
-28
lines changed

clang/docs/UsersManual.rst

+41
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,47 @@ floating point semantic models: precise (the default), strict, and fast.
17461746
has no effect because the optimizer is prohibited from making unsafe
17471747
transformations.
17481748

1749+
.. option:: -fexcess-precision:
1750+
1751+
The C and C++ standards allow floating-point expressions to be computed as if
1752+
intermediate results had more precision (and/or a wider range) than the type
1753+
of the expression strictly allows. This is called excess precision
1754+
arithmetic.
1755+
Excess precision arithmetic can improve the accuracy of results (although not
1756+
always), and it can make computation significantly faster if the target lacks
1757+
direct hardware support for arithmetic in a particular type. However, it can
1758+
also undermine strict floating-point reproducibility.
1759+
1760+
Under the standards, assignments and explicit casts force the operand to be
1761+
converted to its formal type, discarding any excess precision. Because data
1762+
can only flow between statements via an assignment, this means that the use
1763+
of excess precision arithmetic is a reliable local property of a single
1764+
statement, and results do not change based on optimization. However, when
1765+
excess precision arithmetic is in use, Clang does not guarantee strict
1766+
reproducibility, and future compiler releases may recognize more
1767+
opportunities to use excess precision arithmetic, e.g. with floating-point
1768+
builtins.
1769+
1770+
Clang does not use excess precision arithmetic for most types or on most
1771+
targets. For example, even on pre-SSE X86 targets where ``float`` and
1772+
``double`` computations must be performed in the 80-bit X87 format, Clang
1773+
rounds all intermediate results correctly for their type. Clang currently
1774+
uses excess precision arithmetic by default only for the following types and
1775+
targets:
1776+
1777+
* ``_Float16`` on X86 targets without ``AVX512-FP16``.
1778+
1779+
The ``-fexcess-precision=<value>`` option can be used to control the use of
1780+
excess precision arithmetic. Valid values are:
1781+
1782+
* ``standard`` - The default. Allow the use of excess precision arithmetic
1783+
under the constraints of the C and C++ standards. Has no effect except on
1784+
the types and targets listed above.
1785+
* ``fast`` - Accepted for GCC compatibility, but currently treated as an
1786+
alias for ``standard``.
1787+
* ``16`` - Forces ``_Float16`` operations to be emitted without using excess
1788+
precision arithmetic.
1789+
17491790
.. _crtfastmath.o:
17501791

17511792
A note about ``crtfastmath.o``

clang/include/clang/AST/Type.h

+2
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,8 @@ class QualType {
763763
unsigned getLocalFastQualifiers() const { return Value.getInt(); }
764764
void setLocalFastQualifiers(unsigned Quals) { Value.setInt(Quals); }
765765

766+
bool UseExcessPrecision(const ASTContext &Ctx);
767+
766768
/// Retrieves a pointer to the underlying (unqualified) type.
767769
///
768770
/// This function requires that the type not be NULL. If the type might be

clang/include/clang/Basic/FPOptions.def

+1
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ OPTION(NoSignedZero, bool, 1, NoHonorInfs)
2525
OPTION(AllowReciprocal, bool, 1, NoSignedZero)
2626
OPTION(AllowApproxFunc, bool, 1, AllowReciprocal)
2727
OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc)
28+
OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod)
2829
#undef OPTION

clang/include/clang/Basic/LangOptions.def

+1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ COMPATIBLE_LANGOPT(ExpStrictFP, 1, false, "Enable experimental strict floating p
317317
BENIGN_LANGOPT(RoundingMath, 1, false, "Do not assume default floating-point rounding behavior")
318318
BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Exception Behavior Mode type")
319319
BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic")
320+
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for floating point arithmetic")
320321
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
321322
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
322323
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")

clang/include/clang/Basic/LangOptions.h

+2
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ class LangOptions : public LangOptionsBase {
295295
FEM_UnsetOnCommandLine = 3
296296
};
297297

298+
enum ExcessPrecisionKind { FPP_Standard, FPP_Fast, FPP_None };
299+
298300
/// Possible exception handling behavior.
299301
enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm };
300302

clang/include/clang/Basic/TargetInfo.h

-2
Original file line numberDiff line numberDiff line change
@@ -933,8 +933,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
933933
return true;
934934
}
935935

936-
virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }
937-
938936
/// Specify if mangling based on address space map should be used or
939937
/// not for language specific address spaces
940938
bool useAddressSpaceMapMangling() const {

clang/include/clang/Driver/Options.td

+16-2
Original file line numberDiff line numberDiff line change
@@ -1576,8 +1576,22 @@ def exception_model_EQ : Joined<["-"], "exception-model=">,
15761576
def fignore_exceptions : Flag<["-"], "fignore-exceptions">, Group<f_Group>, Flags<[CC1Option]>,
15771577
HelpText<"Enable support for ignoring exception handling constructs">,
15781578
MarshallingInfoFlag<LangOpts<"IgnoreExceptions">>;
1579-
def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">,
1580-
Group<clang_ignored_gcc_optimization_f_Group>;
1579+
def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">, Group<f_Group>,
1580+
HelpText<"Allows control over excess precision on targets where native "
1581+
"support for the precision types is not available. By default, excess "
1582+
"precision is used to calculate intermediate results following the "
1583+
"rules specified in ISO C99.">,
1584+
Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">,
1585+
NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>;
1586+
def ffloat16_excess_precision_EQ : Joined<["-"], "ffloat16-excess-precision=">,
1587+
Group<f_Group>, Flags<[CC1Option, NoDriverOption]>,
1588+
HelpText<"Allows control over excess precision on targets where native "
1589+
"support for Float16 precision types is not available. By default, excess "
1590+
"precision is used to calculate intermediate results following the "
1591+
"rules specified in ISO C99.">,
1592+
Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">,
1593+
NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>,
1594+
MarshallingInfoEnum<LangOpts<"Float16ExcessPrecision">, "FPP_Standard">;
15811595
def : Flag<["-"], "fexpensive-optimizations">, Group<clang_ignored_gcc_optimization_f_Group>;
15821596
def : Flag<["-"], "fno-expensive-optimizations">, Group<clang_ignored_gcc_optimization_f_Group>;
15831597
def fextdirs_EQ : Joined<["-"], "fextdirs=">, Group<f_Group>;

clang/lib/AST/Type.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,25 @@ struct StripObjCKindOfTypeVisitor
14831483

14841484
} // namespace
14851485

1486+
bool QualType::UseExcessPrecision(const ASTContext &Ctx) {
1487+
const BuiltinType *BT = getTypePtr()->getAs<BuiltinType>();
1488+
if (BT) {
1489+
switch (BT->getKind()) {
1490+
case BuiltinType::Kind::Float16: {
1491+
const TargetInfo &TI = Ctx.getTargetInfo();
1492+
if (TI.hasFloat16Type() && !TI.hasLegalHalfType() &&
1493+
Ctx.getLangOpts().getFloat16ExcessPrecision() !=
1494+
Ctx.getLangOpts().ExcessPrecisionKind::FPP_None)
1495+
return true;
1496+
return false;
1497+
}
1498+
default:
1499+
return false;
1500+
}
1501+
}
1502+
return false;
1503+
}
1504+
14861505
/// Substitute the given type arguments for Objective-C type
14871506
/// parameters within the given type, recursively.
14881507
QualType QualType::substObjCTypeArgs(ASTContext &ctx,

clang/lib/Basic/Targets/X86.h

-4
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
302302
return false;
303303
}
304304

305-
bool shouldEmitFloat16WithExcessPrecision() const override {
306-
return HasFloat16 && !hasLegalHalfType();
307-
}
308-
309305
void getTargetDefines(const LangOptions &Opts,
310306
MacroBuilder &Builder) const override;
311307

clang/lib/CodeGen/CGExprComplex.cpp

+6-11
Original file line numberDiff line numberDiff line change
@@ -275,18 +275,13 @@ class ComplexExprEmitter
275275
const BinOpInfo &Op);
276276

277277
QualType getPromotionType(QualType Ty) {
278-
if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
279-
if (Ty->isRealFloatingType()) {
280-
if (Ty->isFloat16Type())
281-
return CGF.getContext().FloatTy;
282-
} else {
283-
assert(Ty->isAnyComplexType() &&
284-
"Expecting to promote a complex type!");
285-
QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
286-
if (ElementType->isFloat16Type())
287-
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
288-
}
278+
if (auto *CT = Ty->getAs<ComplexType>()) {
279+
QualType ElementType = CT->getElementType();
280+
if (ElementType.UseExcessPrecision(CGF.getContext()))
281+
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
289282
}
283+
if (Ty.UseExcessPrecision(CGF.getContext()))
284+
return CGF.getContext().FloatTy;
290285
return QualType();
291286
}
292287

clang/lib/CodeGen/CGExprScalar.cpp

+6-8
Original file line numberDiff line numberDiff line change
@@ -814,15 +814,13 @@ class ScalarExprEmitter
814814
Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
815815

816816
QualType getPromotionType(QualType Ty) {
817-
if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
818-
if (Ty->isAnyComplexType()) {
819-
QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
820-
if (ElementType->isFloat16Type())
821-
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
822-
}
823-
if (Ty->isFloat16Type())
824-
return CGF.getContext().FloatTy;
817+
if (auto *CT = Ty->getAs<ComplexType>()) {
818+
QualType ElementType = CT->getElementType();
819+
if (ElementType.UseExcessPrecision(CGF.getContext()))
820+
return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
825821
}
822+
if (Ty.UseExcessPrecision(CGF.getContext()))
823+
return CGF.getContext().FloatTy;
826824
return QualType();
827825
}
828826

clang/lib/Driver/ToolChains/Clang.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -2705,6 +2705,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
27052705
!JA.isOffloading(Action::OFK_HIP))
27062706
FPContract = "on";
27072707
bool StrictFPModel = false;
2708+
StringRef Float16ExcessPrecision = "";
27082709

27092710
if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
27102711
CmdArgs.push_back("-mlimit-float-precision");
@@ -2901,6 +2902,27 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
29012902
break;
29022903
}
29032904

2905+
case options::OPT_fexcess_precision_EQ: {
2906+
StringRef Val = A->getValue();
2907+
const llvm::Triple::ArchType Arch = TC.getArch();
2908+
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
2909+
if (Val.equals("standard") || Val.equals("fast"))
2910+
Float16ExcessPrecision = Val;
2911+
// To make it GCC compatible, allow the value of "16" which
2912+
// means disable excess precision, the same meaning than clang's
2913+
// equivalent value "none".
2914+
else if (Val.equals("16"))
2915+
Float16ExcessPrecision = "none";
2916+
else
2917+
D.Diag(diag::err_drv_unsupported_option_argument)
2918+
<< A->getSpelling() << Val;
2919+
} else {
2920+
if (!(Val.equals("standard") || Val.equals("fast")))
2921+
D.Diag(diag::err_drv_unsupported_option_argument)
2922+
<< A->getSpelling() << Val;
2923+
}
2924+
break;
2925+
}
29042926
case options::OPT_ffinite_math_only:
29052927
HonorINFs = false;
29062928
HonorNaNs = false;
@@ -3071,6 +3093,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30713093
if (!FPEvalMethod.empty())
30723094
CmdArgs.push_back(Args.MakeArgString("-ffp-eval-method=" + FPEvalMethod));
30733095

3096+
if (!Float16ExcessPrecision.empty())
3097+
CmdArgs.push_back(Args.MakeArgString("-ffloat16-excess-precision=" +
3098+
Float16ExcessPrecision));
3099+
30743100
ParseMRecip(D, Args, CmdArgs);
30753101

30763102
// -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the

0 commit comments

Comments
 (0)