Skip to content

[builtins] Support building the 128-bit float functions on ld80 platforms #68132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ set(BF16_SOURCES
truncsfbf2.c
)

# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in
# GENERIC_SOURCES instead of here.
set(GENERIC_TF_SOURCES
addtf3.c
comparetf2.c
Expand Down
21 changes: 12 additions & 9 deletions compiler-rt/lib/builtins/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,49 +137,54 @@ si_int __ucmpti2(tu_int a, tu_int b);
di_int __fixsfdi( float a);
di_int __fixdfdi( double a);
di_int __fixxfdi(long double a);
di_int __fixtfdi( tf_float a);

ti_int __fixsfti( float a);
ti_int __fixdfti( double a);
ti_int __fixxfti(long double a);
uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation
ti_int __fixtfti( tf_float a);

su_int __fixunssfsi( float a);
su_int __fixunsdfsi( double a);
su_int __fixunsxfsi(long double a);
su_int __fixunstfsi( tf_float a);

du_int __fixunssfdi( float a);
du_int __fixunsdfdi( double a);
du_int __fixunsxfdi(long double a);
du_int __fixunstfdi( tf_float a);

tu_int __fixunssfti( float a);
tu_int __fixunsdfti( double a);
tu_int __fixunsxfti(long double a);
uint64_t __fixunstfdi(long double input); // ppc only
tu_int __fixunstfti( tf_float a);

float __floatdisf(di_int a);
double __floatdidf(di_int a);
long double __floatdixf(di_int a);
long double __floatditf(int64_t a); // ppc only
tf_float __floatditf(int64_t a);

float __floattisf(ti_int a);
double __floattidf(ti_int a);
long double __floattixf(ti_int a);
tf_float __floattitf(ti_int a);

float __floatundisf(du_int a);
double __floatundidf(du_int a);
long double __floatundixf(du_int a);
long double __floatunditf(uint64_t a); // ppc only
tf_float __floatunditf(du_int a);

float __floatuntisf(tu_int a);
double __floatuntidf(tu_int a);
long double __floatuntixf(tu_int a);
tf_float __floatuntixf(tu_int a);

// Floating point raised to integer power

float __powisf2( float a, int b); // a ^ b
double __powidf2( double a, int b); // a ^ b
long double __powixf2(long double a, int b); // a ^ b
long double __powitf2(long double a, int b); // ppc only, a ^ b
tf_float __powitf2( tf_float a, int b); // a ^ b

// Complex arithmetic

Expand All @@ -189,17 +194,15 @@ long double __powitf2(long double a, int b); // ppc only, a ^ b
double _Complex __muldc3(double a, double b, double c, double d);
long double _Complex __mulxc3(long double a, long double b,
long double c, long double d);
long double _Complex __multc3(long double a, long double b,
long double c, long double d); // ppc only
tf_float _Complex __multc3(tf_float a, tf_float b, tf_float c, tf_float d);

// (a + ib) / (c + id)

float _Complex __divsc3( float a, float b, float c, float d);
double _Complex __divdc3(double a, double b, double c, double d);
long double _Complex __divxc3(long double a, long double b,
long double c, long double d);
long double _Complex __divtc3(long double a, long double b,
long double c, long double d); // ppc only
tf_float _Complex __divtc3(tf_float a, tf_float b, tf_float c, tf_float d);


// Runtime support
Expand Down
51 changes: 26 additions & 25 deletions compiler-rt/lib/builtins/divtc3.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,45 @@

#define QUAD_PRECISION
#include "fp_lib.h"
#include "int_lib.h"
#include "int_math.h"

#if defined(CRT_HAS_TF_MODE)

// Returns: the quotient of (a + ib) / (c + id)

COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b,
long double __c, long double __d) {
COMPILER_RT_ABI Qcomplex __divtc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d) {
int __ilogbw = 0;
long double __logbw =
__compiler_rt_logbl(__compiler_rt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
fp_t __logbw = __compiler_rt_logbtf(
__compiler_rt_fmaxtf(crt_fabstf(__c), crt_fabstf(__d)));
if (crt_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = __compiler_rt_scalbnl(__c, -__ilogbw);
__d = __compiler_rt_scalbnl(__d, -__ilogbw);
__c = __compiler_rt_scalbntf(__c, -__ilogbw);
__d = __compiler_rt_scalbntf(__d, -__ilogbw);
}
long double __denom = __c * __c + __d * __d;
Lcomplex z;
COMPLEX_REAL(z) =
__compiler_rt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEX_IMAGINARY(z) =
__compiler_rt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) {
fp_t __denom = __c * __c + __d * __d;
Qcomplex z;
COMPLEXTF_REAL(z) =
__compiler_rt_scalbntf((__a * __c + __b * __d) / __denom, -__ilogbw);
COMPLEXTF_IMAGINARY(z) =
__compiler_rt_scalbntf((__b * __c - __a * __d) / __denom, -__ilogbw);
if (crt_isnan(COMPLEXTF_REAL(z)) && crt_isnan(COMPLEXTF_IMAGINARY(z))) {
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) {
COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
COMPLEXTF_REAL(z) = crt_copysigntf(CRT_INFINITY, __c) * __a;
COMPLEXTF_IMAGINARY(z) = crt_copysigntf(CRT_INFINITY, __c) * __b;
} else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) &&
crt_isfinite(__d)) {
__a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
__a = crt_copysigntf(crt_isinf(__a) ? (fp_t)1.0 : (fp_t)0.0, __a);
__b = crt_copysigntf(crt_isinf(__b) ? (fp_t)1.0 : (fp_t)0.0, __b);
COMPLEXTF_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
COMPLEXTF_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
} else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
crt_isfinite(__b)) {
__c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
__c = crt_copysigntf(crt_isinf(__c) ? (fp_t)1.0 : (fp_t)0.0, __c);
__d = crt_copysigntf(crt_isinf(__d) ? (fp_t)1.0 : (fp_t)0.0, __d);
COMPLEXTF_REAL(z) = 0.0 * (__a * __c + __b * __d);
COMPLEXTF_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
}
return z;
}

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extenddftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI fp_t __extenddftf2(double a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extendhftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI long double __extendhftf2(_Float16 a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extendhftf2(src_t a) { return __extendXfYf2__(a); }

#endif
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/extendsftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI fp_t __extendsftf2(float a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }

#endif
9 changes: 5 additions & 4 deletions compiler-rt/lib/builtins/extendxftf2.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
// Assumption: long double is a IEEE 80 bit floating point type padded to 128
// bits.

// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
#define QUAD_PRECISION
#include "fp_lib.h"

#if defined(CRT_HAS_TF_MODE) && __LDBL_MANT_DIG__ == 64 && defined(__x86_64__)
#define SRC_80
#define DST_QUAD
#include "fp_extend_impl.inc"

COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
COMPILER_RT_ABI tf_float __extendxftf2(long double a) {
return __extendXfYf2__(a);
}

Expand Down
8 changes: 1 addition & 7 deletions compiler-rt/lib/builtins/fp_extend.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,7 @@ static const int dstSigFracBits = 52;
static const int dstExpBits = 11;

#elif defined DST_QUAD
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 113
typedef long double dst_t;
#elif defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
typedef __float128 dst_t;
#endif
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
Expand Down
50 changes: 26 additions & 24 deletions compiler-rt/lib/builtins/fp_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,11 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);

#elif defined QUAD_PRECISION
#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__)
// TODO: Availability of the *tf functions should not depend on long double
// being IEEE 128, but instead on being able to use a 128-bit floating-point
// type, which includes __float128.
// Right now this (incorrectly) stops the builtins from being used for x86.
#define CRT_LDBL_128BIT
#define CRT_HAS_TF_MODE
#define TF_C(c) c##L
#if defined(CRT_HAS_TF_MODE)
typedef uint64_t half_rep_t;
typedef __uint128_t rep_t;
typedef __int128_t srep_t;
typedef long double fp_t;
typedef tf_float fp_t;
#define HALF_REP_C UINT64_C
#define REP_C (__uint128_t)
// Note: Since there is no explicit way to tell compiler the constant is a
Expand Down Expand Up @@ -207,13 +200,13 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
#undef Word_HiMask
#undef Word_LoMask
#undef Word_FullMask
#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__
#endif // defined(CRT_HAS_TF_MODE)
#else
#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
#endif

#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \
defined(CRT_LDBL_128BIT)
(defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE))
#define typeWidth (sizeof(rep_t) * CHAR_BIT)
#define exponentBits (typeWidth - significandBits - 1)
#define maxExponent ((1 << exponentBits) - 1)
Expand Down Expand Up @@ -393,31 +386,40 @@ static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
#endif
}

#elif defined(QUAD_PRECISION)

#if defined(CRT_LDBL_128BIT)
static __inline fp_t __compiler_rt_logbl(fp_t x) {
#elif defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE)
// The generic implementation only works for ieee754 floating point. For other
// floating point types, continue to rely on the libm implementation for now.
#if defined(CRT_HAS_IEEE_TF)
static __inline tf_float __compiler_rt_logbtf(tf_float x) {
return __compiler_rt_logbX(x);
}
static __inline fp_t __compiler_rt_scalbnl(fp_t x, int y) {
static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
return __compiler_rt_scalbnX(x, y);
}
static __inline fp_t __compiler_rt_fmaxl(fp_t x, fp_t y) {
static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
return __compiler_rt_fmaxX(x, y);
}
#else
// The generic implementation only works for ieee754 floating point. For other
// floating point types, continue to rely on the libm implementation for now.
static __inline long double __compiler_rt_logbl(long double x) {
#define __compiler_rt_logbl __compiler_rt_logbtf
#define __compiler_rt_scalbnl __compiler_rt_scalbntf
#define __compiler_rt_fmaxl __compiler_rt_fmaxtf
#define crt_fabstf crt_fabsf128
#define crt_copysigntf crt_copysignf128
#elif defined(CRT_LDBL_128BIT)
static __inline tf_float __compiler_rt_logbtf(tf_float x) {
return crt_logbl(x);
}
static __inline long double __compiler_rt_scalbnl(long double x, int y) {
static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
return crt_scalbnl(x, y);
}
static __inline long double __compiler_rt_fmaxl(long double x, long double y) {
static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
return crt_fmaxl(x, y);
}
#endif // CRT_LDBL_128BIT
#define __compiler_rt_logbl crt_logbl
#define __compiler_rt_scalbnl crt_scalbnl
#define __compiler_rt_fmaxl crt_fmaxl
#else
#error Unsupported TF mode type
#endif

#endif // *_PRECISION

Expand Down
8 changes: 1 addition & 7 deletions compiler-rt/lib/builtins/fp_trunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,7 @@ static const int srcSigFracBits = 52;
static const int srcExpBits = 11;

#elif defined SRC_QUAD
// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
#if __LDBL_MANT_DIG__ == 113
typedef long double src_t;
#elif defined(__x86_64__) && \
(defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
typedef __float128 src_t;
#endif
typedef tf_float src_t;
typedef __uint128_t src_rep_t;
#define SRC_REP_C (__uint128_t)
static const int srcBits = sizeof(src_t) * CHAR_BIT;
Expand Down
10 changes: 10 additions & 0 deletions compiler-rt/lib/builtins/int_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@
#define crt_copysign(x, y) __builtin_copysign((x), (y))
#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
#if __has_builtin(__builtin_copysignf128)
#define crt_copysignf128(x, y) __builtin_copysignf128((x), (y))
#elif __has_builtin(__builtin_copysignq) || (defined(__GNUC__) && __GNUC__ >= 7)
#define crt_copysignf128(x, y) __builtin_copysignq((x), (y))
#endif
#endif

#if defined(_MSC_VER) && !defined(__clang__)
Expand All @@ -75,6 +80,11 @@
#define crt_fabs(x) __builtin_fabs((x))
#define crt_fabsf(x) __builtin_fabsf((x))
#define crt_fabsl(x) __builtin_fabsl((x))
#if __has_builtin(__builtin_fabsf128)
#define crt_fabsf128(x) __builtin_fabsf128((x))
#elif __has_builtin(__builtin_fabsq) || (defined(__GNUC__) && __GNUC__ >= 7)
#define crt_fabsf128(x) __builtin_fabsq((x))
#endif
#endif

#if defined(_MSC_VER) && !defined(__clang__)
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/int_to_fp.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ typedef uint64_t dst_rep_t;
static const int dstSigBits = 52;

#elif defined DST_QUAD
typedef long double dst_t;
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstSigBits = 112;
Expand Down
Loading