@@ -2851,6 +2851,8 @@ dl_sum(double a, double b)
2851
2851
return (DoubleLength ) {x , y };
2852
2852
}
2853
2853
2854
+ #ifndef UNRELIABLE_FMA
2855
+
2854
2856
static DoubleLength
2855
2857
dl_mul (double x , double y )
2856
2858
{
@@ -2860,6 +2862,47 @@ dl_mul(double x, double y)
2860
2862
return (DoubleLength ) {z , zz };
2861
2863
}
2862
2864
2865
+ #else
2866
+
2867
+ /*
2868
+ The default implementation of dl_mul() depends on the C math library
2869
+ having an accurate fma() function as required by § 7.12.13.1 of the
2870
+ C99 standard.
2871
+
2872
+ The UNRELIABLE_FMA option is provided as a slower but accurate
2873
+ alternative for builds where the fma() function is found wanting.
2874
+ The speed penalty may be modest (17% slower on an Apple M1 Max),
2875
+ so don't hesitate to enable this build option.
2876
+
2877
+ The algorithms are from the T. J. Dekker paper:
2878
+ A Floating-Point Technique for Extending the Available Precision
2879
+ https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2880
+ */
2881
+
2882
+ static DoubleLength
2883
+ dl_split (double x ) {
2884
+ // Dekker (5.5) and (5.6).
2885
+ double t = x * 134217729.0 ; // Veltkamp constant = 2.0 ** 27 + 1
2886
+ double hi = t - (t - x );
2887
+ double lo = x - hi ;
2888
+ return (DoubleLength ) {hi , lo };
2889
+ }
2890
+
2891
+ static DoubleLength
2892
+ dl_mul (double x , double y )
2893
+ {
2894
+ // Dekker (5.12) and mul12()
2895
+ DoubleLength xx = dl_split (x );
2896
+ DoubleLength yy = dl_split (y );
2897
+ double p = xx .hi * yy .hi ;
2898
+ double q = xx .hi * yy .lo + xx .lo * yy .hi ;
2899
+ double z = p + q ;
2900
+ double zz = p - z + q + xx .lo * yy .lo ;
2901
+ return (DoubleLength ) {z , zz };
2902
+ }
2903
+
2904
+ #endif
2905
+
2863
2906
typedef struct { double hi ; double lo ; double tiny ; } TripleLength ;
2864
2907
2865
2908
static const TripleLength tl_zero = {0.0 , 0.0 , 0.0 };
0 commit comments