1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <fenv.h> 31 #include <float.h> 32 #include <math.h> 33 34 #include "math_private.h" 35 36 #ifdef USE_BUILTIN_FMA 37 double 38 fma(double x, double y, double z) 39 { 40 return (__builtin_fma(x, y, z)); 41 } 42 #else 43 /* 44 * A struct dd represents a floating-point number with twice the precision 45 * of a double. We maintain the invariant that "hi" stores the 53 high-order 46 * bits of the result. 47 */ 48 struct dd { 49 double hi; 50 double lo; 51 }; 52 53 /* 54 * Compute a+b exactly, returning the exact result in a struct dd. We assume 55 * that both a and b are finite, but make no assumptions about their relative 56 * magnitudes. 57 */ 58 static inline struct dd 59 dd_add(double a, double b) 60 { 61 struct dd ret; 62 double s; 63 64 ret.hi = a + b; 65 s = ret.hi - a; 66 ret.lo = (a - (ret.hi - s)) + (b - s); 67 return (ret); 68 } 69 70 /* 71 * Compute a+b, with a small tweak: The least significant bit of the 72 * result is adjusted into a sticky bit summarizing all the bits that 73 * were lost to rounding. This adjustment negates the effects of double 74 * rounding when the result is added to another number with a higher 75 * exponent. For an explanation of round and sticky bits, see any reference 76 * on FPU design, e.g., 77 * 78 * J. Coonen. An Implementation Guide to a Proposed Standard for 79 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 80 */ 81 static inline double 82 add_adjusted(double a, double b) 83 { 84 struct dd sum; 85 uint64_t hibits, lobits; 86 87 sum = dd_add(a, b); 88 if (sum.lo != 0) { 89 EXTRACT_WORD64(hibits, sum.hi); 90 if ((hibits & 1) == 0) { 91 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 92 EXTRACT_WORD64(lobits, sum.lo); 93 hibits += 1 - ((hibits ^ lobits) >> 62); 94 INSERT_WORD64(sum.hi, hibits); 95 } 96 } 97 return (sum.hi); 98 } 99 100 /* 101 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 102 * that the result will be subnormal, and care is taken to ensure that 103 * double rounding does not occur. 104 */ 105 static inline double 106 add_and_denormalize(double a, double b, int scale) 107 { 108 struct dd sum; 109 uint64_t hibits, lobits; 110 int bits_lost; 111 112 sum = dd_add(a, b); 113 114 /* 115 * If we are losing at least two bits of accuracy to denormalization, 116 * then the first lost bit becomes a round bit, and we adjust the 117 * lowest bit of sum.hi to make it a sticky bit summarizing all the 118 * bits in sum.lo. With the sticky bit adjusted, the hardware will 119 * break any ties in the correct direction. 120 * 121 * If we are losing only one bit to denormalization, however, we must 122 * break the ties manually. 123 */ 124 if (sum.lo != 0) { 125 EXTRACT_WORD64(hibits, sum.hi); 126 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; 127 if ((bits_lost != 1) ^ (int)(hibits & 1)) { 128 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 129 EXTRACT_WORD64(lobits, sum.lo); 130 hibits += 1 - (((hibits ^ lobits) >> 62) & 2); 131 INSERT_WORD64(sum.hi, hibits); 132 } 133 } 134 return (ldexp(sum.hi, scale)); 135 } 136 137 /* 138 * Compute a*b exactly, returning the exact result in a struct dd. We assume 139 * that both a and b are normalized, so no underflow or overflow will occur. 140 * The current rounding mode must be round-to-nearest. 141 */ 142 static inline struct dd 143 dd_mul(double a, double b) 144 { 145 static const double split = 0x1p27 + 1.0; 146 struct dd ret; 147 double ha, hb, la, lb, p, q; 148 149 p = a * split; 150 ha = a - p; 151 ha += p; 152 la = a - ha; 153 154 p = b * split; 155 hb = b - p; 156 hb += p; 157 lb = b - hb; 158 159 p = ha * hb; 160 q = ha * lb + la * hb; 161 162 ret.hi = p + q; 163 ret.lo = p - ret.hi + q + la * lb; 164 return (ret); 165 } 166 167 /* 168 * Fused multiply-add: Compute x * y + z with a single rounding error. 169 * 170 * We use scaling to avoid overflow/underflow, along with the 171 * canonical precision-doubling technique adapted from: 172 * 173 * Dekker, T. A Floating-Point Technique for Extending the 174 * Available Precision. Numer. Math. 18, 224-242 (1971). 175 * 176 * This algorithm is sensitive to the rounding precision. FPUs such 177 * as the i387 must be set in double-precision mode if variables are 178 * to be stored in FP registers in order to avoid incorrect results. 179 * This is the default on FreeBSD, but not on many other systems. 180 * 181 * Hardware instructions should be used on architectures that support it, 182 * since this implementation will likely be several times slower. 183 */ 184 double 185 fma(double x, double y, double z) 186 { 187 double xs, ys, zs, adj; 188 struct dd xy, r; 189 int oround; 190 int ex, ey, ez; 191 int spread; 192 193 /* 194 * Handle special cases. The order of operations and the particular 195 * return values here are crucial in handling special cases involving 196 * infinities, NaNs, overflows, and signed zeroes correctly. 197 */ 198 if (x == 0.0 || y == 0.0) 199 return (x * y + z); 200 if (z == 0.0) 201 return (x * y); 202 if (!isfinite(x) || !isfinite(y)) 203 return (x * y + z); 204 if (!isfinite(z)) 205 return (z); 206 207 xs = frexp(x, &ex); 208 ys = frexp(y, &ey); 209 zs = frexp(z, &ez); 210 oround = fegetround(); 211 spread = ex + ey - ez; 212 213 /* 214 * If x * y and z are many orders of magnitude apart, the scaling 215 * will overflow, so we handle these cases specially. Rounding 216 * modes other than FE_TONEAREST are painful. 217 */ 218 if (spread < -DBL_MANT_DIG) { 219 feraiseexcept(FE_INEXACT); 220 if (!isnormal(z)) 221 feraiseexcept(FE_UNDERFLOW); 222 switch (oround) { 223 case FE_TONEAREST: 224 return (z); 225 case FE_TOWARDZERO: 226 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 227 return (z); 228 else 229 return (nextafter(z, 0)); 230 case FE_DOWNWARD: 231 if (x > 0.0 ^ y < 0.0) 232 return (z); 233 else 234 return (nextafter(z, -INFINITY)); 235 default: /* FE_UPWARD */ 236 if (x > 0.0 ^ y < 0.0) 237 return (nextafter(z, INFINITY)); 238 else 239 return (z); 240 } 241 } 242 if (spread <= DBL_MANT_DIG * 2) 243 zs = ldexp(zs, -spread); 244 else 245 zs = copysign(DBL_MIN, zs); 246 247 fesetround(FE_TONEAREST); 248 /* work around clang bug 8100 */ 249 volatile double vxs = xs; 250 251 /* 252 * Basic approach for round-to-nearest: 253 * 254 * (xy.hi, xy.lo) = x * y (exact) 255 * (r.hi, r.lo) = xy.hi + z (exact) 256 * adj = xy.lo + r.lo (inexact; low bit is sticky) 257 * result = r.hi + adj (correctly rounded) 258 */ 259 xy = dd_mul(vxs, ys); 260 r = dd_add(xy.hi, zs); 261 262 spread = ex + ey; 263 264 if (r.hi == 0.0) { 265 /* 266 * When the addends cancel to 0, ensure that the result has 267 * the correct sign. 268 */ 269 fesetround(oround); 270 volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 271 return (xy.hi + vzs + ldexp(xy.lo, spread)); 272 } 273 274 if (oround != FE_TONEAREST) { 275 /* 276 * There is no need to worry about double rounding in directed 277 * rounding modes. 278 */ 279 fesetround(oround); 280 /* work around clang bug 8100 */ 281 volatile double vrlo = r.lo; 282 adj = vrlo + xy.lo; 283 return (ldexp(r.hi + adj, spread)); 284 } 285 286 adj = add_adjusted(r.lo, xy.lo); 287 if (spread + ilogb(r.hi) > -1023) 288 return (ldexp(r.hi + adj, spread)); 289 else 290 return (add_and_denormalize(r.hi, adj, spread)); 291 } 292 #endif /* !USE_BUILTIN_FMA */ 293 294 #if (LDBL_MANT_DIG == 53) 295 __weak_reference(fma, fmal); 296 #endif 297