1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <fenv.h> 33 #include <float.h> 34 #include <math.h> 35 36 #include "math_private.h" 37 38 /* 39 * A struct dd represents a floating-point number with twice the precision 40 * of a double. We maintain the invariant that "hi" stores the 53 high-order 41 * bits of the result. 42 */ 43 struct dd { 44 double hi; 45 double lo; 46 }; 47 48 /* 49 * Compute a+b exactly, returning the exact result in a struct dd. We assume 50 * that both a and b are finite, but make no assumptions about their relative 51 * magnitudes. 52 */ 53 static inline struct dd 54 dd_add(double a, double b) 55 { 56 struct dd ret; 57 double s; 58 59 ret.hi = a + b; 60 s = ret.hi - a; 61 ret.lo = (a - (ret.hi - s)) + (b - s); 62 return (ret); 63 } 64 65 /* 66 * Compute a+b, with a small tweak: The least significant bit of the 67 * result is adjusted into a sticky bit summarizing all the bits that 68 * were lost to rounding. This adjustment negates the effects of double 69 * rounding when the result is added to another number with a higher 70 * exponent. For an explanation of round and sticky bits, see any reference 71 * on FPU design, e.g., 72 * 73 * J. Coonen. An Implementation Guide to a Proposed Standard for 74 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 75 */ 76 static inline double 77 add_adjusted(double a, double b) 78 { 79 struct dd sum; 80 uint64_t hibits, lobits; 81 82 sum = dd_add(a, b); 83 if (sum.lo != 0) { 84 EXTRACT_WORD64(hibits, sum.hi); 85 if ((hibits & 1) == 0) { 86 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 87 EXTRACT_WORD64(lobits, sum.lo); 88 hibits += 1 - ((hibits ^ lobits) >> 62); 89 INSERT_WORD64(sum.hi, hibits); 90 } 91 } 92 return (sum.hi); 93 } 94 95 /* 96 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 97 * that the result will be subnormal, and care is taken to ensure that 98 * double rounding does not occur. 99 */ 100 static inline double 101 add_and_denormalize(double a, double b, int scale) 102 { 103 struct dd sum; 104 uint64_t hibits, lobits; 105 int bits_lost; 106 107 sum = dd_add(a, b); 108 109 /* 110 * If we are losing at least two bits of accuracy to denormalization, 111 * then the first lost bit becomes a round bit, and we adjust the 112 * lowest bit of sum.hi to make it a sticky bit summarizing all the 113 * bits in sum.lo. With the sticky bit adjusted, the hardware will 114 * break any ties in the correct direction. 115 * 116 * If we are losing only one bit to denormalization, however, we must 117 * break the ties manually. 118 */ 119 if (sum.lo != 0) { 120 EXTRACT_WORD64(hibits, sum.hi); 121 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; 122 if ((bits_lost != 1) ^ (int)(hibits & 1)) { 123 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 124 EXTRACT_WORD64(lobits, sum.lo); 125 hibits += 1 - (((hibits ^ lobits) >> 62) & 2); 126 INSERT_WORD64(sum.hi, hibits); 127 } 128 } 129 return (ldexp(sum.hi, scale)); 130 } 131 132 /* 133 * Compute a*b exactly, returning the exact result in a struct dd. We assume 134 * that both a and b are normalized, so no underflow or overflow will occur. 135 * The current rounding mode must be round-to-nearest. 136 */ 137 static inline struct dd 138 dd_mul(double a, double b) 139 { 140 static const double split = 0x1p27 + 1.0; 141 struct dd ret; 142 double ha, hb, la, lb, p, q; 143 144 p = a * split; 145 ha = a - p; 146 ha += p; 147 la = a - ha; 148 149 p = b * split; 150 hb = b - p; 151 hb += p; 152 lb = b - hb; 153 154 p = ha * hb; 155 q = ha * lb + la * hb; 156 157 ret.hi = p + q; 158 ret.lo = p - ret.hi + q + la * lb; 159 return (ret); 160 } 161 162 /* 163 * Fused multiply-add: Compute x * y + z with a single rounding error. 164 * 165 * We use scaling to avoid overflow/underflow, along with the 166 * canonical precision-doubling technique adapted from: 167 * 168 * Dekker, T. A Floating-Point Technique for Extending the 169 * Available Precision. Numer. Math. 18, 224-242 (1971). 170 * 171 * This algorithm is sensitive to the rounding precision. FPUs such 172 * as the i387 must be set in double-precision mode if variables are 173 * to be stored in FP registers in order to avoid incorrect results. 174 * This is the default on FreeBSD, but not on many other systems. 175 * 176 * Hardware instructions should be used on architectures that support it, 177 * since this implementation will likely be several times slower. 178 */ 179 double 180 fma(double x, double y, double z) 181 { 182 double xs, ys, zs, adj; 183 struct dd xy, r; 184 int oround; 185 int ex, ey, ez; 186 int spread; 187 188 /* 189 * Handle special cases. The order of operations and the particular 190 * return values here are crucial in handling special cases involving 191 * infinities, NaNs, overflows, and signed zeroes correctly. 192 */ 193 if (x == 0.0 || y == 0.0) 194 return (x * y + z); 195 if (z == 0.0) 196 return (x * y); 197 if (!isfinite(x) || !isfinite(y)) 198 return (x * y + z); 199 if (!isfinite(z)) 200 return (z); 201 202 xs = frexp(x, &ex); 203 ys = frexp(y, &ey); 204 zs = frexp(z, &ez); 205 oround = fegetround(); 206 spread = ex + ey - ez; 207 208 /* 209 * If x * y and z are many orders of magnitude apart, the scaling 210 * will overflow, so we handle these cases specially. Rounding 211 * modes other than FE_TONEAREST are painful. 212 */ 213 if (spread < -DBL_MANT_DIG) { 214 feraiseexcept(FE_INEXACT); 215 if (!isnormal(z)) 216 feraiseexcept(FE_UNDERFLOW); 217 switch (oround) { 218 case FE_TONEAREST: 219 return (z); 220 case FE_TOWARDZERO: 221 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 222 return (z); 223 else 224 return (nextafter(z, 0)); 225 case FE_DOWNWARD: 226 if (x > 0.0 ^ y < 0.0) 227 return (z); 228 else 229 return (nextafter(z, -INFINITY)); 230 default: /* FE_UPWARD */ 231 if (x > 0.0 ^ y < 0.0) 232 return (nextafter(z, INFINITY)); 233 else 234 return (z); 235 } 236 } 237 if (spread <= DBL_MANT_DIG * 2) 238 zs = ldexp(zs, -spread); 239 else 240 zs = copysign(DBL_MIN, zs); 241 242 fesetround(FE_TONEAREST); 243 /* work around clang bug 8100 */ 244 volatile double vxs = xs; 245 246 /* 247 * Basic approach for round-to-nearest: 248 * 249 * (xy.hi, xy.lo) = x * y (exact) 250 * (r.hi, r.lo) = xy.hi + z (exact) 251 * adj = xy.lo + r.lo (inexact; low bit is sticky) 252 * result = r.hi + adj (correctly rounded) 253 */ 254 xy = dd_mul(vxs, ys); 255 r = dd_add(xy.hi, zs); 256 257 spread = ex + ey; 258 259 if (r.hi == 0.0) { 260 /* 261 * When the addends cancel to 0, ensure that the result has 262 * the correct sign. 263 */ 264 fesetround(oround); 265 volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 266 return (xy.hi + vzs + ldexp(xy.lo, spread)); 267 } 268 269 if (oround != FE_TONEAREST) { 270 /* 271 * There is no need to worry about double rounding in directed 272 * rounding modes. 273 */ 274 fesetround(oround); 275 /* work around clang bug 8100 */ 276 volatile double vrlo = r.lo; 277 adj = vrlo + xy.lo; 278 return (ldexp(r.hi + adj, spread)); 279 } 280 281 adj = add_adjusted(r.lo, xy.lo); 282 if (spread + ilogb(r.hi) > -1023) 283 return (ldexp(r.hi + adj, spread)); 284 else 285 return (add_and_denormalize(r.hi, adj, spread)); 286 } 287 288 #if (LDBL_MANT_DIG == 53) 289 __weak_reference(fma, fmal); 290 #endif 291