1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <fenv.h> 33 #include <float.h> 34 #include <math.h> 35 36 #include "fpmath.h" 37 38 /* 39 * A struct dd represents a floating-point number with twice the precision 40 * of a long double. We maintain the invariant that "hi" stores the high-order 41 * bits of the result. 42 */ 43 struct dd { 44 long double hi; 45 long double lo; 46 }; 47 48 /* 49 * Compute a+b exactly, returning the exact result in a struct dd. We assume 50 * that both a and b are finite, but make no assumptions about their relative 51 * magnitudes. 52 */ 53 static inline struct dd 54 dd_add(long double a, long double b) 55 { 56 struct dd ret; 57 long double s; 58 59 ret.hi = a + b; 60 s = ret.hi - a; 61 ret.lo = (a - (ret.hi - s)) + (b - s); 62 return (ret); 63 } 64 65 /* 66 * Compute a+b, with a small tweak: The least significant bit of the 67 * result is adjusted into a sticky bit summarizing all the bits that 68 * were lost to rounding. This adjustment negates the effects of double 69 * rounding when the result is added to another number with a higher 70 * exponent. For an explanation of round and sticky bits, see any reference 71 * on FPU design, e.g., 72 * 73 * J. Coonen. An Implementation Guide to a Proposed Standard for 74 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 75 */ 76 static inline long double 77 add_adjusted(long double a, long double b) 78 { 79 struct dd sum; 80 union IEEEl2bits u; 81 82 sum = dd_add(a, b); 83 if (sum.lo != 0) { 84 u.e = sum.hi; 85 if ((u.bits.manl & 1) == 0) 86 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 87 } 88 return (sum.hi); 89 } 90 91 /* 92 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 93 * that the result will be subnormal, and care is taken to ensure that 94 * double rounding does not occur. 95 */ 96 static inline long double 97 add_and_denormalize(long double a, long double b, int scale) 98 { 99 struct dd sum; 100 int bits_lost; 101 union IEEEl2bits u; 102 103 sum = dd_add(a, b); 104 105 /* 106 * If we are losing at least two bits of accuracy to denormalization, 107 * then the first lost bit becomes a round bit, and we adjust the 108 * lowest bit of sum.hi to make it a sticky bit summarizing all the 109 * bits in sum.lo. With the sticky bit adjusted, the hardware will 110 * break any ties in the correct direction. 111 * 112 * If we are losing only one bit to denormalization, however, we must 113 * break the ties manually. 114 */ 115 if (sum.lo != 0) { 116 u.e = sum.hi; 117 bits_lost = -u.bits.exp - scale + 1; 118 if ((bits_lost != 1) ^ (int)(u.bits.manl & 1)) 119 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 120 } 121 return (ldexp(sum.hi, scale)); 122 } 123 124 /* 125 * Compute a*b exactly, returning the exact result in a struct dd. We assume 126 * that both a and b are normalized, so no underflow or overflow will occur. 127 * The current rounding mode must be round-to-nearest. 128 */ 129 static inline struct dd 130 dd_mul(long double a, long double b) 131 { 132 #if LDBL_MANT_DIG == 64 133 static const long double split = 0x1p32L + 1.0; 134 #elif LDBL_MANT_DIG == 113 135 static const long double split = 0x1p57L + 1.0; 136 #endif 137 struct dd ret; 138 long double ha, hb, la, lb, p, q; 139 140 p = a * split; 141 ha = a - p; 142 ha += p; 143 la = a - ha; 144 145 p = b * split; 146 hb = b - p; 147 hb += p; 148 lb = b - hb; 149 150 p = ha * hb; 151 q = ha * lb + la * hb; 152 153 ret.hi = p + q; 154 ret.lo = p - ret.hi + q + la * lb; 155 return (ret); 156 } 157 158 /* 159 * Fused multiply-add: Compute x * y + z with a single rounding error. 160 * 161 * We use scaling to avoid overflow/underflow, along with the 162 * canonical precision-doubling technique adapted from: 163 * 164 * Dekker, T. A Floating-Point Technique for Extending the 165 * Available Precision. Numer. Math. 18, 224-242 (1971). 166 */ 167 long double 168 fmal(long double x, long double y, long double z) 169 { 170 long double xs, ys, zs, adj; 171 struct dd xy, r; 172 int oround; 173 int ex, ey, ez; 174 int spread; 175 176 /* 177 * Handle special cases. The order of operations and the particular 178 * return values here are crucial in handling special cases involving 179 * infinities, NaNs, overflows, and signed zeroes correctly. 180 */ 181 if (x == 0.0 || y == 0.0) 182 return (x * y + z); 183 if (z == 0.0) 184 return (x * y); 185 if (!isfinite(x) || !isfinite(y)) 186 return (x * y + z); 187 if (!isfinite(z)) 188 return (z); 189 190 xs = frexpl(x, &ex); 191 ys = frexpl(y, &ey); 192 zs = frexpl(z, &ez); 193 oround = fegetround(); 194 spread = ex + ey - ez; 195 196 /* 197 * If x * y and z are many orders of magnitude apart, the scaling 198 * will overflow, so we handle these cases specially. Rounding 199 * modes other than FE_TONEAREST are painful. 200 */ 201 if (spread < -LDBL_MANT_DIG) { 202 feraiseexcept(FE_INEXACT); 203 if (!isnormal(z)) 204 feraiseexcept(FE_UNDERFLOW); 205 switch (oround) { 206 case FE_TONEAREST: 207 return (z); 208 case FE_TOWARDZERO: 209 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 210 return (z); 211 else 212 return (nextafterl(z, 0)); 213 case FE_DOWNWARD: 214 if (x > 0.0 ^ y < 0.0) 215 return (z); 216 else 217 return (nextafterl(z, -INFINITY)); 218 default: /* FE_UPWARD */ 219 if (x > 0.0 ^ y < 0.0) 220 return (nextafterl(z, INFINITY)); 221 else 222 return (z); 223 } 224 } 225 if (spread <= LDBL_MANT_DIG * 2) 226 zs = ldexpl(zs, -spread); 227 else 228 zs = copysignl(LDBL_MIN, zs); 229 230 fesetround(FE_TONEAREST); 231 /* work around clang bug 8100 */ 232 volatile long double vxs = xs; 233 234 /* 235 * Basic approach for round-to-nearest: 236 * 237 * (xy.hi, xy.lo) = x * y (exact) 238 * (r.hi, r.lo) = xy.hi + z (exact) 239 * adj = xy.lo + r.lo (inexact; low bit is sticky) 240 * result = r.hi + adj (correctly rounded) 241 */ 242 xy = dd_mul(vxs, ys); 243 r = dd_add(xy.hi, zs); 244 245 spread = ex + ey; 246 247 if (r.hi == 0.0) { 248 /* 249 * When the addends cancel to 0, ensure that the result has 250 * the correct sign. 251 */ 252 fesetround(oround); 253 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 254 return (xy.hi + vzs + ldexpl(xy.lo, spread)); 255 } 256 257 if (oround != FE_TONEAREST) { 258 /* 259 * There is no need to worry about double rounding in directed 260 * rounding modes. 261 */ 262 fesetround(oround); 263 /* work around clang bug 8100 */ 264 volatile long double vrlo = r.lo; 265 adj = vrlo + xy.lo; 266 return (ldexpl(r.hi + adj, spread)); 267 } 268 269 adj = add_adjusted(r.lo, xy.lo); 270 if (spread + ilogbl(r.hi) > -16383) 271 return (ldexpl(r.hi + adj, spread)); 272 else 273 return (add_and_denormalize(r.hi, adj, spread)); 274 } 275