1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <fenv.h> 31 #include <float.h> 32 #include <math.h> 33 34 #include "fpmath.h" 35 36 /* 37 * A struct dd represents a floating-point number with twice the precision 38 * of a long double. We maintain the invariant that "hi" stores the high-order 39 * bits of the result. 40 */ 41 struct dd { 42 long double hi; 43 long double lo; 44 }; 45 46 /* 47 * Compute a+b exactly, returning the exact result in a struct dd. We assume 48 * that both a and b are finite, but make no assumptions about their relative 49 * magnitudes. 50 */ 51 static inline struct dd 52 dd_add(long double a, long double b) 53 { 54 struct dd ret; 55 long double s; 56 57 ret.hi = a + b; 58 s = ret.hi - a; 59 ret.lo = (a - (ret.hi - s)) + (b - s); 60 return (ret); 61 } 62 63 /* 64 * Compute a+b, with a small tweak: The least significant bit of the 65 * result is adjusted into a sticky bit summarizing all the bits that 66 * were lost to rounding. This adjustment negates the effects of double 67 * rounding when the result is added to another number with a higher 68 * exponent. For an explanation of round and sticky bits, see any reference 69 * on FPU design, e.g., 70 * 71 * J. Coonen. An Implementation Guide to a Proposed Standard for 72 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 73 */ 74 static inline long double 75 add_adjusted(long double a, long double b) 76 { 77 struct dd sum; 78 union IEEEl2bits u; 79 80 sum = dd_add(a, b); 81 if (sum.lo != 0) { 82 u.e = sum.hi; 83 if ((u.bits.manl & 1) == 0) 84 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 85 } 86 return (sum.hi); 87 } 88 89 /* 90 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 91 * that the result will be subnormal, and care is taken to ensure that 92 * double rounding does not occur. 93 */ 94 static inline long double 95 add_and_denormalize(long double a, long double b, int scale) 96 { 97 struct dd sum; 98 int bits_lost; 99 union IEEEl2bits u; 100 101 sum = dd_add(a, b); 102 103 /* 104 * If we are losing at least two bits of accuracy to denormalization, 105 * then the first lost bit becomes a round bit, and we adjust the 106 * lowest bit of sum.hi to make it a sticky bit summarizing all the 107 * bits in sum.lo. With the sticky bit adjusted, the hardware will 108 * break any ties in the correct direction. 109 * 110 * If we are losing only one bit to denormalization, however, we must 111 * break the ties manually. 112 */ 113 if (sum.lo != 0) { 114 u.e = sum.hi; 115 bits_lost = -u.bits.exp - scale + 1; 116 if ((bits_lost != 1) ^ (int)(u.bits.manl & 1)) 117 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 118 } 119 return (ldexp(sum.hi, scale)); 120 } 121 122 /* 123 * Compute a*b exactly, returning the exact result in a struct dd. We assume 124 * that both a and b are normalized, so no underflow or overflow will occur. 125 * The current rounding mode must be round-to-nearest. 126 */ 127 static inline struct dd 128 dd_mul(long double a, long double b) 129 { 130 #if LDBL_MANT_DIG == 64 131 static const long double split = 0x1p32L + 1.0; 132 #elif LDBL_MANT_DIG == 113 133 static const long double split = 0x1p57L + 1.0; 134 #endif 135 struct dd ret; 136 long double ha, hb, la, lb, p, q; 137 138 p = a * split; 139 ha = a - p; 140 ha += p; 141 la = a - ha; 142 143 p = b * split; 144 hb = b - p; 145 hb += p; 146 lb = b - hb; 147 148 p = ha * hb; 149 q = ha * lb + la * hb; 150 151 ret.hi = p + q; 152 ret.lo = p - ret.hi + q + la * lb; 153 return (ret); 154 } 155 156 /* 157 * Fused multiply-add: Compute x * y + z with a single rounding error. 158 * 159 * We use scaling to avoid overflow/underflow, along with the 160 * canonical precision-doubling technique adapted from: 161 * 162 * Dekker, T. A Floating-Point Technique for Extending the 163 * Available Precision. Numer. Math. 18, 224-242 (1971). 164 */ 165 long double 166 fmal(long double x, long double y, long double z) 167 { 168 long double xs, ys, zs, adj; 169 struct dd xy, r; 170 int oround; 171 int ex, ey, ez; 172 int spread; 173 174 /* 175 * Handle special cases. The order of operations and the particular 176 * return values here are crucial in handling special cases involving 177 * infinities, NaNs, overflows, and signed zeroes correctly. 178 */ 179 if (x == 0.0 || y == 0.0) 180 return (x * y + z); 181 if (z == 0.0) 182 return (x * y); 183 if (!isfinite(x) || !isfinite(y)) 184 return (x * y + z); 185 if (!isfinite(z)) 186 return (z); 187 188 xs = frexpl(x, &ex); 189 ys = frexpl(y, &ey); 190 zs = frexpl(z, &ez); 191 oround = fegetround(); 192 spread = ex + ey - ez; 193 194 /* 195 * If x * y and z are many orders of magnitude apart, the scaling 196 * will overflow, so we handle these cases specially. Rounding 197 * modes other than FE_TONEAREST are painful. 198 */ 199 if (spread < -LDBL_MANT_DIG) { 200 feraiseexcept(FE_INEXACT); 201 if (!isnormal(z)) 202 feraiseexcept(FE_UNDERFLOW); 203 switch (oround) { 204 case FE_TONEAREST: 205 return (z); 206 case FE_TOWARDZERO: 207 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 208 return (z); 209 else 210 return (nextafterl(z, 0)); 211 case FE_DOWNWARD: 212 if (x > 0.0 ^ y < 0.0) 213 return (z); 214 else 215 return (nextafterl(z, -INFINITY)); 216 default: /* FE_UPWARD */ 217 if (x > 0.0 ^ y < 0.0) 218 return (nextafterl(z, INFINITY)); 219 else 220 return (z); 221 } 222 } 223 if (spread <= LDBL_MANT_DIG * 2) 224 zs = ldexpl(zs, -spread); 225 else 226 zs = copysignl(LDBL_MIN, zs); 227 228 fesetround(FE_TONEAREST); 229 /* work around clang bug 8100 */ 230 volatile long double vxs = xs; 231 232 /* 233 * Basic approach for round-to-nearest: 234 * 235 * (xy.hi, xy.lo) = x * y (exact) 236 * (r.hi, r.lo) = xy.hi + z (exact) 237 * adj = xy.lo + r.lo (inexact; low bit is sticky) 238 * result = r.hi + adj (correctly rounded) 239 */ 240 xy = dd_mul(vxs, ys); 241 r = dd_add(xy.hi, zs); 242 243 spread = ex + ey; 244 245 if (r.hi == 0.0) { 246 /* 247 * When the addends cancel to 0, ensure that the result has 248 * the correct sign. 249 */ 250 fesetround(oround); 251 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 252 return (xy.hi + vzs + ldexpl(xy.lo, spread)); 253 } 254 255 if (oround != FE_TONEAREST) { 256 /* 257 * There is no need to worry about double rounding in directed 258 * rounding modes. 259 */ 260 fesetround(oround); 261 /* work around clang bug 8100 */ 262 volatile long double vrlo = r.lo; 263 adj = vrlo + xy.lo; 264 return (ldexpl(r.hi + adj, spread)); 265 } 266 267 adj = add_adjusted(r.lo, xy.lo); 268 if (spread + ilogbl(r.hi) > -16383) 269 return (ldexpl(r.hi + adj, spread)); 270 else 271 return (add_and_denormalize(r.hi, adj, spread)); 272 } 273