1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <fenv.h> 30 #include <float.h> 31 #include <math.h> 32 33 #include "fpmath.h" 34 35 /* 36 * A struct dd represents a floating-point number with twice the precision 37 * of a long double. We maintain the invariant that "hi" stores the high-order 38 * bits of the result. 39 */ 40 struct dd { 41 long double hi; 42 long double lo; 43 }; 44 45 /* 46 * Compute a+b exactly, returning the exact result in a struct dd. We assume 47 * that both a and b are finite, but make no assumptions about their relative 48 * magnitudes. 49 */ 50 static inline struct dd 51 dd_add(long double a, long double b) 52 { 53 struct dd ret; 54 long double s; 55 56 ret.hi = a + b; 57 s = ret.hi - a; 58 ret.lo = (a - (ret.hi - s)) + (b - s); 59 return (ret); 60 } 61 62 /* 63 * Compute a+b, with a small tweak: The least significant bit of the 64 * result is adjusted into a sticky bit summarizing all the bits that 65 * were lost to rounding. This adjustment negates the effects of double 66 * rounding when the result is added to another number with a higher 67 * exponent. For an explanation of round and sticky bits, see any reference 68 * on FPU design, e.g., 69 * 70 * J. Coonen. An Implementation Guide to a Proposed Standard for 71 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 72 */ 73 static inline long double 74 add_adjusted(long double a, long double b) 75 { 76 struct dd sum; 77 union IEEEl2bits u; 78 79 sum = dd_add(a, b); 80 if (sum.lo != 0) { 81 u.e = sum.hi; 82 if ((u.bits.manl & 1) == 0) 83 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 84 } 85 return (sum.hi); 86 } 87 88 /* 89 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 90 * that the result will be subnormal, and care is taken to ensure that 91 * double rounding does not occur. 92 */ 93 static inline long double 94 add_and_denormalize(long double a, long double b, int scale) 95 { 96 struct dd sum; 97 int bits_lost; 98 union IEEEl2bits u; 99 100 sum = dd_add(a, b); 101 102 /* 103 * If we are losing at least two bits of accuracy to denormalization, 104 * then the first lost bit becomes a round bit, and we adjust the 105 * lowest bit of sum.hi to make it a sticky bit summarizing all the 106 * bits in sum.lo. With the sticky bit adjusted, the hardware will 107 * break any ties in the correct direction. 108 * 109 * If we are losing only one bit to denormalization, however, we must 110 * break the ties manually. 111 */ 112 if (sum.lo != 0) { 113 u.e = sum.hi; 114 bits_lost = -u.bits.exp - scale + 1; 115 if ((bits_lost != 1) ^ (int)(u.bits.manl & 1)) 116 sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 117 } 118 return (ldexp(sum.hi, scale)); 119 } 120 121 /* 122 * Compute a*b exactly, returning the exact result in a struct dd. We assume 123 * that both a and b are normalized, so no underflow or overflow will occur. 124 * The current rounding mode must be round-to-nearest. 125 */ 126 static inline struct dd 127 dd_mul(long double a, long double b) 128 { 129 #if LDBL_MANT_DIG == 64 130 static const long double split = 0x1p32L + 1.0; 131 #elif LDBL_MANT_DIG == 113 132 static const long double split = 0x1p57L + 1.0; 133 #endif 134 struct dd ret; 135 long double ha, hb, la, lb, p, q; 136 137 p = a * split; 138 ha = a - p; 139 ha += p; 140 la = a - ha; 141 142 p = b * split; 143 hb = b - p; 144 hb += p; 145 lb = b - hb; 146 147 p = ha * hb; 148 q = ha * lb + la * hb; 149 150 ret.hi = p + q; 151 ret.lo = p - ret.hi + q + la * lb; 152 return (ret); 153 } 154 155 /* 156 * Fused multiply-add: Compute x * y + z with a single rounding error. 157 * 158 * We use scaling to avoid overflow/underflow, along with the 159 * canonical precision-doubling technique adapted from: 160 * 161 * Dekker, T. A Floating-Point Technique for Extending the 162 * Available Precision. Numer. Math. 18, 224-242 (1971). 163 */ 164 long double 165 fmal(long double x, long double y, long double z) 166 { 167 long double xs, ys, zs, adj; 168 struct dd xy, r; 169 int oround; 170 int ex, ey, ez; 171 int spread; 172 173 /* 174 * Handle special cases. The order of operations and the particular 175 * return values here are crucial in handling special cases involving 176 * infinities, NaNs, overflows, and signed zeroes correctly. 177 */ 178 if (x == 0.0 || y == 0.0) 179 return (x * y + z); 180 if (z == 0.0) 181 return (x * y); 182 if (!isfinite(x) || !isfinite(y)) 183 return (x * y + z); 184 if (!isfinite(z)) 185 return (z); 186 187 xs = frexpl(x, &ex); 188 ys = frexpl(y, &ey); 189 zs = frexpl(z, &ez); 190 oround = fegetround(); 191 spread = ex + ey - ez; 192 193 /* 194 * If x * y and z are many orders of magnitude apart, the scaling 195 * will overflow, so we handle these cases specially. Rounding 196 * modes other than FE_TONEAREST are painful. 197 */ 198 if (spread < -LDBL_MANT_DIG) { 199 feraiseexcept(FE_INEXACT); 200 if (!isnormal(z)) 201 feraiseexcept(FE_UNDERFLOW); 202 switch (oround) { 203 case FE_TONEAREST: 204 return (z); 205 case FE_TOWARDZERO: 206 if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) 207 return (z); 208 else 209 return (nextafterl(z, 0)); 210 case FE_DOWNWARD: 211 if ((x > 0.0) ^ (y < 0.0)) 212 return (z); 213 else 214 return (nextafterl(z, -INFINITY)); 215 default: /* FE_UPWARD */ 216 if ((x > 0.0) ^ (y < 0.0)) 217 return (nextafterl(z, INFINITY)); 218 else 219 return (z); 220 } 221 } 222 if (spread <= LDBL_MANT_DIG * 2) 223 zs = ldexpl(zs, -spread); 224 else 225 zs = copysignl(LDBL_MIN, zs); 226 227 fesetround(FE_TONEAREST); 228 /* work around clang issue #8472 */ 229 volatile long double vxs = xs; 230 231 /* 232 * Basic approach for round-to-nearest: 233 * 234 * (xy.hi, xy.lo) = x * y (exact) 235 * (r.hi, r.lo) = xy.hi + z (exact) 236 * adj = xy.lo + r.lo (inexact; low bit is sticky) 237 * result = r.hi + adj (correctly rounded) 238 */ 239 xy = dd_mul(vxs, ys); 240 r = dd_add(xy.hi, zs); 241 242 spread = ex + ey; 243 244 if (r.hi == 0.0) { 245 /* 246 * When the addends cancel to 0, ensure that the result has 247 * the correct sign. 248 */ 249 fesetround(oround); 250 volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 251 return (xy.hi + vzs + ldexpl(xy.lo, spread)); 252 } 253 254 if (oround != FE_TONEAREST) { 255 /* 256 * There is no need to worry about double rounding in directed 257 * rounding modes. 258 */ 259 fesetround(oround); 260 /* work around clang issue #8472 */ 261 volatile long double vrlo = r.lo; 262 adj = vrlo + xy.lo; 263 return (ldexpl(r.hi + adj, spread)); 264 } 265 266 adj = add_adjusted(r.lo, xy.lo); 267 if (spread + ilogbl(r.hi) > -16383) 268 return (ldexpl(r.hi + adj, spread)); 269 else 270 return (add_and_denormalize(r.hi, adj, spread)); 271 } 272