1d5580d09SDavid Schultz /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 35e53a4f9SPedro F. Giffuni * 477927540SDavid Schultz * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 5d5580d09SDavid Schultz * All rights reserved. 6d5580d09SDavid Schultz * 7d5580d09SDavid Schultz * Redistribution and use in source and binary forms, with or without 8d5580d09SDavid Schultz * modification, are permitted provided that the following conditions 9d5580d09SDavid Schultz * are met: 10d5580d09SDavid Schultz * 1. Redistributions of source code must retain the above copyright 11d5580d09SDavid Schultz * notice, this list of conditions and the following disclaimer. 12d5580d09SDavid Schultz * 2. Redistributions in binary form must reproduce the above copyright 13d5580d09SDavid Schultz * notice, this list of conditions and the following disclaimer in the 14d5580d09SDavid Schultz * documentation and/or other materials provided with the distribution. 15d5580d09SDavid Schultz * 16d5580d09SDavid Schultz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17d5580d09SDavid Schultz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18d5580d09SDavid Schultz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19d5580d09SDavid Schultz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20d5580d09SDavid Schultz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21d5580d09SDavid Schultz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22d5580d09SDavid Schultz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23d5580d09SDavid Schultz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24d5580d09SDavid Schultz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25d5580d09SDavid Schultz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26d5580d09SDavid Schultz * SUCH DAMAGE. 27d5580d09SDavid Schultz */ 28d5580d09SDavid Schultz 29d5580d09SDavid Schultz #include <fenv.h> 30d5580d09SDavid Schultz #include <float.h> 31d5580d09SDavid Schultz #include <math.h> 32d5580d09SDavid Schultz 33ec950830SDavid Schultz #include "math_private.h" 34ec950830SDavid Schultz 35b2e84316SAndrew Turner #ifdef USE_BUILTIN_FMA 36b2e84316SAndrew Turner double 37b2e84316SAndrew Turner fma(double x, double y, double z) 38b2e84316SAndrew Turner { 39b2e84316SAndrew Turner return (__builtin_fma(x, y, z)); 40b2e84316SAndrew Turner } 41b2e84316SAndrew Turner #else 42d5580d09SDavid Schultz /* 4377927540SDavid Schultz * A struct dd represents a floating-point number with twice the precision 4477927540SDavid Schultz * of a double. We maintain the invariant that "hi" stores the 53 high-order 4577927540SDavid Schultz * bits of the result. 4677927540SDavid Schultz */ 4777927540SDavid Schultz struct dd { 4877927540SDavid Schultz double hi; 4977927540SDavid Schultz double lo; 5077927540SDavid Schultz }; 5177927540SDavid Schultz 5277927540SDavid Schultz /* 5377927540SDavid Schultz * Compute a+b exactly, returning the exact result in a struct dd. We assume 5477927540SDavid Schultz * that both a and b are finite, but make no assumptions about their relative 5577927540SDavid Schultz * magnitudes. 5677927540SDavid Schultz */ 5777927540SDavid Schultz static inline struct dd 5877927540SDavid Schultz dd_add(double a, double b) 5977927540SDavid Schultz { 6077927540SDavid Schultz struct dd ret; 6177927540SDavid Schultz double s; 6277927540SDavid Schultz 6377927540SDavid Schultz ret.hi = a + b; 6477927540SDavid Schultz s = ret.hi - a; 6577927540SDavid Schultz ret.lo = (a - (ret.hi - s)) + (b - s); 6677927540SDavid Schultz return (ret); 6777927540SDavid Schultz } 6877927540SDavid Schultz 6977927540SDavid Schultz /* 70ec950830SDavid Schultz * Compute a+b, with a small tweak: The least significant bit of the 71ec950830SDavid Schultz * result is adjusted into a sticky bit summarizing all the bits that 72ec950830SDavid Schultz * were lost to rounding. This adjustment negates the effects of double 73ec950830SDavid Schultz * rounding when the result is added to another number with a higher 74ec950830SDavid Schultz * exponent. For an explanation of round and sticky bits, see any reference 75ec950830SDavid Schultz * on FPU design, e.g., 76ec950830SDavid Schultz * 77ec950830SDavid Schultz * J. Coonen. An Implementation Guide to a Proposed Standard for 78ec950830SDavid Schultz * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 79ec950830SDavid Schultz */ 80ec950830SDavid Schultz static inline double 81ec950830SDavid Schultz add_adjusted(double a, double b) 82ec950830SDavid Schultz { 83ec950830SDavid Schultz struct dd sum; 84ec950830SDavid Schultz uint64_t hibits, lobits; 85ec950830SDavid Schultz 86ec950830SDavid Schultz sum = dd_add(a, b); 87ec950830SDavid Schultz if (sum.lo != 0) { 88ec950830SDavid Schultz EXTRACT_WORD64(hibits, sum.hi); 89ec950830SDavid Schultz if ((hibits & 1) == 0) { 90ec950830SDavid Schultz /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 91ec950830SDavid Schultz EXTRACT_WORD64(lobits, sum.lo); 92ec950830SDavid Schultz hibits += 1 - ((hibits ^ lobits) >> 62); 93ec950830SDavid Schultz INSERT_WORD64(sum.hi, hibits); 94ec950830SDavid Schultz } 95ec950830SDavid Schultz } 96ec950830SDavid Schultz return (sum.hi); 97ec950830SDavid Schultz } 98ec950830SDavid Schultz 99ec950830SDavid Schultz /* 100ec950830SDavid Schultz * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 101ec950830SDavid Schultz * that the result will be subnormal, and care is taken to ensure that 102ec950830SDavid Schultz * double rounding does not occur. 103ec950830SDavid Schultz */ 104ec950830SDavid Schultz static inline double 105ec950830SDavid Schultz add_and_denormalize(double a, double b, int scale) 106ec950830SDavid Schultz { 107ec950830SDavid Schultz struct dd sum; 108ec950830SDavid Schultz uint64_t hibits, lobits; 109ec950830SDavid Schultz int bits_lost; 110ec950830SDavid Schultz 111ec950830SDavid Schultz sum = dd_add(a, b); 112ec950830SDavid Schultz 113ec950830SDavid Schultz /* 114ec950830SDavid Schultz * If we are losing at least two bits of accuracy to denormalization, 115ec950830SDavid Schultz * then the first lost bit becomes a round bit, and we adjust the 116ec950830SDavid Schultz * lowest bit of sum.hi to make it a sticky bit summarizing all the 117ec950830SDavid Schultz * bits in sum.lo. With the sticky bit adjusted, the hardware will 118ec950830SDavid Schultz * break any ties in the correct direction. 119ec950830SDavid Schultz * 120ec950830SDavid Schultz * If we are losing only one bit to denormalization, however, we must 121ec950830SDavid Schultz * break the ties manually. 122ec950830SDavid Schultz */ 123ec950830SDavid Schultz if (sum.lo != 0) { 124ec950830SDavid Schultz EXTRACT_WORD64(hibits, sum.hi); 125ec950830SDavid Schultz bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; 12619eb5fb0SEitan Adler if ((bits_lost != 1) ^ (int)(hibits & 1)) { 127ec950830SDavid Schultz /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 128ec950830SDavid Schultz EXTRACT_WORD64(lobits, sum.lo); 129ec950830SDavid Schultz hibits += 1 - (((hibits ^ lobits) >> 62) & 2); 130ec950830SDavid Schultz INSERT_WORD64(sum.hi, hibits); 131ec950830SDavid Schultz } 132ec950830SDavid Schultz } 133ec950830SDavid Schultz return (ldexp(sum.hi, scale)); 134ec950830SDavid Schultz } 135ec950830SDavid Schultz 136ec950830SDavid Schultz /* 13777927540SDavid Schultz * Compute a*b exactly, returning the exact result in a struct dd. We assume 13877927540SDavid Schultz * that both a and b are normalized, so no underflow or overflow will occur. 13977927540SDavid Schultz * The current rounding mode must be round-to-nearest. 14077927540SDavid Schultz */ 14177927540SDavid Schultz static inline struct dd 14277927540SDavid Schultz dd_mul(double a, double b) 14377927540SDavid Schultz { 14477927540SDavid Schultz static const double split = 0x1p27 + 1.0; 14577927540SDavid Schultz struct dd ret; 14677927540SDavid Schultz double ha, hb, la, lb, p, q; 14777927540SDavid Schultz 14877927540SDavid Schultz p = a * split; 14977927540SDavid Schultz ha = a - p; 15077927540SDavid Schultz ha += p; 15177927540SDavid Schultz la = a - ha; 15277927540SDavid Schultz 15377927540SDavid Schultz p = b * split; 15477927540SDavid Schultz hb = b - p; 15577927540SDavid Schultz hb += p; 15677927540SDavid Schultz lb = b - hb; 15777927540SDavid Schultz 15877927540SDavid Schultz p = ha * hb; 15977927540SDavid Schultz q = ha * lb + la * hb; 16077927540SDavid Schultz 16177927540SDavid Schultz ret.hi = p + q; 16277927540SDavid Schultz ret.lo = p - ret.hi + q + la * lb; 16377927540SDavid Schultz return (ret); 16477927540SDavid Schultz } 16577927540SDavid Schultz 16677927540SDavid Schultz /* 167d5580d09SDavid Schultz * Fused multiply-add: Compute x * y + z with a single rounding error. 168d5580d09SDavid Schultz * 169d5580d09SDavid Schultz * We use scaling to avoid overflow/underflow, along with the 170d5580d09SDavid Schultz * canonical precision-doubling technique adapted from: 171d5580d09SDavid Schultz * 172d5580d09SDavid Schultz * Dekker, T. A Floating-Point Technique for Extending the 173d5580d09SDavid Schultz * Available Precision. Numer. Math. 18, 224-242 (1971). 174d5580d09SDavid Schultz * 175d5580d09SDavid Schultz * This algorithm is sensitive to the rounding precision. FPUs such 176d5580d09SDavid Schultz * as the i387 must be set in double-precision mode if variables are 177d5580d09SDavid Schultz * to be stored in FP registers in order to avoid incorrect results. 178d5580d09SDavid Schultz * This is the default on FreeBSD, but not on many other systems. 179d5580d09SDavid Schultz * 1802c243582SDavid Schultz * Hardware instructions should be used on architectures that support it, 1812c243582SDavid Schultz * since this implementation will likely be several times slower. 182d5580d09SDavid Schultz */ 183d5580d09SDavid Schultz double 184d5580d09SDavid Schultz fma(double x, double y, double z) 185d5580d09SDavid Schultz { 186ec950830SDavid Schultz double xs, ys, zs, adj; 187ec950830SDavid Schultz struct dd xy, r; 188d5580d09SDavid Schultz int oround; 189d5580d09SDavid Schultz int ex, ey, ez; 190d5580d09SDavid Schultz int spread; 191d5580d09SDavid Schultz 19292a1a6e1SDavid Schultz /* 19392a1a6e1SDavid Schultz * Handle special cases. The order of operations and the particular 19492a1a6e1SDavid Schultz * return values here are crucial in handling special cases involving 19592a1a6e1SDavid Schultz * infinities, NaNs, overflows, and signed zeroes correctly. 19692a1a6e1SDavid Schultz */ 197c8642491SDavid Schultz if (x == 0.0 || y == 0.0) 198c8642491SDavid Schultz return (x * y + z); 19992a1a6e1SDavid Schultz if (z == 0.0) 20092a1a6e1SDavid Schultz return (x * y); 20192a1a6e1SDavid Schultz if (!isfinite(x) || !isfinite(y)) 202d5580d09SDavid Schultz return (x * y + z); 20392a1a6e1SDavid Schultz if (!isfinite(z)) 20492a1a6e1SDavid Schultz return (z); 205d5580d09SDavid Schultz 206d5580d09SDavid Schultz xs = frexp(x, &ex); 207d5580d09SDavid Schultz ys = frexp(y, &ey); 208d5580d09SDavid Schultz zs = frexp(z, &ez); 209d5580d09SDavid Schultz oround = fegetround(); 210d5580d09SDavid Schultz spread = ex + ey - ez; 211d5580d09SDavid Schultz 212d5580d09SDavid Schultz /* 213d5580d09SDavid Schultz * If x * y and z are many orders of magnitude apart, the scaling 214d5580d09SDavid Schultz * will overflow, so we handle these cases specially. Rounding 215d5580d09SDavid Schultz * modes other than FE_TONEAREST are painful. 216d5580d09SDavid Schultz */ 217d5580d09SDavid Schultz if (spread < -DBL_MANT_DIG) { 218d5580d09SDavid Schultz feraiseexcept(FE_INEXACT); 219d5580d09SDavid Schultz if (!isnormal(z)) 220d5580d09SDavid Schultz feraiseexcept(FE_UNDERFLOW); 221d5580d09SDavid Schultz switch (oround) { 222d5580d09SDavid Schultz case FE_TONEAREST: 223d5580d09SDavid Schultz return (z); 224d5580d09SDavid Schultz case FE_TOWARDZERO: 22500160652SEd Maste if ((x > 0.0) ^ (y < 0.0) ^ (z < 0.0)) 226d5580d09SDavid Schultz return (z); 227d5580d09SDavid Schultz else 228d5580d09SDavid Schultz return (nextafter(z, 0)); 229d5580d09SDavid Schultz case FE_DOWNWARD: 23000160652SEd Maste if ((x > 0.0) ^ (y < 0.0)) 231d5580d09SDavid Schultz return (z); 232d5580d09SDavid Schultz else 233d5580d09SDavid Schultz return (nextafter(z, -INFINITY)); 234d5580d09SDavid Schultz default: /* FE_UPWARD */ 23500160652SEd Maste if ((x > 0.0) ^ (y < 0.0)) 236d5580d09SDavid Schultz return (nextafter(z, INFINITY)); 237d5580d09SDavid Schultz else 238d5580d09SDavid Schultz return (z); 239d5580d09SDavid Schultz } 240d5580d09SDavid Schultz } 241ec950830SDavid Schultz if (spread <= DBL_MANT_DIG * 2) 242ec950830SDavid Schultz zs = ldexp(zs, -spread); 243ec950830SDavid Schultz else 244ec950830SDavid Schultz zs = copysign(DBL_MIN, zs); 245d5580d09SDavid Schultz 246d5580d09SDavid Schultz fesetround(FE_TONEAREST); 24792927b8bSEd Maste /* work around clang issue #8472 */ 2487dbbb6ddSDavid Schultz volatile double vxs = xs; 249d5580d09SDavid Schultz 250ec950830SDavid Schultz /* 251ec950830SDavid Schultz * Basic approach for round-to-nearest: 252ec950830SDavid Schultz * 253ec950830SDavid Schultz * (xy.hi, xy.lo) = x * y (exact) 254ec950830SDavid Schultz * (r.hi, r.lo) = xy.hi + z (exact) 255ec950830SDavid Schultz * adj = xy.lo + r.lo (inexact; low bit is sticky) 256ec950830SDavid Schultz * result = r.hi + adj (correctly rounded) 257ec950830SDavid Schultz */ 2587dbbb6ddSDavid Schultz xy = dd_mul(vxs, ys); 25977927540SDavid Schultz r = dd_add(xy.hi, zs); 260ec950830SDavid Schultz 2610c7e4d5fSDavid Schultz spread = ex + ey; 2620c7e4d5fSDavid Schultz 263*34f746ccSSteve Kargl if (r.hi == 0.0 && xy.lo == 0) { 264ec950830SDavid Schultz /* 265ec950830SDavid Schultz * When the addends cancel to 0, ensure that the result has 266ec950830SDavid Schultz * the correct sign. 267ec950830SDavid Schultz */ 268ec950830SDavid Schultz fesetround(oround); 269ec950830SDavid Schultz volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 270*34f746ccSSteve Kargl return (xy.hi + vzs); 271ec950830SDavid Schultz } 272d5580d09SDavid Schultz 273ec950830SDavid Schultz if (oround != FE_TONEAREST) { 2742c243582SDavid Schultz /* 275ec950830SDavid Schultz * There is no need to worry about double rounding in directed 276ec950830SDavid Schultz * rounding modes. 2772c243582SDavid Schultz */ 2782c243582SDavid Schultz fesetround(oround); 27992927b8bSEd Maste /* work around clang issue #8472 */ 2807dbbb6ddSDavid Schultz volatile double vrlo = r.lo; 2817dbbb6ddSDavid Schultz adj = vrlo + xy.lo; 282ec950830SDavid Schultz return (ldexp(r.hi + adj, spread)); 2832c243582SDavid Schultz } 284ec950830SDavid Schultz 285ec950830SDavid Schultz adj = add_adjusted(r.lo, xy.lo); 286ec950830SDavid Schultz if (spread + ilogb(r.hi) > -1023) 287ec950830SDavid Schultz return (ldexp(r.hi + adj, spread)); 288ec950830SDavid Schultz else 289ec950830SDavid Schultz return (add_and_denormalize(r.hi, adj, spread)); 290d5580d09SDavid Schultz } 291b2e84316SAndrew Turner #endif /* !USE_BUILTIN_FMA */ 292c8642491SDavid Schultz 293c8642491SDavid Schultz #if (LDBL_MANT_DIG == 53) 2943d266bdeSDavid Schultz __weak_reference(fma, fmal); 295c8642491SDavid Schultz #endif 296