165e60ab1SDavid Schultz /*- 265e60ab1SDavid Schultz * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> 365e60ab1SDavid Schultz * All rights reserved. 465e60ab1SDavid Schultz * 565e60ab1SDavid Schultz * Redistribution and use in source and binary forms, with or without 665e60ab1SDavid Schultz * modification, are permitted provided that the following conditions 765e60ab1SDavid Schultz * are met: 865e60ab1SDavid Schultz * 1. Redistributions of source code must retain the above copyright 965e60ab1SDavid Schultz * notice, this list of conditions and the following disclaimer. 1065e60ab1SDavid Schultz * 2. Redistributions in binary form must reproduce the above copyright 1165e60ab1SDavid Schultz * notice, this list of conditions and the following disclaimer in the 1265e60ab1SDavid Schultz * documentation and/or other materials provided with the distribution. 1365e60ab1SDavid Schultz * 1465e60ab1SDavid Schultz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1565e60ab1SDavid Schultz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1665e60ab1SDavid Schultz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1765e60ab1SDavid Schultz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1865e60ab1SDavid Schultz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1965e60ab1SDavid Schultz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2065e60ab1SDavid Schultz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2165e60ab1SDavid Schultz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2265e60ab1SDavid Schultz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2365e60ab1SDavid Schultz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2465e60ab1SDavid Schultz * SUCH DAMAGE. 2565e60ab1SDavid Schultz */ 2665e60ab1SDavid Schultz 2765e60ab1SDavid Schultz #include <sys/cdefs.h> 2865e60ab1SDavid Schultz __FBSDID("$FreeBSD$"); 2965e60ab1SDavid Schultz 3065e60ab1SDavid Schultz #include <fenv.h> 3165e60ab1SDavid Schultz #include <float.h> 3265e60ab1SDavid Schultz #include <math.h> 3365e60ab1SDavid Schultz 3465e60ab1SDavid Schultz /* 3565e60ab1SDavid Schultz * Fused multiply-add: Compute x * y + z with a single rounding error. 3665e60ab1SDavid Schultz * 3765e60ab1SDavid Schultz * We use scaling to avoid overflow/underflow, along with the 3865e60ab1SDavid Schultz * canonical precision-doubling technique adapted from: 3965e60ab1SDavid Schultz * 4065e60ab1SDavid Schultz * Dekker, T. A Floating-Point Technique for Extending the 4165e60ab1SDavid Schultz * Available Precision. Numer. Math. 18, 224-242 (1971). 4265e60ab1SDavid Schultz */ 4365e60ab1SDavid Schultz long double 4465e60ab1SDavid Schultz fmal(long double x, long double y, long double z) 4565e60ab1SDavid Schultz { 4665e60ab1SDavid Schultz #if LDBL_MANT_DIG == 64 4765e60ab1SDavid Schultz static const long double split = 0x1p32L + 1.0; 4865e60ab1SDavid Schultz #elif LDBL_MANT_DIG == 113 4965e60ab1SDavid Schultz static const long double split = 0x1p57L + 1.0; 5065e60ab1SDavid Schultz #endif 5165e60ab1SDavid Schultz long double xs, ys, zs; 5265e60ab1SDavid Schultz long double c, cc, hx, hy, p, q, tx, ty; 5365e60ab1SDavid Schultz long double r, rr, s; 5465e60ab1SDavid Schultz int oround; 5565e60ab1SDavid Schultz int ex, ey, ez; 5665e60ab1SDavid Schultz int spread; 5765e60ab1SDavid Schultz 5865e60ab1SDavid Schultz if (z == 0.0) 5965e60ab1SDavid Schultz return (x * y); 6065e60ab1SDavid Schultz if (x == 0.0 || y == 0.0) 6165e60ab1SDavid Schultz return (x * y + z); 6265e60ab1SDavid Schultz 6365e60ab1SDavid Schultz /* Results of frexp() are undefined for these cases. */ 6465e60ab1SDavid Schultz if (!isfinite(x) || !isfinite(y) || !isfinite(z)) 6565e60ab1SDavid Schultz return (x * y + z); 6665e60ab1SDavid Schultz 6765e60ab1SDavid Schultz xs = frexpl(x, &ex); 6865e60ab1SDavid Schultz ys = frexpl(y, &ey); 6965e60ab1SDavid Schultz zs = frexpl(z, &ez); 7065e60ab1SDavid Schultz oround = fegetround(); 7165e60ab1SDavid Schultz spread = ex + ey - ez; 7265e60ab1SDavid Schultz 7365e60ab1SDavid Schultz /* 7465e60ab1SDavid Schultz * If x * y and z are many orders of magnitude apart, the scaling 7565e60ab1SDavid Schultz * will overflow, so we handle these cases specially. Rounding 7665e60ab1SDavid Schultz * modes other than FE_TONEAREST are painful. 7765e60ab1SDavid Schultz */ 7865e60ab1SDavid Schultz if (spread > LDBL_MANT_DIG * 2) { 7965e60ab1SDavid Schultz fenv_t env; 8065e60ab1SDavid Schultz feraiseexcept(FE_INEXACT); 8165e60ab1SDavid Schultz switch(oround) { 8265e60ab1SDavid Schultz case FE_TONEAREST: 8365e60ab1SDavid Schultz return (x * y); 8465e60ab1SDavid Schultz case FE_TOWARDZERO: 8565e60ab1SDavid Schultz if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 8665e60ab1SDavid Schultz return (x * y); 8765e60ab1SDavid Schultz feholdexcept(&env); 8865e60ab1SDavid Schultz r = x * y; 8965e60ab1SDavid Schultz if (!fetestexcept(FE_INEXACT)) 9065e60ab1SDavid Schultz r = nextafterl(r, 0); 9165e60ab1SDavid Schultz feupdateenv(&env); 9265e60ab1SDavid Schultz return (r); 9365e60ab1SDavid Schultz case FE_DOWNWARD: 9465e60ab1SDavid Schultz if (z > 0.0) 9565e60ab1SDavid Schultz return (x * y); 9665e60ab1SDavid Schultz feholdexcept(&env); 9765e60ab1SDavid Schultz r = x * y; 9865e60ab1SDavid Schultz if (!fetestexcept(FE_INEXACT)) 9965e60ab1SDavid Schultz r = nextafterl(r, -INFINITY); 10065e60ab1SDavid Schultz feupdateenv(&env); 10165e60ab1SDavid Schultz return (r); 10265e60ab1SDavid Schultz default: /* FE_UPWARD */ 10365e60ab1SDavid Schultz if (z < 0.0) 10465e60ab1SDavid Schultz return (x * y); 10565e60ab1SDavid Schultz feholdexcept(&env); 10665e60ab1SDavid Schultz r = x * y; 10765e60ab1SDavid Schultz if (!fetestexcept(FE_INEXACT)) 10865e60ab1SDavid Schultz r = nextafterl(r, INFINITY); 10965e60ab1SDavid Schultz feupdateenv(&env); 11065e60ab1SDavid Schultz return (r); 11165e60ab1SDavid Schultz } 11265e60ab1SDavid Schultz } 11365e60ab1SDavid Schultz if (spread < -LDBL_MANT_DIG) { 11465e60ab1SDavid Schultz feraiseexcept(FE_INEXACT); 11565e60ab1SDavid Schultz if (!isnormal(z)) 11665e60ab1SDavid Schultz feraiseexcept(FE_UNDERFLOW); 11765e60ab1SDavid Schultz switch (oround) { 11865e60ab1SDavid Schultz case FE_TONEAREST: 11965e60ab1SDavid Schultz return (z); 12065e60ab1SDavid Schultz case FE_TOWARDZERO: 12165e60ab1SDavid Schultz if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 12265e60ab1SDavid Schultz return (z); 12365e60ab1SDavid Schultz else 12465e60ab1SDavid Schultz return (nextafterl(z, 0)); 12565e60ab1SDavid Schultz case FE_DOWNWARD: 12665e60ab1SDavid Schultz if (x > 0.0 ^ y < 0.0) 12765e60ab1SDavid Schultz return (z); 12865e60ab1SDavid Schultz else 12965e60ab1SDavid Schultz return (nextafterl(z, -INFINITY)); 13065e60ab1SDavid Schultz default: /* FE_UPWARD */ 13165e60ab1SDavid Schultz if (x > 0.0 ^ y < 0.0) 13265e60ab1SDavid Schultz return (nextafterl(z, INFINITY)); 13365e60ab1SDavid Schultz else 13465e60ab1SDavid Schultz return (z); 13565e60ab1SDavid Schultz } 13665e60ab1SDavid Schultz } 13765e60ab1SDavid Schultz 13865e60ab1SDavid Schultz /* 13965e60ab1SDavid Schultz * Use Dekker's algorithm to perform the multiplication and 14065e60ab1SDavid Schultz * subsequent addition in twice the machine precision. 14165e60ab1SDavid Schultz * Arrange so that x * y = c + cc, and x * y + z = r + rr. 14265e60ab1SDavid Schultz */ 14365e60ab1SDavid Schultz fesetround(FE_TONEAREST); 14465e60ab1SDavid Schultz 14565e60ab1SDavid Schultz p = xs * split; 14665e60ab1SDavid Schultz hx = xs - p; 14765e60ab1SDavid Schultz hx += p; 14865e60ab1SDavid Schultz tx = xs - hx; 14965e60ab1SDavid Schultz 15065e60ab1SDavid Schultz p = ys * split; 15165e60ab1SDavid Schultz hy = ys - p; 15265e60ab1SDavid Schultz hy += p; 15365e60ab1SDavid Schultz ty = ys - hy; 15465e60ab1SDavid Schultz 15565e60ab1SDavid Schultz p = hx * hy; 15665e60ab1SDavid Schultz q = hx * ty + tx * hy; 15765e60ab1SDavid Schultz c = p + q; 15865e60ab1SDavid Schultz cc = p - c + q + tx * ty; 15965e60ab1SDavid Schultz 16065e60ab1SDavid Schultz zs = ldexpl(zs, -spread); 16165e60ab1SDavid Schultz r = c + zs; 16265e60ab1SDavid Schultz s = r - c; 16365e60ab1SDavid Schultz rr = (c - (r - s)) + (zs - s) + cc; 16465e60ab1SDavid Schultz 1652c243582SDavid Schultz spread = ex + ey; 1662c243582SDavid Schultz if (spread + ilogbl(r) > -16383) { 16765e60ab1SDavid Schultz fesetround(oround); 1682c243582SDavid Schultz r = r + rr; 1692c243582SDavid Schultz } else { 1702c243582SDavid Schultz /* 1712c243582SDavid Schultz * The result is subnormal, so we round before scaling to 1722c243582SDavid Schultz * avoid double rounding. 1732c243582SDavid Schultz */ 1742c243582SDavid Schultz p = ldexpl(copysignl(0x1p-16382L, r), -spread); 1752c243582SDavid Schultz c = r + p; 1762c243582SDavid Schultz s = c - r; 1772c243582SDavid Schultz cc = (r - (c - s)) + (p - s) + rr; 1782c243582SDavid Schultz fesetround(oround); 1792c243582SDavid Schultz r = (c + cc) - p; 1802c243582SDavid Schultz } 1812c243582SDavid Schultz return (ldexpl(r, spread)); 18265e60ab1SDavid Schultz } 183