xref: /freebsd/lib/msun/src/s_fmal.c (revision 2c2435825abe9bc9670f13b32ca9fe22ff55d52f)
165e60ab1SDavid Schultz /*-
265e60ab1SDavid Schultz  * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
365e60ab1SDavid Schultz  * All rights reserved.
465e60ab1SDavid Schultz  *
565e60ab1SDavid Schultz  * Redistribution and use in source and binary forms, with or without
665e60ab1SDavid Schultz  * modification, are permitted provided that the following conditions
765e60ab1SDavid Schultz  * are met:
865e60ab1SDavid Schultz  * 1. Redistributions of source code must retain the above copyright
965e60ab1SDavid Schultz  *    notice, this list of conditions and the following disclaimer.
1065e60ab1SDavid Schultz  * 2. Redistributions in binary form must reproduce the above copyright
1165e60ab1SDavid Schultz  *    notice, this list of conditions and the following disclaimer in the
1265e60ab1SDavid Schultz  *    documentation and/or other materials provided with the distribution.
1365e60ab1SDavid Schultz  *
1465e60ab1SDavid Schultz  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1565e60ab1SDavid Schultz  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1665e60ab1SDavid Schultz  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1765e60ab1SDavid Schultz  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1865e60ab1SDavid Schultz  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1965e60ab1SDavid Schultz  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2065e60ab1SDavid Schultz  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2165e60ab1SDavid Schultz  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2265e60ab1SDavid Schultz  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2365e60ab1SDavid Schultz  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2465e60ab1SDavid Schultz  * SUCH DAMAGE.
2565e60ab1SDavid Schultz  */
2665e60ab1SDavid Schultz 
2765e60ab1SDavid Schultz #include <sys/cdefs.h>
2865e60ab1SDavid Schultz __FBSDID("$FreeBSD$");
2965e60ab1SDavid Schultz 
3065e60ab1SDavid Schultz #include <fenv.h>
3165e60ab1SDavid Schultz #include <float.h>
3265e60ab1SDavid Schultz #include <math.h>
3365e60ab1SDavid Schultz 
3465e60ab1SDavid Schultz /*
3565e60ab1SDavid Schultz  * Fused multiply-add: Compute x * y + z with a single rounding error.
3665e60ab1SDavid Schultz  *
3765e60ab1SDavid Schultz  * We use scaling to avoid overflow/underflow, along with the
3865e60ab1SDavid Schultz  * canonical precision-doubling technique adapted from:
3965e60ab1SDavid Schultz  *
4065e60ab1SDavid Schultz  *	Dekker, T.  A Floating-Point Technique for Extending the
4165e60ab1SDavid Schultz  *	Available Precision.  Numer. Math. 18, 224-242 (1971).
4265e60ab1SDavid Schultz  */
4365e60ab1SDavid Schultz long double
4465e60ab1SDavid Schultz fmal(long double x, long double y, long double z)
4565e60ab1SDavid Schultz {
4665e60ab1SDavid Schultz #if LDBL_MANT_DIG == 64
4765e60ab1SDavid Schultz 	static const long double split = 0x1p32L + 1.0;
4865e60ab1SDavid Schultz #elif LDBL_MANT_DIG == 113
4965e60ab1SDavid Schultz 	static const long double split = 0x1p57L + 1.0;
5065e60ab1SDavid Schultz #endif
5165e60ab1SDavid Schultz 	long double xs, ys, zs;
5265e60ab1SDavid Schultz 	long double c, cc, hx, hy, p, q, tx, ty;
5365e60ab1SDavid Schultz 	long double r, rr, s;
5465e60ab1SDavid Schultz 	int oround;
5565e60ab1SDavid Schultz 	int ex, ey, ez;
5665e60ab1SDavid Schultz 	int spread;
5765e60ab1SDavid Schultz 
5865e60ab1SDavid Schultz 	if (z == 0.0)
5965e60ab1SDavid Schultz 		return (x * y);
6065e60ab1SDavid Schultz 	if (x == 0.0 || y == 0.0)
6165e60ab1SDavid Schultz 		return (x * y + z);
6265e60ab1SDavid Schultz 
6365e60ab1SDavid Schultz 	/* Results of frexp() are undefined for these cases. */
6465e60ab1SDavid Schultz 	if (!isfinite(x) || !isfinite(y) || !isfinite(z))
6565e60ab1SDavid Schultz 		return (x * y + z);
6665e60ab1SDavid Schultz 
6765e60ab1SDavid Schultz 	xs = frexpl(x, &ex);
6865e60ab1SDavid Schultz 	ys = frexpl(y, &ey);
6965e60ab1SDavid Schultz 	zs = frexpl(z, &ez);
7065e60ab1SDavid Schultz 	oround = fegetround();
7165e60ab1SDavid Schultz 	spread = ex + ey - ez;
7265e60ab1SDavid Schultz 
7365e60ab1SDavid Schultz 	/*
7465e60ab1SDavid Schultz 	 * If x * y and z are many orders of magnitude apart, the scaling
7565e60ab1SDavid Schultz 	 * will overflow, so we handle these cases specially.  Rounding
7665e60ab1SDavid Schultz 	 * modes other than FE_TONEAREST are painful.
7765e60ab1SDavid Schultz 	 */
7865e60ab1SDavid Schultz 	if (spread > LDBL_MANT_DIG * 2) {
7965e60ab1SDavid Schultz 		fenv_t env;
8065e60ab1SDavid Schultz 		feraiseexcept(FE_INEXACT);
8165e60ab1SDavid Schultz 		switch(oround) {
8265e60ab1SDavid Schultz 		case FE_TONEAREST:
8365e60ab1SDavid Schultz 			return (x * y);
8465e60ab1SDavid Schultz 		case FE_TOWARDZERO:
8565e60ab1SDavid Schultz 			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
8665e60ab1SDavid Schultz 				return (x * y);
8765e60ab1SDavid Schultz 			feholdexcept(&env);
8865e60ab1SDavid Schultz 			r = x * y;
8965e60ab1SDavid Schultz 			if (!fetestexcept(FE_INEXACT))
9065e60ab1SDavid Schultz 				r = nextafterl(r, 0);
9165e60ab1SDavid Schultz 			feupdateenv(&env);
9265e60ab1SDavid Schultz 			return (r);
9365e60ab1SDavid Schultz 		case FE_DOWNWARD:
9465e60ab1SDavid Schultz 			if (z > 0.0)
9565e60ab1SDavid Schultz 				return (x * y);
9665e60ab1SDavid Schultz 			feholdexcept(&env);
9765e60ab1SDavid Schultz 			r = x * y;
9865e60ab1SDavid Schultz 			if (!fetestexcept(FE_INEXACT))
9965e60ab1SDavid Schultz 				r = nextafterl(r, -INFINITY);
10065e60ab1SDavid Schultz 			feupdateenv(&env);
10165e60ab1SDavid Schultz 			return (r);
10265e60ab1SDavid Schultz 		default:	/* FE_UPWARD */
10365e60ab1SDavid Schultz 			if (z < 0.0)
10465e60ab1SDavid Schultz 				return (x * y);
10565e60ab1SDavid Schultz 			feholdexcept(&env);
10665e60ab1SDavid Schultz 			r = x * y;
10765e60ab1SDavid Schultz 			if (!fetestexcept(FE_INEXACT))
10865e60ab1SDavid Schultz 				r = nextafterl(r, INFINITY);
10965e60ab1SDavid Schultz 			feupdateenv(&env);
11065e60ab1SDavid Schultz 			return (r);
11165e60ab1SDavid Schultz 		}
11265e60ab1SDavid Schultz 	}
11365e60ab1SDavid Schultz 	if (spread < -LDBL_MANT_DIG) {
11465e60ab1SDavid Schultz 		feraiseexcept(FE_INEXACT);
11565e60ab1SDavid Schultz 		if (!isnormal(z))
11665e60ab1SDavid Schultz 			feraiseexcept(FE_UNDERFLOW);
11765e60ab1SDavid Schultz 		switch (oround) {
11865e60ab1SDavid Schultz 		case FE_TONEAREST:
11965e60ab1SDavid Schultz 			return (z);
12065e60ab1SDavid Schultz 		case FE_TOWARDZERO:
12165e60ab1SDavid Schultz 			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
12265e60ab1SDavid Schultz 				return (z);
12365e60ab1SDavid Schultz 			else
12465e60ab1SDavid Schultz 				return (nextafterl(z, 0));
12565e60ab1SDavid Schultz 		case FE_DOWNWARD:
12665e60ab1SDavid Schultz 			if (x > 0.0 ^ y < 0.0)
12765e60ab1SDavid Schultz 				return (z);
12865e60ab1SDavid Schultz 			else
12965e60ab1SDavid Schultz 				return (nextafterl(z, -INFINITY));
13065e60ab1SDavid Schultz 		default:	/* FE_UPWARD */
13165e60ab1SDavid Schultz 			if (x > 0.0 ^ y < 0.0)
13265e60ab1SDavid Schultz 				return (nextafterl(z, INFINITY));
13365e60ab1SDavid Schultz 			else
13465e60ab1SDavid Schultz 				return (z);
13565e60ab1SDavid Schultz 		}
13665e60ab1SDavid Schultz 	}
13765e60ab1SDavid Schultz 
13865e60ab1SDavid Schultz 	/*
13965e60ab1SDavid Schultz 	 * Use Dekker's algorithm to perform the multiplication and
14065e60ab1SDavid Schultz 	 * subsequent addition in twice the machine precision.
14165e60ab1SDavid Schultz 	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
14265e60ab1SDavid Schultz 	 */
14365e60ab1SDavid Schultz 	fesetround(FE_TONEAREST);
14465e60ab1SDavid Schultz 
14565e60ab1SDavid Schultz 	p = xs * split;
14665e60ab1SDavid Schultz 	hx = xs - p;
14765e60ab1SDavid Schultz 	hx += p;
14865e60ab1SDavid Schultz 	tx = xs - hx;
14965e60ab1SDavid Schultz 
15065e60ab1SDavid Schultz 	p = ys * split;
15165e60ab1SDavid Schultz 	hy = ys - p;
15265e60ab1SDavid Schultz 	hy += p;
15365e60ab1SDavid Schultz 	ty = ys - hy;
15465e60ab1SDavid Schultz 
15565e60ab1SDavid Schultz 	p = hx * hy;
15665e60ab1SDavid Schultz 	q = hx * ty + tx * hy;
15765e60ab1SDavid Schultz 	c = p + q;
15865e60ab1SDavid Schultz 	cc = p - c + q + tx * ty;
15965e60ab1SDavid Schultz 
16065e60ab1SDavid Schultz 	zs = ldexpl(zs, -spread);
16165e60ab1SDavid Schultz 	r = c + zs;
16265e60ab1SDavid Schultz 	s = r - c;
16365e60ab1SDavid Schultz 	rr = (c - (r - s)) + (zs - s) + cc;
16465e60ab1SDavid Schultz 
1652c243582SDavid Schultz 	spread = ex + ey;
1662c243582SDavid Schultz 	if (spread + ilogbl(r) > -16383) {
16765e60ab1SDavid Schultz 		fesetround(oround);
1682c243582SDavid Schultz 		r = r + rr;
1692c243582SDavid Schultz 	} else {
1702c243582SDavid Schultz 		/*
1712c243582SDavid Schultz 		 * The result is subnormal, so we round before scaling to
1722c243582SDavid Schultz 		 * avoid double rounding.
1732c243582SDavid Schultz 		 */
1742c243582SDavid Schultz 		p = ldexpl(copysignl(0x1p-16382L, r), -spread);
1752c243582SDavid Schultz 		c = r + p;
1762c243582SDavid Schultz 		s = c - r;
1772c243582SDavid Schultz 		cc = (r - (c - s)) + (p - s) + rr;
1782c243582SDavid Schultz 		fesetround(oround);
1792c243582SDavid Schultz 		r = (c + cc) - p;
1802c243582SDavid Schultz 	}
1812c243582SDavid Schultz 	return (ldexpl(r, spread));
18265e60ab1SDavid Schultz }
183