xref: /freebsd/lib/msun/src/s_rsqrtl.c (revision 3085fc9d97bd83785ba3ba43e0378d7d67987d1f)
1*3085fc9dSSteve Kargl /*-
2*3085fc9dSSteve Kargl  * Copyright (c) 2026 Steven G. Kargl
3*3085fc9dSSteve Kargl  * All rights reserved.
4*3085fc9dSSteve Kargl  *
5*3085fc9dSSteve Kargl  * Redistribution and use in source and binary forms, with or without
6*3085fc9dSSteve Kargl  * modification, are permitted provided that the following conditions
7*3085fc9dSSteve Kargl  * are met:
8*3085fc9dSSteve Kargl  * 1. Redistributions of source code must retain the above copyright
9*3085fc9dSSteve Kargl  *    notice unmodified, this list of conditions, and the following
10*3085fc9dSSteve Kargl  *    disclaimer.
11*3085fc9dSSteve Kargl  * 2. Redistributions in binary form must reproduce the above copyright
12*3085fc9dSSteve Kargl  *    notice, this list of conditions and the following disclaimer in the
13*3085fc9dSSteve Kargl  *    documentation and/or other materials provided with the distribution.
14*3085fc9dSSteve Kargl  *
15*3085fc9dSSteve Kargl  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16*3085fc9dSSteve Kargl  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17*3085fc9dSSteve Kargl  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18*3085fc9dSSteve Kargl  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19*3085fc9dSSteve Kargl  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20*3085fc9dSSteve Kargl  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21*3085fc9dSSteve Kargl  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22*3085fc9dSSteve Kargl  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*3085fc9dSSteve Kargl  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24*3085fc9dSSteve Kargl  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*3085fc9dSSteve Kargl  */
26*3085fc9dSSteve Kargl 
27*3085fc9dSSteve Kargl /**
28*3085fc9dSSteve Kargl  * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
29*3085fc9dSSteve Kargl  *
30*3085fc9dSSteve Kargl  * First, filter out special cases:
31*3085fc9dSSteve Kargl  *
32*3085fc9dSSteve Kargl  *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
33*3085fc9dSSteve Kargl  *   2. rsqrt(nan) = NaN.
34*3085fc9dSSteve Kargl  *   3. rsqrt(+inf) returns +0.
35*3085fc9dSSteve Kargl  *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
36*3085fc9dSSteve Kargl  *
37*3085fc9dSSteve Kargl  * If x is a subnormal, scale x into the normal range by x*0x1pN; while
38*3085fc9dSSteve Kargl  * recording the exponent of the scale factor N.  Split the possibly
39*3085fc9dSSteve Kargl  * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
40*3085fc9dSSteve Kargl  * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
41*3085fc9dSSteve Kargl  * in [0.25,1) with n even.
42*3085fc9dSSteve Kargl  *
43*3085fc9dSSteve Kargl  * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
44*3085fc9dSSteve Kargl  * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
45*3085fc9dSSteve Kargl  * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
46*3085fc9dSSteve Kargl  * used with one iteration of Goldschmidt's algorithm:
47*3085fc9dSSteve Kargl  *
48*3085fc9dSSteve Kargl  *	z = x * y
49*3085fc9dSSteve Kargl  *	h = y / 2
50*3085fc9dSSteve Kargl  *	r = 0.5 - h * z
51*3085fc9dSSteve Kargl  *	y = h * r + h
52*3085fc9dSSteve Kargl  *
53*3085fc9dSSteve Kargl  * A factor of 2 appears missing in the above, but it is included in the
54*3085fc9dSSteve Kargl  * exponent m.
55*3085fc9dSSteve Kargl  */
56*3085fc9dSSteve Kargl #include <fenv.h>
57*3085fc9dSSteve Kargl #include <float.h>
58*3085fc9dSSteve Kargl #include "math.h"
59*3085fc9dSSteve Kargl #include "math_private.h"
60*3085fc9dSSteve Kargl #include "fpmath.h"
61*3085fc9dSSteve Kargl 
62*3085fc9dSSteve Kargl #pragma STDC FENV_ACCESS ON
63*3085fc9dSSteve Kargl 
64*3085fc9dSSteve Kargl #if LDBL_MANT_DIG == 64
65*3085fc9dSSteve Kargl 
66*3085fc9dSSteve Kargl #ifdef _CC
67*3085fc9dSSteve Kargl #undef _CC
68*3085fc9dSSteve Kargl #endif
69*3085fc9dSSteve Kargl #define _CC (0x1p32L + 1)
70*3085fc9dSSteve Kargl 
71*3085fc9dSSteve Kargl long double
rsqrtl(long double x)72*3085fc9dSSteve Kargl rsqrtl(long double x)
73*3085fc9dSSteve Kargl {
74*3085fc9dSSteve Kargl 	volatile static const double vzero = 0;
75*3085fc9dSSteve Kargl 	static const double half = 0.5;
76*3085fc9dSSteve Kargl 	uint32_t ux;
77*3085fc9dSSteve Kargl 	int m, rnd;
78*3085fc9dSSteve Kargl 	long double h, ph, pl, rh, rl, y, zh, zl;
79*3085fc9dSSteve Kargl 	union IEEEl2bits u;
80*3085fc9dSSteve Kargl 
81*3085fc9dSSteve Kargl 	u.e = x;
82*3085fc9dSSteve Kargl 	ux = (u.bits.manl | u.bits.manh);
83*3085fc9dSSteve Kargl 
84*3085fc9dSSteve Kargl 	/* x = +-0.  Raise exception. */
85*3085fc9dSSteve Kargl 	if ((u.bits.exp | ux) == 0)
86*3085fc9dSSteve Kargl 	    return (1 / x);
87*3085fc9dSSteve Kargl 
88*3085fc9dSSteve Kargl 	/* x is NaN or x is +-inf. */
89*3085fc9dSSteve Kargl 	if (u.bits.exp == 0x7fff)
90*3085fc9dSSteve Kargl 	    return (ux ? (x + x) : (u.bits.sign ? vzero / vzero : 0));
91*3085fc9dSSteve Kargl 
92*3085fc9dSSteve Kargl 	/* x < 0.  Raise exception. */
93*3085fc9dSSteve Kargl 	if (u.bits.sign)
94*3085fc9dSSteve Kargl 	    return (vzero / vzero);
95*3085fc9dSSteve Kargl 
96*3085fc9dSSteve Kargl 	/*
97*3085fc9dSSteve Kargl 	 * If x is subnormal, then scale it into the normal range.
98*3085fc9dSSteve Kargl 	 * Split x into significand and exponent, x = f * 2^m, with
99*3085fc9dSSteve Kargl 	 * f in [0.5,1) and m a biased exponent.
100*3085fc9dSSteve Kargl 	 */
101*3085fc9dSSteve Kargl 	ENTERI();
102*3085fc9dSSteve Kargl 
103*3085fc9dSSteve Kargl 	if (u.bits.exp == 0) {		/* Subnormal */
104*3085fc9dSSteve Kargl 	    u.e *= 0x1p512;
105*3085fc9dSSteve Kargl 	    m = u.bits.exp - 0x41fe;
106*3085fc9dSSteve Kargl 	} else {
107*3085fc9dSSteve Kargl 	    m = u.bits.exp - 0x3ffe;
108*3085fc9dSSteve Kargl 	}
109*3085fc9dSSteve Kargl 	u.bits.exp = 0x3ffe;
110*3085fc9dSSteve Kargl 
111*3085fc9dSSteve Kargl 	/* m is odd.  Put x into [0.25,5) and increase m. */
112*3085fc9dSSteve Kargl 	if (m & 1) {
113*3085fc9dSSteve Kargl 	    u.e /= 2;
114*3085fc9dSSteve Kargl 	    m += 1;
115*3085fc9dSSteve Kargl 	}
116*3085fc9dSSteve Kargl 	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
117*3085fc9dSSteve Kargl 
118*3085fc9dSSteve Kargl 	y = 1 / sqrt((double)u.e);	/* ~52-bit estimate. */
119*3085fc9dSSteve Kargl 	y -= y * (u.e * y * y - 1) / 2;	/* ~63-bit estimate. */
120*3085fc9dSSteve Kargl 
121*3085fc9dSSteve Kargl 	h = y / 2;
122*3085fc9dSSteve Kargl 
123*3085fc9dSSteve Kargl 	_MUL(u.e, y, zh, zl);
124*3085fc9dSSteve Kargl 	_XMUL(zh, zl, h, 0, ph, pl);
125*3085fc9dSSteve Kargl 	_XADD(-ph, -pl, half, 0, rh, rl);
126*3085fc9dSSteve Kargl 	y = rh * h + h;
127*3085fc9dSSteve Kargl 
128*3085fc9dSSteve Kargl 	u.e = 1;
129*3085fc9dSSteve Kargl 	u.xbits.expsign = 0x3fff + m + 1;
130*3085fc9dSSteve Kargl 	RETURNI(y * u.e);
131*3085fc9dSSteve Kargl }
132*3085fc9dSSteve Kargl 
133*3085fc9dSSteve Kargl #else
134*3085fc9dSSteve Kargl 
135*3085fc9dSSteve Kargl #ifdef _CC
136*3085fc9dSSteve Kargl #undef _CC
137*3085fc9dSSteve Kargl #endif
138*3085fc9dSSteve Kargl #define _CC (0x1p57L + 1)
139*3085fc9dSSteve Kargl 
140*3085fc9dSSteve Kargl long double
rsqrtl(long double x)141*3085fc9dSSteve Kargl rsqrtl(long double x)
142*3085fc9dSSteve Kargl {
143*3085fc9dSSteve Kargl 	volatile static const double vzero = 0;
144*3085fc9dSSteve Kargl 	int hx, m, rnd;
145*3085fc9dSSteve Kargl 	long double y;
146*3085fc9dSSteve Kargl 
147*3085fc9dSSteve Kargl 	/* x = +-0.  Raise exception. */
148*3085fc9dSSteve Kargl 	if (x == 0)
149*3085fc9dSSteve Kargl 	    return (1 / x);
150*3085fc9dSSteve Kargl 
151*3085fc9dSSteve Kargl 	/* x is NaN. */
152*3085fc9dSSteve Kargl 	if (isnan(x))
153*3085fc9dSSteve Kargl 	    return (x + x);
154*3085fc9dSSteve Kargl 
155*3085fc9dSSteve Kargl 	/* x is +-inf. */
156*3085fc9dSSteve Kargl 	if (isinf(x))
157*3085fc9dSSteve Kargl 	    return (x > 0 ? 0 : vzero / vzero);
158*3085fc9dSSteve Kargl 
159*3085fc9dSSteve Kargl 	/* x < 0.  Raise exception. */
160*3085fc9dSSteve Kargl 	if (x < 0)
161*3085fc9dSSteve Kargl 	    return (vzero / vzero);
162*3085fc9dSSteve Kargl 
163*3085fc9dSSteve Kargl 	/*
164*3085fc9dSSteve Kargl 	 * If x is subnormal, then scale it into the normal range.
165*3085fc9dSSteve Kargl 	 * Split x into significand and exponent, x = f * 2^m, with
166*3085fc9dSSteve Kargl 	 * f in [0.5,1) and m a biased exponent.
167*3085fc9dSSteve Kargl 	 */
168*3085fc9dSSteve Kargl 	m = 0;
169*3085fc9dSSteve Kargl 	if (!isnormal(x)) {
170*3085fc9dSSteve Kargl 	    x *= 0x1p114L;
171*3085fc9dSSteve Kargl 	    m = -114;
172*3085fc9dSSteve Kargl 	}
173*3085fc9dSSteve Kargl 	x = frexpl(x, &hx);
174*3085fc9dSSteve Kargl 	m += hx;
175*3085fc9dSSteve Kargl 
176*3085fc9dSSteve Kargl 	/* m is odd.  Put x into [0.25,5) and increase m. */
177*3085fc9dSSteve Kargl 	if (m & 1) {
178*3085fc9dSSteve Kargl 	    x /= 2;
179*3085fc9dSSteve Kargl 	    m += 1;
180*3085fc9dSSteve Kargl 	}
181*3085fc9dSSteve Kargl 	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
182*3085fc9dSSteve Kargl 
183*3085fc9dSSteve Kargl 	y = 1 / sqrt((double)x);	/* ~52-bit estimate. */
184*3085fc9dSSteve Kargl 	y -= y * (x * y * y - 1) / 2;	/* ~104-bit estimate. */
185*3085fc9dSSteve Kargl 
186*3085fc9dSSteve Kargl 	static const double half = 0.5;
187*3085fc9dSSteve Kargl 	long double h, ph, pl, rh, rl, zh, zl;
188*3085fc9dSSteve Kargl 
189*3085fc9dSSteve Kargl 	h = y / 2;
190*3085fc9dSSteve Kargl 
191*3085fc9dSSteve Kargl 	rnd = fegetround();
192*3085fc9dSSteve Kargl 	fesetround(FE_TOWARDZERO);
193*3085fc9dSSteve Kargl 	_MUL(x, y, zh, zl);
194*3085fc9dSSteve Kargl 	_XMUL(zh, zl, -h, 0, ph, pl);
195*3085fc9dSSteve Kargl 	fesetround(rnd);
196*3085fc9dSSteve Kargl 
197*3085fc9dSSteve Kargl 	_XADD(ph, pl, half, 0, rh, rl);
198*3085fc9dSSteve Kargl 	y = rh * h + h;
199*3085fc9dSSteve Kargl 	m++;
200*3085fc9dSSteve Kargl 
201*3085fc9dSSteve Kargl 	RETURNI(ldexpl(y, m));
202*3085fc9dSSteve Kargl }
203*3085fc9dSSteve Kargl #endif
204