1 /*-
2 * Copyright (c) 2026 Steven G. Kargl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /**
28 * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
29 *
30 * First, filter out special cases:
31 *
32 * 1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
33 * 2. rsqrt(nan) = NaN.
34 * 3. rsqrt(+inf) returns +0.
35 * 2. rsqrt(x<0) = NaN, and raises FE_INVALID.
36 *
37 * If x is a subnormal, scale x into the normal range by x*0x1pN; while
38 * recording the exponent of the scale factor N. Split the possibly
39 * scaled x into f*2^n with f in [0.5,1). Set m=n or m=n-N (subnormal).
40 * If n is odd, then set f = f/2 and increase n to n+1. Thus, f is
41 * in [0.25,1) with n even.
42 *
43 * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x). Exhaustive
44 * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
45 * [0,1000] gave a max ULP of 1.24 for rsqrt(). The value of y is then
46 * used with one iteration of Goldschmidt's algorithm:
47 *
48 * z = x * y
49 * h = y / 2
50 * r = 0.5 - h * z
51 * y = h * r + h
52 *
53 * A factor of 2 appears missing in the above, but it is included in the
54 * exponent m.
55 */
56 #include <fenv.h>
57 #include <float.h>
58 #include "math.h"
59 #include "math_private.h"
60 #include "fpmath.h"
61
62 #pragma STDC FENV_ACCESS ON
63
64 #if LDBL_MANT_DIG == 64
65
66 #ifdef _CC
67 #undef _CC
68 #endif
69 #define _CC (0x1p32L + 1)
70
71 long double
rsqrtl(long double x)72 rsqrtl(long double x)
73 {
74 volatile static const double vzero = 0;
75 static const double half = 0.5;
76 uint32_t ux;
77 int m, rnd;
78 long double h, ph, pl, rh, rl, y, zh, zl;
79 union IEEEl2bits u;
80
81 u.e = x;
82 ux = (u.bits.manl | u.bits.manh);
83
84 /* x = +-0. Raise exception. */
85 if ((u.bits.exp | ux) == 0)
86 return (1 / x);
87
88 /* x is NaN or x is +-inf. */
89 if (u.bits.exp == 0x7fff)
90 return (ux ? (x + x) : (u.bits.sign ? vzero / vzero : 0));
91
92 /* x < 0. Raise exception. */
93 if (u.bits.sign)
94 return (vzero / vzero);
95
96 /*
97 * If x is subnormal, then scale it into the normal range.
98 * Split x into significand and exponent, x = f * 2^m, with
99 * f in [0.5,1) and m a biased exponent.
100 */
101 ENTERI();
102
103 if (u.bits.exp == 0) { /* Subnormal */
104 u.e *= 0x1p512;
105 m = u.bits.exp - 0x41fe;
106 } else {
107 m = u.bits.exp - 0x3ffe;
108 }
109 u.bits.exp = 0x3ffe;
110
111 /* m is odd. Put x into [0.25,5) and increase m. */
112 if (m & 1) {
113 u.e /= 2;
114 m += 1;
115 }
116 m = -(m >> 1); /* Prepare for 2^(-m/2). */
117
118 y = 1 / sqrt((double)u.e); /* ~52-bit estimate. */
119 y -= y * (u.e * y * y - 1) / 2; /* ~63-bit estimate. */
120
121 h = y / 2;
122
123 _MUL(u.e, y, zh, zl);
124 _XMUL(zh, zl, h, 0, ph, pl);
125 _XADD(-ph, -pl, half, 0, rh, rl);
126 y = rh * h + h;
127
128 u.e = 1;
129 u.xbits.expsign = 0x3fff + m + 1;
130 RETURNI(y * u.e);
131 }
132
133 #else
134
135 #ifdef _CC
136 #undef _CC
137 #endif
138 #define _CC (0x1p57L + 1)
139
140 long double
rsqrtl(long double x)141 rsqrtl(long double x)
142 {
143 volatile static const double vzero = 0;
144 int hx, m, rnd;
145 long double y;
146
147 /* x = +-0. Raise exception. */
148 if (x == 0)
149 return (1 / x);
150
151 /* x is NaN. */
152 if (isnan(x))
153 return (x + x);
154
155 /* x is +-inf. */
156 if (isinf(x))
157 return (x > 0 ? 0 : vzero / vzero);
158
159 /* x < 0. Raise exception. */
160 if (x < 0)
161 return (vzero / vzero);
162
163 /*
164 * If x is subnormal, then scale it into the normal range.
165 * Split x into significand and exponent, x = f * 2^m, with
166 * f in [0.5,1) and m a biased exponent.
167 */
168 m = 0;
169 if (!isnormal(x)) {
170 x *= 0x1p114L;
171 m = -114;
172 }
173 x = frexpl(x, &hx);
174 m += hx;
175
176 /* m is odd. Put x into [0.25,5) and increase m. */
177 if (m & 1) {
178 x /= 2;
179 m += 1;
180 }
181 m = -(m >> 1); /* Prepare for 2^(-m/2). */
182
183 y = 1 / sqrt((double)x); /* ~52-bit estimate. */
184 y -= y * (x * y * y - 1) / 2; /* ~104-bit estimate. */
185
186 static const double half = 0.5;
187 long double h, ph, pl, rh, rl, zh, zl;
188
189 h = y / 2;
190
191 rnd = fegetround();
192 fesetround(FE_TOWARDZERO);
193 _MUL(x, y, zh, zl);
194 _XMUL(zh, zl, -h, 0, ph, pl);
195 fesetround(rnd);
196
197 _XADD(ph, pl, half, 0, rh, rl);
198 y = rh * h + h;
199 m++;
200
201 RETURNI(ldexpl(y, m));
202 }
203 #endif
204