xref: /freebsd/lib/msun/src/s_rsqrtf.c (revision c3f6dcea199289329c1d3b91b69e5a4821fc3dff)
1 /*-
2  * Copyright (c) 2026 Steven G. Kargl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /**
28  * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
29  *
30  * First, filter out special cases:
31  *
32  *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
33  *   2. rsqrt(nan) = NaN.
34  *   3. rsqrt(+inf) returns +0.
35  *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
36  *
37  * If x is a subnormal, scale x into the normal range by x*0x1pN; while
38  * recording the exponent of the scale factor N.  Split the possibly
39  * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
40  * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
41  * in [0.25,1) with n even.
42  *
43  * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
44  * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
45  * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
46  * used with one iteration of Goldschmidt's algorithm:
47  *
48  *	z = x * y
49  *	h = y / 2
50  *	r = 0.5 - h * z
51  *	y = h * r + h
52  *
53  * A factor of 2 appears missing in the above, but it is included in the
54  * exponent m.
55  */
56 #include <fenv.h>
57 #include <float.h>
58 #include "math.h"
59 #include "math_private.h"
60 
61 #pragma STDC FENV_ACCESS ON
62 
63 #ifdef _CC
64 #undef _CC
65 #endif
66 #define _CC (0x1p12F + 1)
67 
68 float
rsqrtf(float x)69 rsqrtf(float x)
70 {
71 	volatile static const float vzero = 0;
72 	static const float half = 0.5;
73 	uint32_t ix, ux;
74 	int m, rnd;
75 	float h, ph, pl, rh, rl, y, zh, zl;
76 
77 	GET_FLOAT_WORD(ix, x);
78 	ux = ix & 0x7fffffff;
79 
80 	/* x = +-0.  Raise exception. */
81 	if (ux == 0)
82 	    return (1 / x);
83 
84 	/* x is NaN. */
85 	if (ux > 0x7f800000)
86 	    return (x + x);
87 
88 	/* x is +-inf. */
89 	if (ux == 0x7f800000)
90 	    return (ix & 0x80000000 ? vzero / vzero : 0.F);
91 
92 	/* x < 0.  Raise exception. */
93 	if (ix & 0x80000000)
94 	    return (vzero / vzero);
95 
96 	/*
97 	 * If x is subnormal, then scale it into the normal range.
98 	 * Split x into significand and exponent, x = f * 2^m, with
99 	 * f in [0.5,1) and m a biased exponent.
100 	 */
101 	m = 0;
102 	if (ux < 0x00800000) {		/* Subnormal */
103 	    x *= 0x1p25f;
104 	    GET_FLOAT_WORD(ix, x);
105 	    m = -25;
106 	}
107 	m += (ix >> 23) - 126;		/* Unbiased exponent */
108 	ix = (ix & 0x007fffff) | 0x3f000000;
109 	SET_FLOAT_WORD(x, ix);		/* x is in [0.5,1). */
110 
111 	/* m is odd.  Put x into [0.25,0.5) and increase m. */
112 	if (m & 1) {
113 	    x /= 2;
114 	    m += 1;
115 	}
116 	m = -(m >> 1);		/* Prepare for 2^(-m/2). */
117 
118 	/*
119 	 * Exhaustive testing of rsqrtf(x) = 1 / sqrtf(x) with x in
120 	 * [0x1p-127,0x1p126] shows the this approximation gives a
121 	 * 22- to 23-bit estimate of rsqrt(f).  This is equivalent to
122 	 * a max ulp of ~1.49.
123 	 */
124 	y = 1 / sqrtf(x);
125 	h = y / 2;
126 
127 	/*
128 	 * For values of x with representations of the forms 0x1.fffffcpN
129 	 * or 0x1.13e07pN with N an odd integer, the computed rsqrtf() is
130 	 * not correctly rounded in round-to-nearest without fiddling with
131 	 * the rounding mode and/or bits of x.
132 	 */
133 	GET_FLOAT_WORD(ix, y);
134 	if ((ix & 0x000fffff) == 1) {			/* 0x1.fffffcpN */
135 	    rnd = fegetround();
136 	    fesetround(FE_DOWNWARD);
137 	    _MUL(x, y, zh, zl);
138 	    _XMUL(zh, zl, h, 0, ph, pl);
139 	    fesetround(rnd);
140             _XADD(-ph, -pl, half, 0, rh, rl);
141             y = h + h * rh;
142 	} else if((ix & 0x00ffffff) == 0x00ae6055) {	/* 0x1.13e07pN */
143 	    GET_FLOAT_WORD(ix, x);
144 	    SET_FLOAT_WORD(x, ix - 1);
145 	    rnd = fegetround();
146 	    fesetround(FE_DOWNWARD);
147 	    _MUL(x, y, zh, zl);
148 	    _XMUL(zh, zl, h, 0, ph, pl);
149             _XADD(-ph, -pl, half, 0, rh, rl);
150             y = h + h * rh;
151 	    fesetround(rnd);
152 	} else {
153 	    _MUL(x, y, zh, zl);
154 	    _XMUL(zh, zl, h, 0, ph, pl);
155 	    _XADD(-ph, -pl, half, 0, rh, rl);
156 	    y = h * rh + h;
157 	}
158 
159 	ix = (uint32_t)(m + 128) << 23;
160 	SET_FLOAT_WORD(x, ix);
161 	return (y *= x);
162 }
163