xref: /illumos-gate/usr/src/lib/libc/sparc/fp/_Q_sqrt.c (revision 45744051679350ee063cdc366b66bee5223a11ea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "quad.h"
28 
29 static const double C[] = {
30 	0.0,
31 	0.5,
32 	1.0,
33 	68719476736.0,
34 	536870912.0,
35 	48.0,
36 	16.0,
37 	1.52587890625000000000e-05,
38 	2.86102294921875000000e-06,
39 	5.96046447753906250000e-08,
40 	3.72529029846191406250e-09,
41 	1.70530256582424044609e-13,
42 	7.10542735760100185871e-15,
43 	8.67361737988403547206e-19,
44 	2.16840434497100886801e-19,
45 	1.27054942088145050860e-21,
46 	1.21169035041947413311e-27,
47 	9.62964972193617926528e-35,
48 	4.70197740328915003187e-38
49 };
50 
51 #define	zero		C[0]
52 #define	half		C[1]
53 #define	one		C[2]
54 #define	two36		C[3]
55 #define	two29		C[4]
56 #define	three2p4	C[5]
57 #define	two4		C[6]
58 #define	twom16		C[7]
59 #define	three2m20	C[8]
60 #define	twom24		C[9]
61 #define	twom28		C[10]
62 #define	three2m44	C[11]
63 #define	twom47		C[12]
64 #define	twom60		C[13]
65 #define	twom62		C[14]
66 #define	three2m71	C[15]
67 #define	three2m91	C[16]
68 #define	twom113		C[17]
69 #define	twom124		C[18]
70 
71 static const unsigned
72 	fsr_re = 0x00000000u,
73 	fsr_rn = 0xc0000000u;
74 
75 #ifdef __sparcv9
76 
77 /*
78  * _Qp_sqrt(pz, x) sets *pz = sqrt(*x).
79  */
80 void
81 _Qp_sqrt(union longdouble *pz, const union longdouble *x)
82 
83 #else
84 
85 /*
86  * _Q_sqrt(x) returns sqrt(*x).
87  */
88 union longdouble
89 _Q_sqrt(const union longdouble *x)
90 
91 #endif	/* __sparcv9 */
92 
93 {
94 	union longdouble	z;
95 	union xdouble		u;
96 	double			c, d, rr, r[2], tt[3], xx[4], zz[5];
97 	unsigned int		xm, fsr, lx, wx[3];
98 	unsigned int		msw, frac2, frac3, frac4, rm;
99 	int			ex, ez;
100 
101 	if (QUAD_ISZERO(*x)) {
102 		Z = *x;
103 		QUAD_RETURN(Z);
104 	}
105 
106 	xm = x->l.msw;
107 
108 	__quad_getfsrp(&fsr);
109 
110 	/* handle nan and inf cases */
111 	if ((xm & 0x7fffffff) >= 0x7fff0000) {
112 		if ((x->l.msw & 0xffff) | x->l.frac2 | x->l.frac3 |
113 		    x->l.frac4) {
114 			if (!(x->l.msw & 0x8000)) {
115 				/* snan, signal invalid */
116 				if (fsr & FSR_NVM) {
117 					__quad_fsqrtq(x, &Z);
118 				} else {
119 					Z = *x;
120 					Z.l.msw |= 0x8000;
121 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
122 					    FSR_NVC;
123 					__quad_setfsrp(&fsr);
124 				}
125 				QUAD_RETURN(Z);
126 			}
127 			Z = *x;
128 			QUAD_RETURN(Z);
129 		}
130 		if (x->l.msw & 0x80000000) {
131 			/* sqrt(-inf), signal invalid */
132 			if (fsr & FSR_NVM) {
133 				__quad_fsqrtq(x, &Z);
134 			} else {
135 				Z.l.msw = 0x7fffffff;
136 				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
137 				fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
138 				__quad_setfsrp(&fsr);
139 			}
140 			QUAD_RETURN(Z);
141 		}
142 		/* sqrt(inf), return inf */
143 		Z = *x;
144 		QUAD_RETURN(Z);
145 	}
146 
147 	/* handle negative numbers */
148 	if (xm & 0x80000000) {
149 		if (fsr & FSR_NVM) {
150 			__quad_fsqrtq(x, &Z);
151 		} else {
152 			Z.l.msw = 0x7fffffff;
153 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
154 			fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
155 			__quad_setfsrp(&fsr);
156 		}
157 		QUAD_RETURN(Z);
158 	}
159 
160 	/* now x is finite, positive */
161 	__quad_setfsrp((unsigned *)&fsr_re);
162 
163 	/* get the normalized significand and exponent */
164 	ex = (int)(xm >> 16);
165 	lx = xm & 0xffff;
166 	if (ex) {
167 		lx |= 0x10000;
168 		wx[0] = x->l.frac2;
169 		wx[1] = x->l.frac3;
170 		wx[2] = x->l.frac4;
171 	} else {
172 		if (lx | (x->l.frac2 & 0xfffe0000)) {
173 			wx[0] = x->l.frac2;
174 			wx[1] = x->l.frac3;
175 			wx[2] = x->l.frac4;
176 			ex = 1;
177 		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
178 			lx = x->l.frac2;
179 			wx[0] = x->l.frac3;
180 			wx[1] = x->l.frac4;
181 			wx[2] = 0;
182 			ex = -31;
183 		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
184 			lx = x->l.frac3;
185 			wx[0] = x->l.frac4;
186 			wx[1] = wx[2] = 0;
187 			ex = -63;
188 		} else {
189 			lx = x->l.frac4;
190 			wx[0] = wx[1] = wx[2] = 0;
191 			ex = -95;
192 		}
193 		while ((lx & 0x10000) == 0) {
194 			lx = (lx << 1) | (wx[0] >> 31);
195 			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
196 			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
197 			wx[2] <<= 1;
198 			ex--;
199 		}
200 	}
201 	ez = ex - 0x3fff;
202 	if (ez & 1) {
203 		/* make exponent even */
204 		lx = (lx << 1) | (wx[0] >> 31);
205 		wx[0] = (wx[0] << 1) | (wx[1] >> 31);
206 		wx[1] = (wx[1] << 1) | (wx[2] >> 31);
207 		wx[2] <<= 1;
208 		ez--;
209 	}
210 
211 	/* extract the significands into doubles */
212 	c = twom16;
213 	xx[0] = (double)((int)lx) * c;
214 
215 	c *= twom24;
216 	xx[0] += (double)((int)(wx[0] >> 8)) * c;
217 
218 	c *= twom24;
219 	xx[1] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
220 	    0xffffff)) * c;
221 
222 	c *= twom24;
223 	xx[2] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
224 	    0xffffff)) * c;
225 
226 	c *= twom24;
227 	xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
228 
229 	/* approximate the divisor for the Newton iteration */
230 	c = xx[0] + xx[1];
231 	c = __quad_dp_sqrt(&c);
232 	rr = half / c;
233 
234 	/* compute the first five "digits" of the square root */
235 	zz[0] = (c + two29) - two29;
236 	tt[0] = zz[0] + zz[0];
237 	r[0] = (xx[0] - zz[0] * zz[0]) + xx[1];
238 
239 	zz[1] = (rr * (r[0] + xx[2]) + three2p4) - three2p4;
240 	tt[1] = zz[1] + zz[1];
241 	r[0] -= tt[0] * zz[1];
242 	r[1] = xx[2] - zz[1] * zz[1];
243 	c = (r[1] + three2m20) - three2m20;
244 	r[0] += c;
245 	r[1] = (r[1] - c) + xx[3];
246 
247 	zz[2] = (rr * (r[0] + r[1]) + three2m20) - three2m20;
248 	tt[2] = zz[2] + zz[2];
249 	r[0] -= tt[0] * zz[2];
250 	r[1] -= tt[1] * zz[2];
251 	c = (r[1] + three2m44) - three2m44;
252 	r[0] += c;
253 	r[1] = (r[1] - c) - zz[2] * zz[2];
254 
255 	zz[3] = (rr * (r[0] + r[1]) + three2m44) - three2m44;
256 	r[0] = ((r[0] - tt[0] * zz[3]) + r[1]) - tt[1] * zz[3];
257 	r[1] = -tt[2] * zz[3];
258 	c = (r[1] + three2m91) - three2m91;
259 	r[0] += c;
260 	r[1] = (r[1] - c) - zz[3] * zz[3];
261 
262 	zz[4] = (rr * (r[0] + r[1]) + three2m71) - three2m71;
263 
264 	/* reduce to three doubles, making sure zz[1] is positive */
265 	zz[0] += zz[1] - twom47;
266 	zz[1] = twom47 + zz[2] + zz[3];
267 	zz[2] = zz[4];
268 
269 	/* if the third term might lie on a rounding boundary, perturb it */
270 	if (zz[2] == (twom62 + zz[2]) - twom62) {
271 		/* here we just need to get the sign of the remainder */
272 		c = (((((r[0] - tt[0] * zz[4]) - tt[1] * zz[4]) + r[1])
273 		    - tt[2] * zz[4]) - (zz[3] + zz[3]) * zz[4]) - zz[4] * zz[4];
274 		if (c < zero)
275 			zz[2] -= twom124;
276 		else if (c > zero)
277 			zz[2] += twom124;
278 	}
279 
280 	/*
281 	 * propagate carries/borrows, using round-to-negative-infinity mode
282 	 * to make all terms nonnegative (note that we can't encounter a
283 	 * borrow so large that the roundoff is unrepresentable because
284 	 * we took care to make zz[1] positive above)
285 	 */
286 	__quad_setfsrp(&fsr_rn);
287 	c = zz[1] + zz[2];
288 	zz[2] += (zz[1] - c);
289 	zz[1] = c;
290 	c = zz[0] + zz[1];
291 	zz[1] += (zz[0] - c);
292 	zz[0] = c;
293 
294 	/* adjust exponent and strip off integer bit */
295 	ez = (ez >> 1) + 0x3fff;
296 	zz[0] -= one;
297 
298 	/* the first 48 bits of fraction come from zz[0] */
299 	u.d = d = two36 + zz[0];
300 	msw = u.l.lo;
301 	zz[0] -= (d - two36);
302 
303 	u.d = d = two4 + zz[0];
304 	frac2 = u.l.lo;
305 	zz[0] -= (d - two4);
306 
307 	/* the next 32 come from zz[0] and zz[1] */
308 	u.d = d = twom28 + (zz[0] + zz[1]);
309 	frac3 = u.l.lo;
310 	zz[0] -= (d - twom28);
311 
312 	/* condense the remaining fraction; errors here won't matter */
313 	c = zz[0] + zz[1];
314 	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
315 	zz[0] = c;
316 
317 	/* get the last word of fraction */
318 	u.d = d = twom60 + (zz[0] + zz[1]);
319 	frac4 = u.l.lo;
320 	zz[0] -= (d - twom60);
321 
322 	/* keep track of what's left for rounding; note that the error */
323 	/* in computing c will be non-negative due to rounding mode */
324 	c = zz[0] + zz[1];
325 
326 	/* get the rounding mode */
327 	rm = fsr >> 30;
328 
329 	/* round and raise exceptions */
330 	fsr &= ~FSR_CEXC;
331 	if (c != zero) {
332 		fsr |= FSR_NXC;
333 
334 		/* decide whether to round the fraction up */
335 		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
336 		    (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
337 			/* round up and renormalize if necessary */
338 			if (++frac4 == 0)
339 				if (++frac3 == 0)
340 					if (++frac2 == 0)
341 						if (++msw == 0x10000) {
342 							msw = 0;
343 							ez++;
344 						}
345 		}
346 	}
347 
348 	/* stow the result */
349 	z.l.msw = (ez << 16) | msw;
350 	z.l.frac2 = frac2;
351 	z.l.frac3 = frac3;
352 	z.l.frac4 = frac4;
353 
354 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
355 		__quad_setfsrp(&fsr);
356 		__quad_fsqrtq(x, &Z);
357 	} else {
358 		Z = z;
359 		fsr |= (fsr & 0x1f) << 5;
360 		__quad_setfsrp(&fsr);
361 	}
362 	QUAD_RETURN(Z);
363 }
364