1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include "quad.h"
30
31 static const double C[] = {
32 0.0,
33 0.5,
34 1.0,
35 68719476736.0,
36 536870912.0,
37 48.0,
38 16.0,
39 1.52587890625000000000e-05,
40 2.86102294921875000000e-06,
41 5.96046447753906250000e-08,
42 3.72529029846191406250e-09,
43 1.70530256582424044609e-13,
44 7.10542735760100185871e-15,
45 8.67361737988403547206e-19,
46 2.16840434497100886801e-19,
47 1.27054942088145050860e-21,
48 1.21169035041947413311e-27,
49 9.62964972193617926528e-35,
50 4.70197740328915003187e-38
51 };
52
53 #define zero C[0]
54 #define half C[1]
55 #define one C[2]
56 #define two36 C[3]
57 #define two29 C[4]
58 #define three2p4 C[5]
59 #define two4 C[6]
60 #define twom16 C[7]
61 #define three2m20 C[8]
62 #define twom24 C[9]
63 #define twom28 C[10]
64 #define three2m44 C[11]
65 #define twom47 C[12]
66 #define twom60 C[13]
67 #define twom62 C[14]
68 #define three2m71 C[15]
69 #define three2m91 C[16]
70 #define twom113 C[17]
71 #define twom124 C[18]
72
73 static const unsigned
74 fsr_re = 0x00000000u,
75 fsr_rn = 0xc0000000u;
76
77 #ifdef __sparcv9
78
79 /*
80 * _Qp_sqrt(pz, x) sets *pz = sqrt(*x).
81 */
82 void
_Qp_sqrt(union longdouble * pz,const union longdouble * x)83 _Qp_sqrt(union longdouble *pz, const union longdouble *x)
84
85 #else
86
87 /*
88 * _Q_sqrt(x) returns sqrt(*x).
89 */
90 union longdouble
91 _Q_sqrt(const union longdouble *x)
92
93 #endif /* __sparcv9 */
94
95 {
96 union longdouble z;
97 union xdouble u;
98 double c, d, rr, r[2], tt[3], xx[4], zz[5];
99 unsigned int xm, fsr, lx, wx[3];
100 unsigned int msw, frac2, frac3, frac4, rm;
101 int ex, ez;
102
103 if (QUAD_ISZERO(*x)) {
104 Z = *x;
105 QUAD_RETURN(Z);
106 }
107
108 xm = x->l.msw;
109
110 __quad_getfsrp(&fsr);
111
112 /* handle nan and inf cases */
113 if ((xm & 0x7fffffff) >= 0x7fff0000) {
114 if ((x->l.msw & 0xffff) | x->l.frac2 | x->l.frac3 |
115 x->l.frac4) {
116 if (!(x->l.msw & 0x8000)) {
117 /* snan, signal invalid */
118 if (fsr & FSR_NVM) {
119 __quad_fsqrtq(x, &Z);
120 } else {
121 Z = *x;
122 Z.l.msw |= 0x8000;
123 fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
124 FSR_NVC;
125 __quad_setfsrp(&fsr);
126 }
127 QUAD_RETURN(Z);
128 }
129 Z = *x;
130 QUAD_RETURN(Z);
131 }
132 if (x->l.msw & 0x80000000) {
133 /* sqrt(-inf), signal invalid */
134 if (fsr & FSR_NVM) {
135 __quad_fsqrtq(x, &Z);
136 } else {
137 Z.l.msw = 0x7fffffff;
138 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
139 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
140 __quad_setfsrp(&fsr);
141 }
142 QUAD_RETURN(Z);
143 }
144 /* sqrt(inf), return inf */
145 Z = *x;
146 QUAD_RETURN(Z);
147 }
148
149 /* handle negative numbers */
150 if (xm & 0x80000000) {
151 if (fsr & FSR_NVM) {
152 __quad_fsqrtq(x, &Z);
153 } else {
154 Z.l.msw = 0x7fffffff;
155 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
156 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
157 __quad_setfsrp(&fsr);
158 }
159 QUAD_RETURN(Z);
160 }
161
162 /* now x is finite, positive */
163 __quad_setfsrp((unsigned *)&fsr_re);
164
165 /* get the normalized significand and exponent */
166 ex = (int)(xm >> 16);
167 lx = xm & 0xffff;
168 if (ex) {
169 lx |= 0x10000;
170 wx[0] = x->l.frac2;
171 wx[1] = x->l.frac3;
172 wx[2] = x->l.frac4;
173 } else {
174 if (lx | (x->l.frac2 & 0xfffe0000)) {
175 wx[0] = x->l.frac2;
176 wx[1] = x->l.frac3;
177 wx[2] = x->l.frac4;
178 ex = 1;
179 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
180 lx = x->l.frac2;
181 wx[0] = x->l.frac3;
182 wx[1] = x->l.frac4;
183 wx[2] = 0;
184 ex = -31;
185 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
186 lx = x->l.frac3;
187 wx[0] = x->l.frac4;
188 wx[1] = wx[2] = 0;
189 ex = -63;
190 } else {
191 lx = x->l.frac4;
192 wx[0] = wx[1] = wx[2] = 0;
193 ex = -95;
194 }
195 while ((lx & 0x10000) == 0) {
196 lx = (lx << 1) | (wx[0] >> 31);
197 wx[0] = (wx[0] << 1) | (wx[1] >> 31);
198 wx[1] = (wx[1] << 1) | (wx[2] >> 31);
199 wx[2] <<= 1;
200 ex--;
201 }
202 }
203 ez = ex - 0x3fff;
204 if (ez & 1) {
205 /* make exponent even */
206 lx = (lx << 1) | (wx[0] >> 31);
207 wx[0] = (wx[0] << 1) | (wx[1] >> 31);
208 wx[1] = (wx[1] << 1) | (wx[2] >> 31);
209 wx[2] <<= 1;
210 ez--;
211 }
212
213 /* extract the significands into doubles */
214 c = twom16;
215 xx[0] = (double)((int)lx) * c;
216
217 c *= twom24;
218 xx[0] += (double)((int)(wx[0] >> 8)) * c;
219
220 c *= twom24;
221 xx[1] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
222 0xffffff)) * c;
223
224 c *= twom24;
225 xx[2] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
226 0xffffff)) * c;
227
228 c *= twom24;
229 xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
230
231 /* approximate the divisor for the Newton iteration */
232 c = xx[0] + xx[1];
233 c = __quad_dp_sqrt(&c);
234 rr = half / c;
235
236 /* compute the first five "digits" of the square root */
237 zz[0] = (c + two29) - two29;
238 tt[0] = zz[0] + zz[0];
239 r[0] = (xx[0] - zz[0] * zz[0]) + xx[1];
240
241 zz[1] = (rr * (r[0] + xx[2]) + three2p4) - three2p4;
242 tt[1] = zz[1] + zz[1];
243 r[0] -= tt[0] * zz[1];
244 r[1] = xx[2] - zz[1] * zz[1];
245 c = (r[1] + three2m20) - three2m20;
246 r[0] += c;
247 r[1] = (r[1] - c) + xx[3];
248
249 zz[2] = (rr * (r[0] + r[1]) + three2m20) - three2m20;
250 tt[2] = zz[2] + zz[2];
251 r[0] -= tt[0] * zz[2];
252 r[1] -= tt[1] * zz[2];
253 c = (r[1] + three2m44) - three2m44;
254 r[0] += c;
255 r[1] = (r[1] - c) - zz[2] * zz[2];
256
257 zz[3] = (rr * (r[0] + r[1]) + three2m44) - three2m44;
258 r[0] = ((r[0] - tt[0] * zz[3]) + r[1]) - tt[1] * zz[3];
259 r[1] = -tt[2] * zz[3];
260 c = (r[1] + three2m91) - three2m91;
261 r[0] += c;
262 r[1] = (r[1] - c) - zz[3] * zz[3];
263
264 zz[4] = (rr * (r[0] + r[1]) + three2m71) - three2m71;
265
266 /* reduce to three doubles, making sure zz[1] is positive */
267 zz[0] += zz[1] - twom47;
268 zz[1] = twom47 + zz[2] + zz[3];
269 zz[2] = zz[4];
270
271 /* if the third term might lie on a rounding boundary, perturb it */
272 if (zz[2] == (twom62 + zz[2]) - twom62) {
273 /* here we just need to get the sign of the remainder */
274 c = (((((r[0] - tt[0] * zz[4]) - tt[1] * zz[4]) + r[1])
275 - tt[2] * zz[4]) - (zz[3] + zz[3]) * zz[4]) - zz[4] * zz[4];
276 if (c < zero)
277 zz[2] -= twom124;
278 else if (c > zero)
279 zz[2] += twom124;
280 }
281
282 /*
283 * propagate carries/borrows, using round-to-negative-infinity mode
284 * to make all terms nonnegative (note that we can't encounter a
285 * borrow so large that the roundoff is unrepresentable because
286 * we took care to make zz[1] positive above)
287 */
288 __quad_setfsrp(&fsr_rn);
289 c = zz[1] + zz[2];
290 zz[2] += (zz[1] - c);
291 zz[1] = c;
292 c = zz[0] + zz[1];
293 zz[1] += (zz[0] - c);
294 zz[0] = c;
295
296 /* adjust exponent and strip off integer bit */
297 ez = (ez >> 1) + 0x3fff;
298 zz[0] -= one;
299
300 /* the first 48 bits of fraction come from zz[0] */
301 u.d = d = two36 + zz[0];
302 msw = u.l.lo;
303 zz[0] -= (d - two36);
304
305 u.d = d = two4 + zz[0];
306 frac2 = u.l.lo;
307 zz[0] -= (d - two4);
308
309 /* the next 32 come from zz[0] and zz[1] */
310 u.d = d = twom28 + (zz[0] + zz[1]);
311 frac3 = u.l.lo;
312 zz[0] -= (d - twom28);
313
314 /* condense the remaining fraction; errors here won't matter */
315 c = zz[0] + zz[1];
316 zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
317 zz[0] = c;
318
319 /* get the last word of fraction */
320 u.d = d = twom60 + (zz[0] + zz[1]);
321 frac4 = u.l.lo;
322 zz[0] -= (d - twom60);
323
324 /* keep track of what's left for rounding; note that the error */
325 /* in computing c will be non-negative due to rounding mode */
326 c = zz[0] + zz[1];
327
328 /* get the rounding mode */
329 rm = fsr >> 30;
330
331 /* round and raise exceptions */
332 fsr &= ~FSR_CEXC;
333 if (c != zero) {
334 fsr |= FSR_NXC;
335
336 /* decide whether to round the fraction up */
337 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
338 (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
339 /* round up and renormalize if necessary */
340 if (++frac4 == 0)
341 if (++frac3 == 0)
342 if (++frac2 == 0)
343 if (++msw == 0x10000) {
344 msw = 0;
345 ez++;
346 }
347 }
348 }
349
350 /* stow the result */
351 z.l.msw = (ez << 16) | msw;
352 z.l.frac2 = frac2;
353 z.l.frac3 = frac3;
354 z.l.frac4 = frac4;
355
356 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
357 __quad_setfsrp(&fsr);
358 __quad_fsqrtq(x, &Z);
359 } else {
360 Z = z;
361 fsr |= (fsr & 0x1f) << 5;
362 __quad_setfsrp(&fsr);
363 }
364 QUAD_RETURN(Z);
365 }
366