1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "quad.h" 30 31 static const double C[] = { 32 0.0, 33 0.5, 34 1.0, 35 68719476736.0, 36 536870912.0, 37 48.0, 38 16.0, 39 1.52587890625000000000e-05, 40 2.86102294921875000000e-06, 41 5.96046447753906250000e-08, 42 3.72529029846191406250e-09, 43 1.70530256582424044609e-13, 44 7.10542735760100185871e-15, 45 8.67361737988403547206e-19, 46 2.16840434497100886801e-19, 47 1.27054942088145050860e-21, 48 1.21169035041947413311e-27, 49 9.62964972193617926528e-35, 50 4.70197740328915003187e-38 51 }; 52 53 #define zero C[0] 54 #define half C[1] 55 #define one C[2] 56 #define two36 C[3] 57 #define two29 C[4] 58 #define three2p4 C[5] 59 #define two4 C[6] 60 #define twom16 C[7] 61 #define three2m20 C[8] 62 #define twom24 C[9] 63 #define twom28 C[10] 64 #define three2m44 C[11] 65 #define twom47 C[12] 66 #define twom60 C[13] 67 #define twom62 C[14] 68 #define three2m71 C[15] 69 #define three2m91 C[16] 70 #define twom113 C[17] 71 #define twom124 C[18] 72 73 static const unsigned 74 fsr_re = 0x00000000u, 75 fsr_rn = 0xc0000000u; 76 77 #ifdef __sparcv9 78 79 /* 80 * _Qp_sqrt(pz, x) sets *pz = sqrt(*x). 81 */ 82 void 83 _Qp_sqrt(union longdouble *pz, const union longdouble *x) 84 85 #else 86 87 /* 88 * _Q_sqrt(x) returns sqrt(*x). 89 */ 90 union longdouble 91 _Q_sqrt(const union longdouble *x) 92 93 #endif /* __sparcv9 */ 94 95 { 96 union longdouble z; 97 union xdouble u; 98 double c, d, rr, r[2], tt[3], xx[4], zz[5]; 99 unsigned int xm, fsr, lx, wx[3]; 100 unsigned int msw, frac2, frac3, frac4, rm; 101 int ex, ez; 102 103 if (QUAD_ISZERO(*x)) { 104 Z = *x; 105 QUAD_RETURN(Z); 106 } 107 108 xm = x->l.msw; 109 110 __quad_getfsrp(&fsr); 111 112 /* handle nan and inf cases */ 113 if ((xm & 0x7fffffff) >= 0x7fff0000) { 114 if ((x->l.msw & 0xffff) | x->l.frac2 | x->l.frac3 | 115 x->l.frac4) { 116 if (!(x->l.msw & 0x8000)) { 117 /* snan, signal invalid */ 118 if (fsr & FSR_NVM) { 119 __quad_fsqrtq(x, &Z); 120 } else { 121 Z = *x; 122 Z.l.msw |= 0x8000; 123 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 124 FSR_NVC; 125 __quad_setfsrp(&fsr); 126 } 127 QUAD_RETURN(Z); 128 } 129 Z = *x; 130 QUAD_RETURN(Z); 131 } 132 if (x->l.msw & 0x80000000) { 133 /* sqrt(-inf), signal invalid */ 134 if (fsr & FSR_NVM) { 135 __quad_fsqrtq(x, &Z); 136 } else { 137 Z.l.msw = 0x7fffffff; 138 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff; 139 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC; 140 __quad_setfsrp(&fsr); 141 } 142 QUAD_RETURN(Z); 143 } 144 /* sqrt(inf), return inf */ 145 Z = *x; 146 QUAD_RETURN(Z); 147 } 148 149 /* handle negative numbers */ 150 if (xm & 0x80000000) { 151 if (fsr & FSR_NVM) { 152 __quad_fsqrtq(x, &Z); 153 } else { 154 Z.l.msw = 0x7fffffff; 155 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff; 156 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC; 157 __quad_setfsrp(&fsr); 158 } 159 QUAD_RETURN(Z); 160 } 161 162 /* now x is finite, positive */ 163 __quad_setfsrp((unsigned *)&fsr_re); 164 165 /* get the normalized significand and exponent */ 166 ex = (int)(xm >> 16); 167 lx = xm & 0xffff; 168 if (ex) { 169 lx |= 0x10000; 170 wx[0] = x->l.frac2; 171 wx[1] = x->l.frac3; 172 wx[2] = x->l.frac4; 173 } else { 174 if (lx | (x->l.frac2 & 0xfffe0000)) { 175 wx[0] = x->l.frac2; 176 wx[1] = x->l.frac3; 177 wx[2] = x->l.frac4; 178 ex = 1; 179 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) { 180 lx = x->l.frac2; 181 wx[0] = x->l.frac3; 182 wx[1] = x->l.frac4; 183 wx[2] = 0; 184 ex = -31; 185 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) { 186 lx = x->l.frac3; 187 wx[0] = x->l.frac4; 188 wx[1] = wx[2] = 0; 189 ex = -63; 190 } else { 191 lx = x->l.frac4; 192 wx[0] = wx[1] = wx[2] = 0; 193 ex = -95; 194 } 195 while ((lx & 0x10000) == 0) { 196 lx = (lx << 1) | (wx[0] >> 31); 197 wx[0] = (wx[0] << 1) | (wx[1] >> 31); 198 wx[1] = (wx[1] << 1) | (wx[2] >> 31); 199 wx[2] <<= 1; 200 ex--; 201 } 202 } 203 ez = ex - 0x3fff; 204 if (ez & 1) { 205 /* make exponent even */ 206 lx = (lx << 1) | (wx[0] >> 31); 207 wx[0] = (wx[0] << 1) | (wx[1] >> 31); 208 wx[1] = (wx[1] << 1) | (wx[2] >> 31); 209 wx[2] <<= 1; 210 ez--; 211 } 212 213 /* extract the significands into doubles */ 214 c = twom16; 215 xx[0] = (double)((int)lx) * c; 216 217 c *= twom24; 218 xx[0] += (double)((int)(wx[0] >> 8)) * c; 219 220 c *= twom24; 221 xx[1] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) & 222 0xffffff)) * c; 223 224 c *= twom24; 225 xx[2] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) & 226 0xffffff)) * c; 227 228 c *= twom24; 229 xx[3] = (double)((int)(wx[2] & 0xffffff)) * c; 230 231 /* approximate the divisor for the Newton iteration */ 232 c = xx[0] + xx[1]; 233 c = __quad_dp_sqrt(&c); 234 rr = half / c; 235 236 /* compute the first five "digits" of the square root */ 237 zz[0] = (c + two29) - two29; 238 tt[0] = zz[0] + zz[0]; 239 r[0] = (xx[0] - zz[0] * zz[0]) + xx[1]; 240 241 zz[1] = (rr * (r[0] + xx[2]) + three2p4) - three2p4; 242 tt[1] = zz[1] + zz[1]; 243 r[0] -= tt[0] * zz[1]; 244 r[1] = xx[2] - zz[1] * zz[1]; 245 c = (r[1] + three2m20) - three2m20; 246 r[0] += c; 247 r[1] = (r[1] - c) + xx[3]; 248 249 zz[2] = (rr * (r[0] + r[1]) + three2m20) - three2m20; 250 tt[2] = zz[2] + zz[2]; 251 r[0] -= tt[0] * zz[2]; 252 r[1] -= tt[1] * zz[2]; 253 c = (r[1] + three2m44) - three2m44; 254 r[0] += c; 255 r[1] = (r[1] - c) - zz[2] * zz[2]; 256 257 zz[3] = (rr * (r[0] + r[1]) + three2m44) - three2m44; 258 r[0] = ((r[0] - tt[0] * zz[3]) + r[1]) - tt[1] * zz[3]; 259 r[1] = -tt[2] * zz[3]; 260 c = (r[1] + three2m91) - three2m91; 261 r[0] += c; 262 r[1] = (r[1] - c) - zz[3] * zz[3]; 263 264 zz[4] = (rr * (r[0] + r[1]) + three2m71) - three2m71; 265 266 /* reduce to three doubles, making sure zz[1] is positive */ 267 zz[0] += zz[1] - twom47; 268 zz[1] = twom47 + zz[2] + zz[3]; 269 zz[2] = zz[4]; 270 271 /* if the third term might lie on a rounding boundary, perturb it */ 272 if (zz[2] == (twom62 + zz[2]) - twom62) { 273 /* here we just need to get the sign of the remainder */ 274 c = (((((r[0] - tt[0] * zz[4]) - tt[1] * zz[4]) + r[1]) 275 - tt[2] * zz[4]) - (zz[3] + zz[3]) * zz[4]) - zz[4] * zz[4]; 276 if (c < zero) 277 zz[2] -= twom124; 278 else if (c > zero) 279 zz[2] += twom124; 280 } 281 282 /* 283 * propagate carries/borrows, using round-to-negative-infinity mode 284 * to make all terms nonnegative (note that we can't encounter a 285 * borrow so large that the roundoff is unrepresentable because 286 * we took care to make zz[1] positive above) 287 */ 288 __quad_setfsrp(&fsr_rn); 289 c = zz[1] + zz[2]; 290 zz[2] += (zz[1] - c); 291 zz[1] = c; 292 c = zz[0] + zz[1]; 293 zz[1] += (zz[0] - c); 294 zz[0] = c; 295 296 /* adjust exponent and strip off integer bit */ 297 ez = (ez >> 1) + 0x3fff; 298 zz[0] -= one; 299 300 /* the first 48 bits of fraction come from zz[0] */ 301 u.d = d = two36 + zz[0]; 302 msw = u.l.lo; 303 zz[0] -= (d - two36); 304 305 u.d = d = two4 + zz[0]; 306 frac2 = u.l.lo; 307 zz[0] -= (d - two4); 308 309 /* the next 32 come from zz[0] and zz[1] */ 310 u.d = d = twom28 + (zz[0] + zz[1]); 311 frac3 = u.l.lo; 312 zz[0] -= (d - twom28); 313 314 /* condense the remaining fraction; errors here won't matter */ 315 c = zz[0] + zz[1]; 316 zz[1] = ((zz[0] - c) + zz[1]) + zz[2]; 317 zz[0] = c; 318 319 /* get the last word of fraction */ 320 u.d = d = twom60 + (zz[0] + zz[1]); 321 frac4 = u.l.lo; 322 zz[0] -= (d - twom60); 323 324 /* keep track of what's left for rounding; note that the error */ 325 /* in computing c will be non-negative due to rounding mode */ 326 c = zz[0] + zz[1]; 327 328 /* get the rounding mode */ 329 rm = fsr >> 30; 330 331 /* round and raise exceptions */ 332 fsr &= ~FSR_CEXC; 333 if (c != zero) { 334 fsr |= FSR_NXC; 335 336 /* decide whether to round the fraction up */ 337 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 || 338 (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) { 339 /* round up and renormalize if necessary */ 340 if (++frac4 == 0) 341 if (++frac3 == 0) 342 if (++frac2 == 0) 343 if (++msw == 0x10000) { 344 msw = 0; 345 ez++; 346 } 347 } 348 } 349 350 /* stow the result */ 351 z.l.msw = (ez << 16) | msw; 352 z.l.frac2 = frac2; 353 z.l.frac3 = frac3; 354 z.l.frac4 = frac4; 355 356 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 357 __quad_setfsrp(&fsr); 358 __quad_fsqrtq(x, &Z); 359 } else { 360 Z = z; 361 fsr |= (fsr & 0x1f) << 5; 362 __quad_setfsrp(&fsr); 363 } 364 QUAD_RETURN(Z); 365 } 366