1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "quad.h" 30 31 static const double C[] = { 32 0.0, 33 1.0, 34 68719476736.0, 35 402653184.0, 36 24.0, 37 16.0, 38 3.66210937500000000000e-04, 39 1.52587890625000000000e-05, 40 1.43051147460937500000e-06, 41 5.96046447753906250000e-08, 42 3.72529029846191406250e-09, 43 2.18278728425502777100e-11, 44 8.52651282912120223045e-14, 45 3.55271367880050092936e-15, 46 1.30104260698260532081e-18, 47 8.67361737988403547206e-19, 48 2.16840434497100886801e-19, 49 3.17637355220362627151e-22, 50 7.75481824268463445192e-26, 51 4.62223186652936604733e-33, 52 9.62964972193617926528e-35, 53 4.70197740328915003187e-38, 54 2.75506488473973634680e-40, 55 }; 56 57 #define zero C[0] 58 #define one C[1] 59 #define two36 C[2] 60 #define three2p27 C[3] 61 #define three2p3 C[4] 62 #define two4 C[5] 63 #define three2m13 C[6] 64 #define twom16 C[7] 65 #define three2m21 C[8] 66 #define twom24 C[9] 67 #define twom28 C[10] 68 #define three2m37 C[11] 69 #define three2m45 C[12] 70 #define twom48 C[13] 71 #define three2m61 C[14] 72 #define twom60 C[15] 73 #define twom62 C[16] 74 #define three2m73 C[17] 75 #define three2m85 C[18] 76 #define three2m109 C[19] 77 #define twom113 C[20] 78 #define twom124 C[21] 79 #define three2m133 C[22] 80 81 static const unsigned int 82 fsr_re = 0x00000000u, 83 fsr_rn = 0xc0000000u; 84 85 #ifdef __sparcv9 86 87 /* 88 * _Qp_div(pz, x, y) sets *pz = *x / *y. 89 */ 90 void 91 _Qp_div(union longdouble *pz, const union longdouble *x, 92 const union longdouble *y) 93 94 #else 95 96 /* 97 * _Q_div(x, y) returns *x / *y. 98 */ 99 union longdouble 100 _Q_div(const union longdouble *x, const union longdouble *y) 101 102 #endif /* __sparcv9 */ 103 104 { 105 union longdouble z; 106 union xdouble u; 107 double c, d, ry, xx[4], yy[5], zz[5]; 108 unsigned int xm, ym, fsr, lx, ly, wx[3], wy[3]; 109 unsigned int msw, frac2, frac3, frac4, rm; 110 int ibit, ex, ey, ez, sign; 111 112 xm = x->l.msw & 0x7fffffff; 113 ym = y->l.msw & 0x7fffffff; 114 sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff; 115 116 __quad_getfsrp(&fsr); 117 118 /* handle nan and inf cases */ 119 if (xm >= 0x7fff0000 || ym >= 0x7fff0000) { 120 /* handle nan cases according to V9 app. B */ 121 if (QUAD_ISNAN(*y)) { 122 if (!(y->l.msw & 0x8000)) { 123 /* snan, signal invalid */ 124 if (fsr & FSR_NVM) { 125 __quad_fdivq(x, y, &Z); 126 } else { 127 Z = *y; 128 Z.l.msw |= 0x8000; 129 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 130 FSR_NVC; 131 __quad_setfsrp(&fsr); 132 } 133 QUAD_RETURN(Z); 134 } else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) { 135 /* snan, signal invalid */ 136 if (fsr & FSR_NVM) { 137 __quad_fdivq(x, y, &Z); 138 } else { 139 Z = *x; 140 Z.l.msw |= 0x8000; 141 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 142 FSR_NVC; 143 __quad_setfsrp(&fsr); 144 } 145 QUAD_RETURN(Z); 146 } 147 Z = *y; 148 QUAD_RETURN(Z); 149 } 150 if (QUAD_ISNAN(*x)) { 151 if (!(x->l.msw & 0x8000)) { 152 /* snan, signal invalid */ 153 if (fsr & FSR_NVM) { 154 __quad_fdivq(x, y, &Z); 155 } else { 156 Z = *x; 157 Z.l.msw |= 0x8000; 158 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 159 FSR_NVC; 160 __quad_setfsrp(&fsr); 161 } 162 QUAD_RETURN(Z); 163 } 164 Z = *x; 165 QUAD_RETURN(Z); 166 } 167 if (xm == 0x7fff0000) { 168 /* x is inf */ 169 if (ym == 0x7fff0000) { 170 /* inf / inf, signal invalid */ 171 if (fsr & FSR_NVM) { 172 __quad_fdivq(x, y, &Z); 173 } else { 174 Z.l.msw = 0x7fffffff; 175 Z.l.frac2 = Z.l.frac3 = 176 Z.l.frac4 = 0xffffffff; 177 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 178 FSR_NVC; 179 __quad_setfsrp(&fsr); 180 } 181 QUAD_RETURN(Z); 182 } 183 /* inf / finite, return inf */ 184 Z.l.msw = sign | 0x7fff0000; 185 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 186 QUAD_RETURN(Z); 187 } 188 /* y is inf */ 189 Z.l.msw = sign; 190 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 191 QUAD_RETURN(Z); 192 } 193 194 /* handle zero cases */ 195 if (xm == 0 || ym == 0) { 196 if (QUAD_ISZERO(*x)) { 197 if (QUAD_ISZERO(*y)) { 198 /* zero / zero, signal invalid */ 199 if (fsr & FSR_NVM) { 200 __quad_fdivq(x, y, &Z); 201 } else { 202 Z.l.msw = 0x7fffffff; 203 Z.l.frac2 = Z.l.frac3 = 204 Z.l.frac4 = 0xffffffff; 205 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 206 FSR_NVC; 207 __quad_setfsrp(&fsr); 208 } 209 QUAD_RETURN(Z); 210 } 211 /* zero / nonzero, return zero */ 212 Z.l.msw = sign; 213 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 214 QUAD_RETURN(Z); 215 } 216 if (QUAD_ISZERO(*y)) { 217 /* nonzero / zero, signal zero divide */ 218 if (fsr & FSR_DZM) { 219 __quad_fdivq(x, y, &Z); 220 } else { 221 Z.l.msw = sign | 0x7fff0000; 222 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 223 fsr = (fsr & ~FSR_CEXC) | FSR_DZA | FSR_DZC; 224 __quad_setfsrp(&fsr); 225 } 226 QUAD_RETURN(Z); 227 } 228 } 229 230 /* now x and y are finite, nonzero */ 231 __quad_setfsrp(&fsr_re); 232 233 /* get their normalized significands and exponents */ 234 ex = (int)(xm >> 16); 235 lx = xm & 0xffff; 236 if (ex) { 237 lx |= 0x10000; 238 wx[0] = x->l.frac2; 239 wx[1] = x->l.frac3; 240 wx[2] = x->l.frac4; 241 } else { 242 if (lx | (x->l.frac2 & 0xfffe0000)) { 243 wx[0] = x->l.frac2; 244 wx[1] = x->l.frac3; 245 wx[2] = x->l.frac4; 246 ex = 1; 247 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) { 248 lx = x->l.frac2; 249 wx[0] = x->l.frac3; 250 wx[1] = x->l.frac4; 251 wx[2] = 0; 252 ex = -31; 253 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) { 254 lx = x->l.frac3; 255 wx[0] = x->l.frac4; 256 wx[1] = wx[2] = 0; 257 ex = -63; 258 } else { 259 lx = x->l.frac4; 260 wx[0] = wx[1] = wx[2] = 0; 261 ex = -95; 262 } 263 while ((lx & 0x10000) == 0) { 264 lx = (lx << 1) | (wx[0] >> 31); 265 wx[0] = (wx[0] << 1) | (wx[1] >> 31); 266 wx[1] = (wx[1] << 1) | (wx[2] >> 31); 267 wx[2] <<= 1; 268 ex--; 269 } 270 } 271 ez = ex; 272 273 ey = (int)(ym >> 16); 274 ly = ym & 0xffff; 275 if (ey) { 276 ly |= 0x10000; 277 wy[0] = y->l.frac2; 278 wy[1] = y->l.frac3; 279 wy[2] = y->l.frac4; 280 } else { 281 if (ly | (y->l.frac2 & 0xfffe0000)) { 282 wy[0] = y->l.frac2; 283 wy[1] = y->l.frac3; 284 wy[2] = y->l.frac4; 285 ey = 1; 286 } else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) { 287 ly = y->l.frac2; 288 wy[0] = y->l.frac3; 289 wy[1] = y->l.frac4; 290 wy[2] = 0; 291 ey = -31; 292 } else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) { 293 ly = y->l.frac3; 294 wy[0] = y->l.frac4; 295 wy[1] = wy[2] = 0; 296 ey = -63; 297 } else { 298 ly = y->l.frac4; 299 wy[0] = wy[1] = wy[2] = 0; 300 ey = -95; 301 } 302 while ((ly & 0x10000) == 0) { 303 ly = (ly << 1) | (wy[0] >> 31); 304 wy[0] = (wy[0] << 1) | (wy[1] >> 31); 305 wy[1] = (wy[1] << 1) | (wy[2] >> 31); 306 wy[2] <<= 1; 307 ey--; 308 } 309 } 310 ez -= ey - 0x3fff; 311 312 /* extract the significands into doubles */ 313 c = twom16; 314 xx[0] = (double)((int)lx) * c; 315 yy[0] = (double)((int)ly) * c; 316 317 c *= twom24; 318 xx[0] += (double)((int)(wx[0] >> 8)) * c; 319 yy[1] = (double)((int)(wy[0] >> 8)) * c; 320 321 c *= twom24; 322 xx[1] = (double)((int)(((wx[0] << 16) | 323 (wx[1] >> 16)) & 0xffffff)) * c; 324 yy[2] = (double)((int)(((wy[0] << 16) | 325 (wy[1] >> 16)) & 0xffffff)) * c; 326 327 c *= twom24; 328 xx[2] = (double)((int)(((wx[1] << 8) | 329 (wx[2] >> 24)) & 0xffffff)) * c; 330 yy[3] = (double)((int)(((wy[1] << 8) | 331 (wy[2] >> 24)) & 0xffffff)) * c; 332 333 c *= twom24; 334 xx[3] = (double)((int)(wx[2] & 0xffffff)) * c; 335 yy[4] = (double)((int)(wy[2] & 0xffffff)) * c; 336 337 /* approximate the reciprocal of y */ 338 ry = one / ((yy[0] + yy[1]) + yy[2]); 339 340 /* compute the first five "digits" of the quotient */ 341 zz[0] = (ry * (xx[0] + xx[1]) + three2p27) - three2p27; 342 xx[0] = ((xx[0] - zz[0] * yy[0]) - zz[0] * yy[1]) + xx[1]; 343 d = zz[0] * yy[2]; 344 c = (d + three2m13) - three2m13; 345 xx[0] -= c; 346 xx[1] = xx[2] - (d - c); 347 d = zz[0] * yy[3]; 348 c = (d + three2m37) - three2m37; 349 xx[1] -= c; 350 xx[2] = xx[3] - (d - c); 351 d = zz[0] * yy[4]; 352 c = (d + three2m61) - three2m61; 353 xx[2] -= c; 354 xx[3] = c - d; 355 356 zz[1] = (ry * (xx[0] + xx[1]) + three2p3) - three2p3; 357 xx[0] = ((xx[0] - zz[1] * yy[0]) - zz[1] * yy[1]) + xx[1]; 358 d = zz[1] * yy[2]; 359 c = (d + three2m37) - three2m37; 360 xx[0] -= c; 361 xx[1] = xx[2] - (d - c); 362 d = zz[1] * yy[3]; 363 c = (d + three2m61) - three2m61; 364 xx[1] -= c; 365 xx[2] = xx[3] - (d - c); 366 d = zz[1] * yy[4]; 367 c = (d + three2m85) - three2m85; 368 xx[2] -= c; 369 xx[3] = c - d; 370 371 zz[2] = (ry * (xx[0] + xx[1]) + three2m21) - three2m21; 372 xx[0] = ((xx[0] - zz[2] * yy[0]) - zz[2] * yy[1]) + xx[1]; 373 d = zz[2] * yy[2]; 374 c = (d + three2m61) - three2m61; 375 xx[0] -= c; 376 xx[1] = xx[2] - (d - c); 377 d = zz[2] * yy[3]; 378 c = (d + three2m85) - three2m85; 379 xx[1] -= c; 380 xx[2] = xx[3] - (d - c); 381 d = zz[2] * yy[4]; 382 c = (d + three2m109) - three2m109; 383 xx[2] -= c; 384 xx[3] = c - d; 385 386 zz[3] = (ry * (xx[0] + xx[1]) + three2m45) - three2m45; 387 xx[0] = ((xx[0] - zz[3] * yy[0]) - zz[3] * yy[1]) + xx[1]; 388 d = zz[3] * yy[2]; 389 c = (d + three2m85) - three2m85; 390 xx[0] -= c; 391 xx[1] = xx[2] - (d - c); 392 d = zz[3] * yy[3]; 393 c = (d + three2m109) - three2m109; 394 xx[1] -= c; 395 xx[2] = xx[3] - (d - c); 396 d = zz[3] * yy[4]; 397 c = (d + three2m133) - three2m133; 398 xx[2] -= c; 399 xx[3] = c - d; 400 401 zz[4] = (ry * (xx[0] + xx[1]) + three2m73) - three2m73; 402 403 /* reduce to three doubles, making sure zz[1] is positive */ 404 zz[0] += zz[1] - twom48; 405 zz[1] = twom48 + zz[2] + zz[3]; 406 zz[2] = zz[4]; 407 408 /* if the third term might lie on a rounding boundary, perturb it */ 409 if (zz[2] == (twom62 + zz[2]) - twom62) { 410 /* here we just need to get the sign of the remainder */ 411 c = (((((xx[0] - zz[4] * yy[0]) - zz[4] * yy[1]) + xx[1]) + 412 (xx[2] - zz[4] * yy[2])) + (xx[3] - zz[4] * yy[3])) 413 - zz[4] * yy[4]; 414 if (c < zero) 415 zz[2] -= twom124; 416 else if (c > zero) 417 zz[2] += twom124; 418 } 419 420 /* 421 * propagate carries/borrows, using round-to-negative-infinity mode 422 * to make all terms nonnegative (note that we can't encounter a 423 * borrow so large that the roundoff is unrepresentable because 424 * we took care to make zz[1] positive above) 425 */ 426 __quad_setfsrp(&fsr_rn); 427 c = zz[1] + zz[2]; 428 zz[2] += (zz[1] - c); 429 zz[1] = c; 430 c = zz[0] + zz[1]; 431 zz[1] += (zz[0] - c); 432 zz[0] = c; 433 434 /* check for borrow */ 435 if (c < one) { 436 /* postnormalize */ 437 zz[0] += zz[0]; 438 zz[1] += zz[1]; 439 zz[2] += zz[2]; 440 ez--; 441 } 442 443 /* if exponent > 0 strip off integer bit, else denormalize */ 444 if (ez > 0) { 445 ibit = 1; 446 zz[0] -= one; 447 } else { 448 ibit = 0; 449 if (ez > -128) 450 u.l.hi = (unsigned int)(0x3fe + ez) << 20; 451 else 452 u.l.hi = 0x37e00000; 453 u.l.lo = 0; 454 zz[0] *= u.d; 455 zz[1] *= u.d; 456 zz[2] *= u.d; 457 ez = 0; 458 } 459 460 /* the first 48 bits of fraction come from zz[0] */ 461 u.d = d = two36 + zz[0]; 462 msw = u.l.lo; 463 zz[0] -= (d - two36); 464 465 u.d = d = two4 + zz[0]; 466 frac2 = u.l.lo; 467 zz[0] -= (d - two4); 468 469 /* the next 32 come from zz[0] and zz[1] */ 470 u.d = d = twom28 + (zz[0] + zz[1]); 471 frac3 = u.l.lo; 472 zz[0] -= (d - twom28); 473 474 /* condense the remaining fraction; errors here won't matter */ 475 c = zz[0] + zz[1]; 476 zz[1] = ((zz[0] - c) + zz[1]) + zz[2]; 477 zz[0] = c; 478 479 /* get the last word of fraction */ 480 u.d = d = twom60 + (zz[0] + zz[1]); 481 frac4 = u.l.lo; 482 zz[0] -= (d - twom60); 483 484 /* keep track of what's left for rounding; note that the error */ 485 /* in computing c will be non-negative due to rounding mode */ 486 c = zz[0] + zz[1]; 487 488 /* get the rounding mode, fudging directed rounding modes */ 489 /* as though the result were positive */ 490 rm = fsr >> 30; 491 if (sign) 492 rm ^= (rm >> 1); 493 494 /* round and raise exceptions */ 495 fsr &= ~FSR_CEXC; 496 if (c != zero) { 497 fsr |= FSR_NXC; 498 499 /* decide whether to round the fraction up */ 500 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 || 501 (c == twom113 && ((frac4 & 1) || (c - zz[0] != 502 zz[1])))))) { 503 /* round up and renormalize if necessary */ 504 if (++frac4 == 0) 505 if (++frac3 == 0) 506 if (++frac2 == 0) 507 if (++msw == 0x10000) { 508 msw = 0; 509 ez++; 510 } 511 } 512 } 513 514 /* check for under/overflow */ 515 if (ez >= 0x7fff) { 516 if (rm == FSR_RN || rm == FSR_RP) { 517 z.l.msw = sign | 0x7fff0000; 518 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0; 519 } else { 520 z.l.msw = sign | 0x7ffeffff; 521 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff; 522 } 523 fsr |= FSR_OFC | FSR_NXC; 524 } else { 525 z.l.msw = sign | (ez << 16) | msw; 526 z.l.frac2 = frac2; 527 z.l.frac3 = frac3; 528 z.l.frac4 = frac4; 529 530 /* !ibit => exact result was tiny before rounding, */ 531 /* t nonzero => result delivered is inexact */ 532 if (!ibit) { 533 if (c != zero) 534 fsr |= FSR_UFC | FSR_NXC; 535 else if (fsr & FSR_UFM) 536 fsr |= FSR_UFC; 537 } 538 } 539 540 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 541 __quad_setfsrp(&fsr); 542 __quad_fdivq(x, y, &Z); 543 } else { 544 Z = z; 545 fsr |= (fsr & 0x1f) << 5; 546 __quad_setfsrp(&fsr); 547 } 548 QUAD_RETURN(Z); 549 } 550