1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma weak fmal = __fmal 31 32 #include "libm.h" 33 #include "fma.h" 34 #include "fenv_inlines.h" 35 36 #if defined(__sparc) 37 38 static const union { 39 unsigned i[2]; 40 double d; 41 } C[] = { 42 { 0x3fe00000u, 0 }, 43 { 0x40000000u, 0 }, 44 { 0x3ef00000u, 0 }, 45 { 0x3e700000u, 0 }, 46 { 0x41300000u, 0 }, 47 { 0x3e300000u, 0 }, 48 { 0x3b300000u, 0 }, 49 { 0x38300000u, 0 }, 50 { 0x42300000u, 0 }, 51 { 0x3df00000u, 0 }, 52 { 0x7fe00000u, 0 }, 53 { 0x00100000u, 0 }, 54 { 0x00100001u, 0 }, 55 { 0, 0 }, 56 { 0x7ff00000u, 0 }, 57 { 0x7ff00001u, 0 } 58 }; 59 60 #define half C[0].d 61 #define two C[1].d 62 #define twom16 C[2].d 63 #define twom24 C[3].d 64 #define two20 C[4].d 65 #define twom28 C[5].d 66 #define twom76 C[6].d 67 #define twom124 C[7].d 68 #define two36 C[8].d 69 #define twom32 C[9].d 70 #define huge C[10].d 71 #define tiny C[11].d 72 #define tiny2 C[12].d 73 #define zero C[13].d 74 #define inf C[14].d 75 #define snan C[15].d 76 77 static const unsigned int fsr_rm = 0xc0000000u; 78 79 /* 80 * fmal for SPARC: 128-bit quad precision, big-endian 81 */ 82 long double 83 __fmal(long double x, long double y, long double z) { 84 union { 85 unsigned int i[4]; 86 long double q; 87 } xx, yy, zz; 88 union { 89 unsigned int i[2]; 90 double d; 91 } u; 92 double dx[5], dy[5], dxy[9], c, s; 93 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; 94 unsigned int z0, z1, z2, z3, z4, z5, z6, z7; 95 unsigned int rm, sticky; 96 unsigned int fsr; 97 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; 98 int cx, cy, cz; 99 volatile double dummy; 100 101 /* extract the high order words of the arguments */ 102 xx.q = x; 103 yy.q = y; 104 zz.q = z; 105 hx = xx.i[0] & ~0x80000000; 106 hy = yy.i[0] & ~0x80000000; 107 hz = zz.i[0] & ~0x80000000; 108 109 /* 110 * distinguish zero, finite nonzero, infinite, and quiet nan 111 * arguments; raise invalid and return for signaling nans 112 */ 113 if (hx >= 0x7fff0000) { 114 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { 115 if (!(hx & 0x8000)) { 116 /* signaling nan, raise invalid */ 117 dummy = snan; 118 dummy += snan; 119 xx.i[0] |= 0x8000; 120 return (xx.q); 121 } 122 cx = 3; /* quiet nan */ 123 } else 124 cx = 2; /* inf */ 125 } else if (hx == 0) { 126 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; 127 /* subnormal or zero */ 128 } else 129 cx = 1; /* finite nonzero */ 130 131 if (hy >= 0x7fff0000) { 132 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { 133 if (!(hy & 0x8000)) { 134 dummy = snan; 135 dummy += snan; 136 yy.i[0] |= 0x8000; 137 return (yy.q); 138 } 139 cy = 3; 140 } else 141 cy = 2; 142 } else if (hy == 0) { 143 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; 144 } else 145 cy = 1; 146 147 if (hz >= 0x7fff0000) { 148 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { 149 if (!(hz & 0x8000)) { 150 dummy = snan; 151 dummy += snan; 152 zz.i[0] |= 0x8000; 153 return (zz.q); 154 } 155 cz = 3; 156 } else 157 cz = 2; 158 } else if (hz == 0) { 159 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; 160 } else 161 cz = 1; 162 163 /* get the fsr and clear current exceptions */ 164 __fenv_getfsr32(&fsr); 165 fsr &= ~FSR_CEXC; 166 167 /* handle all other zero, inf, and nan cases */ 168 if (cx != 1 || cy != 1 || cz != 1) { 169 /* if x or y is a quiet nan, return it */ 170 if (cx == 3) { 171 __fenv_setfsr32(&fsr); 172 return (x); 173 } 174 if (cy == 3) { 175 __fenv_setfsr32(&fsr); 176 return (y); 177 } 178 179 /* if x*y is 0*inf, raise invalid and return the default nan */ 180 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { 181 dummy = zero; 182 dummy *= inf; 183 zz.i[0] = 0x7fffffff; 184 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 185 return (zz.q); 186 } 187 188 /* if z is a quiet nan, return it */ 189 if (cz == 3) { 190 __fenv_setfsr32(&fsr); 191 return (z); 192 } 193 194 /* 195 * now none of x, y, or z is nan; handle cases where x or y 196 * is inf 197 */ 198 if (cx == 2 || cy == 2) { 199 /* 200 * if z is also inf, either we have inf-inf or 201 * the result is the same as z depending on signs 202 */ 203 if (cz == 2) { 204 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { 205 dummy = inf; 206 dummy -= inf; 207 zz.i[0] = 0x7fffffff; 208 zz.i[1] = zz.i[2] = zz.i[3] = 209 0xffffffff; 210 return (zz.q); 211 } 212 __fenv_setfsr32(&fsr); 213 return (z); 214 } 215 216 /* otherwise the result is inf with appropriate sign */ 217 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | 218 0x7fff0000; 219 zz.i[1] = zz.i[2] = zz.i[3] = 0; 220 __fenv_setfsr32(&fsr); 221 return (zz.q); 222 } 223 224 /* if z is inf, return it */ 225 if (cz == 2) { 226 __fenv_setfsr32(&fsr); 227 return (z); 228 } 229 230 /* 231 * now x, y, and z are all finite; handle cases where x or y 232 * is zero 233 */ 234 if (cx == 0 || cy == 0) { 235 /* either we have 0-0 or the result is the same as z */ 236 if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 237 0) { 238 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : 239 0; 240 __fenv_setfsr32(&fsr); 241 return (zz.q); 242 } 243 __fenv_setfsr32(&fsr); 244 return (z); 245 } 246 247 /* if we get here, x and y are nonzero finite, z must be zero */ 248 return (x * y); 249 } 250 251 /* 252 * now x, y, and z are all finite and nonzero; set round-to- 253 * negative-infinity mode 254 */ 255 __fenv_setfsr32(&fsr_rm); 256 257 /* 258 * get the signs and exponents and normalize the significands 259 * of x and y 260 */ 261 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; 262 ex = hx >> 16; 263 hx &= 0xffff; 264 if (!ex) { 265 if (hx | (xx.i[1] & 0xfffe0000)) { 266 ex = 1; 267 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { 268 hx = xx.i[1]; 269 xx.i[1] = xx.i[2]; 270 xx.i[2] = xx.i[3]; 271 xx.i[3] = 0; 272 ex = -31; 273 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { 274 hx = xx.i[2]; 275 xx.i[1] = xx.i[3]; 276 xx.i[2] = xx.i[3] = 0; 277 ex = -63; 278 } else { 279 hx = xx.i[3]; 280 xx.i[1] = xx.i[2] = xx.i[3] = 0; 281 ex = -95; 282 } 283 while ((hx & 0x10000) == 0) { 284 hx = (hx << 1) | (xx.i[1] >> 31); 285 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); 286 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); 287 xx.i[3] <<= 1; 288 ex--; 289 } 290 } else 291 hx |= 0x10000; 292 ey = hy >> 16; 293 hy &= 0xffff; 294 if (!ey) { 295 if (hy | (yy.i[1] & 0xfffe0000)) { 296 ey = 1; 297 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { 298 hy = yy.i[1]; 299 yy.i[1] = yy.i[2]; 300 yy.i[2] = yy.i[3]; 301 yy.i[3] = 0; 302 ey = -31; 303 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { 304 hy = yy.i[2]; 305 yy.i[1] = yy.i[3]; 306 yy.i[2] = yy.i[3] = 0; 307 ey = -63; 308 } else { 309 hy = yy.i[3]; 310 yy.i[1] = yy.i[2] = yy.i[3] = 0; 311 ey = -95; 312 } 313 while ((hy & 0x10000) == 0) { 314 hy = (hy << 1) | (yy.i[1] >> 31); 315 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); 316 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); 317 yy.i[3] <<= 1; 318 ey--; 319 } 320 } else 321 hy |= 0x10000; 322 exy = ex + ey - 0x3fff; 323 324 /* convert the significands of x and y to doubles */ 325 c = twom16; 326 dx[0] = (double) ((int) hx) * c; 327 dy[0] = (double) ((int) hy) * c; 328 329 c *= twom24; 330 dx[1] = (double) ((int) (xx.i[1] >> 8)) * c; 331 dy[1] = (double) ((int) (yy.i[1] >> 8)) * c; 332 333 c *= twom24; 334 dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) & 335 0xffffff)) * c; 336 dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) & 337 0xffffff)) * c; 338 339 c *= twom24; 340 dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) & 341 0xffffff)) * c; 342 dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) & 343 0xffffff)) * c; 344 345 c *= twom24; 346 dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c; 347 dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c; 348 349 /* form the "digits" of the product */ 350 dxy[0] = dx[0] * dy[0]; 351 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; 352 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; 353 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + 354 dx[3] * dy[0]; 355 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + 356 dx[3] * dy[1] + dx[4] * dy[0]; 357 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + 358 dx[4] * dy[1]; 359 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; 360 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; 361 dxy[8] = dx[4] * dy[4]; 362 363 /* split odd-numbered terms and combine into even-numbered terms */ 364 c = (dxy[1] + two20) - two20; 365 dxy[0] += c; 366 dxy[1] -= c; 367 c = (dxy[3] + twom28) - twom28; 368 dxy[2] += c + dxy[1]; 369 dxy[3] -= c; 370 c = (dxy[5] + twom76) - twom76; 371 dxy[4] += c + dxy[3]; 372 dxy[5] -= c; 373 c = (dxy[7] + twom124) - twom124; 374 dxy[6] += c + dxy[5]; 375 dxy[8] += (dxy[7] - c); 376 377 /* propagate carries, adjusting the exponent if need be */ 378 dxy[7] = dxy[6] + dxy[8]; 379 dxy[5] = dxy[4] + dxy[7]; 380 dxy[3] = dxy[2] + dxy[5]; 381 dxy[1] = dxy[0] + dxy[3]; 382 if (dxy[1] >= two) { 383 dxy[0] *= half; 384 dxy[1] *= half; 385 dxy[2] *= half; 386 dxy[3] *= half; 387 dxy[4] *= half; 388 dxy[5] *= half; 389 dxy[6] *= half; 390 dxy[7] *= half; 391 dxy[8] *= half; 392 exy++; 393 } 394 395 /* extract the significand of x*y */ 396 s = two36; 397 u.d = c = dxy[1] + s; 398 xy0 = u.i[1]; 399 c -= s; 400 dxy[1] -= c; 401 dxy[0] -= c; 402 403 s *= twom32; 404 u.d = c = dxy[1] + s; 405 xy1 = u.i[1]; 406 c -= s; 407 dxy[2] += (dxy[0] - c); 408 dxy[3] = dxy[2] + dxy[5]; 409 410 s *= twom32; 411 u.d = c = dxy[3] + s; 412 xy2 = u.i[1]; 413 c -= s; 414 dxy[4] += (dxy[2] - c); 415 dxy[5] = dxy[4] + dxy[7]; 416 417 s *= twom32; 418 u.d = c = dxy[5] + s; 419 xy3 = u.i[1]; 420 c -= s; 421 dxy[4] -= c; 422 dxy[5] = dxy[4] + dxy[7]; 423 424 s *= twom32; 425 u.d = c = dxy[5] + s; 426 xy4 = u.i[1]; 427 c -= s; 428 dxy[6] += (dxy[4] - c); 429 dxy[7] = dxy[6] + dxy[8]; 430 431 s *= twom32; 432 u.d = c = dxy[7] + s; 433 xy5 = u.i[1]; 434 c -= s; 435 dxy[8] += (dxy[6] - c); 436 437 s *= twom32; 438 u.d = c = dxy[8] + s; 439 xy6 = u.i[1]; 440 c -= s; 441 dxy[8] -= c; 442 443 s *= twom32; 444 u.d = c = dxy[8] + s; 445 xy7 = u.i[1]; 446 447 /* extract the sign, exponent, and significand of z */ 448 sz = zz.i[0] & 0x80000000; 449 ez = hz >> 16; 450 z0 = hz & 0xffff; 451 if (!ez) { 452 if (z0 | (zz.i[1] & 0xfffe0000)) { 453 z1 = zz.i[1]; 454 z2 = zz.i[2]; 455 z3 = zz.i[3]; 456 ez = 1; 457 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { 458 z0 = zz.i[1]; 459 z1 = zz.i[2]; 460 z2 = zz.i[3]; 461 z3 = 0; 462 ez = -31; 463 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { 464 z0 = zz.i[2]; 465 z1 = zz.i[3]; 466 z2 = z3 = 0; 467 ez = -63; 468 } else { 469 z0 = zz.i[3]; 470 z1 = z2 = z3 = 0; 471 ez = -95; 472 } 473 while ((z0 & 0x10000) == 0) { 474 z0 = (z0 << 1) | (z1 >> 31); 475 z1 = (z1 << 1) | (z2 >> 31); 476 z2 = (z2 << 1) | (z3 >> 31); 477 z3 <<= 1; 478 ez--; 479 } 480 } else { 481 z0 |= 0x10000; 482 z1 = zz.i[1]; 483 z2 = zz.i[2]; 484 z3 = zz.i[3]; 485 } 486 z4 = z5 = z6 = z7 = 0; 487 488 /* 489 * now x*y is represented by sxy, exy, and xy[0-7], and z is 490 * represented likewise; swap if need be so |xy| <= |z| 491 */ 492 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || 493 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || 494 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { 495 e = sxy; sxy = sz; sz = e; 496 e = exy; exy = ez; ez = e; 497 e = xy0; xy0 = z0; z0 = e; 498 e = xy1; xy1 = z1; z1 = e; 499 e = xy2; xy2 = z2; z2 = e; 500 e = xy3; xy3 = z3; z3 = e; 501 z4 = xy4; xy4 = 0; 502 z5 = xy5; xy5 = 0; 503 z6 = xy6; xy6 = 0; 504 z7 = xy7; xy7 = 0; 505 } 506 507 /* shift the significand of xy keeping a sticky bit */ 508 e = ez - exy; 509 if (e > 236) { 510 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 511 xy7 = 1; 512 } else if (e >= 224) { 513 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | 514 ((xy0 << 1) << (255 - e)); 515 xy7 = xy0 >> (e - 224); 516 if (sticky) 517 xy7 |= 1; 518 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 519 } else if (e >= 192) { 520 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | 521 ((xy1 << 1) << (223 - e)); 522 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); 523 if (sticky) 524 xy7 |= 1; 525 xy6 = xy0 >> (e - 192); 526 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; 527 } else if (e >= 160) { 528 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | 529 ((xy2 << 1) << (191 - e)); 530 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); 531 if (sticky) 532 xy7 |= 1; 533 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); 534 xy5 = xy0 >> (e - 160); 535 xy0 = xy1 = xy2 = xy3 = xy4 = 0; 536 } else if (e >= 128) { 537 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); 538 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); 539 if (sticky) 540 xy7 |= 1; 541 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); 542 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); 543 xy4 = xy0 >> (e - 128); 544 xy0 = xy1 = xy2 = xy3 = 0; 545 } else if (e >= 96) { 546 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); 547 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); 548 if (sticky) 549 xy7 |= 1; 550 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); 551 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); 552 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 553 xy3 = xy0 >> (e - 96); 554 xy0 = xy1 = xy2 = 0; 555 } else if (e >= 64) { 556 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); 557 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); 558 if (sticky) 559 xy7 |= 1; 560 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); 561 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); 562 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 563 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 564 xy2 = xy0 >> (e - 64); 565 xy0 = xy1 = 0; 566 } else if (e >= 32) { 567 sticky = xy7 | ((xy6 << 1) << (63 - e)); 568 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); 569 if (sticky) 570 xy7 |= 1; 571 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); 572 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); 573 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 574 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 575 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 576 xy1 = xy0 >> (e - 32); 577 xy0 = 0; 578 } else if (e) { 579 sticky = (xy7 << 1) << (31 - e); 580 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); 581 if (sticky) 582 xy7 |= 1; 583 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); 584 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); 585 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); 586 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 587 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 588 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 589 xy0 >>= e; 590 } 591 592 /* if this is a magnitude subtract, negate the significand of xy */ 593 if (sxy ^ sz) { 594 xy0 = ~xy0; 595 xy1 = ~xy1; 596 xy2 = ~xy2; 597 xy3 = ~xy3; 598 xy4 = ~xy4; 599 xy5 = ~xy5; 600 xy6 = ~xy6; 601 xy7 = -xy7; 602 if (xy7 == 0) 603 if (++xy6 == 0) 604 if (++xy5 == 0) 605 if (++xy4 == 0) 606 if (++xy3 == 0) 607 if (++xy2 == 0) 608 if (++xy1 == 0) 609 xy0++; 610 } 611 612 /* add, propagating carries */ 613 z7 += xy7; 614 e = (z7 < xy7); 615 z6 += xy6; 616 if (e) { 617 z6++; 618 e = (z6 <= xy6); 619 } else 620 e = (z6 < xy6); 621 z5 += xy5; 622 if (e) { 623 z5++; 624 e = (z5 <= xy5); 625 } else 626 e = (z5 < xy5); 627 z4 += xy4; 628 if (e) { 629 z4++; 630 e = (z4 <= xy4); 631 } else 632 e = (z4 < xy4); 633 z3 += xy3; 634 if (e) { 635 z3++; 636 e = (z3 <= xy3); 637 } else 638 e = (z3 < xy3); 639 z2 += xy2; 640 if (e) { 641 z2++; 642 e = (z2 <= xy2); 643 } else 644 e = (z2 < xy2); 645 z1 += xy1; 646 if (e) { 647 z1++; 648 e = (z1 <= xy1); 649 } else 650 e = (z1 < xy1); 651 z0 += xy0; 652 if (e) 653 z0++; 654 655 /* postnormalize and collect rounding information into z4 */ 656 if (ez < 1) { 657 /* result is tiny; shift right until exponent is within range */ 658 e = 1 - ez; 659 if (e > 116) { 660 z4 = 1; /* result can't be exactly zero */ 661 z0 = z1 = z2 = z3 = 0; 662 } else if (e >= 96) { 663 sticky = z7 | z6 | z5 | z4 | z3 | z2 | 664 ((z1 << 1) << (127 - e)); 665 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); 666 if (sticky) 667 z4 |= 1; 668 z3 = z0 >> (e - 96); 669 z0 = z1 = z2 = 0; 670 } else if (e >= 64) { 671 sticky = z7 | z6 | z5 | z4 | z3 | 672 ((z2 << 1) << (95 - e)); 673 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); 674 if (sticky) 675 z4 |= 1; 676 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); 677 z2 = z0 >> (e - 64); 678 z0 = z1 = 0; 679 } else if (e >= 32) { 680 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); 681 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); 682 if (sticky) 683 z4 |= 1; 684 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); 685 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 686 z1 = z0 >> (e - 32); 687 z0 = 0; 688 } else { 689 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); 690 z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); 691 if (sticky) 692 z4 |= 1; 693 z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); 694 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 695 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 696 z0 >>= e; 697 } 698 ez = 1; 699 } else if (z0 >= 0x20000) { 700 /* carry out; shift right by one */ 701 sticky = (z4 & 1) | z5 | z6 | z7; 702 z4 = (z4 >> 1) | (z3 << 31); 703 if (sticky) 704 z4 |= 1; 705 z3 = (z3 >> 1) | (z2 << 31); 706 z2 = (z2 >> 1) | (z1 << 31); 707 z1 = (z1 >> 1) | (z0 << 31); 708 z0 >>= 1; 709 ez++; 710 } else { 711 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) 712 != 0) { 713 /* 714 * borrow/cancellation; shift left as much as 715 * exponent allows 716 */ 717 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { 718 z0 = z1; 719 z1 = z2; 720 z2 = z3; 721 z3 = z4; 722 z4 = z5; 723 z5 = z6; 724 z6 = z7; 725 z7 = 0; 726 ez -= 32; 727 } 728 while (z0 < 0x10000 && ez > 1) { 729 z0 = (z0 << 1) | (z1 >> 31); 730 z1 = (z1 << 1) | (z2 >> 31); 731 z2 = (z2 << 1) | (z3 >> 31); 732 z3 = (z3 << 1) | (z4 >> 31); 733 z4 = (z4 << 1) | (z5 >> 31); 734 z5 = (z5 << 1) | (z6 >> 31); 735 z6 = (z6 << 1) | (z7 >> 31); 736 z7 <<= 1; 737 ez--; 738 } 739 } 740 if (z5 | z6 | z7) 741 z4 |= 1; 742 } 743 744 /* get the rounding mode */ 745 rm = fsr >> 30; 746 747 /* strip off the integer bit, if there is one */ 748 ibit = z0 & 0x10000; 749 if (ibit) 750 z0 -= 0x10000; 751 else { 752 ez = 0; 753 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ 754 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; 755 zz.i[1] = zz.i[2] = zz.i[3] = 0; 756 __fenv_setfsr32(&fsr); 757 return (zz.q); 758 } 759 } 760 761 /* 762 * flip the sense of directed roundings if the result is negative; 763 * the logic below applies to a positive result 764 */ 765 if (sz) 766 rm ^= rm >> 1; 767 768 /* round and raise exceptions */ 769 if (z4) { 770 fsr |= FSR_NXC; 771 772 /* decide whether to round the fraction up */ 773 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || 774 (z4 == 0x80000000u && (z3 & 1))))) { 775 /* round up and renormalize if necessary */ 776 if (++z3 == 0) 777 if (++z2 == 0) 778 if (++z1 == 0) 779 if (++z0 == 0x10000) { 780 z0 = 0; 781 ez++; 782 } 783 } 784 } 785 786 /* check for under/overflow */ 787 if (ez >= 0x7fff) { 788 if (rm == FSR_RN || rm == FSR_RP) { 789 zz.i[0] = sz | 0x7fff0000; 790 zz.i[1] = zz.i[2] = zz.i[3] = 0; 791 } else { 792 zz.i[0] = sz | 0x7ffeffff; 793 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 794 } 795 fsr |= FSR_OFC | FSR_NXC; 796 } else { 797 zz.i[0] = sz | (ez << 16) | z0; 798 zz.i[1] = z1; 799 zz.i[2] = z2; 800 zz.i[3] = z3; 801 802 /* 803 * !ibit => exact result was tiny before rounding, 804 * z4 nonzero => result delivered is inexact 805 */ 806 if (!ibit) { 807 if (z4) 808 fsr |= FSR_UFC | FSR_NXC; 809 else if (fsr & FSR_UFM) 810 fsr |= FSR_UFC; 811 } 812 } 813 814 /* restore the fsr and emulate exceptions as needed */ 815 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 816 __fenv_setfsr32(&fsr); 817 if (fsr & FSR_OFC) { 818 dummy = huge; 819 dummy *= huge; 820 } else if (fsr & FSR_UFC) { 821 dummy = tiny; 822 if (fsr & FSR_NXC) 823 dummy *= tiny; 824 else 825 dummy -= tiny2; 826 } else { 827 dummy = huge; 828 dummy += tiny; 829 } 830 } else { 831 fsr |= (fsr & 0x1f) << 5; 832 __fenv_setfsr32(&fsr); 833 } 834 return (zz.q); 835 } 836 837 #elif defined(__x86) 838 839 static const union { 840 unsigned i[2]; 841 double d; 842 } C[] = { 843 { 0, 0x3fe00000u }, 844 { 0, 0x40000000u }, 845 { 0, 0x3df00000u }, 846 { 0, 0x3bf00000u }, 847 { 0, 0x41f00000u }, 848 { 0, 0x43e00000u }, 849 { 0, 0x7fe00000u }, 850 { 0, 0x00100000u }, 851 { 0, 0x00100001u } 852 }; 853 854 #define half C[0].d 855 #define two C[1].d 856 #define twom32 C[2].d 857 #define twom64 C[3].d 858 #define two32 C[4].d 859 #define two63 C[5].d 860 #define huge C[6].d 861 #define tiny C[7].d 862 #define tiny2 C[8].d 863 864 #if defined(__amd64) 865 #define NI 4 866 #else 867 #define NI 3 868 #endif 869 870 /* 871 * fmal for x86: 80-bit extended double precision, little-endian 872 */ 873 long double 874 __fmal(long double x, long double y, long double z) { 875 union { 876 unsigned i[NI]; 877 long double e; 878 } xx, yy, zz; 879 long double xhi, yhi, xlo, ylo, t; 880 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; 881 unsigned oldcwsw, cwsw, rm, sticky, carry; 882 int ex, ey, ez, exy, sxy, sz, e, tinyafter; 883 volatile double dummy; 884 885 /* extract the exponents of the arguments */ 886 xx.e = x; 887 yy.e = y; 888 zz.e = z; 889 ex = xx.i[2] & 0x7fff; 890 ey = yy.i[2] & 0x7fff; 891 ez = zz.i[2] & 0x7fff; 892 893 /* dispense with inf, nan, and zero cases */ 894 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || 895 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ 896 return (x * y + z); 897 898 if (ez == 0x7fff) /* z is inf or nan */ 899 return (x + z); /* avoid spurious under/overflow in x * y */ 900 901 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ 902 /* 903 * x * y isn't zero but could underflow to zero, 904 * so don't add z, lest we perturb the sign 905 */ 906 return (x * y); 907 908 /* 909 * now x, y, and z are all finite and nonzero; extract signs and 910 * normalize the significands (this will raise the denormal operand 911 * exception if need be) 912 */ 913 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; 914 sz = zz.i[2] & 0x8000; 915 if (!ex) { 916 xx.e = x * two63; 917 ex = (xx.i[2] & 0x7fff) - 63; 918 } 919 if (!ey) { 920 yy.e = y * two63; 921 ey = (yy.i[2] & 0x7fff) - 63; 922 } 923 if (!ez) { 924 zz.e = z * two63; 925 ez = (zz.i[2] & 0x7fff) - 63; 926 } 927 928 /* 929 * save the control and status words, mask all exceptions, and 930 * set rounding to 64-bit precision and toward-zero 931 */ 932 __fenv_getcwsw(&oldcwsw); 933 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; 934 __fenv_setcwsw(&cwsw); 935 936 /* multiply x*y to 128 bits */ 937 exy = ex + ey - 0x3fff; 938 xx.i[2] = 0x3fff; 939 yy.i[2] = 0x3fff; 940 x = xx.e; 941 y = yy.e; 942 xhi = ((x + twom32) + two32) - two32; 943 yhi = ((y + twom32) + two32) - two32; 944 xlo = x - xhi; 945 ylo = y - yhi; 946 x *= y; 947 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; 948 if (x >= two) { 949 x *= half; 950 y *= half; 951 exy++; 952 } 953 954 /* extract the significands */ 955 xx.e = x; 956 xy0 = xx.i[1]; 957 xy1 = xx.i[0]; 958 yy.e = t = y + twom32; 959 xy2 = yy.i[0]; 960 yy.e = (y - (t - twom32)) + twom64; 961 xy3 = yy.i[0]; 962 xy4 = 0; 963 z0 = zz.i[1]; 964 z1 = zz.i[0]; 965 z2 = z3 = z4 = 0; 966 967 /* 968 * now x*y is represented by sxy, exy, and xy[0-4], and z is 969 * represented likewise; swap if need be so |xy| <= |z| 970 */ 971 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && 972 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { 973 e = sxy; sxy = sz; sz = e; 974 e = exy; exy = ez; ez = e; 975 e = xy0; xy0 = z0; z0 = e; 976 e = xy1; xy1 = z1; z1 = e; 977 z2 = xy2; xy2 = 0; 978 z3 = xy3; xy3 = 0; 979 } 980 981 /* shift the significand of xy keeping a sticky bit */ 982 e = ez - exy; 983 if (e > 130) { 984 xy0 = xy1 = xy2 = xy3 = 0; 985 xy4 = 1; 986 } else if (e >= 128) { 987 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); 988 xy4 = xy0 >> (e - 128); 989 if (sticky) 990 xy4 |= 1; 991 xy0 = xy1 = xy2 = xy3 = 0; 992 } else if (e >= 96) { 993 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); 994 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 995 if (sticky) 996 xy4 |= 1; 997 xy3 = xy0 >> (e - 96); 998 xy0 = xy1 = xy2 = 0; 999 } else if (e >= 64) { 1000 sticky = xy3 | ((xy2 << 1) << (95 - e)); 1001 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 1002 if (sticky) 1003 xy4 |= 1; 1004 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 1005 xy2 = xy0 >> (e - 64); 1006 xy0 = xy1 = 0; 1007 } else if (e >= 32) { 1008 sticky = (xy3 << 1) << (63 - e); 1009 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 1010 if (sticky) 1011 xy4 |= 1; 1012 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 1013 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 1014 xy1 = xy0 >> (e - 32); 1015 xy0 = 0; 1016 } else if (e) { 1017 xy4 = (xy3 << 1) << (31 - e); 1018 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 1019 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 1020 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 1021 xy0 >>= e; 1022 } 1023 1024 /* if this is a magnitude subtract, negate the significand of xy */ 1025 if (sxy ^ sz) { 1026 xy0 = ~xy0; 1027 xy1 = ~xy1; 1028 xy2 = ~xy2; 1029 xy3 = ~xy3; 1030 xy4 = -xy4; 1031 if (xy4 == 0) 1032 if (++xy3 == 0) 1033 if (++xy2 == 0) 1034 if (++xy1 == 0) 1035 xy0++; 1036 } 1037 1038 /* add, propagating carries */ 1039 z4 += xy4; 1040 carry = (z4 < xy4); 1041 z3 += xy3; 1042 if (carry) { 1043 z3++; 1044 carry = (z3 <= xy3); 1045 } else 1046 carry = (z3 < xy3); 1047 z2 += xy2; 1048 if (carry) { 1049 z2++; 1050 carry = (z2 <= xy2); 1051 } else 1052 carry = (z2 < xy2); 1053 z1 += xy1; 1054 if (carry) { 1055 z1++; 1056 carry = (z1 <= xy1); 1057 } else 1058 carry = (z1 < xy1); 1059 z0 += xy0; 1060 if (carry) { 1061 z0++; 1062 carry = (z0 <= xy0); 1063 } else 1064 carry = (z0 < xy0); 1065 1066 /* for a magnitude subtract, ignore the last carry out */ 1067 if (sxy ^ sz) 1068 carry = 0; 1069 1070 /* postnormalize and collect rounding information into z2 */ 1071 if (ez < 1) { 1072 /* result is tiny; shift right until exponent is within range */ 1073 e = 1 - ez; 1074 if (e > 67) { 1075 z2 = 1; /* result can't be exactly zero */ 1076 z0 = z1 = 0; 1077 } else if (e >= 64) { 1078 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); 1079 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); 1080 if (sticky) 1081 z2 |= 1; 1082 z1 = carry >> (e - 64); 1083 z0 = 0; 1084 } else if (e >= 32) { 1085 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); 1086 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 1087 if (sticky) 1088 z2 |= 1; 1089 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); 1090 z0 = carry >> (e - 32); 1091 } else { 1092 sticky = z4 | z3 | (z2 << 1) << (31 - e); 1093 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 1094 if (sticky) 1095 z2 |= 1; 1096 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 1097 z0 = (z0 >> e) | ((carry << 1) << (31 - e)); 1098 } 1099 ez = 1; 1100 } else if (carry) { 1101 /* carry out; shift right by one */ 1102 sticky = (z2 & 1) | z3 | z4; 1103 z2 = (z2 >> 1) | (z1 << 31); 1104 if (sticky) 1105 z2 |= 1; 1106 z1 = (z1 >> 1) | (z0 << 31); 1107 z0 = (z0 >> 1) | 0x80000000; 1108 ez++; 1109 } else { 1110 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { 1111 /* 1112 * borrow/cancellation; shift left as much as 1113 * exponent allows 1114 */ 1115 while (!z0 && ez >= 33) { 1116 z0 = z1; 1117 z1 = z2; 1118 z2 = z3; 1119 z3 = z4; 1120 z4 = 0; 1121 ez -= 32; 1122 } 1123 while (z0 < 0x80000000u && ez > 1) { 1124 z0 = (z0 << 1) | (z1 >> 31); 1125 z1 = (z1 << 1) | (z2 >> 31); 1126 z2 = (z2 << 1) | (z3 >> 31); 1127 z3 = (z3 << 1) | (z4 >> 31); 1128 z4 <<= 1; 1129 ez--; 1130 } 1131 } 1132 if (z3 | z4) 1133 z2 |= 1; 1134 } 1135 1136 /* get the rounding mode */ 1137 rm = oldcwsw & 0x0c000000; 1138 1139 /* adjust exponent if result is subnormal */ 1140 tinyafter = 0; 1141 if (!(z0 & 0x80000000)) { 1142 ez = 0; 1143 tinyafter = 1; 1144 if (!(z0 | z1 | z2)) { /* exact zero */ 1145 zz.i[2] = rm == FCW_RM ? 0x8000 : 0; 1146 zz.i[1] = zz.i[0] = 0; 1147 __fenv_setcwsw(&oldcwsw); 1148 return (zz.e); 1149 } 1150 } 1151 1152 /* 1153 * flip the sense of directed roundings if the result is negative; 1154 * the logic below applies to a positive result 1155 */ 1156 if (sz && (rm == FCW_RM || rm == FCW_RP)) 1157 rm = (FCW_RM + FCW_RP) - rm; 1158 1159 /* round */ 1160 if (z2) { 1161 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || 1162 (z2 == 0x80000000u && (z1 & 1))))) { 1163 /* round up and renormalize if necessary */ 1164 if (++z1 == 0) { 1165 if (++z0 == 0) { 1166 z0 = 0x80000000; 1167 ez++; 1168 } else if (z0 == 0x80000000) { 1169 /* rounded up to smallest normal */ 1170 ez = 1; 1171 if ((rm == FCW_RP && z2 > 1172 0x80000000u) || (rm == FCW_RN && 1173 z2 >= 0xc0000000u)) 1174 /* 1175 * would have rounded up to 1176 * smallest normal even with 1177 * unbounded range 1178 */ 1179 tinyafter = 0; 1180 } 1181 } 1182 } 1183 } 1184 1185 /* restore the control and status words, check for over/underflow */ 1186 __fenv_setcwsw(&oldcwsw); 1187 if (ez >= 0x7fff) { 1188 if (rm == FCW_RN || rm == FCW_RP) { 1189 zz.i[2] = sz | 0x7fff; 1190 zz.i[1] = 0x80000000; 1191 zz.i[0] = 0; 1192 } else { 1193 zz.i[2] = sz | 0x7ffe; 1194 zz.i[1] = 0xffffffff; 1195 zz.i[0] = 0xffffffff; 1196 } 1197 dummy = huge; 1198 dummy *= huge; 1199 } else { 1200 zz.i[2] = sz | ez; 1201 zz.i[1] = z0; 1202 zz.i[0] = z1; 1203 1204 /* 1205 * tinyafter => result rounded w/ unbounded range would be tiny, 1206 * z2 nonzero => result delivered is inexact 1207 */ 1208 if (tinyafter) { 1209 dummy = tiny; 1210 if (z2) 1211 dummy *= tiny; 1212 else 1213 dummy -= tiny2; 1214 } else if (z2) { 1215 dummy = huge; 1216 dummy += tiny; 1217 } 1218 } 1219 1220 return (zz.e); 1221 } 1222 1223 #else 1224 #error Unknown architecture 1225 #endif 1226