1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */ 2 3 /* 4 * This version hacked for use with gcc -msoft-float by bjh21. 5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6 * itself). 7 */ 8 9 /* 10 * Things you may want to define: 11 * 12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14 * properly renamed. 15 */ 16 17 /* 18 * This differs from the standard bits32/softfloat.c in that float64 19 * is defined to be a 64-bit integer rather than a structure. The 20 * structure is float64s, with translation between the two going via 21 * float64u. 22 */ 23 24 /* 25 =============================================================================== 26 27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point 28 Arithmetic Package, Release 2a. 29 30 Written by John R. Hauser. This work was made possible in part by the 31 International Computer Science Institute, located at Suite 600, 1947 Center 32 Street, Berkeley, California 94704. Funding was partially provided by the 33 National Science Foundation under grant MIP-9311980. The original version 34 of this code was written as part of a project to build a fixed-point vector 35 processor in collaboration with the University of California at Berkeley, 36 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 38 arithmetic/SoftFloat.html'. 39 40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 42 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 45 46 Derivative works are acceptable, even for commercial purposes, so long as 47 (1) they include prominent notice that the work is derivative, and (2) they 48 include prominent notice akin to these four paragraphs for those parts of 49 this code that are retained. 50 51 =============================================================================== 52 */ 53 54 #include <sys/cdefs.h> 55 __FBSDID("$FreeBSD$"); 56 57 #ifdef SOFTFLOAT_FOR_GCC 58 #include "softfloat-for-gcc.h" 59 #endif 60 61 #include "milieu.h" 62 #include "softfloat.h" 63 64 /* 65 * Conversions between floats as stored in memory and floats as 66 * SoftFloat uses them 67 */ 68 #ifndef FLOAT64_DEMANGLE 69 #define FLOAT64_DEMANGLE(a) (a) 70 #endif 71 #ifndef FLOAT64_MANGLE 72 #define FLOAT64_MANGLE(a) (a) 73 #endif 74 75 /* 76 ------------------------------------------------------------------------------- 77 Floating-point rounding mode and exception flags. 78 ------------------------------------------------------------------------------- 79 */ 80 int float_rounding_mode = float_round_nearest_even; 81 int float_exception_flags = 0; 82 83 /* 84 ------------------------------------------------------------------------------- 85 Primitive arithmetic functions, including multi-word arithmetic, and 86 division and square root approximations. (Can be specialized to target if 87 desired.) 88 ------------------------------------------------------------------------------- 89 */ 90 #include "softfloat-macros" 91 92 /* 93 ------------------------------------------------------------------------------- 94 Functions and definitions to determine: (1) whether tininess for underflow 95 is detected before or after rounding by default, (2) what (if anything) 96 happens when exceptions are raised, (3) how signaling NaNs are distinguished 97 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs 98 are propagated from function inputs to output. These details are target- 99 specific. 100 ------------------------------------------------------------------------------- 101 */ 102 #include "softfloat-specialize" 103 104 /* 105 ------------------------------------------------------------------------------- 106 Returns the fraction bits of the single-precision floating-point value `a'. 107 ------------------------------------------------------------------------------- 108 */ 109 INLINE bits32 extractFloat32Frac( float32 a ) 110 { 111 112 return a & 0x007FFFFF; 113 114 } 115 116 /* 117 ------------------------------------------------------------------------------- 118 Returns the exponent bits of the single-precision floating-point value `a'. 119 ------------------------------------------------------------------------------- 120 */ 121 INLINE int16 extractFloat32Exp( float32 a ) 122 { 123 124 return ( a>>23 ) & 0xFF; 125 126 } 127 128 /* 129 ------------------------------------------------------------------------------- 130 Returns the sign bit of the single-precision floating-point value `a'. 131 ------------------------------------------------------------------------------- 132 */ 133 INLINE flag extractFloat32Sign( float32 a ) 134 { 135 136 return a>>31; 137 138 } 139 140 /* 141 ------------------------------------------------------------------------------- 142 Normalizes the subnormal single-precision floating-point value represented 143 by the denormalized significand `aSig'. The normalized exponent and 144 significand are stored at the locations pointed to by `zExpPtr' and 145 `zSigPtr', respectively. 146 ------------------------------------------------------------------------------- 147 */ 148 static void 149 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 150 { 151 int8 shiftCount; 152 153 shiftCount = countLeadingZeros32( aSig ) - 8; 154 *zSigPtr = aSig<<shiftCount; 155 *zExpPtr = 1 - shiftCount; 156 157 } 158 159 /* 160 ------------------------------------------------------------------------------- 161 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 162 single-precision floating-point value, returning the result. After being 163 shifted into the proper positions, the three fields are simply added 164 together to form the result. This means that any integer portion of `zSig' 165 will be added into the exponent. Since a properly normalized significand 166 will have an integer portion equal to 1, the `zExp' input should be 1 less 167 than the desired result exponent whenever `zSig' is a complete, normalized 168 significand. 169 ------------------------------------------------------------------------------- 170 */ 171 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 172 { 173 174 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 175 176 } 177 178 /* 179 ------------------------------------------------------------------------------- 180 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 181 and significand `zSig', and returns the proper single-precision floating- 182 point value corresponding to the abstract input. Ordinarily, the abstract 183 value is simply rounded and packed into the single-precision format, with 184 the inexact exception raised if the abstract input cannot be represented 185 exactly. However, if the abstract value is too large, the overflow and 186 inexact exceptions are raised and an infinity or maximal finite value is 187 returned. If the abstract value is too small, the input value is rounded to 188 a subnormal number, and the underflow and inexact exceptions are raised if 189 the abstract input cannot be represented exactly as a subnormal single- 190 precision floating-point number. 191 The input significand `zSig' has its binary point between bits 30 192 and 29, which is 7 bits to the left of the usual location. This shifted 193 significand must be normalized or smaller. If `zSig' is not normalized, 194 `zExp' must be 0; in that case, the result returned is a subnormal number, 195 and it must not require rounding. In the usual case that `zSig' is 196 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 197 The handling of underflow and overflow follows the IEC/IEEE Standard for 198 Binary Floating-Point Arithmetic. 199 ------------------------------------------------------------------------------- 200 */ 201 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 202 { 203 int8 roundingMode; 204 flag roundNearestEven; 205 int8 roundIncrement, roundBits; 206 flag isTiny; 207 208 roundingMode = float_rounding_mode; 209 roundNearestEven = roundingMode == float_round_nearest_even; 210 roundIncrement = 0x40; 211 if ( ! roundNearestEven ) { 212 if ( roundingMode == float_round_to_zero ) { 213 roundIncrement = 0; 214 } 215 else { 216 roundIncrement = 0x7F; 217 if ( zSign ) { 218 if ( roundingMode == float_round_up ) roundIncrement = 0; 219 } 220 else { 221 if ( roundingMode == float_round_down ) roundIncrement = 0; 222 } 223 } 224 } 225 roundBits = zSig & 0x7F; 226 if ( 0xFD <= (bits16) zExp ) { 227 if ( ( 0xFD < zExp ) 228 || ( ( zExp == 0xFD ) 229 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 230 ) { 231 float_raise( float_flag_overflow | float_flag_inexact ); 232 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 233 } 234 if ( zExp < 0 ) { 235 isTiny = 236 ( float_detect_tininess == float_tininess_before_rounding ) 237 || ( zExp < -1 ) 238 || ( zSig + roundIncrement < 0x80000000 ); 239 shift32RightJamming( zSig, - zExp, &zSig ); 240 zExp = 0; 241 roundBits = zSig & 0x7F; 242 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 243 } 244 } 245 if ( roundBits ) float_exception_flags |= float_flag_inexact; 246 zSig = ( zSig + roundIncrement )>>7; 247 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 248 if ( zSig == 0 ) zExp = 0; 249 return packFloat32( zSign, zExp, zSig ); 250 251 } 252 253 /* 254 ------------------------------------------------------------------------------- 255 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 256 and significand `zSig', and returns the proper single-precision floating- 257 point value corresponding to the abstract input. This routine is just like 258 `roundAndPackFloat32' except that `zSig' does not have to be normalized. 259 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 260 floating-point exponent. 261 ------------------------------------------------------------------------------- 262 */ 263 static float32 264 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 265 { 266 int8 shiftCount; 267 268 shiftCount = countLeadingZeros32( zSig ) - 1; 269 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 270 271 } 272 273 /* 274 ------------------------------------------------------------------------------- 275 Returns the least-significant 32 fraction bits of the double-precision 276 floating-point value `a'. 277 ------------------------------------------------------------------------------- 278 */ 279 INLINE bits32 extractFloat64Frac1( float64 a ) 280 { 281 282 return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF ); 283 284 } 285 286 /* 287 ------------------------------------------------------------------------------- 288 Returns the most-significant 20 fraction bits of the double-precision 289 floating-point value `a'. 290 ------------------------------------------------------------------------------- 291 */ 292 INLINE bits32 extractFloat64Frac0( float64 a ) 293 { 294 295 return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF; 296 297 } 298 299 /* 300 ------------------------------------------------------------------------------- 301 Returns the exponent bits of the double-precision floating-point value `a'. 302 ------------------------------------------------------------------------------- 303 */ 304 INLINE int16 extractFloat64Exp( float64 a ) 305 { 306 307 return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; 308 309 } 310 311 /* 312 ------------------------------------------------------------------------------- 313 Returns the sign bit of the double-precision floating-point value `a'. 314 ------------------------------------------------------------------------------- 315 */ 316 INLINE flag extractFloat64Sign( float64 a ) 317 { 318 319 return FLOAT64_DEMANGLE(a)>>63; 320 321 } 322 323 /* 324 ------------------------------------------------------------------------------- 325 Normalizes the subnormal double-precision floating-point value represented 326 by the denormalized significand formed by the concatenation of `aSig0' and 327 `aSig1'. The normalized exponent is stored at the location pointed to by 328 `zExpPtr'. The most significant 21 bits of the normalized significand are 329 stored at the location pointed to by `zSig0Ptr', and the least significant 330 32 bits of the normalized significand are stored at the location pointed to 331 by `zSig1Ptr'. 332 ------------------------------------------------------------------------------- 333 */ 334 static void 335 normalizeFloat64Subnormal( 336 bits32 aSig0, 337 bits32 aSig1, 338 int16 *zExpPtr, 339 bits32 *zSig0Ptr, 340 bits32 *zSig1Ptr 341 ) 342 { 343 int8 shiftCount; 344 345 if ( aSig0 == 0 ) { 346 shiftCount = countLeadingZeros32( aSig1 ) - 11; 347 if ( shiftCount < 0 ) { 348 *zSig0Ptr = aSig1>>( - shiftCount ); 349 *zSig1Ptr = aSig1<<( shiftCount & 31 ); 350 } 351 else { 352 *zSig0Ptr = aSig1<<shiftCount; 353 *zSig1Ptr = 0; 354 } 355 *zExpPtr = - shiftCount - 31; 356 } 357 else { 358 shiftCount = countLeadingZeros32( aSig0 ) - 11; 359 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 360 *zExpPtr = 1 - shiftCount; 361 } 362 363 } 364 365 /* 366 ------------------------------------------------------------------------------- 367 Packs the sign `zSign', the exponent `zExp', and the significand formed by 368 the concatenation of `zSig0' and `zSig1' into a double-precision floating- 369 point value, returning the result. After being shifted into the proper 370 positions, the three fields `zSign', `zExp', and `zSig0' are simply added 371 together to form the most significant 32 bits of the result. This means 372 that any integer portion of `zSig0' will be added into the exponent. Since 373 a properly normalized significand will have an integer portion equal to 1, 374 the `zExp' input should be 1 less than the desired result exponent whenever 375 `zSig0' and `zSig1' concatenated form a complete, normalized significand. 376 ------------------------------------------------------------------------------- 377 */ 378 INLINE float64 379 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 380 { 381 382 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 383 ( ( (bits64) zExp )<<52 ) + 384 ( ( (bits64) zSig0 )<<32 ) + zSig1 ); 385 386 387 } 388 389 /* 390 ------------------------------------------------------------------------------- 391 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 392 and extended significand formed by the concatenation of `zSig0', `zSig1', 393 and `zSig2', and returns the proper double-precision floating-point value 394 corresponding to the abstract input. Ordinarily, the abstract value is 395 simply rounded and packed into the double-precision format, with the inexact 396 exception raised if the abstract input cannot be represented exactly. 397 However, if the abstract value is too large, the overflow and inexact 398 exceptions are raised and an infinity or maximal finite value is returned. 399 If the abstract value is too small, the input value is rounded to a 400 subnormal number, and the underflow and inexact exceptions are raised if the 401 abstract input cannot be represented exactly as a subnormal double-precision 402 floating-point number. 403 The input significand must be normalized or smaller. If the input 404 significand is not normalized, `zExp' must be 0; in that case, the result 405 returned is a subnormal number, and it must not require rounding. In the 406 usual case that the input significand is normalized, `zExp' must be 1 less 407 than the ``true'' floating-point exponent. The handling of underflow and 408 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 409 ------------------------------------------------------------------------------- 410 */ 411 static float64 412 roundAndPackFloat64( 413 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) 414 { 415 int8 roundingMode; 416 flag roundNearestEven, increment, isTiny; 417 418 roundingMode = float_rounding_mode; 419 roundNearestEven = ( roundingMode == float_round_nearest_even ); 420 increment = ( (sbits32) zSig2 < 0 ); 421 if ( ! roundNearestEven ) { 422 if ( roundingMode == float_round_to_zero ) { 423 increment = 0; 424 } 425 else { 426 if ( zSign ) { 427 increment = ( roundingMode == float_round_down ) && zSig2; 428 } 429 else { 430 increment = ( roundingMode == float_round_up ) && zSig2; 431 } 432 } 433 } 434 if ( 0x7FD <= (bits16) zExp ) { 435 if ( ( 0x7FD < zExp ) 436 || ( ( zExp == 0x7FD ) 437 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) 438 && increment 439 ) 440 ) { 441 float_raise( float_flag_overflow | float_flag_inexact ); 442 if ( ( roundingMode == float_round_to_zero ) 443 || ( zSign && ( roundingMode == float_round_up ) ) 444 || ( ! zSign && ( roundingMode == float_round_down ) ) 445 ) { 446 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); 447 } 448 return packFloat64( zSign, 0x7FF, 0, 0 ); 449 } 450 if ( zExp < 0 ) { 451 isTiny = 452 ( float_detect_tininess == float_tininess_before_rounding ) 453 || ( zExp < -1 ) 454 || ! increment 455 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); 456 shift64ExtraRightJamming( 457 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 458 zExp = 0; 459 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 460 if ( roundNearestEven ) { 461 increment = ( (sbits32) zSig2 < 0 ); 462 } 463 else { 464 if ( zSign ) { 465 increment = ( roundingMode == float_round_down ) && zSig2; 466 } 467 else { 468 increment = ( roundingMode == float_round_up ) && zSig2; 469 } 470 } 471 } 472 } 473 if ( zSig2 ) float_exception_flags |= float_flag_inexact; 474 if ( increment ) { 475 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 476 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 477 } 478 else { 479 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 480 } 481 return packFloat64( zSign, zExp, zSig0, zSig1 ); 482 483 } 484 485 /* 486 ------------------------------------------------------------------------------- 487 Takes an abstract floating-point value having sign `zSign', exponent `zExp', 488 and significand formed by the concatenation of `zSig0' and `zSig1', and 489 returns the proper double-precision floating-point value corresponding 490 to the abstract input. This routine is just like `roundAndPackFloat64' 491 except that the input significand has fewer bits and does not have to be 492 normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 493 point exponent. 494 ------------------------------------------------------------------------------- 495 */ 496 static float64 497 normalizeRoundAndPackFloat64( 498 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) 499 { 500 int8 shiftCount; 501 bits32 zSig2; 502 503 if ( zSig0 == 0 ) { 504 zSig0 = zSig1; 505 zSig1 = 0; 506 zExp -= 32; 507 } 508 shiftCount = countLeadingZeros32( zSig0 ) - 11; 509 if ( 0 <= shiftCount ) { 510 zSig2 = 0; 511 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 512 } 513 else { 514 shift64ExtraRightJamming( 515 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 516 } 517 zExp -= shiftCount; 518 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 519 520 } 521 522 /* 523 ------------------------------------------------------------------------------- 524 Returns the result of converting the 32-bit two's complement integer `a' to 525 the single-precision floating-point format. The conversion is performed 526 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 527 ------------------------------------------------------------------------------- 528 */ 529 float32 int32_to_float32( int32 a ) 530 { 531 flag zSign; 532 533 if ( a == 0 ) return 0; 534 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 535 zSign = ( a < 0 ); 536 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 537 538 } 539 540 /* 541 ------------------------------------------------------------------------------- 542 Returns the result of converting the 32-bit two's complement integer `a' to 543 the double-precision floating-point format. The conversion is performed 544 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 545 ------------------------------------------------------------------------------- 546 */ 547 float64 int32_to_float64( int32 a ) 548 { 549 flag zSign; 550 bits32 absA; 551 int8 shiftCount; 552 bits32 zSig0, zSig1; 553 554 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); 555 zSign = ( a < 0 ); 556 absA = zSign ? - a : a; 557 shiftCount = countLeadingZeros32( absA ) - 11; 558 if ( 0 <= shiftCount ) { 559 zSig0 = absA<<shiftCount; 560 zSig1 = 0; 561 } 562 else { 563 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); 564 } 565 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); 566 567 } 568 569 #ifndef SOFTFLOAT_FOR_GCC 570 /* 571 ------------------------------------------------------------------------------- 572 Returns the result of converting the single-precision floating-point value 573 `a' to the 32-bit two's complement integer format. The conversion is 574 performed according to the IEC/IEEE Standard for Binary Floating-Point 575 Arithmetic---which means in particular that the conversion is rounded 576 according to the current rounding mode. If `a' is a NaN, the largest 577 positive integer is returned. Otherwise, if the conversion overflows, the 578 largest integer with the same sign as `a' is returned. 579 ------------------------------------------------------------------------------- 580 */ 581 int32 float32_to_int32( float32 a ) 582 { 583 flag aSign; 584 int16 aExp, shiftCount; 585 bits32 aSig, aSigExtra; 586 int32 z; 587 int8 roundingMode; 588 589 aSig = extractFloat32Frac( a ); 590 aExp = extractFloat32Exp( a ); 591 aSign = extractFloat32Sign( a ); 592 shiftCount = aExp - 0x96; 593 if ( 0 <= shiftCount ) { 594 if ( 0x9E <= aExp ) { 595 if ( a != 0xCF000000 ) { 596 float_raise( float_flag_invalid ); 597 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 598 return 0x7FFFFFFF; 599 } 600 } 601 return (sbits32) 0x80000000; 602 } 603 z = ( aSig | 0x00800000 )<<shiftCount; 604 if ( aSign ) z = - z; 605 } 606 else { 607 if ( aExp < 0x7E ) { 608 aSigExtra = aExp | aSig; 609 z = 0; 610 } 611 else { 612 aSig |= 0x00800000; 613 aSigExtra = aSig<<( shiftCount & 31 ); 614 z = aSig>>( - shiftCount ); 615 } 616 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 617 roundingMode = float_rounding_mode; 618 if ( roundingMode == float_round_nearest_even ) { 619 if ( (sbits32) aSigExtra < 0 ) { 620 ++z; 621 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; 622 } 623 if ( aSign ) z = - z; 624 } 625 else { 626 aSigExtra = ( aSigExtra != 0 ); 627 if ( aSign ) { 628 z += ( roundingMode == float_round_down ) & aSigExtra; 629 z = - z; 630 } 631 else { 632 z += ( roundingMode == float_round_up ) & aSigExtra; 633 } 634 } 635 } 636 return z; 637 638 } 639 #endif 640 641 /* 642 ------------------------------------------------------------------------------- 643 Returns the result of converting the single-precision floating-point value 644 `a' to the 32-bit two's complement integer format. The conversion is 645 performed according to the IEC/IEEE Standard for Binary Floating-Point 646 Arithmetic, except that the conversion is always rounded toward zero. 647 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 648 the conversion overflows, the largest integer with the same sign as `a' is 649 returned. 650 ------------------------------------------------------------------------------- 651 */ 652 int32 float32_to_int32_round_to_zero( float32 a ) 653 { 654 flag aSign; 655 int16 aExp, shiftCount; 656 bits32 aSig; 657 int32 z; 658 659 aSig = extractFloat32Frac( a ); 660 aExp = extractFloat32Exp( a ); 661 aSign = extractFloat32Sign( a ); 662 shiftCount = aExp - 0x9E; 663 if ( 0 <= shiftCount ) { 664 if ( a != 0xCF000000 ) { 665 float_raise( float_flag_invalid ); 666 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 667 } 668 return (sbits32) 0x80000000; 669 } 670 else if ( aExp <= 0x7E ) { 671 if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 672 return 0; 673 } 674 aSig = ( aSig | 0x00800000 )<<8; 675 z = aSig>>( - shiftCount ); 676 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 677 float_exception_flags |= float_flag_inexact; 678 } 679 if ( aSign ) z = - z; 680 return z; 681 682 } 683 684 /* 685 ------------------------------------------------------------------------------- 686 Returns the result of converting the single-precision floating-point value 687 `a' to the double-precision floating-point format. The conversion is 688 performed according to the IEC/IEEE Standard for Binary Floating-Point 689 Arithmetic. 690 ------------------------------------------------------------------------------- 691 */ 692 float64 float32_to_float64( float32 a ) 693 { 694 flag aSign; 695 int16 aExp; 696 bits32 aSig, zSig0, zSig1; 697 698 aSig = extractFloat32Frac( a ); 699 aExp = extractFloat32Exp( a ); 700 aSign = extractFloat32Sign( a ); 701 if ( aExp == 0xFF ) { 702 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 703 return packFloat64( aSign, 0x7FF, 0, 0 ); 704 } 705 if ( aExp == 0 ) { 706 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); 707 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 708 --aExp; 709 } 710 shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); 711 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); 712 713 } 714 715 #ifndef SOFTFLOAT_FOR_GCC 716 /* 717 ------------------------------------------------------------------------------- 718 Rounds the single-precision floating-point value `a' to an integer, 719 and returns the result as a single-precision floating-point value. The 720 operation is performed according to the IEC/IEEE Standard for Binary 721 Floating-Point Arithmetic. 722 ------------------------------------------------------------------------------- 723 */ 724 float32 float32_round_to_int( float32 a ) 725 { 726 flag aSign; 727 int16 aExp; 728 bits32 lastBitMask, roundBitsMask; 729 int8 roundingMode; 730 float32 z; 731 732 aExp = extractFloat32Exp( a ); 733 if ( 0x96 <= aExp ) { 734 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 735 return propagateFloat32NaN( a, a ); 736 } 737 return a; 738 } 739 if ( aExp <= 0x7E ) { 740 if ( (bits32) ( a<<1 ) == 0 ) return a; 741 float_exception_flags |= float_flag_inexact; 742 aSign = extractFloat32Sign( a ); 743 switch ( float_rounding_mode ) { 744 case float_round_nearest_even: 745 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 746 return packFloat32( aSign, 0x7F, 0 ); 747 } 748 break; 749 case float_round_to_zero: 750 break; 751 case float_round_down: 752 return aSign ? 0xBF800000 : 0; 753 case float_round_up: 754 return aSign ? 0x80000000 : 0x3F800000; 755 } 756 return packFloat32( aSign, 0, 0 ); 757 } 758 lastBitMask = 1; 759 lastBitMask <<= 0x96 - aExp; 760 roundBitsMask = lastBitMask - 1; 761 z = a; 762 roundingMode = float_rounding_mode; 763 if ( roundingMode == float_round_nearest_even ) { 764 z += lastBitMask>>1; 765 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 766 } 767 else if ( roundingMode != float_round_to_zero ) { 768 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 769 z += roundBitsMask; 770 } 771 } 772 z &= ~ roundBitsMask; 773 if ( z != a ) float_exception_flags |= float_flag_inexact; 774 return z; 775 776 } 777 #endif 778 779 /* 780 ------------------------------------------------------------------------------- 781 Returns the result of adding the absolute values of the single-precision 782 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 783 before being returned. `zSign' is ignored if the result is a NaN. 784 The addition is performed according to the IEC/IEEE Standard for Binary 785 Floating-Point Arithmetic. 786 ------------------------------------------------------------------------------- 787 */ 788 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 789 { 790 int16 aExp, bExp, zExp; 791 bits32 aSig, bSig, zSig; 792 int16 expDiff; 793 794 aSig = extractFloat32Frac( a ); 795 aExp = extractFloat32Exp( a ); 796 bSig = extractFloat32Frac( b ); 797 bExp = extractFloat32Exp( b ); 798 expDiff = aExp - bExp; 799 aSig <<= 6; 800 bSig <<= 6; 801 if ( 0 < expDiff ) { 802 if ( aExp == 0xFF ) { 803 if ( aSig ) return propagateFloat32NaN( a, b ); 804 return a; 805 } 806 if ( bExp == 0 ) { 807 --expDiff; 808 } 809 else { 810 bSig |= 0x20000000; 811 } 812 shift32RightJamming( bSig, expDiff, &bSig ); 813 zExp = aExp; 814 } 815 else if ( expDiff < 0 ) { 816 if ( bExp == 0xFF ) { 817 if ( bSig ) return propagateFloat32NaN( a, b ); 818 return packFloat32( zSign, 0xFF, 0 ); 819 } 820 if ( aExp == 0 ) { 821 ++expDiff; 822 } 823 else { 824 aSig |= 0x20000000; 825 } 826 shift32RightJamming( aSig, - expDiff, &aSig ); 827 zExp = bExp; 828 } 829 else { 830 if ( aExp == 0xFF ) { 831 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 832 return a; 833 } 834 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 835 zSig = 0x40000000 + aSig + bSig; 836 zExp = aExp; 837 goto roundAndPack; 838 } 839 aSig |= 0x20000000; 840 zSig = ( aSig + bSig )<<1; 841 --zExp; 842 if ( (sbits32) zSig < 0 ) { 843 zSig = aSig + bSig; 844 ++zExp; 845 } 846 roundAndPack: 847 return roundAndPackFloat32( zSign, zExp, zSig ); 848 849 } 850 851 /* 852 ------------------------------------------------------------------------------- 853 Returns the result of subtracting the absolute values of the single- 854 precision floating-point values `a' and `b'. If `zSign' is 1, the 855 difference is negated before being returned. `zSign' is ignored if the 856 result is a NaN. The subtraction is performed according to the IEC/IEEE 857 Standard for Binary Floating-Point Arithmetic. 858 ------------------------------------------------------------------------------- 859 */ 860 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 861 { 862 int16 aExp, bExp, zExp; 863 bits32 aSig, bSig, zSig; 864 int16 expDiff; 865 866 aSig = extractFloat32Frac( a ); 867 aExp = extractFloat32Exp( a ); 868 bSig = extractFloat32Frac( b ); 869 bExp = extractFloat32Exp( b ); 870 expDiff = aExp - bExp; 871 aSig <<= 7; 872 bSig <<= 7; 873 if ( 0 < expDiff ) goto aExpBigger; 874 if ( expDiff < 0 ) goto bExpBigger; 875 if ( aExp == 0xFF ) { 876 if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 877 float_raise( float_flag_invalid ); 878 return float32_default_nan; 879 } 880 if ( aExp == 0 ) { 881 aExp = 1; 882 bExp = 1; 883 } 884 if ( bSig < aSig ) goto aBigger; 885 if ( aSig < bSig ) goto bBigger; 886 return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 887 bExpBigger: 888 if ( bExp == 0xFF ) { 889 if ( bSig ) return propagateFloat32NaN( a, b ); 890 return packFloat32( zSign ^ 1, 0xFF, 0 ); 891 } 892 if ( aExp == 0 ) { 893 ++expDiff; 894 } 895 else { 896 aSig |= 0x40000000; 897 } 898 shift32RightJamming( aSig, - expDiff, &aSig ); 899 bSig |= 0x40000000; 900 bBigger: 901 zSig = bSig - aSig; 902 zExp = bExp; 903 zSign ^= 1; 904 goto normalizeRoundAndPack; 905 aExpBigger: 906 if ( aExp == 0xFF ) { 907 if ( aSig ) return propagateFloat32NaN( a, b ); 908 return a; 909 } 910 if ( bExp == 0 ) { 911 --expDiff; 912 } 913 else { 914 bSig |= 0x40000000; 915 } 916 shift32RightJamming( bSig, expDiff, &bSig ); 917 aSig |= 0x40000000; 918 aBigger: 919 zSig = aSig - bSig; 920 zExp = aExp; 921 normalizeRoundAndPack: 922 --zExp; 923 return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 924 925 } 926 927 /* 928 ------------------------------------------------------------------------------- 929 Returns the result of adding the single-precision floating-point values `a' 930 and `b'. The operation is performed according to the IEC/IEEE Standard for 931 Binary Floating-Point Arithmetic. 932 ------------------------------------------------------------------------------- 933 */ 934 float32 float32_add( float32 a, float32 b ) 935 { 936 flag aSign, bSign; 937 938 aSign = extractFloat32Sign( a ); 939 bSign = extractFloat32Sign( b ); 940 if ( aSign == bSign ) { 941 return addFloat32Sigs( a, b, aSign ); 942 } 943 else { 944 return subFloat32Sigs( a, b, aSign ); 945 } 946 947 } 948 949 /* 950 ------------------------------------------------------------------------------- 951 Returns the result of subtracting the single-precision floating-point values 952 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 953 for Binary Floating-Point Arithmetic. 954 ------------------------------------------------------------------------------- 955 */ 956 float32 float32_sub( float32 a, float32 b ) 957 { 958 flag aSign, bSign; 959 960 aSign = extractFloat32Sign( a ); 961 bSign = extractFloat32Sign( b ); 962 if ( aSign == bSign ) { 963 return subFloat32Sigs( a, b, aSign ); 964 } 965 else { 966 return addFloat32Sigs( a, b, aSign ); 967 } 968 969 } 970 971 /* 972 ------------------------------------------------------------------------------- 973 Returns the result of multiplying the single-precision floating-point values 974 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 975 for Binary Floating-Point Arithmetic. 976 ------------------------------------------------------------------------------- 977 */ 978 float32 float32_mul( float32 a, float32 b ) 979 { 980 flag aSign, bSign, zSign; 981 int16 aExp, bExp, zExp; 982 bits32 aSig, bSig, zSig0, zSig1; 983 984 aSig = extractFloat32Frac( a ); 985 aExp = extractFloat32Exp( a ); 986 aSign = extractFloat32Sign( a ); 987 bSig = extractFloat32Frac( b ); 988 bExp = extractFloat32Exp( b ); 989 bSign = extractFloat32Sign( b ); 990 zSign = aSign ^ bSign; 991 if ( aExp == 0xFF ) { 992 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 993 return propagateFloat32NaN( a, b ); 994 } 995 if ( ( bExp | bSig ) == 0 ) { 996 float_raise( float_flag_invalid ); 997 return float32_default_nan; 998 } 999 return packFloat32( zSign, 0xFF, 0 ); 1000 } 1001 if ( bExp == 0xFF ) { 1002 if ( bSig ) return propagateFloat32NaN( a, b ); 1003 if ( ( aExp | aSig ) == 0 ) { 1004 float_raise( float_flag_invalid ); 1005 return float32_default_nan; 1006 } 1007 return packFloat32( zSign, 0xFF, 0 ); 1008 } 1009 if ( aExp == 0 ) { 1010 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1011 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1012 } 1013 if ( bExp == 0 ) { 1014 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1015 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1016 } 1017 zExp = aExp + bExp - 0x7F; 1018 aSig = ( aSig | 0x00800000 )<<7; 1019 bSig = ( bSig | 0x00800000 )<<8; 1020 mul32To64( aSig, bSig, &zSig0, &zSig1 ); 1021 zSig0 |= ( zSig1 != 0 ); 1022 if ( 0 <= (sbits32) ( zSig0<<1 ) ) { 1023 zSig0 <<= 1; 1024 --zExp; 1025 } 1026 return roundAndPackFloat32( zSign, zExp, zSig0 ); 1027 1028 } 1029 1030 /* 1031 ------------------------------------------------------------------------------- 1032 Returns the result of dividing the single-precision floating-point value `a' 1033 by the corresponding value `b'. The operation is performed according to the 1034 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1035 ------------------------------------------------------------------------------- 1036 */ 1037 float32 float32_div( float32 a, float32 b ) 1038 { 1039 flag aSign, bSign, zSign; 1040 int16 aExp, bExp, zExp; 1041 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; 1042 1043 aSig = extractFloat32Frac( a ); 1044 aExp = extractFloat32Exp( a ); 1045 aSign = extractFloat32Sign( a ); 1046 bSig = extractFloat32Frac( b ); 1047 bExp = extractFloat32Exp( b ); 1048 bSign = extractFloat32Sign( b ); 1049 zSign = aSign ^ bSign; 1050 if ( aExp == 0xFF ) { 1051 if ( aSig ) return propagateFloat32NaN( a, b ); 1052 if ( bExp == 0xFF ) { 1053 if ( bSig ) return propagateFloat32NaN( a, b ); 1054 float_raise( float_flag_invalid ); 1055 return float32_default_nan; 1056 } 1057 return packFloat32( zSign, 0xFF, 0 ); 1058 } 1059 if ( bExp == 0xFF ) { 1060 if ( bSig ) return propagateFloat32NaN( a, b ); 1061 return packFloat32( zSign, 0, 0 ); 1062 } 1063 if ( bExp == 0 ) { 1064 if ( bSig == 0 ) { 1065 if ( ( aExp | aSig ) == 0 ) { 1066 float_raise( float_flag_invalid ); 1067 return float32_default_nan; 1068 } 1069 float_raise( float_flag_divbyzero ); 1070 return packFloat32( zSign, 0xFF, 0 ); 1071 } 1072 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1073 } 1074 if ( aExp == 0 ) { 1075 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1076 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1077 } 1078 zExp = aExp - bExp + 0x7D; 1079 aSig = ( aSig | 0x00800000 )<<7; 1080 bSig = ( bSig | 0x00800000 )<<8; 1081 if ( bSig <= ( aSig + aSig ) ) { 1082 aSig >>= 1; 1083 ++zExp; 1084 } 1085 zSig = estimateDiv64To32( aSig, 0, bSig ); 1086 if ( ( zSig & 0x3F ) <= 2 ) { 1087 mul32To64( bSig, zSig, &term0, &term1 ); 1088 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1089 while ( (sbits32) rem0 < 0 ) { 1090 --zSig; 1091 add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); 1092 } 1093 zSig |= ( rem1 != 0 ); 1094 } 1095 return roundAndPackFloat32( zSign, zExp, zSig ); 1096 1097 } 1098 1099 #ifndef SOFTFLOAT_FOR_GCC 1100 /* 1101 ------------------------------------------------------------------------------- 1102 Returns the remainder of the single-precision floating-point value `a' 1103 with respect to the corresponding value `b'. The operation is performed 1104 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1105 ------------------------------------------------------------------------------- 1106 */ 1107 float32 float32_rem( float32 a, float32 b ) 1108 { 1109 flag aSign, bSign, zSign; 1110 int16 aExp, bExp, expDiff; 1111 bits32 aSig, bSig, q, allZero, alternateASig; 1112 sbits32 sigMean; 1113 1114 aSig = extractFloat32Frac( a ); 1115 aExp = extractFloat32Exp( a ); 1116 aSign = extractFloat32Sign( a ); 1117 bSig = extractFloat32Frac( b ); 1118 bExp = extractFloat32Exp( b ); 1119 bSign = extractFloat32Sign( b ); 1120 if ( aExp == 0xFF ) { 1121 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1122 return propagateFloat32NaN( a, b ); 1123 } 1124 float_raise( float_flag_invalid ); 1125 return float32_default_nan; 1126 } 1127 if ( bExp == 0xFF ) { 1128 if ( bSig ) return propagateFloat32NaN( a, b ); 1129 return a; 1130 } 1131 if ( bExp == 0 ) { 1132 if ( bSig == 0 ) { 1133 float_raise( float_flag_invalid ); 1134 return float32_default_nan; 1135 } 1136 normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1137 } 1138 if ( aExp == 0 ) { 1139 if ( aSig == 0 ) return a; 1140 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1141 } 1142 expDiff = aExp - bExp; 1143 aSig = ( aSig | 0x00800000 )<<8; 1144 bSig = ( bSig | 0x00800000 )<<8; 1145 if ( expDiff < 0 ) { 1146 if ( expDiff < -1 ) return a; 1147 aSig >>= 1; 1148 } 1149 q = ( bSig <= aSig ); 1150 if ( q ) aSig -= bSig; 1151 expDiff -= 32; 1152 while ( 0 < expDiff ) { 1153 q = estimateDiv64To32( aSig, 0, bSig ); 1154 q = ( 2 < q ) ? q - 2 : 0; 1155 aSig = - ( ( bSig>>2 ) * q ); 1156 expDiff -= 30; 1157 } 1158 expDiff += 32; 1159 if ( 0 < expDiff ) { 1160 q = estimateDiv64To32( aSig, 0, bSig ); 1161 q = ( 2 < q ) ? q - 2 : 0; 1162 q >>= 32 - expDiff; 1163 bSig >>= 2; 1164 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 1165 } 1166 else { 1167 aSig >>= 2; 1168 bSig >>= 2; 1169 } 1170 do { 1171 alternateASig = aSig; 1172 ++q; 1173 aSig -= bSig; 1174 } while ( 0 <= (sbits32) aSig ); 1175 sigMean = aSig + alternateASig; 1176 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 1177 aSig = alternateASig; 1178 } 1179 zSign = ( (sbits32) aSig < 0 ); 1180 if ( zSign ) aSig = - aSig; 1181 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 1182 1183 } 1184 #endif 1185 1186 #ifndef SOFTFLOAT_FOR_GCC 1187 /* 1188 ------------------------------------------------------------------------------- 1189 Returns the square root of the single-precision floating-point value `a'. 1190 The operation is performed according to the IEC/IEEE Standard for Binary 1191 Floating-Point Arithmetic. 1192 ------------------------------------------------------------------------------- 1193 */ 1194 float32 float32_sqrt( float32 a ) 1195 { 1196 flag aSign; 1197 int16 aExp, zExp; 1198 bits32 aSig, zSig, rem0, rem1, term0, term1; 1199 1200 aSig = extractFloat32Frac( a ); 1201 aExp = extractFloat32Exp( a ); 1202 aSign = extractFloat32Sign( a ); 1203 if ( aExp == 0xFF ) { 1204 if ( aSig ) return propagateFloat32NaN( a, 0 ); 1205 if ( ! aSign ) return a; 1206 float_raise( float_flag_invalid ); 1207 return float32_default_nan; 1208 } 1209 if ( aSign ) { 1210 if ( ( aExp | aSig ) == 0 ) return a; 1211 float_raise( float_flag_invalid ); 1212 return float32_default_nan; 1213 } 1214 if ( aExp == 0 ) { 1215 if ( aSig == 0 ) return 0; 1216 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1217 } 1218 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 1219 aSig = ( aSig | 0x00800000 )<<8; 1220 zSig = estimateSqrt32( aExp, aSig ) + 2; 1221 if ( ( zSig & 0x7F ) <= 5 ) { 1222 if ( zSig < 2 ) { 1223 zSig = 0x7FFFFFFF; 1224 goto roundAndPack; 1225 } 1226 else { 1227 aSig >>= aExp & 1; 1228 mul32To64( zSig, zSig, &term0, &term1 ); 1229 sub64( aSig, 0, term0, term1, &rem0, &rem1 ); 1230 while ( (sbits32) rem0 < 0 ) { 1231 --zSig; 1232 shortShift64Left( 0, zSig, 1, &term0, &term1 ); 1233 term1 |= 1; 1234 add64( rem0, rem1, term0, term1, &rem0, &rem1 ); 1235 } 1236 zSig |= ( ( rem0 | rem1 ) != 0 ); 1237 } 1238 } 1239 shift32RightJamming( zSig, 1, &zSig ); 1240 roundAndPack: 1241 return roundAndPackFloat32( 0, zExp, zSig ); 1242 1243 } 1244 #endif 1245 1246 /* 1247 ------------------------------------------------------------------------------- 1248 Returns 1 if the single-precision floating-point value `a' is equal to 1249 the corresponding value `b', and 0 otherwise. The comparison is performed 1250 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1251 ------------------------------------------------------------------------------- 1252 */ 1253 flag float32_eq( float32 a, float32 b ) 1254 { 1255 1256 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1257 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1258 ) { 1259 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1260 float_raise( float_flag_invalid ); 1261 } 1262 return 0; 1263 } 1264 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1265 1266 } 1267 1268 /* 1269 ------------------------------------------------------------------------------- 1270 Returns 1 if the single-precision floating-point value `a' is less than 1271 or equal to the corresponding value `b', and 0 otherwise. The comparison 1272 is performed according to the IEC/IEEE Standard for Binary Floating-Point 1273 Arithmetic. 1274 ------------------------------------------------------------------------------- 1275 */ 1276 flag float32_le( float32 a, float32 b ) 1277 { 1278 flag aSign, bSign; 1279 1280 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1281 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1282 ) { 1283 float_raise( float_flag_invalid ); 1284 return 0; 1285 } 1286 aSign = extractFloat32Sign( a ); 1287 bSign = extractFloat32Sign( b ); 1288 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1289 return ( a == b ) || ( aSign ^ ( a < b ) ); 1290 1291 } 1292 1293 /* 1294 ------------------------------------------------------------------------------- 1295 Returns 1 if the single-precision floating-point value `a' is less than 1296 the corresponding value `b', and 0 otherwise. The comparison is performed 1297 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1298 ------------------------------------------------------------------------------- 1299 */ 1300 flag float32_lt( float32 a, float32 b ) 1301 { 1302 flag aSign, bSign; 1303 1304 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1305 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1306 ) { 1307 float_raise( float_flag_invalid ); 1308 return 0; 1309 } 1310 aSign = extractFloat32Sign( a ); 1311 bSign = extractFloat32Sign( b ); 1312 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1313 return ( a != b ) && ( aSign ^ ( a < b ) ); 1314 1315 } 1316 1317 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1318 /* 1319 ------------------------------------------------------------------------------- 1320 Returns 1 if the single-precision floating-point value `a' is equal to 1321 the corresponding value `b', and 0 otherwise. The invalid exception is 1322 raised if either operand is a NaN. Otherwise, the comparison is performed 1323 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1324 ------------------------------------------------------------------------------- 1325 */ 1326 flag float32_eq_signaling( float32 a, float32 b ) 1327 { 1328 1329 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1330 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1331 ) { 1332 float_raise( float_flag_invalid ); 1333 return 0; 1334 } 1335 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1336 1337 } 1338 1339 /* 1340 ------------------------------------------------------------------------------- 1341 Returns 1 if the single-precision floating-point value `a' is less than or 1342 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 1343 cause an exception. Otherwise, the comparison is performed according to the 1344 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1345 ------------------------------------------------------------------------------- 1346 */ 1347 flag float32_le_quiet( float32 a, float32 b ) 1348 { 1349 flag aSign, bSign; 1350 int16 aExp, bExp; 1351 1352 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1353 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1354 ) { 1355 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1356 float_raise( float_flag_invalid ); 1357 } 1358 return 0; 1359 } 1360 aSign = extractFloat32Sign( a ); 1361 bSign = extractFloat32Sign( b ); 1362 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 1363 return ( a == b ) || ( aSign ^ ( a < b ) ); 1364 1365 } 1366 1367 /* 1368 ------------------------------------------------------------------------------- 1369 Returns 1 if the single-precision floating-point value `a' is less than 1370 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 1371 exception. Otherwise, the comparison is performed according to the IEC/IEEE 1372 Standard for Binary Floating-Point Arithmetic. 1373 ------------------------------------------------------------------------------- 1374 */ 1375 flag float32_lt_quiet( float32 a, float32 b ) 1376 { 1377 flag aSign, bSign; 1378 1379 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 1380 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 1381 ) { 1382 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 1383 float_raise( float_flag_invalid ); 1384 } 1385 return 0; 1386 } 1387 aSign = extractFloat32Sign( a ); 1388 bSign = extractFloat32Sign( b ); 1389 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 1390 return ( a != b ) && ( aSign ^ ( a < b ) ); 1391 1392 } 1393 #endif /* !SOFTFLOAT_FOR_GCC */ 1394 1395 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1396 /* 1397 ------------------------------------------------------------------------------- 1398 Returns the result of converting the double-precision floating-point value 1399 `a' to the 32-bit two's complement integer format. The conversion is 1400 performed according to the IEC/IEEE Standard for Binary Floating-Point 1401 Arithmetic---which means in particular that the conversion is rounded 1402 according to the current rounding mode. If `a' is a NaN, the largest 1403 positive integer is returned. Otherwise, if the conversion overflows, the 1404 largest integer with the same sign as `a' is returned. 1405 ------------------------------------------------------------------------------- 1406 */ 1407 int32 float64_to_int32( float64 a ) 1408 { 1409 flag aSign; 1410 int16 aExp, shiftCount; 1411 bits32 aSig0, aSig1, absZ, aSigExtra; 1412 int32 z; 1413 int8 roundingMode; 1414 1415 aSig1 = extractFloat64Frac1( a ); 1416 aSig0 = extractFloat64Frac0( a ); 1417 aExp = extractFloat64Exp( a ); 1418 aSign = extractFloat64Sign( a ); 1419 shiftCount = aExp - 0x413; 1420 if ( 0 <= shiftCount ) { 1421 if ( 0x41E < aExp ) { 1422 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1423 goto invalid; 1424 } 1425 shortShift64Left( 1426 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1427 if ( 0x80000000 < absZ ) goto invalid; 1428 } 1429 else { 1430 aSig1 = ( aSig1 != 0 ); 1431 if ( aExp < 0x3FE ) { 1432 aSigExtra = aExp | aSig0 | aSig1; 1433 absZ = 0; 1434 } 1435 else { 1436 aSig0 |= 0x00100000; 1437 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1438 absZ = aSig0>>( - shiftCount ); 1439 } 1440 } 1441 roundingMode = float_rounding_mode; 1442 if ( roundingMode == float_round_nearest_even ) { 1443 if ( (sbits32) aSigExtra < 0 ) { 1444 ++absZ; 1445 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; 1446 } 1447 z = aSign ? - absZ : absZ; 1448 } 1449 else { 1450 aSigExtra = ( aSigExtra != 0 ); 1451 if ( aSign ) { 1452 z = - ( absZ 1453 + ( ( roundingMode == float_round_down ) & aSigExtra ) ); 1454 } 1455 else { 1456 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); 1457 } 1458 } 1459 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1460 invalid: 1461 float_raise( float_flag_invalid ); 1462 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1463 } 1464 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1465 return z; 1466 1467 } 1468 #endif /* !SOFTFLOAT_FOR_GCC */ 1469 1470 /* 1471 ------------------------------------------------------------------------------- 1472 Returns the result of converting the double-precision floating-point value 1473 `a' to the 32-bit two's complement integer format. The conversion is 1474 performed according to the IEC/IEEE Standard for Binary Floating-Point 1475 Arithmetic, except that the conversion is always rounded toward zero. 1476 If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1477 the conversion overflows, the largest integer with the same sign as `a' is 1478 returned. 1479 ------------------------------------------------------------------------------- 1480 */ 1481 int32 float64_to_int32_round_to_zero( float64 a ) 1482 { 1483 flag aSign; 1484 int16 aExp, shiftCount; 1485 bits32 aSig0, aSig1, absZ, aSigExtra; 1486 int32 z; 1487 1488 aSig1 = extractFloat64Frac1( a ); 1489 aSig0 = extractFloat64Frac0( a ); 1490 aExp = extractFloat64Exp( a ); 1491 aSign = extractFloat64Sign( a ); 1492 shiftCount = aExp - 0x413; 1493 if ( 0 <= shiftCount ) { 1494 if ( 0x41E < aExp ) { 1495 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; 1496 goto invalid; 1497 } 1498 shortShift64Left( 1499 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); 1500 } 1501 else { 1502 if ( aExp < 0x3FF ) { 1503 if ( aExp | aSig0 | aSig1 ) { 1504 float_exception_flags |= float_flag_inexact; 1505 } 1506 return 0; 1507 } 1508 aSig0 |= 0x00100000; 1509 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; 1510 absZ = aSig0>>( - shiftCount ); 1511 } 1512 z = aSign ? - absZ : absZ; 1513 if ( ( aSign ^ ( z < 0 ) ) && z ) { 1514 invalid: 1515 float_raise( float_flag_invalid ); 1516 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 1517 } 1518 if ( aSigExtra ) float_exception_flags |= float_flag_inexact; 1519 return z; 1520 1521 } 1522 1523 /* 1524 ------------------------------------------------------------------------------- 1525 Returns the result of converting the double-precision floating-point value 1526 `a' to the single-precision floating-point format. The conversion is 1527 performed according to the IEC/IEEE Standard for Binary Floating-Point 1528 Arithmetic. 1529 ------------------------------------------------------------------------------- 1530 */ 1531 float32 float64_to_float32( float64 a ) 1532 { 1533 flag aSign; 1534 int16 aExp; 1535 bits32 aSig0, aSig1, zSig; 1536 bits32 allZero; 1537 1538 aSig1 = extractFloat64Frac1( a ); 1539 aSig0 = extractFloat64Frac0( a ); 1540 aExp = extractFloat64Exp( a ); 1541 aSign = extractFloat64Sign( a ); 1542 if ( aExp == 0x7FF ) { 1543 if ( aSig0 | aSig1 ) { 1544 return commonNaNToFloat32( float64ToCommonNaN( a ) ); 1545 } 1546 return packFloat32( aSign, 0xFF, 0 ); 1547 } 1548 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); 1549 if ( aExp ) zSig |= 0x40000000; 1550 return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); 1551 1552 } 1553 1554 #ifndef SOFTFLOAT_FOR_GCC 1555 /* 1556 ------------------------------------------------------------------------------- 1557 Rounds the double-precision floating-point value `a' to an integer, 1558 and returns the result as a double-precision floating-point value. The 1559 operation is performed according to the IEC/IEEE Standard for Binary 1560 Floating-Point Arithmetic. 1561 ------------------------------------------------------------------------------- 1562 */ 1563 float64 float64_round_to_int( float64 a ) 1564 { 1565 flag aSign; 1566 int16 aExp; 1567 bits32 lastBitMask, roundBitsMask; 1568 int8 roundingMode; 1569 float64 z; 1570 1571 aExp = extractFloat64Exp( a ); 1572 if ( 0x413 <= aExp ) { 1573 if ( 0x433 <= aExp ) { 1574 if ( ( aExp == 0x7FF ) 1575 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { 1576 return propagateFloat64NaN( a, a ); 1577 } 1578 return a; 1579 } 1580 lastBitMask = 1; 1581 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; 1582 roundBitsMask = lastBitMask - 1; 1583 z = a; 1584 roundingMode = float_rounding_mode; 1585 if ( roundingMode == float_round_nearest_even ) { 1586 if ( lastBitMask ) { 1587 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 1588 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1589 } 1590 else { 1591 if ( (sbits32) z.low < 0 ) { 1592 ++z.high; 1593 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; 1594 } 1595 } 1596 } 1597 else if ( roundingMode != float_round_to_zero ) { 1598 if ( extractFloat64Sign( z ) 1599 ^ ( roundingMode == float_round_up ) ) { 1600 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 1601 } 1602 } 1603 z.low &= ~ roundBitsMask; 1604 } 1605 else { 1606 if ( aExp <= 0x3FE ) { 1607 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 1608 float_exception_flags |= float_flag_inexact; 1609 aSign = extractFloat64Sign( a ); 1610 switch ( float_rounding_mode ) { 1611 case float_round_nearest_even: 1612 if ( ( aExp == 0x3FE ) 1613 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) 1614 ) { 1615 return packFloat64( aSign, 0x3FF, 0, 0 ); 1616 } 1617 break; 1618 case float_round_down: 1619 return 1620 aSign ? packFloat64( 1, 0x3FF, 0, 0 ) 1621 : packFloat64( 0, 0, 0, 0 ); 1622 case float_round_up: 1623 return 1624 aSign ? packFloat64( 1, 0, 0, 0 ) 1625 : packFloat64( 0, 0x3FF, 0, 0 ); 1626 } 1627 return packFloat64( aSign, 0, 0, 0 ); 1628 } 1629 lastBitMask = 1; 1630 lastBitMask <<= 0x413 - aExp; 1631 roundBitsMask = lastBitMask - 1; 1632 z.low = 0; 1633 z.high = a.high; 1634 roundingMode = float_rounding_mode; 1635 if ( roundingMode == float_round_nearest_even ) { 1636 z.high += lastBitMask>>1; 1637 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 1638 z.high &= ~ lastBitMask; 1639 } 1640 } 1641 else if ( roundingMode != float_round_to_zero ) { 1642 if ( extractFloat64Sign( z ) 1643 ^ ( roundingMode == float_round_up ) ) { 1644 z.high |= ( a.low != 0 ); 1645 z.high += roundBitsMask; 1646 } 1647 } 1648 z.high &= ~ roundBitsMask; 1649 } 1650 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 1651 float_exception_flags |= float_flag_inexact; 1652 } 1653 return z; 1654 1655 } 1656 #endif 1657 1658 /* 1659 ------------------------------------------------------------------------------- 1660 Returns the result of adding the absolute values of the double-precision 1661 floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1662 before being returned. `zSign' is ignored if the result is a NaN. 1663 The addition is performed according to the IEC/IEEE Standard for Binary 1664 Floating-Point Arithmetic. 1665 ------------------------------------------------------------------------------- 1666 */ 1667 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 1668 { 1669 int16 aExp, bExp, zExp; 1670 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1671 int16 expDiff; 1672 1673 aSig1 = extractFloat64Frac1( a ); 1674 aSig0 = extractFloat64Frac0( a ); 1675 aExp = extractFloat64Exp( a ); 1676 bSig1 = extractFloat64Frac1( b ); 1677 bSig0 = extractFloat64Frac0( b ); 1678 bExp = extractFloat64Exp( b ); 1679 expDiff = aExp - bExp; 1680 if ( 0 < expDiff ) { 1681 if ( aExp == 0x7FF ) { 1682 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1683 return a; 1684 } 1685 if ( bExp == 0 ) { 1686 --expDiff; 1687 } 1688 else { 1689 bSig0 |= 0x00100000; 1690 } 1691 shift64ExtraRightJamming( 1692 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 1693 zExp = aExp; 1694 } 1695 else if ( expDiff < 0 ) { 1696 if ( bExp == 0x7FF ) { 1697 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1698 return packFloat64( zSign, 0x7FF, 0, 0 ); 1699 } 1700 if ( aExp == 0 ) { 1701 ++expDiff; 1702 } 1703 else { 1704 aSig0 |= 0x00100000; 1705 } 1706 shift64ExtraRightJamming( 1707 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 1708 zExp = bExp; 1709 } 1710 else { 1711 if ( aExp == 0x7FF ) { 1712 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1713 return propagateFloat64NaN( a, b ); 1714 } 1715 return a; 1716 } 1717 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1718 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); 1719 zSig2 = 0; 1720 zSig0 |= 0x00200000; 1721 zExp = aExp; 1722 goto shiftRight1; 1723 } 1724 aSig0 |= 0x00100000; 1725 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1726 --zExp; 1727 if ( zSig0 < 0x00200000 ) goto roundAndPack; 1728 ++zExp; 1729 shiftRight1: 1730 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1731 roundAndPack: 1732 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1733 1734 } 1735 1736 /* 1737 ------------------------------------------------------------------------------- 1738 Returns the result of subtracting the absolute values of the double- 1739 precision floating-point values `a' and `b'. If `zSign' is 1, the 1740 difference is negated before being returned. `zSign' is ignored if the 1741 result is a NaN. The subtraction is performed according to the IEC/IEEE 1742 Standard for Binary Floating-Point Arithmetic. 1743 ------------------------------------------------------------------------------- 1744 */ 1745 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 1746 { 1747 int16 aExp, bExp, zExp; 1748 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 1749 int16 expDiff; 1750 1751 aSig1 = extractFloat64Frac1( a ); 1752 aSig0 = extractFloat64Frac0( a ); 1753 aExp = extractFloat64Exp( a ); 1754 bSig1 = extractFloat64Frac1( b ); 1755 bSig0 = extractFloat64Frac0( b ); 1756 bExp = extractFloat64Exp( b ); 1757 expDiff = aExp - bExp; 1758 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); 1759 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); 1760 if ( 0 < expDiff ) goto aExpBigger; 1761 if ( expDiff < 0 ) goto bExpBigger; 1762 if ( aExp == 0x7FF ) { 1763 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 1764 return propagateFloat64NaN( a, b ); 1765 } 1766 float_raise( float_flag_invalid ); 1767 return float64_default_nan; 1768 } 1769 if ( aExp == 0 ) { 1770 aExp = 1; 1771 bExp = 1; 1772 } 1773 if ( bSig0 < aSig0 ) goto aBigger; 1774 if ( aSig0 < bSig0 ) goto bBigger; 1775 if ( bSig1 < aSig1 ) goto aBigger; 1776 if ( aSig1 < bSig1 ) goto bBigger; 1777 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); 1778 bExpBigger: 1779 if ( bExp == 0x7FF ) { 1780 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1781 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); 1782 } 1783 if ( aExp == 0 ) { 1784 ++expDiff; 1785 } 1786 else { 1787 aSig0 |= 0x40000000; 1788 } 1789 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 1790 bSig0 |= 0x40000000; 1791 bBigger: 1792 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1793 zExp = bExp; 1794 zSign ^= 1; 1795 goto normalizeRoundAndPack; 1796 aExpBigger: 1797 if ( aExp == 0x7FF ) { 1798 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1799 return a; 1800 } 1801 if ( bExp == 0 ) { 1802 --expDiff; 1803 } 1804 else { 1805 bSig0 |= 0x40000000; 1806 } 1807 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 1808 aSig0 |= 0x40000000; 1809 aBigger: 1810 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 1811 zExp = aExp; 1812 normalizeRoundAndPack: 1813 --zExp; 1814 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); 1815 1816 } 1817 1818 /* 1819 ------------------------------------------------------------------------------- 1820 Returns the result of adding the double-precision floating-point values `a' 1821 and `b'. The operation is performed according to the IEC/IEEE Standard for 1822 Binary Floating-Point Arithmetic. 1823 ------------------------------------------------------------------------------- 1824 */ 1825 float64 float64_add( float64 a, float64 b ) 1826 { 1827 flag aSign, bSign; 1828 1829 aSign = extractFloat64Sign( a ); 1830 bSign = extractFloat64Sign( b ); 1831 if ( aSign == bSign ) { 1832 return addFloat64Sigs( a, b, aSign ); 1833 } 1834 else { 1835 return subFloat64Sigs( a, b, aSign ); 1836 } 1837 1838 } 1839 1840 /* 1841 ------------------------------------------------------------------------------- 1842 Returns the result of subtracting the double-precision floating-point values 1843 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1844 for Binary Floating-Point Arithmetic. 1845 ------------------------------------------------------------------------------- 1846 */ 1847 float64 float64_sub( float64 a, float64 b ) 1848 { 1849 flag aSign, bSign; 1850 1851 aSign = extractFloat64Sign( a ); 1852 bSign = extractFloat64Sign( b ); 1853 if ( aSign == bSign ) { 1854 return subFloat64Sigs( a, b, aSign ); 1855 } 1856 else { 1857 return addFloat64Sigs( a, b, aSign ); 1858 } 1859 1860 } 1861 1862 /* 1863 ------------------------------------------------------------------------------- 1864 Returns the result of multiplying the double-precision floating-point values 1865 `a' and `b'. The operation is performed according to the IEC/IEEE Standard 1866 for Binary Floating-Point Arithmetic. 1867 ------------------------------------------------------------------------------- 1868 */ 1869 float64 float64_mul( float64 a, float64 b ) 1870 { 1871 flag aSign, bSign, zSign; 1872 int16 aExp, bExp, zExp; 1873 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 1874 1875 aSig1 = extractFloat64Frac1( a ); 1876 aSig0 = extractFloat64Frac0( a ); 1877 aExp = extractFloat64Exp( a ); 1878 aSign = extractFloat64Sign( a ); 1879 bSig1 = extractFloat64Frac1( b ); 1880 bSig0 = extractFloat64Frac0( b ); 1881 bExp = extractFloat64Exp( b ); 1882 bSign = extractFloat64Sign( b ); 1883 zSign = aSign ^ bSign; 1884 if ( aExp == 0x7FF ) { 1885 if ( ( aSig0 | aSig1 ) 1886 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 1887 return propagateFloat64NaN( a, b ); 1888 } 1889 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 1890 return packFloat64( zSign, 0x7FF, 0, 0 ); 1891 } 1892 if ( bExp == 0x7FF ) { 1893 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1894 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1895 invalid: 1896 float_raise( float_flag_invalid ); 1897 return float64_default_nan; 1898 } 1899 return packFloat64( zSign, 0x7FF, 0, 0 ); 1900 } 1901 if ( aExp == 0 ) { 1902 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1903 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1904 } 1905 if ( bExp == 0 ) { 1906 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1907 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1908 } 1909 zExp = aExp + bExp - 0x400; 1910 aSig0 |= 0x00100000; 1911 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); 1912 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 1913 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 1914 zSig2 |= ( zSig3 != 0 ); 1915 if ( 0x00200000 <= zSig0 ) { 1916 shift64ExtraRightJamming( 1917 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 1918 ++zExp; 1919 } 1920 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 1921 1922 } 1923 1924 /* 1925 ------------------------------------------------------------------------------- 1926 Returns the result of dividing the double-precision floating-point value `a' 1927 by the corresponding value `b'. The operation is performed according to the 1928 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1929 ------------------------------------------------------------------------------- 1930 */ 1931 float64 float64_div( float64 a, float64 b ) 1932 { 1933 flag aSign, bSign, zSign; 1934 int16 aExp, bExp, zExp; 1935 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 1936 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 1937 1938 aSig1 = extractFloat64Frac1( a ); 1939 aSig0 = extractFloat64Frac0( a ); 1940 aExp = extractFloat64Exp( a ); 1941 aSign = extractFloat64Sign( a ); 1942 bSig1 = extractFloat64Frac1( b ); 1943 bSig0 = extractFloat64Frac0( b ); 1944 bExp = extractFloat64Exp( b ); 1945 bSign = extractFloat64Sign( b ); 1946 zSign = aSign ^ bSign; 1947 if ( aExp == 0x7FF ) { 1948 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); 1949 if ( bExp == 0x7FF ) { 1950 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1951 goto invalid; 1952 } 1953 return packFloat64( zSign, 0x7FF, 0, 0 ); 1954 } 1955 if ( bExp == 0x7FF ) { 1956 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 1957 return packFloat64( zSign, 0, 0, 0 ); 1958 } 1959 if ( bExp == 0 ) { 1960 if ( ( bSig0 | bSig1 ) == 0 ) { 1961 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 1962 invalid: 1963 float_raise( float_flag_invalid ); 1964 return float64_default_nan; 1965 } 1966 float_raise( float_flag_divbyzero ); 1967 return packFloat64( zSign, 0x7FF, 0, 0 ); 1968 } 1969 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 1970 } 1971 if ( aExp == 0 ) { 1972 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); 1973 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 1974 } 1975 zExp = aExp - bExp + 0x3FD; 1976 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); 1977 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 1978 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { 1979 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 1980 ++zExp; 1981 } 1982 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); 1983 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 1984 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 1985 while ( (sbits32) rem0 < 0 ) { 1986 --zSig0; 1987 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 1988 } 1989 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); 1990 if ( ( zSig1 & 0x3FF ) <= 4 ) { 1991 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 1992 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 1993 while ( (sbits32) rem1 < 0 ) { 1994 --zSig1; 1995 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 1996 } 1997 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 1998 } 1999 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); 2000 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); 2001 2002 } 2003 2004 #ifndef SOFTFLOAT_FOR_GCC 2005 /* 2006 ------------------------------------------------------------------------------- 2007 Returns the remainder of the double-precision floating-point value `a' 2008 with respect to the corresponding value `b'. The operation is performed 2009 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2010 ------------------------------------------------------------------------------- 2011 */ 2012 float64 float64_rem( float64 a, float64 b ) 2013 { 2014 flag aSign, bSign, zSign; 2015 int16 aExp, bExp, expDiff; 2016 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 2017 bits32 allZero, alternateASig0, alternateASig1, sigMean1; 2018 sbits32 sigMean0; 2019 float64 z; 2020 2021 aSig1 = extractFloat64Frac1( a ); 2022 aSig0 = extractFloat64Frac0( a ); 2023 aExp = extractFloat64Exp( a ); 2024 aSign = extractFloat64Sign( a ); 2025 bSig1 = extractFloat64Frac1( b ); 2026 bSig0 = extractFloat64Frac0( b ); 2027 bExp = extractFloat64Exp( b ); 2028 bSign = extractFloat64Sign( b ); 2029 if ( aExp == 0x7FF ) { 2030 if ( ( aSig0 | aSig1 ) 2031 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { 2032 return propagateFloat64NaN( a, b ); 2033 } 2034 goto invalid; 2035 } 2036 if ( bExp == 0x7FF ) { 2037 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); 2038 return a; 2039 } 2040 if ( bExp == 0 ) { 2041 if ( ( bSig0 | bSig1 ) == 0 ) { 2042 invalid: 2043 float_raise( float_flag_invalid ); 2044 return float64_default_nan; 2045 } 2046 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2047 } 2048 if ( aExp == 0 ) { 2049 if ( ( aSig0 | aSig1 ) == 0 ) return a; 2050 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2051 } 2052 expDiff = aExp - bExp; 2053 if ( expDiff < -1 ) return a; 2054 shortShift64Left( 2055 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); 2056 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); 2057 q = le64( bSig0, bSig1, aSig0, aSig1 ); 2058 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2059 expDiff -= 32; 2060 while ( 0 < expDiff ) { 2061 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2062 q = ( 4 < q ) ? q - 4 : 0; 2063 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2064 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); 2065 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); 2066 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 2067 expDiff -= 29; 2068 } 2069 if ( -32 < expDiff ) { 2070 q = estimateDiv64To32( aSig0, aSig1, bSig0 ); 2071 q = ( 4 < q ) ? q - 4 : 0; 2072 q >>= - expDiff; 2073 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2074 expDiff += 24; 2075 if ( expDiff < 0 ) { 2076 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2077 } 2078 else { 2079 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 2080 } 2081 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); 2082 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 2083 } 2084 else { 2085 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); 2086 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); 2087 } 2088 do { 2089 alternateASig0 = aSig0; 2090 alternateASig1 = aSig1; 2091 ++q; 2092 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 2093 } while ( 0 <= (sbits32) aSig0 ); 2094 add64( 2095 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 2096 if ( ( sigMean0 < 0 ) 2097 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 2098 aSig0 = alternateASig0; 2099 aSig1 = alternateASig1; 2100 } 2101 zSign = ( (sbits32) aSig0 < 0 ); 2102 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 2103 return 2104 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 2105 2106 } 2107 #endif 2108 2109 #ifndef SOFTFLOAT_FOR_GCC 2110 /* 2111 ------------------------------------------------------------------------------- 2112 Returns the square root of the double-precision floating-point value `a'. 2113 The operation is performed according to the IEC/IEEE Standard for Binary 2114 Floating-Point Arithmetic. 2115 ------------------------------------------------------------------------------- 2116 */ 2117 float64 float64_sqrt( float64 a ) 2118 { 2119 flag aSign; 2120 int16 aExp, zExp; 2121 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 2122 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2123 float64 z; 2124 2125 aSig1 = extractFloat64Frac1( a ); 2126 aSig0 = extractFloat64Frac0( a ); 2127 aExp = extractFloat64Exp( a ); 2128 aSign = extractFloat64Sign( a ); 2129 if ( aExp == 0x7FF ) { 2130 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); 2131 if ( ! aSign ) return a; 2132 goto invalid; 2133 } 2134 if ( aSign ) { 2135 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 2136 invalid: 2137 float_raise( float_flag_invalid ); 2138 return float64_default_nan; 2139 } 2140 if ( aExp == 0 ) { 2141 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); 2142 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2143 } 2144 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 2145 aSig0 |= 0x00100000; 2146 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); 2147 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; 2148 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; 2149 doubleZSig0 = zSig0 + zSig0; 2150 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); 2151 mul32To64( zSig0, zSig0, &term0, &term1 ); 2152 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2153 while ( (sbits32) rem0 < 0 ) { 2154 --zSig0; 2155 doubleZSig0 -= 2; 2156 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); 2157 } 2158 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); 2159 if ( ( zSig1 & 0x1FF ) <= 5 ) { 2160 if ( zSig1 == 0 ) zSig1 = 1; 2161 mul32To64( doubleZSig0, zSig1, &term1, &term2 ); 2162 sub64( rem1, 0, term1, term2, &rem1, &rem2 ); 2163 mul32To64( zSig1, zSig1, &term2, &term3 ); 2164 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2165 while ( (sbits32) rem1 < 0 ) { 2166 --zSig1; 2167 shortShift64Left( 0, zSig1, 1, &term2, &term3 ); 2168 term3 |= 1; 2169 term2 |= doubleZSig0; 2170 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2171 } 2172 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2173 } 2174 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); 2175 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); 2176 2177 } 2178 #endif 2179 2180 /* 2181 ------------------------------------------------------------------------------- 2182 Returns 1 if the double-precision floating-point value `a' is equal to 2183 the corresponding value `b', and 0 otherwise. The comparison is performed 2184 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2185 ------------------------------------------------------------------------------- 2186 */ 2187 flag float64_eq( float64 a, float64 b ) 2188 { 2189 2190 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2191 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2192 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2193 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2194 ) { 2195 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2196 float_raise( float_flag_invalid ); 2197 } 2198 return 0; 2199 } 2200 return ( a == b ) || 2201 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 2202 2203 } 2204 2205 /* 2206 ------------------------------------------------------------------------------- 2207 Returns 1 if the double-precision floating-point value `a' is less than 2208 or equal to the corresponding value `b', and 0 otherwise. The comparison 2209 is performed according to the IEC/IEEE Standard for Binary Floating-Point 2210 Arithmetic. 2211 ------------------------------------------------------------------------------- 2212 */ 2213 flag float64_le( float64 a, float64 b ) 2214 { 2215 flag aSign, bSign; 2216 2217 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2218 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2219 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2220 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2221 ) { 2222 float_raise( float_flag_invalid ); 2223 return 0; 2224 } 2225 aSign = extractFloat64Sign( a ); 2226 bSign = extractFloat64Sign( b ); 2227 if ( aSign != bSign ) 2228 return aSign || 2229 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 2230 0 ); 2231 return ( a == b ) || 2232 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2233 } 2234 2235 /* 2236 ------------------------------------------------------------------------------- 2237 Returns 1 if the double-precision floating-point value `a' is less than 2238 the corresponding value `b', and 0 otherwise. The comparison is performed 2239 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2240 ------------------------------------------------------------------------------- 2241 */ 2242 flag float64_lt( float64 a, float64 b ) 2243 { 2244 flag aSign, bSign; 2245 2246 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2247 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2248 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2249 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2250 ) { 2251 float_raise( float_flag_invalid ); 2252 return 0; 2253 } 2254 aSign = extractFloat64Sign( a ); 2255 bSign = extractFloat64Sign( b ); 2256 if ( aSign != bSign ) 2257 return aSign && 2258 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 2259 0 ); 2260 return ( a != b ) && 2261 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 2262 2263 } 2264 2265 #ifndef SOFTFLOAT_FOR_GCC 2266 /* 2267 ------------------------------------------------------------------------------- 2268 Returns 1 if the double-precision floating-point value `a' is equal to 2269 the corresponding value `b', and 0 otherwise. The invalid exception is 2270 raised if either operand is a NaN. Otherwise, the comparison is performed 2271 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2272 ------------------------------------------------------------------------------- 2273 */ 2274 flag float64_eq_signaling( float64 a, float64 b ) 2275 { 2276 2277 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2278 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2279 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2280 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2281 ) { 2282 float_raise( float_flag_invalid ); 2283 return 0; 2284 } 2285 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2286 2287 } 2288 2289 /* 2290 ------------------------------------------------------------------------------- 2291 Returns 1 if the double-precision floating-point value `a' is less than or 2292 equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2293 cause an exception. Otherwise, the comparison is performed according to the 2294 IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2295 ------------------------------------------------------------------------------- 2296 */ 2297 flag float64_le_quiet( float64 a, float64 b ) 2298 { 2299 flag aSign, bSign; 2300 2301 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2302 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2303 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2304 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2305 ) { 2306 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2307 float_raise( float_flag_invalid ); 2308 } 2309 return 0; 2310 } 2311 aSign = extractFloat64Sign( a ); 2312 bSign = extractFloat64Sign( b ); 2313 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 2314 return ( a == b ) || ( aSign ^ ( a < b ) ); 2315 2316 } 2317 2318 /* 2319 ------------------------------------------------------------------------------- 2320 Returns 1 if the double-precision floating-point value `a' is less than 2321 the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2322 exception. Otherwise, the comparison is performed according to the IEC/IEEE 2323 Standard for Binary Floating-Point Arithmetic. 2324 ------------------------------------------------------------------------------- 2325 */ 2326 flag float64_lt_quiet( float64 a, float64 b ) 2327 { 2328 flag aSign, bSign; 2329 2330 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) 2331 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) 2332 || ( ( extractFloat64Exp( b ) == 0x7FF ) 2333 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) 2334 ) { 2335 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 2336 float_raise( float_flag_invalid ); 2337 } 2338 return 0; 2339 } 2340 aSign = extractFloat64Sign( a ); 2341 bSign = extractFloat64Sign( b ); 2342 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 2343 return ( a != b ) && ( aSign ^ ( a < b ) ); 2344 2345 } 2346 2347 #endif 2348