1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 /* Represents floating point arithmetic semantics. */ 54 struct fltSemantics { 55 /* The largest E such that 2^E is representable; this matches the 56 definition of IEEE 754. */ 57 APFloatBase::ExponentType maxExponent; 58 59 /* The smallest E such that 2^E is a normalized number; this 60 matches the definition of IEEE 754. */ 61 APFloatBase::ExponentType minExponent; 62 63 /* Number of bits in the significand. This includes the integer 64 bit. */ 65 unsigned int precision; 66 67 /* Number of bits actually used in the semantics. */ 68 unsigned int sizeInBits; 69 70 // Returns true if any number described by this semantics can be precisely 71 // represented by the specified semantics. 72 bool isRepresentableBy(const fltSemantics &S) const { 73 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 74 precision <= S.precision; 75 } 76 }; 77 78 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 79 static const fltSemantics semBFloat = {127, -126, 8, 16}; 80 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 81 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 82 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 83 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 84 static const fltSemantics semBogus = {0, 0, 0, 0}; 85 86 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 87 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 88 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 89 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 90 to each other, and two 11-bit exponents. 91 92 Note: we need to make the value different from semBogus as otherwise 93 an unsafe optimization may collapse both values to a single address, 94 and we heavily rely on them having distinct addresses. */ 95 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 96 97 /* These are legacy semantics for the fallback, inaccrurate implementation of 98 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 99 operation. It's equivalent to having an IEEE number with consecutive 106 100 bits of mantissa and 11 bits of exponent. 101 102 It's not equivalent to IBM double-double. For example, a legit IBM 103 double-double, 1 + epsilon: 104 105 1 + epsilon = 1 + (1 >> 1076) 106 107 is not representable by a consecutive 106 bits of mantissa. 108 109 Currently, these semantics are used in the following way: 110 111 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 112 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 113 semPPCDoubleDoubleLegacy -> IEEE operations 114 115 We use bitcastToAPInt() to get the bit representation (in APInt) of the 116 underlying IEEEdouble, then use the APInt constructor to construct the 117 legacy IEEE float. 118 119 TODO: Implement all operations in semPPCDoubleDouble, and delete these 120 semantics. */ 121 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 122 53 + 53, 128}; 123 124 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 125 switch (S) { 126 case S_IEEEhalf: 127 return IEEEhalf(); 128 case S_BFloat: 129 return BFloat(); 130 case S_IEEEsingle: 131 return IEEEsingle(); 132 case S_IEEEdouble: 133 return IEEEdouble(); 134 case S_x87DoubleExtended: 135 return x87DoubleExtended(); 136 case S_IEEEquad: 137 return IEEEquad(); 138 case S_PPCDoubleDouble: 139 return PPCDoubleDouble(); 140 } 141 llvm_unreachable("Unrecognised floating semantics"); 142 } 143 144 APFloatBase::Semantics 145 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 146 if (&Sem == &llvm::APFloat::IEEEhalf()) 147 return S_IEEEhalf; 148 else if (&Sem == &llvm::APFloat::BFloat()) 149 return S_BFloat; 150 else if (&Sem == &llvm::APFloat::IEEEsingle()) 151 return S_IEEEsingle; 152 else if (&Sem == &llvm::APFloat::IEEEdouble()) 153 return S_IEEEdouble; 154 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 155 return S_x87DoubleExtended; 156 else if (&Sem == &llvm::APFloat::IEEEquad()) 157 return S_IEEEquad; 158 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 159 return S_PPCDoubleDouble; 160 else 161 llvm_unreachable("Unknown floating semantics"); 162 } 163 164 const fltSemantics &APFloatBase::IEEEhalf() { 165 return semIEEEhalf; 166 } 167 const fltSemantics &APFloatBase::BFloat() { 168 return semBFloat; 169 } 170 const fltSemantics &APFloatBase::IEEEsingle() { 171 return semIEEEsingle; 172 } 173 const fltSemantics &APFloatBase::IEEEdouble() { 174 return semIEEEdouble; 175 } 176 const fltSemantics &APFloatBase::IEEEquad() { 177 return semIEEEquad; 178 } 179 const fltSemantics &APFloatBase::x87DoubleExtended() { 180 return semX87DoubleExtended; 181 } 182 const fltSemantics &APFloatBase::Bogus() { 183 return semBogus; 184 } 185 const fltSemantics &APFloatBase::PPCDoubleDouble() { 186 return semPPCDoubleDouble; 187 } 188 189 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 190 constexpr RoundingMode APFloatBase::rmTowardPositive; 191 constexpr RoundingMode APFloatBase::rmTowardNegative; 192 constexpr RoundingMode APFloatBase::rmTowardZero; 193 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 194 195 /* A tight upper bound on number of parts required to hold the value 196 pow(5, power) is 197 198 power * 815 / (351 * integerPartWidth) + 1 199 200 However, whilst the result may require only this many parts, 201 because we are multiplying two values to get it, the 202 multiplication may require an extra part with the excess part 203 being zero (consider the trivial case of 1 * 1, tcFullMultiply 204 requires two parts to hold the single-part result). So we add an 205 extra one to guarantee enough space whilst multiplying. */ 206 const unsigned int maxExponent = 16383; 207 const unsigned int maxPrecision = 113; 208 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 209 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 210 211 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 212 return semantics.precision; 213 } 214 APFloatBase::ExponentType 215 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 216 return semantics.maxExponent; 217 } 218 APFloatBase::ExponentType 219 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 220 return semantics.minExponent; 221 } 222 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 223 return semantics.sizeInBits; 224 } 225 226 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 227 return Sem.sizeInBits; 228 } 229 230 /* A bunch of private, handy routines. */ 231 232 static inline Error createError(const Twine &Err) { 233 return make_error<StringError>(Err, inconvertibleErrorCode()); 234 } 235 236 static inline unsigned int 237 partCountForBits(unsigned int bits) 238 { 239 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 240 } 241 242 /* Returns 0U-9U. Return values >= 10U are not digits. */ 243 static inline unsigned int 244 decDigitValue(unsigned int c) 245 { 246 return c - '0'; 247 } 248 249 /* Return the value of a decimal exponent of the form 250 [+-]ddddddd. 251 252 If the exponent overflows, returns a large exponent with the 253 appropriate sign. */ 254 static Expected<int> readExponent(StringRef::iterator begin, 255 StringRef::iterator end) { 256 bool isNegative; 257 unsigned int absExponent; 258 const unsigned int overlargeExponent = 24000; /* FIXME. */ 259 StringRef::iterator p = begin; 260 261 // Treat no exponent as 0 to match binutils 262 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 263 return 0; 264 } 265 266 isNegative = (*p == '-'); 267 if (*p == '-' || *p == '+') { 268 p++; 269 if (p == end) 270 return createError("Exponent has no digits"); 271 } 272 273 absExponent = decDigitValue(*p++); 274 if (absExponent >= 10U) 275 return createError("Invalid character in exponent"); 276 277 for (; p != end; ++p) { 278 unsigned int value; 279 280 value = decDigitValue(*p); 281 if (value >= 10U) 282 return createError("Invalid character in exponent"); 283 284 absExponent = absExponent * 10U + value; 285 if (absExponent >= overlargeExponent) { 286 absExponent = overlargeExponent; 287 break; 288 } 289 } 290 291 if (isNegative) 292 return -(int) absExponent; 293 else 294 return (int) absExponent; 295 } 296 297 /* This is ugly and needs cleaning up, but I don't immediately see 298 how whilst remaining safe. */ 299 static Expected<int> totalExponent(StringRef::iterator p, 300 StringRef::iterator end, 301 int exponentAdjustment) { 302 int unsignedExponent; 303 bool negative, overflow; 304 int exponent = 0; 305 306 if (p == end) 307 return createError("Exponent has no digits"); 308 309 negative = *p == '-'; 310 if (*p == '-' || *p == '+') { 311 p++; 312 if (p == end) 313 return createError("Exponent has no digits"); 314 } 315 316 unsignedExponent = 0; 317 overflow = false; 318 for (; p != end; ++p) { 319 unsigned int value; 320 321 value = decDigitValue(*p); 322 if (value >= 10U) 323 return createError("Invalid character in exponent"); 324 325 unsignedExponent = unsignedExponent * 10 + value; 326 if (unsignedExponent > 32767) { 327 overflow = true; 328 break; 329 } 330 } 331 332 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 333 overflow = true; 334 335 if (!overflow) { 336 exponent = unsignedExponent; 337 if (negative) 338 exponent = -exponent; 339 exponent += exponentAdjustment; 340 if (exponent > 32767 || exponent < -32768) 341 overflow = true; 342 } 343 344 if (overflow) 345 exponent = negative ? -32768: 32767; 346 347 return exponent; 348 } 349 350 static Expected<StringRef::iterator> 351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 352 StringRef::iterator *dot) { 353 StringRef::iterator p = begin; 354 *dot = end; 355 while (p != end && *p == '0') 356 p++; 357 358 if (p != end && *p == '.') { 359 *dot = p++; 360 361 if (end - begin == 1) 362 return createError("Significand has no digits"); 363 364 while (p != end && *p == '0') 365 p++; 366 } 367 368 return p; 369 } 370 371 /* Given a normal decimal floating point number of the form 372 373 dddd.dddd[eE][+-]ddd 374 375 where the decimal point and exponent are optional, fill out the 376 structure D. Exponent is appropriate if the significand is 377 treated as an integer, and normalizedExponent if the significand 378 is taken to have the decimal point after a single leading 379 non-zero digit. 380 381 If the value is zero, V->firstSigDigit points to a non-digit, and 382 the return exponent is zero. 383 */ 384 struct decimalInfo { 385 const char *firstSigDigit; 386 const char *lastSigDigit; 387 int exponent; 388 int normalizedExponent; 389 }; 390 391 static Error interpretDecimal(StringRef::iterator begin, 392 StringRef::iterator end, decimalInfo *D) { 393 StringRef::iterator dot = end; 394 395 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 396 if (!PtrOrErr) 397 return PtrOrErr.takeError(); 398 StringRef::iterator p = *PtrOrErr; 399 400 D->firstSigDigit = p; 401 D->exponent = 0; 402 D->normalizedExponent = 0; 403 404 for (; p != end; ++p) { 405 if (*p == '.') { 406 if (dot != end) 407 return createError("String contains multiple dots"); 408 dot = p++; 409 if (p == end) 410 break; 411 } 412 if (decDigitValue(*p) >= 10U) 413 break; 414 } 415 416 if (p != end) { 417 if (*p != 'e' && *p != 'E') 418 return createError("Invalid character in significand"); 419 if (p == begin) 420 return createError("Significand has no digits"); 421 if (dot != end && p - begin == 1) 422 return createError("Significand has no digits"); 423 424 /* p points to the first non-digit in the string */ 425 auto ExpOrErr = readExponent(p + 1, end); 426 if (!ExpOrErr) 427 return ExpOrErr.takeError(); 428 D->exponent = *ExpOrErr; 429 430 /* Implied decimal point? */ 431 if (dot == end) 432 dot = p; 433 } 434 435 /* If number is all zeroes accept any exponent. */ 436 if (p != D->firstSigDigit) { 437 /* Drop insignificant trailing zeroes. */ 438 if (p != begin) { 439 do 440 do 441 p--; 442 while (p != begin && *p == '0'); 443 while (p != begin && *p == '.'); 444 } 445 446 /* Adjust the exponents for any decimal point. */ 447 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 448 D->normalizedExponent = (D->exponent + 449 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 450 - (dot > D->firstSigDigit && dot < p))); 451 } 452 453 D->lastSigDigit = p; 454 return Error::success(); 455 } 456 457 /* Return the trailing fraction of a hexadecimal number. 458 DIGITVALUE is the first hex digit of the fraction, P points to 459 the next digit. */ 460 static Expected<lostFraction> 461 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 462 unsigned int digitValue) { 463 unsigned int hexDigit; 464 465 /* If the first trailing digit isn't 0 or 8 we can work out the 466 fraction immediately. */ 467 if (digitValue > 8) 468 return lfMoreThanHalf; 469 else if (digitValue < 8 && digitValue > 0) 470 return lfLessThanHalf; 471 472 // Otherwise we need to find the first non-zero digit. 473 while (p != end && (*p == '0' || *p == '.')) 474 p++; 475 476 if (p == end) 477 return createError("Invalid trailing hexadecimal fraction!"); 478 479 hexDigit = hexDigitValue(*p); 480 481 /* If we ran off the end it is exactly zero or one-half, otherwise 482 a little more. */ 483 if (hexDigit == -1U) 484 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 485 else 486 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 487 } 488 489 /* Return the fraction lost were a bignum truncated losing the least 490 significant BITS bits. */ 491 static lostFraction 492 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 493 unsigned int partCount, 494 unsigned int bits) 495 { 496 unsigned int lsb; 497 498 lsb = APInt::tcLSB(parts, partCount); 499 500 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 501 if (bits <= lsb) 502 return lfExactlyZero; 503 if (bits == lsb + 1) 504 return lfExactlyHalf; 505 if (bits <= partCount * APFloatBase::integerPartWidth && 506 APInt::tcExtractBit(parts, bits - 1)) 507 return lfMoreThanHalf; 508 509 return lfLessThanHalf; 510 } 511 512 /* Shift DST right BITS bits noting lost fraction. */ 513 static lostFraction 514 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 515 { 516 lostFraction lost_fraction; 517 518 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 519 520 APInt::tcShiftRight(dst, parts, bits); 521 522 return lost_fraction; 523 } 524 525 /* Combine the effect of two lost fractions. */ 526 static lostFraction 527 combineLostFractions(lostFraction moreSignificant, 528 lostFraction lessSignificant) 529 { 530 if (lessSignificant != lfExactlyZero) { 531 if (moreSignificant == lfExactlyZero) 532 moreSignificant = lfLessThanHalf; 533 else if (moreSignificant == lfExactlyHalf) 534 moreSignificant = lfMoreThanHalf; 535 } 536 537 return moreSignificant; 538 } 539 540 /* The error from the true value, in half-ulps, on multiplying two 541 floating point numbers, which differ from the value they 542 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 543 than the returned value. 544 545 See "How to Read Floating Point Numbers Accurately" by William D 546 Clinger. */ 547 static unsigned int 548 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 549 { 550 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 551 552 if (HUerr1 + HUerr2 == 0) 553 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 554 else 555 return inexactMultiply + 2 * (HUerr1 + HUerr2); 556 } 557 558 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 559 when the least significant BITS are truncated. BITS cannot be 560 zero. */ 561 static APFloatBase::integerPart 562 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 563 bool isNearest) { 564 unsigned int count, partBits; 565 APFloatBase::integerPart part, boundary; 566 567 assert(bits != 0); 568 569 bits--; 570 count = bits / APFloatBase::integerPartWidth; 571 partBits = bits % APFloatBase::integerPartWidth + 1; 572 573 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 574 575 if (isNearest) 576 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 577 else 578 boundary = 0; 579 580 if (count == 0) { 581 if (part - boundary <= boundary - part) 582 return part - boundary; 583 else 584 return boundary - part; 585 } 586 587 if (part == boundary) { 588 while (--count) 589 if (parts[count]) 590 return ~(APFloatBase::integerPart) 0; /* A lot. */ 591 592 return parts[0]; 593 } else if (part == boundary - 1) { 594 while (--count) 595 if (~parts[count]) 596 return ~(APFloatBase::integerPart) 0; /* A lot. */ 597 598 return -parts[0]; 599 } 600 601 return ~(APFloatBase::integerPart) 0; /* A lot. */ 602 } 603 604 /* Place pow(5, power) in DST, and return the number of parts used. 605 DST must be at least one part larger than size of the answer. */ 606 static unsigned int 607 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 608 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 609 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 610 pow5s[0] = 78125 * 5; 611 612 unsigned int partsCount[16] = { 1 }; 613 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 614 unsigned int result; 615 assert(power <= maxExponent); 616 617 p1 = dst; 618 p2 = scratch; 619 620 *p1 = firstEightPowers[power & 7]; 621 power >>= 3; 622 623 result = 1; 624 pow5 = pow5s; 625 626 for (unsigned int n = 0; power; power >>= 1, n++) { 627 unsigned int pc; 628 629 pc = partsCount[n]; 630 631 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 632 if (pc == 0) { 633 pc = partsCount[n - 1]; 634 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 635 pc *= 2; 636 if (pow5[pc - 1] == 0) 637 pc--; 638 partsCount[n] = pc; 639 } 640 641 if (power & 1) { 642 APFloatBase::integerPart *tmp; 643 644 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 645 result += pc; 646 if (p2[result - 1] == 0) 647 result--; 648 649 /* Now result is in p1 with partsCount parts and p2 is scratch 650 space. */ 651 tmp = p1; 652 p1 = p2; 653 p2 = tmp; 654 } 655 656 pow5 += pc; 657 } 658 659 if (p1 != dst) 660 APInt::tcAssign(dst, p1, result); 661 662 return result; 663 } 664 665 /* Zero at the end to avoid modular arithmetic when adding one; used 666 when rounding up during hexadecimal output. */ 667 static const char hexDigitsLower[] = "0123456789abcdef0"; 668 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 669 static const char infinityL[] = "infinity"; 670 static const char infinityU[] = "INFINITY"; 671 static const char NaNL[] = "nan"; 672 static const char NaNU[] = "NAN"; 673 674 /* Write out an integerPart in hexadecimal, starting with the most 675 significant nibble. Write out exactly COUNT hexdigits, return 676 COUNT. */ 677 static unsigned int 678 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 679 const char *hexDigitChars) 680 { 681 unsigned int result = count; 682 683 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 684 685 part >>= (APFloatBase::integerPartWidth - 4 * count); 686 while (count--) { 687 dst[count] = hexDigitChars[part & 0xf]; 688 part >>= 4; 689 } 690 691 return result; 692 } 693 694 /* Write out an unsigned decimal integer. */ 695 static char * 696 writeUnsignedDecimal (char *dst, unsigned int n) 697 { 698 char buff[40], *p; 699 700 p = buff; 701 do 702 *p++ = '0' + n % 10; 703 while (n /= 10); 704 705 do 706 *dst++ = *--p; 707 while (p != buff); 708 709 return dst; 710 } 711 712 /* Write out a signed decimal integer. */ 713 static char * 714 writeSignedDecimal (char *dst, int value) 715 { 716 if (value < 0) { 717 *dst++ = '-'; 718 dst = writeUnsignedDecimal(dst, -(unsigned) value); 719 } else 720 dst = writeUnsignedDecimal(dst, value); 721 722 return dst; 723 } 724 725 namespace detail { 726 /* Constructors. */ 727 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 728 unsigned int count; 729 730 semantics = ourSemantics; 731 count = partCount(); 732 if (count > 1) 733 significand.parts = new integerPart[count]; 734 } 735 736 void IEEEFloat::freeSignificand() { 737 if (needsCleanup()) 738 delete [] significand.parts; 739 } 740 741 void IEEEFloat::assign(const IEEEFloat &rhs) { 742 assert(semantics == rhs.semantics); 743 744 sign = rhs.sign; 745 category = rhs.category; 746 exponent = rhs.exponent; 747 if (isFiniteNonZero() || category == fcNaN) 748 copySignificand(rhs); 749 } 750 751 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 752 assert(isFiniteNonZero() || category == fcNaN); 753 assert(rhs.partCount() >= partCount()); 754 755 APInt::tcAssign(significandParts(), rhs.significandParts(), 756 partCount()); 757 } 758 759 /* Make this number a NaN, with an arbitrary but deterministic value 760 for the significand. If double or longer, this is a signalling NaN, 761 which may not be ideal. If float, this is QNaN(0). */ 762 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 763 category = fcNaN; 764 sign = Negative; 765 exponent = exponentNaN(); 766 767 integerPart *significand = significandParts(); 768 unsigned numParts = partCount(); 769 770 // Set the significand bits to the fill. 771 if (!fill || fill->getNumWords() < numParts) 772 APInt::tcSet(significand, 0, numParts); 773 if (fill) { 774 APInt::tcAssign(significand, fill->getRawData(), 775 std::min(fill->getNumWords(), numParts)); 776 777 // Zero out the excess bits of the significand. 778 unsigned bitsToPreserve = semantics->precision - 1; 779 unsigned part = bitsToPreserve / 64; 780 bitsToPreserve %= 64; 781 significand[part] &= ((1ULL << bitsToPreserve) - 1); 782 for (part++; part != numParts; ++part) 783 significand[part] = 0; 784 } 785 786 unsigned QNaNBit = semantics->precision - 2; 787 788 if (SNaN) { 789 // We always have to clear the QNaN bit to make it an SNaN. 790 APInt::tcClearBit(significand, QNaNBit); 791 792 // If there are no bits set in the payload, we have to set 793 // *something* to make it a NaN instead of an infinity; 794 // conventionally, this is the next bit down from the QNaN bit. 795 if (APInt::tcIsZero(significand, numParts)) 796 APInt::tcSetBit(significand, QNaNBit - 1); 797 } else { 798 // We always have to set the QNaN bit to make it a QNaN. 799 APInt::tcSetBit(significand, QNaNBit); 800 } 801 802 // For x87 extended precision, we want to make a NaN, not a 803 // pseudo-NaN. Maybe we should expose the ability to make 804 // pseudo-NaNs? 805 if (semantics == &semX87DoubleExtended) 806 APInt::tcSetBit(significand, QNaNBit + 1); 807 } 808 809 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 810 if (this != &rhs) { 811 if (semantics != rhs.semantics) { 812 freeSignificand(); 813 initialize(rhs.semantics); 814 } 815 assign(rhs); 816 } 817 818 return *this; 819 } 820 821 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 822 freeSignificand(); 823 824 semantics = rhs.semantics; 825 significand = rhs.significand; 826 exponent = rhs.exponent; 827 category = rhs.category; 828 sign = rhs.sign; 829 830 rhs.semantics = &semBogus; 831 return *this; 832 } 833 834 bool IEEEFloat::isDenormal() const { 835 return isFiniteNonZero() && (exponent == semantics->minExponent) && 836 (APInt::tcExtractBit(significandParts(), 837 semantics->precision - 1) == 0); 838 } 839 840 bool IEEEFloat::isSmallest() const { 841 // The smallest number by magnitude in our format will be the smallest 842 // denormal, i.e. the floating point number with exponent being minimum 843 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 844 return isFiniteNonZero() && exponent == semantics->minExponent && 845 significandMSB() == 0; 846 } 847 848 bool IEEEFloat::isSignificandAllOnes() const { 849 // Test if the significand excluding the integral bit is all ones. This allows 850 // us to test for binade boundaries. 851 const integerPart *Parts = significandParts(); 852 const unsigned PartCount = partCountForBits(semantics->precision); 853 for (unsigned i = 0; i < PartCount - 1; i++) 854 if (~Parts[i]) 855 return false; 856 857 // Set the unused high bits to all ones when we compare. 858 const unsigned NumHighBits = 859 PartCount*integerPartWidth - semantics->precision + 1; 860 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 861 "Can not have more high bits to fill than integerPartWidth"); 862 const integerPart HighBitFill = 863 ~integerPart(0) << (integerPartWidth - NumHighBits); 864 if (~(Parts[PartCount - 1] | HighBitFill)) 865 return false; 866 867 return true; 868 } 869 870 bool IEEEFloat::isSignificandAllZeros() const { 871 // Test if the significand excluding the integral bit is all zeros. This 872 // allows us to test for binade boundaries. 873 const integerPart *Parts = significandParts(); 874 const unsigned PartCount = partCountForBits(semantics->precision); 875 876 for (unsigned i = 0; i < PartCount - 1; i++) 877 if (Parts[i]) 878 return false; 879 880 // Compute how many bits are used in the final word. 881 const unsigned NumHighBits = 882 PartCount*integerPartWidth - semantics->precision + 1; 883 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 884 "clear than integerPartWidth"); 885 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 886 887 if (Parts[PartCount - 1] & HighBitMask) 888 return false; 889 890 return true; 891 } 892 893 bool IEEEFloat::isLargest() const { 894 // The largest number by magnitude in our format will be the floating point 895 // number with maximum exponent and with significand that is all ones. 896 return isFiniteNonZero() && exponent == semantics->maxExponent 897 && isSignificandAllOnes(); 898 } 899 900 bool IEEEFloat::isInteger() const { 901 // This could be made more efficient; I'm going for obviously correct. 902 if (!isFinite()) return false; 903 IEEEFloat truncated = *this; 904 truncated.roundToIntegral(rmTowardZero); 905 return compare(truncated) == cmpEqual; 906 } 907 908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 909 if (this == &rhs) 910 return true; 911 if (semantics != rhs.semantics || 912 category != rhs.category || 913 sign != rhs.sign) 914 return false; 915 if (category==fcZero || category==fcInfinity) 916 return true; 917 918 if (isFiniteNonZero() && exponent != rhs.exponent) 919 return false; 920 921 return std::equal(significandParts(), significandParts() + partCount(), 922 rhs.significandParts()); 923 } 924 925 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 926 initialize(&ourSemantics); 927 sign = 0; 928 category = fcNormal; 929 zeroSignificand(); 930 exponent = ourSemantics.precision - 1; 931 significandParts()[0] = value; 932 normalize(rmNearestTiesToEven, lfExactlyZero); 933 } 934 935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 936 initialize(&ourSemantics); 937 makeZero(false); 938 } 939 940 // Delegate to the previous constructor, because later copy constructor may 941 // actually inspects category, which can't be garbage. 942 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 943 : IEEEFloat(ourSemantics) {} 944 945 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 946 initialize(rhs.semantics); 947 assign(rhs); 948 } 949 950 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 951 *this = std::move(rhs); 952 } 953 954 IEEEFloat::~IEEEFloat() { freeSignificand(); } 955 956 unsigned int IEEEFloat::partCount() const { 957 return partCountForBits(semantics->precision + 1); 958 } 959 960 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 961 return const_cast<IEEEFloat *>(this)->significandParts(); 962 } 963 964 IEEEFloat::integerPart *IEEEFloat::significandParts() { 965 if (partCount() > 1) 966 return significand.parts; 967 else 968 return &significand.part; 969 } 970 971 void IEEEFloat::zeroSignificand() { 972 APInt::tcSet(significandParts(), 0, partCount()); 973 } 974 975 /* Increment an fcNormal floating point number's significand. */ 976 void IEEEFloat::incrementSignificand() { 977 integerPart carry; 978 979 carry = APInt::tcIncrement(significandParts(), partCount()); 980 981 /* Our callers should never cause us to overflow. */ 982 assert(carry == 0); 983 (void)carry; 984 } 985 986 /* Add the significand of the RHS. Returns the carry flag. */ 987 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 988 integerPart *parts; 989 990 parts = significandParts(); 991 992 assert(semantics == rhs.semantics); 993 assert(exponent == rhs.exponent); 994 995 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 996 } 997 998 /* Subtract the significand of the RHS with a borrow flag. Returns 999 the borrow flag. */ 1000 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1001 integerPart borrow) { 1002 integerPart *parts; 1003 1004 parts = significandParts(); 1005 1006 assert(semantics == rhs.semantics); 1007 assert(exponent == rhs.exponent); 1008 1009 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1010 partCount()); 1011 } 1012 1013 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1014 on to the full-precision result of the multiplication. Returns the 1015 lost fraction. */ 1016 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1017 IEEEFloat addend) { 1018 unsigned int omsb; // One, not zero, based MSB. 1019 unsigned int partsCount, newPartsCount, precision; 1020 integerPart *lhsSignificand; 1021 integerPart scratch[4]; 1022 integerPart *fullSignificand; 1023 lostFraction lost_fraction; 1024 bool ignored; 1025 1026 assert(semantics == rhs.semantics); 1027 1028 precision = semantics->precision; 1029 1030 // Allocate space for twice as many bits as the original significand, plus one 1031 // extra bit for the addition to overflow into. 1032 newPartsCount = partCountForBits(precision * 2 + 1); 1033 1034 if (newPartsCount > 4) 1035 fullSignificand = new integerPart[newPartsCount]; 1036 else 1037 fullSignificand = scratch; 1038 1039 lhsSignificand = significandParts(); 1040 partsCount = partCount(); 1041 1042 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1043 rhs.significandParts(), partsCount, partsCount); 1044 1045 lost_fraction = lfExactlyZero; 1046 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1047 exponent += rhs.exponent; 1048 1049 // Assume the operands involved in the multiplication are single-precision 1050 // FP, and the two multiplicants are: 1051 // *this = a23 . a22 ... a0 * 2^e1 1052 // rhs = b23 . b22 ... b0 * 2^e2 1053 // the result of multiplication is: 1054 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1055 // Note that there are three significant bits at the left-hand side of the 1056 // radix point: two for the multiplication, and an overflow bit for the 1057 // addition (that will always be zero at this point). Move the radix point 1058 // toward left by two bits, and adjust exponent accordingly. 1059 exponent += 2; 1060 1061 if (addend.isNonZero()) { 1062 // The intermediate result of the multiplication has "2 * precision" 1063 // signicant bit; adjust the addend to be consistent with mul result. 1064 // 1065 Significand savedSignificand = significand; 1066 const fltSemantics *savedSemantics = semantics; 1067 fltSemantics extendedSemantics; 1068 opStatus status; 1069 unsigned int extendedPrecision; 1070 1071 // Normalize our MSB to one below the top bit to allow for overflow. 1072 extendedPrecision = 2 * precision + 1; 1073 if (omsb != extendedPrecision - 1) { 1074 assert(extendedPrecision > omsb); 1075 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1076 (extendedPrecision - 1) - omsb); 1077 exponent -= (extendedPrecision - 1) - omsb; 1078 } 1079 1080 /* Create new semantics. */ 1081 extendedSemantics = *semantics; 1082 extendedSemantics.precision = extendedPrecision; 1083 1084 if (newPartsCount == 1) 1085 significand.part = fullSignificand[0]; 1086 else 1087 significand.parts = fullSignificand; 1088 semantics = &extendedSemantics; 1089 1090 // Make a copy so we can convert it to the extended semantics. 1091 // Note that we cannot convert the addend directly, as the extendedSemantics 1092 // is a local variable (which we take a reference to). 1093 IEEEFloat extendedAddend(addend); 1094 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1095 assert(status == opOK); 1096 (void)status; 1097 1098 // Shift the significand of the addend right by one bit. This guarantees 1099 // that the high bit of the significand is zero (same as fullSignificand), 1100 // so the addition will overflow (if it does overflow at all) into the top bit. 1101 lost_fraction = extendedAddend.shiftSignificandRight(1); 1102 assert(lost_fraction == lfExactlyZero && 1103 "Lost precision while shifting addend for fused-multiply-add."); 1104 1105 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1106 1107 /* Restore our state. */ 1108 if (newPartsCount == 1) 1109 fullSignificand[0] = significand.part; 1110 significand = savedSignificand; 1111 semantics = savedSemantics; 1112 1113 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1114 } 1115 1116 // Convert the result having "2 * precision" significant-bits back to the one 1117 // having "precision" significant-bits. First, move the radix point from 1118 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1119 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1120 exponent -= precision + 1; 1121 1122 // In case MSB resides at the left-hand side of radix point, shift the 1123 // mantissa right by some amount to make sure the MSB reside right before 1124 // the radix point (i.e. "MSB . rest-significant-bits"). 1125 // 1126 // Note that the result is not normalized when "omsb < precision". So, the 1127 // caller needs to call IEEEFloat::normalize() if normalized value is 1128 // expected. 1129 if (omsb > precision) { 1130 unsigned int bits, significantParts; 1131 lostFraction lf; 1132 1133 bits = omsb - precision; 1134 significantParts = partCountForBits(omsb); 1135 lf = shiftRight(fullSignificand, significantParts, bits); 1136 lost_fraction = combineLostFractions(lf, lost_fraction); 1137 exponent += bits; 1138 } 1139 1140 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1141 1142 if (newPartsCount > 4) 1143 delete [] fullSignificand; 1144 1145 return lost_fraction; 1146 } 1147 1148 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1149 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1150 } 1151 1152 /* Multiply the significands of LHS and RHS to DST. */ 1153 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1154 unsigned int bit, i, partsCount; 1155 const integerPart *rhsSignificand; 1156 integerPart *lhsSignificand, *dividend, *divisor; 1157 integerPart scratch[4]; 1158 lostFraction lost_fraction; 1159 1160 assert(semantics == rhs.semantics); 1161 1162 lhsSignificand = significandParts(); 1163 rhsSignificand = rhs.significandParts(); 1164 partsCount = partCount(); 1165 1166 if (partsCount > 2) 1167 dividend = new integerPart[partsCount * 2]; 1168 else 1169 dividend = scratch; 1170 1171 divisor = dividend + partsCount; 1172 1173 /* Copy the dividend and divisor as they will be modified in-place. */ 1174 for (i = 0; i < partsCount; i++) { 1175 dividend[i] = lhsSignificand[i]; 1176 divisor[i] = rhsSignificand[i]; 1177 lhsSignificand[i] = 0; 1178 } 1179 1180 exponent -= rhs.exponent; 1181 1182 unsigned int precision = semantics->precision; 1183 1184 /* Normalize the divisor. */ 1185 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1186 if (bit) { 1187 exponent += bit; 1188 APInt::tcShiftLeft(divisor, partsCount, bit); 1189 } 1190 1191 /* Normalize the dividend. */ 1192 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1193 if (bit) { 1194 exponent -= bit; 1195 APInt::tcShiftLeft(dividend, partsCount, bit); 1196 } 1197 1198 /* Ensure the dividend >= divisor initially for the loop below. 1199 Incidentally, this means that the division loop below is 1200 guaranteed to set the integer bit to one. */ 1201 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1202 exponent--; 1203 APInt::tcShiftLeft(dividend, partsCount, 1); 1204 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1205 } 1206 1207 /* Long division. */ 1208 for (bit = precision; bit; bit -= 1) { 1209 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1210 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1211 APInt::tcSetBit(lhsSignificand, bit - 1); 1212 } 1213 1214 APInt::tcShiftLeft(dividend, partsCount, 1); 1215 } 1216 1217 /* Figure out the lost fraction. */ 1218 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1219 1220 if (cmp > 0) 1221 lost_fraction = lfMoreThanHalf; 1222 else if (cmp == 0) 1223 lost_fraction = lfExactlyHalf; 1224 else if (APInt::tcIsZero(dividend, partsCount)) 1225 lost_fraction = lfExactlyZero; 1226 else 1227 lost_fraction = lfLessThanHalf; 1228 1229 if (partsCount > 2) 1230 delete [] dividend; 1231 1232 return lost_fraction; 1233 } 1234 1235 unsigned int IEEEFloat::significandMSB() const { 1236 return APInt::tcMSB(significandParts(), partCount()); 1237 } 1238 1239 unsigned int IEEEFloat::significandLSB() const { 1240 return APInt::tcLSB(significandParts(), partCount()); 1241 } 1242 1243 /* Note that a zero result is NOT normalized to fcZero. */ 1244 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1245 /* Our exponent should not overflow. */ 1246 assert((ExponentType) (exponent + bits) >= exponent); 1247 1248 exponent += bits; 1249 1250 return shiftRight(significandParts(), partCount(), bits); 1251 } 1252 1253 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1255 assert(bits < semantics->precision); 1256 1257 if (bits) { 1258 unsigned int partsCount = partCount(); 1259 1260 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1261 exponent -= bits; 1262 1263 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1264 } 1265 } 1266 1267 IEEEFloat::cmpResult 1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1269 int compare; 1270 1271 assert(semantics == rhs.semantics); 1272 assert(isFiniteNonZero()); 1273 assert(rhs.isFiniteNonZero()); 1274 1275 compare = exponent - rhs.exponent; 1276 1277 /* If exponents are equal, do an unsigned bignum comparison of the 1278 significands. */ 1279 if (compare == 0) 1280 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1281 partCount()); 1282 1283 if (compare > 0) 1284 return cmpGreaterThan; 1285 else if (compare < 0) 1286 return cmpLessThan; 1287 else 1288 return cmpEqual; 1289 } 1290 1291 /* Set the least significant BITS bits of a bignum, clear the 1292 rest. */ 1293 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1294 unsigned bits) { 1295 unsigned i = 0; 1296 while (bits > APInt::APINT_BITS_PER_WORD) { 1297 dst[i++] = ~(APInt::WordType)0; 1298 bits -= APInt::APINT_BITS_PER_WORD; 1299 } 1300 1301 if (bits) 1302 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1303 1304 while (i < parts) 1305 dst[i++] = 0; 1306 } 1307 1308 /* Handle overflow. Sign is preserved. We either become infinity or 1309 the largest finite number. */ 1310 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1311 /* Infinity? */ 1312 if (rounding_mode == rmNearestTiesToEven || 1313 rounding_mode == rmNearestTiesToAway || 1314 (rounding_mode == rmTowardPositive && !sign) || 1315 (rounding_mode == rmTowardNegative && sign)) { 1316 category = fcInfinity; 1317 return (opStatus) (opOverflow | opInexact); 1318 } 1319 1320 /* Otherwise we become the largest finite number. */ 1321 category = fcNormal; 1322 exponent = semantics->maxExponent; 1323 tcSetLeastSignificantBits(significandParts(), partCount(), 1324 semantics->precision); 1325 1326 return opInexact; 1327 } 1328 1329 /* Returns TRUE if, when truncating the current number, with BIT the 1330 new LSB, with the given lost fraction and rounding mode, the result 1331 would need to be rounded away from zero (i.e., by increasing the 1332 signficand). This routine must work for fcZero of both signs, and 1333 fcNormal numbers. */ 1334 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1335 lostFraction lost_fraction, 1336 unsigned int bit) const { 1337 /* NaNs and infinities should not have lost fractions. */ 1338 assert(isFiniteNonZero() || category == fcZero); 1339 1340 /* Current callers never pass this so we don't handle it. */ 1341 assert(lost_fraction != lfExactlyZero); 1342 1343 switch (rounding_mode) { 1344 case rmNearestTiesToAway: 1345 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1346 1347 case rmNearestTiesToEven: 1348 if (lost_fraction == lfMoreThanHalf) 1349 return true; 1350 1351 /* Our zeroes don't have a significand to test. */ 1352 if (lost_fraction == lfExactlyHalf && category != fcZero) 1353 return APInt::tcExtractBit(significandParts(), bit); 1354 1355 return false; 1356 1357 case rmTowardZero: 1358 return false; 1359 1360 case rmTowardPositive: 1361 return !sign; 1362 1363 case rmTowardNegative: 1364 return sign; 1365 1366 default: 1367 break; 1368 } 1369 llvm_unreachable("Invalid rounding mode found"); 1370 } 1371 1372 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1373 lostFraction lost_fraction) { 1374 unsigned int omsb; /* One, not zero, based MSB. */ 1375 int exponentChange; 1376 1377 if (!isFiniteNonZero()) 1378 return opOK; 1379 1380 /* Before rounding normalize the exponent of fcNormal numbers. */ 1381 omsb = significandMSB() + 1; 1382 1383 if (omsb) { 1384 /* OMSB is numbered from 1. We want to place it in the integer 1385 bit numbered PRECISION if possible, with a compensating change in 1386 the exponent. */ 1387 exponentChange = omsb - semantics->precision; 1388 1389 /* If the resulting exponent is too high, overflow according to 1390 the rounding mode. */ 1391 if (exponent + exponentChange > semantics->maxExponent) 1392 return handleOverflow(rounding_mode); 1393 1394 /* Subnormal numbers have exponent minExponent, and their MSB 1395 is forced based on that. */ 1396 if (exponent + exponentChange < semantics->minExponent) 1397 exponentChange = semantics->minExponent - exponent; 1398 1399 /* Shifting left is easy as we don't lose precision. */ 1400 if (exponentChange < 0) { 1401 assert(lost_fraction == lfExactlyZero); 1402 1403 shiftSignificandLeft(-exponentChange); 1404 1405 return opOK; 1406 } 1407 1408 if (exponentChange > 0) { 1409 lostFraction lf; 1410 1411 /* Shift right and capture any new lost fraction. */ 1412 lf = shiftSignificandRight(exponentChange); 1413 1414 lost_fraction = combineLostFractions(lf, lost_fraction); 1415 1416 /* Keep OMSB up-to-date. */ 1417 if (omsb > (unsigned) exponentChange) 1418 omsb -= exponentChange; 1419 else 1420 omsb = 0; 1421 } 1422 } 1423 1424 /* Now round the number according to rounding_mode given the lost 1425 fraction. */ 1426 1427 /* As specified in IEEE 754, since we do not trap we do not report 1428 underflow for exact results. */ 1429 if (lost_fraction == lfExactlyZero) { 1430 /* Canonicalize zeroes. */ 1431 if (omsb == 0) 1432 category = fcZero; 1433 1434 return opOK; 1435 } 1436 1437 /* Increment the significand if we're rounding away from zero. */ 1438 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1439 if (omsb == 0) 1440 exponent = semantics->minExponent; 1441 1442 incrementSignificand(); 1443 omsb = significandMSB() + 1; 1444 1445 /* Did the significand increment overflow? */ 1446 if (omsb == (unsigned) semantics->precision + 1) { 1447 /* Renormalize by incrementing the exponent and shifting our 1448 significand right one. However if we already have the 1449 maximum exponent we overflow to infinity. */ 1450 if (exponent == semantics->maxExponent) { 1451 category = fcInfinity; 1452 1453 return (opStatus) (opOverflow | opInexact); 1454 } 1455 1456 shiftSignificandRight(1); 1457 1458 return opInexact; 1459 } 1460 } 1461 1462 /* The normal case - we were and are not denormal, and any 1463 significand increment above didn't overflow. */ 1464 if (omsb == semantics->precision) 1465 return opInexact; 1466 1467 /* We have a non-zero denormal. */ 1468 assert(omsb < semantics->precision); 1469 1470 /* Canonicalize zeroes. */ 1471 if (omsb == 0) 1472 category = fcZero; 1473 1474 /* The fcZero case is a denormal that underflowed to zero. */ 1475 return (opStatus) (opUnderflow | opInexact); 1476 } 1477 1478 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1479 bool subtract) { 1480 switch (PackCategoriesIntoKey(category, rhs.category)) { 1481 default: 1482 llvm_unreachable(nullptr); 1483 1484 case PackCategoriesIntoKey(fcZero, fcNaN): 1485 case PackCategoriesIntoKey(fcNormal, fcNaN): 1486 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1487 assign(rhs); 1488 LLVM_FALLTHROUGH; 1489 case PackCategoriesIntoKey(fcNaN, fcZero): 1490 case PackCategoriesIntoKey(fcNaN, fcNormal): 1491 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1492 case PackCategoriesIntoKey(fcNaN, fcNaN): 1493 if (isSignaling()) { 1494 makeQuiet(); 1495 return opInvalidOp; 1496 } 1497 return rhs.isSignaling() ? opInvalidOp : opOK; 1498 1499 case PackCategoriesIntoKey(fcNormal, fcZero): 1500 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1501 case PackCategoriesIntoKey(fcInfinity, fcZero): 1502 return opOK; 1503 1504 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1505 case PackCategoriesIntoKey(fcZero, fcInfinity): 1506 category = fcInfinity; 1507 sign = rhs.sign ^ subtract; 1508 return opOK; 1509 1510 case PackCategoriesIntoKey(fcZero, fcNormal): 1511 assign(rhs); 1512 sign = rhs.sign ^ subtract; 1513 return opOK; 1514 1515 case PackCategoriesIntoKey(fcZero, fcZero): 1516 /* Sign depends on rounding mode; handled by caller. */ 1517 return opOK; 1518 1519 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1520 /* Differently signed infinities can only be validly 1521 subtracted. */ 1522 if (((sign ^ rhs.sign)!=0) != subtract) { 1523 makeNaN(); 1524 return opInvalidOp; 1525 } 1526 1527 return opOK; 1528 1529 case PackCategoriesIntoKey(fcNormal, fcNormal): 1530 return opDivByZero; 1531 } 1532 } 1533 1534 /* Add or subtract two normal numbers. */ 1535 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1536 bool subtract) { 1537 integerPart carry; 1538 lostFraction lost_fraction; 1539 int bits; 1540 1541 /* Determine if the operation on the absolute values is effectively 1542 an addition or subtraction. */ 1543 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1544 1545 /* Are we bigger exponent-wise than the RHS? */ 1546 bits = exponent - rhs.exponent; 1547 1548 /* Subtraction is more subtle than one might naively expect. */ 1549 if (subtract) { 1550 IEEEFloat temp_rhs(rhs); 1551 1552 if (bits == 0) 1553 lost_fraction = lfExactlyZero; 1554 else if (bits > 0) { 1555 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1556 shiftSignificandLeft(1); 1557 } else { 1558 lost_fraction = shiftSignificandRight(-bits - 1); 1559 temp_rhs.shiftSignificandLeft(1); 1560 } 1561 1562 // Should we reverse the subtraction. 1563 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1564 carry = temp_rhs.subtractSignificand 1565 (*this, lost_fraction != lfExactlyZero); 1566 copySignificand(temp_rhs); 1567 sign = !sign; 1568 } else { 1569 carry = subtractSignificand 1570 (temp_rhs, lost_fraction != lfExactlyZero); 1571 } 1572 1573 /* Invert the lost fraction - it was on the RHS and 1574 subtracted. */ 1575 if (lost_fraction == lfLessThanHalf) 1576 lost_fraction = lfMoreThanHalf; 1577 else if (lost_fraction == lfMoreThanHalf) 1578 lost_fraction = lfLessThanHalf; 1579 1580 /* The code above is intended to ensure that no borrow is 1581 necessary. */ 1582 assert(!carry); 1583 (void)carry; 1584 } else { 1585 if (bits > 0) { 1586 IEEEFloat temp_rhs(rhs); 1587 1588 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1589 carry = addSignificand(temp_rhs); 1590 } else { 1591 lost_fraction = shiftSignificandRight(-bits); 1592 carry = addSignificand(rhs); 1593 } 1594 1595 /* We have a guard bit; generating a carry cannot happen. */ 1596 assert(!carry); 1597 (void)carry; 1598 } 1599 1600 return lost_fraction; 1601 } 1602 1603 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1604 switch (PackCategoriesIntoKey(category, rhs.category)) { 1605 default: 1606 llvm_unreachable(nullptr); 1607 1608 case PackCategoriesIntoKey(fcZero, fcNaN): 1609 case PackCategoriesIntoKey(fcNormal, fcNaN): 1610 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1611 assign(rhs); 1612 sign = false; 1613 LLVM_FALLTHROUGH; 1614 case PackCategoriesIntoKey(fcNaN, fcZero): 1615 case PackCategoriesIntoKey(fcNaN, fcNormal): 1616 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1617 case PackCategoriesIntoKey(fcNaN, fcNaN): 1618 sign ^= rhs.sign; // restore the original sign 1619 if (isSignaling()) { 1620 makeQuiet(); 1621 return opInvalidOp; 1622 } 1623 return rhs.isSignaling() ? opInvalidOp : opOK; 1624 1625 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1626 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1627 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1628 category = fcInfinity; 1629 return opOK; 1630 1631 case PackCategoriesIntoKey(fcZero, fcNormal): 1632 case PackCategoriesIntoKey(fcNormal, fcZero): 1633 case PackCategoriesIntoKey(fcZero, fcZero): 1634 category = fcZero; 1635 return opOK; 1636 1637 case PackCategoriesIntoKey(fcZero, fcInfinity): 1638 case PackCategoriesIntoKey(fcInfinity, fcZero): 1639 makeNaN(); 1640 return opInvalidOp; 1641 1642 case PackCategoriesIntoKey(fcNormal, fcNormal): 1643 return opOK; 1644 } 1645 } 1646 1647 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1648 switch (PackCategoriesIntoKey(category, rhs.category)) { 1649 default: 1650 llvm_unreachable(nullptr); 1651 1652 case PackCategoriesIntoKey(fcZero, fcNaN): 1653 case PackCategoriesIntoKey(fcNormal, fcNaN): 1654 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1655 assign(rhs); 1656 sign = false; 1657 LLVM_FALLTHROUGH; 1658 case PackCategoriesIntoKey(fcNaN, fcZero): 1659 case PackCategoriesIntoKey(fcNaN, fcNormal): 1660 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1661 case PackCategoriesIntoKey(fcNaN, fcNaN): 1662 sign ^= rhs.sign; // restore the original sign 1663 if (isSignaling()) { 1664 makeQuiet(); 1665 return opInvalidOp; 1666 } 1667 return rhs.isSignaling() ? opInvalidOp : opOK; 1668 1669 case PackCategoriesIntoKey(fcInfinity, fcZero): 1670 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1671 case PackCategoriesIntoKey(fcZero, fcInfinity): 1672 case PackCategoriesIntoKey(fcZero, fcNormal): 1673 return opOK; 1674 1675 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1676 category = fcZero; 1677 return opOK; 1678 1679 case PackCategoriesIntoKey(fcNormal, fcZero): 1680 category = fcInfinity; 1681 return opDivByZero; 1682 1683 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1684 case PackCategoriesIntoKey(fcZero, fcZero): 1685 makeNaN(); 1686 return opInvalidOp; 1687 1688 case PackCategoriesIntoKey(fcNormal, fcNormal): 1689 return opOK; 1690 } 1691 } 1692 1693 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1694 switch (PackCategoriesIntoKey(category, rhs.category)) { 1695 default: 1696 llvm_unreachable(nullptr); 1697 1698 case PackCategoriesIntoKey(fcZero, fcNaN): 1699 case PackCategoriesIntoKey(fcNormal, fcNaN): 1700 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1701 assign(rhs); 1702 LLVM_FALLTHROUGH; 1703 case PackCategoriesIntoKey(fcNaN, fcZero): 1704 case PackCategoriesIntoKey(fcNaN, fcNormal): 1705 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1706 case PackCategoriesIntoKey(fcNaN, fcNaN): 1707 if (isSignaling()) { 1708 makeQuiet(); 1709 return opInvalidOp; 1710 } 1711 return rhs.isSignaling() ? opInvalidOp : opOK; 1712 1713 case PackCategoriesIntoKey(fcZero, fcInfinity): 1714 case PackCategoriesIntoKey(fcZero, fcNormal): 1715 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1716 return opOK; 1717 1718 case PackCategoriesIntoKey(fcNormal, fcZero): 1719 case PackCategoriesIntoKey(fcInfinity, fcZero): 1720 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1721 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1722 case PackCategoriesIntoKey(fcZero, fcZero): 1723 makeNaN(); 1724 return opInvalidOp; 1725 1726 case PackCategoriesIntoKey(fcNormal, fcNormal): 1727 return opOK; 1728 } 1729 } 1730 1731 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1732 switch (PackCategoriesIntoKey(category, rhs.category)) { 1733 default: 1734 llvm_unreachable(nullptr); 1735 1736 case PackCategoriesIntoKey(fcZero, fcNaN): 1737 case PackCategoriesIntoKey(fcNormal, fcNaN): 1738 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1739 assign(rhs); 1740 LLVM_FALLTHROUGH; 1741 case PackCategoriesIntoKey(fcNaN, fcZero): 1742 case PackCategoriesIntoKey(fcNaN, fcNormal): 1743 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1744 case PackCategoriesIntoKey(fcNaN, fcNaN): 1745 if (isSignaling()) { 1746 makeQuiet(); 1747 return opInvalidOp; 1748 } 1749 return rhs.isSignaling() ? opInvalidOp : opOK; 1750 1751 case PackCategoriesIntoKey(fcZero, fcInfinity): 1752 case PackCategoriesIntoKey(fcZero, fcNormal): 1753 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1754 return opOK; 1755 1756 case PackCategoriesIntoKey(fcNormal, fcZero): 1757 case PackCategoriesIntoKey(fcInfinity, fcZero): 1758 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1759 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1760 case PackCategoriesIntoKey(fcZero, fcZero): 1761 makeNaN(); 1762 return opInvalidOp; 1763 1764 case PackCategoriesIntoKey(fcNormal, fcNormal): 1765 return opDivByZero; // fake status, indicating this is not a special case 1766 } 1767 } 1768 1769 /* Change sign. */ 1770 void IEEEFloat::changeSign() { 1771 /* Look mummy, this one's easy. */ 1772 sign = !sign; 1773 } 1774 1775 /* Normalized addition or subtraction. */ 1776 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1777 roundingMode rounding_mode, 1778 bool subtract) { 1779 opStatus fs; 1780 1781 fs = addOrSubtractSpecials(rhs, subtract); 1782 1783 /* This return code means it was not a simple case. */ 1784 if (fs == opDivByZero) { 1785 lostFraction lost_fraction; 1786 1787 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1788 fs = normalize(rounding_mode, lost_fraction); 1789 1790 /* Can only be zero if we lost no fraction. */ 1791 assert(category != fcZero || lost_fraction == lfExactlyZero); 1792 } 1793 1794 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1795 positive zero unless rounding to minus infinity, except that 1796 adding two like-signed zeroes gives that zero. */ 1797 if (category == fcZero) { 1798 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1799 sign = (rounding_mode == rmTowardNegative); 1800 } 1801 1802 return fs; 1803 } 1804 1805 /* Normalized addition. */ 1806 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1807 roundingMode rounding_mode) { 1808 return addOrSubtract(rhs, rounding_mode, false); 1809 } 1810 1811 /* Normalized subtraction. */ 1812 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1813 roundingMode rounding_mode) { 1814 return addOrSubtract(rhs, rounding_mode, true); 1815 } 1816 1817 /* Normalized multiply. */ 1818 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1819 roundingMode rounding_mode) { 1820 opStatus fs; 1821 1822 sign ^= rhs.sign; 1823 fs = multiplySpecials(rhs); 1824 1825 if (isFiniteNonZero()) { 1826 lostFraction lost_fraction = multiplySignificand(rhs); 1827 fs = normalize(rounding_mode, lost_fraction); 1828 if (lost_fraction != lfExactlyZero) 1829 fs = (opStatus) (fs | opInexact); 1830 } 1831 1832 return fs; 1833 } 1834 1835 /* Normalized divide. */ 1836 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1837 roundingMode rounding_mode) { 1838 opStatus fs; 1839 1840 sign ^= rhs.sign; 1841 fs = divideSpecials(rhs); 1842 1843 if (isFiniteNonZero()) { 1844 lostFraction lost_fraction = divideSignificand(rhs); 1845 fs = normalize(rounding_mode, lost_fraction); 1846 if (lost_fraction != lfExactlyZero) 1847 fs = (opStatus) (fs | opInexact); 1848 } 1849 1850 return fs; 1851 } 1852 1853 /* Normalized remainder. */ 1854 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1855 opStatus fs; 1856 unsigned int origSign = sign; 1857 1858 // First handle the special cases. 1859 fs = remainderSpecials(rhs); 1860 if (fs != opDivByZero) 1861 return fs; 1862 1863 fs = opOK; 1864 1865 // Make sure the current value is less than twice the denom. If the addition 1866 // did not succeed (an overflow has happened), which means that the finite 1867 // value we currently posses must be less than twice the denom (as we are 1868 // using the same semantics). 1869 IEEEFloat P2 = rhs; 1870 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1871 fs = mod(P2); 1872 assert(fs == opOK); 1873 } 1874 1875 // Lets work with absolute numbers. 1876 IEEEFloat P = rhs; 1877 P.sign = false; 1878 sign = false; 1879 1880 // 1881 // To calculate the remainder we use the following scheme. 1882 // 1883 // The remainder is defained as follows: 1884 // 1885 // remainder = numer - rquot * denom = x - r * p 1886 // 1887 // Where r is the result of: x/p, rounded toward the nearest integral value 1888 // (with halfway cases rounded toward the even number). 1889 // 1890 // Currently, (after x mod 2p): 1891 // r is the number of 2p's present inside x, which is inherently, an even 1892 // number of p's. 1893 // 1894 // We may split the remaining calculation into 4 options: 1895 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1896 // - if x == 0.5p then we round to the nearest even number which is 0, and we 1897 // are done as well. 1898 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 1899 // to subtract 1p at least once. 1900 // - if x >= p then we must subtract p at least once, as x must be a 1901 // remainder. 1902 // 1903 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 1904 // 1905 // We can now split the remaining calculation to the following 3 options: 1906 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1907 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 1908 // must round up to the next even number. so we must subtract p once more. 1909 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 1910 // integral, and subtract p once more. 1911 // 1912 1913 // Extend the semantics to prevent an overflow/underflow or inexact result. 1914 bool losesInfo; 1915 fltSemantics extendedSemantics = *semantics; 1916 extendedSemantics.maxExponent++; 1917 extendedSemantics.minExponent--; 1918 extendedSemantics.precision += 2; 1919 1920 IEEEFloat VEx = *this; 1921 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1922 assert(fs == opOK && !losesInfo); 1923 IEEEFloat PEx = P; 1924 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1925 assert(fs == opOK && !losesInfo); 1926 1927 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 1928 // any fraction. 1929 fs = VEx.add(VEx, rmNearestTiesToEven); 1930 assert(fs == opOK); 1931 1932 if (VEx.compare(PEx) == cmpGreaterThan) { 1933 fs = subtract(P, rmNearestTiesToEven); 1934 assert(fs == opOK); 1935 1936 // Make VEx = this.add(this), but because we have different semantics, we do 1937 // not want to `convert` again, so we just subtract PEx twice (which equals 1938 // to the desired value). 1939 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1940 assert(fs == opOK); 1941 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1942 assert(fs == opOK); 1943 1944 cmpResult result = VEx.compare(PEx); 1945 if (result == cmpGreaterThan || result == cmpEqual) { 1946 fs = subtract(P, rmNearestTiesToEven); 1947 assert(fs == opOK); 1948 } 1949 } 1950 1951 if (isZero()) 1952 sign = origSign; // IEEE754 requires this 1953 else 1954 sign ^= origSign; 1955 return fs; 1956 } 1957 1958 /* Normalized llvm frem (C fmod). */ 1959 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1960 opStatus fs; 1961 fs = modSpecials(rhs); 1962 unsigned int origSign = sign; 1963 1964 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 1965 compareAbsoluteValue(rhs) != cmpLessThan) { 1966 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); 1967 if (compareAbsoluteValue(V) == cmpLessThan) 1968 V = scalbn(V, -1, rmNearestTiesToEven); 1969 V.sign = sign; 1970 1971 fs = subtract(V, rmNearestTiesToEven); 1972 assert(fs==opOK); 1973 } 1974 if (isZero()) 1975 sign = origSign; // fmod requires this 1976 return fs; 1977 } 1978 1979 /* Normalized fused-multiply-add. */ 1980 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1981 const IEEEFloat &addend, 1982 roundingMode rounding_mode) { 1983 opStatus fs; 1984 1985 /* Post-multiplication sign, before addition. */ 1986 sign ^= multiplicand.sign; 1987 1988 /* If and only if all arguments are normal do we need to do an 1989 extended-precision calculation. */ 1990 if (isFiniteNonZero() && 1991 multiplicand.isFiniteNonZero() && 1992 addend.isFinite()) { 1993 lostFraction lost_fraction; 1994 1995 lost_fraction = multiplySignificand(multiplicand, addend); 1996 fs = normalize(rounding_mode, lost_fraction); 1997 if (lost_fraction != lfExactlyZero) 1998 fs = (opStatus) (fs | opInexact); 1999 2000 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2001 positive zero unless rounding to minus infinity, except that 2002 adding two like-signed zeroes gives that zero. */ 2003 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 2004 sign = (rounding_mode == rmTowardNegative); 2005 } else { 2006 fs = multiplySpecials(multiplicand); 2007 2008 /* FS can only be opOK or opInvalidOp. There is no more work 2009 to do in the latter case. The IEEE-754R standard says it is 2010 implementation-defined in this case whether, if ADDEND is a 2011 quiet NaN, we raise invalid op; this implementation does so. 2012 2013 If we need to do the addition we can do so with normal 2014 precision. */ 2015 if (fs == opOK) 2016 fs = addOrSubtract(addend, rounding_mode, false); 2017 } 2018 2019 return fs; 2020 } 2021 2022 /* Rounding-mode correct round to integral value. */ 2023 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2024 opStatus fs; 2025 2026 if (isInfinity()) 2027 // [IEEE Std 754-2008 6.1]: 2028 // The behavior of infinity in floating-point arithmetic is derived from the 2029 // limiting cases of real arithmetic with operands of arbitrarily 2030 // large magnitude, when such a limit exists. 2031 // ... 2032 // Operations on infinite operands are usually exact and therefore signal no 2033 // exceptions ... 2034 return opOK; 2035 2036 if (isNaN()) { 2037 if (isSignaling()) { 2038 // [IEEE Std 754-2008 6.2]: 2039 // Under default exception handling, any operation signaling an invalid 2040 // operation exception and for which a floating-point result is to be 2041 // delivered shall deliver a quiet NaN. 2042 makeQuiet(); 2043 // [IEEE Std 754-2008 6.2]: 2044 // Signaling NaNs shall be reserved operands that, under default exception 2045 // handling, signal the invalid operation exception(see 7.2) for every 2046 // general-computational and signaling-computational operation except for 2047 // the conversions described in 5.12. 2048 return opInvalidOp; 2049 } else { 2050 // [IEEE Std 754-2008 6.2]: 2051 // For an operation with quiet NaN inputs, other than maximum and minimum 2052 // operations, if a floating-point result is to be delivered the result 2053 // shall be a quiet NaN which should be one of the input NaNs. 2054 // ... 2055 // Every general-computational and quiet-computational operation involving 2056 // one or more input NaNs, none of them signaling, shall signal no 2057 // exception, except fusedMultiplyAdd might signal the invalid operation 2058 // exception(see 7.2). 2059 return opOK; 2060 } 2061 } 2062 2063 if (isZero()) { 2064 // [IEEE Std 754-2008 6.3]: 2065 // ... the sign of the result of conversions, the quantize operation, the 2066 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2067 // the sign of the first or only operand. 2068 return opOK; 2069 } 2070 2071 // If the exponent is large enough, we know that this value is already 2072 // integral, and the arithmetic below would potentially cause it to saturate 2073 // to +/-Inf. Bail out early instead. 2074 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2075 return opOK; 2076 2077 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2078 // precision of our format, and then subtract it back off again. The choice 2079 // of rounding modes for the addition/subtraction determines the rounding mode 2080 // for our integral rounding as well. 2081 // NOTE: When the input value is negative, we do subtraction followed by 2082 // addition instead. 2083 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2084 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2085 IEEEFloat MagicConstant(*semantics); 2086 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2087 rmNearestTiesToEven); 2088 assert(fs == opOK); 2089 MagicConstant.sign = sign; 2090 2091 // Preserve the input sign so that we can handle the case of zero result 2092 // correctly. 2093 bool inputSign = isNegative(); 2094 2095 fs = add(MagicConstant, rounding_mode); 2096 2097 // Current value and 'MagicConstant' are both integers, so the result of the 2098 // subtraction is always exact according to Sterbenz' lemma. 2099 subtract(MagicConstant, rounding_mode); 2100 2101 // Restore the input sign. 2102 if (inputSign != isNegative()) 2103 changeSign(); 2104 2105 return fs; 2106 } 2107 2108 2109 /* Comparison requires normalized numbers. */ 2110 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2111 cmpResult result; 2112 2113 assert(semantics == rhs.semantics); 2114 2115 switch (PackCategoriesIntoKey(category, rhs.category)) { 2116 default: 2117 llvm_unreachable(nullptr); 2118 2119 case PackCategoriesIntoKey(fcNaN, fcZero): 2120 case PackCategoriesIntoKey(fcNaN, fcNormal): 2121 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2122 case PackCategoriesIntoKey(fcNaN, fcNaN): 2123 case PackCategoriesIntoKey(fcZero, fcNaN): 2124 case PackCategoriesIntoKey(fcNormal, fcNaN): 2125 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2126 return cmpUnordered; 2127 2128 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2129 case PackCategoriesIntoKey(fcInfinity, fcZero): 2130 case PackCategoriesIntoKey(fcNormal, fcZero): 2131 if (sign) 2132 return cmpLessThan; 2133 else 2134 return cmpGreaterThan; 2135 2136 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2137 case PackCategoriesIntoKey(fcZero, fcInfinity): 2138 case PackCategoriesIntoKey(fcZero, fcNormal): 2139 if (rhs.sign) 2140 return cmpGreaterThan; 2141 else 2142 return cmpLessThan; 2143 2144 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2145 if (sign == rhs.sign) 2146 return cmpEqual; 2147 else if (sign) 2148 return cmpLessThan; 2149 else 2150 return cmpGreaterThan; 2151 2152 case PackCategoriesIntoKey(fcZero, fcZero): 2153 return cmpEqual; 2154 2155 case PackCategoriesIntoKey(fcNormal, fcNormal): 2156 break; 2157 } 2158 2159 /* Two normal numbers. Do they have the same sign? */ 2160 if (sign != rhs.sign) { 2161 if (sign) 2162 result = cmpLessThan; 2163 else 2164 result = cmpGreaterThan; 2165 } else { 2166 /* Compare absolute values; invert result if negative. */ 2167 result = compareAbsoluteValue(rhs); 2168 2169 if (sign) { 2170 if (result == cmpLessThan) 2171 result = cmpGreaterThan; 2172 else if (result == cmpGreaterThan) 2173 result = cmpLessThan; 2174 } 2175 } 2176 2177 return result; 2178 } 2179 2180 /// IEEEFloat::convert - convert a value of one floating point type to another. 2181 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2182 /// records whether the transformation lost information, i.e. whether 2183 /// converting the result back to the original type will produce the 2184 /// original value (this is almost the same as return value==fsOK, but there 2185 /// are edge cases where this is not so). 2186 2187 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2188 roundingMode rounding_mode, 2189 bool *losesInfo) { 2190 lostFraction lostFraction; 2191 unsigned int newPartCount, oldPartCount; 2192 opStatus fs; 2193 int shift; 2194 const fltSemantics &fromSemantics = *semantics; 2195 2196 lostFraction = lfExactlyZero; 2197 newPartCount = partCountForBits(toSemantics.precision + 1); 2198 oldPartCount = partCount(); 2199 shift = toSemantics.precision - fromSemantics.precision; 2200 2201 bool X86SpecialNan = false; 2202 if (&fromSemantics == &semX87DoubleExtended && 2203 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2204 (!(*significandParts() & 0x8000000000000000ULL) || 2205 !(*significandParts() & 0x4000000000000000ULL))) { 2206 // x86 has some unusual NaNs which cannot be represented in any other 2207 // format; note them here. 2208 X86SpecialNan = true; 2209 } 2210 2211 // If this is a truncation of a denormal number, and the target semantics 2212 // has larger exponent range than the source semantics (this can happen 2213 // when truncating from PowerPC double-double to double format), the 2214 // right shift could lose result mantissa bits. Adjust exponent instead 2215 // of performing excessive shift. 2216 // Also do a similar trick in case shifting denormal would produce zero 2217 // significand as this case isn't handled correctly by normalize. 2218 if (shift < 0 && isFiniteNonZero()) { 2219 int omsb = significandMSB() + 1; 2220 int exponentChange = omsb - fromSemantics.precision; 2221 if (exponent + exponentChange < toSemantics.minExponent) 2222 exponentChange = toSemantics.minExponent - exponent; 2223 if (exponentChange < shift) 2224 exponentChange = shift; 2225 if (exponentChange < 0) { 2226 shift -= exponentChange; 2227 exponent += exponentChange; 2228 } else if (omsb <= -shift) { 2229 exponentChange = omsb + shift - 1; // leave at least one bit set 2230 shift -= exponentChange; 2231 exponent += exponentChange; 2232 } 2233 } 2234 2235 // If this is a truncation, perform the shift before we narrow the storage. 2236 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2237 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2238 2239 // Fix the storage so it can hold to new value. 2240 if (newPartCount > oldPartCount) { 2241 // The new type requires more storage; make it available. 2242 integerPart *newParts; 2243 newParts = new integerPart[newPartCount]; 2244 APInt::tcSet(newParts, 0, newPartCount); 2245 if (isFiniteNonZero() || category==fcNaN) 2246 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2247 freeSignificand(); 2248 significand.parts = newParts; 2249 } else if (newPartCount == 1 && oldPartCount != 1) { 2250 // Switch to built-in storage for a single part. 2251 integerPart newPart = 0; 2252 if (isFiniteNonZero() || category==fcNaN) 2253 newPart = significandParts()[0]; 2254 freeSignificand(); 2255 significand.part = newPart; 2256 } 2257 2258 // Now that we have the right storage, switch the semantics. 2259 semantics = &toSemantics; 2260 2261 // If this is an extension, perform the shift now that the storage is 2262 // available. 2263 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2264 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2265 2266 if (isFiniteNonZero()) { 2267 fs = normalize(rounding_mode, lostFraction); 2268 *losesInfo = (fs != opOK); 2269 } else if (category == fcNaN) { 2270 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2271 2272 // For x87 extended precision, we want to make a NaN, not a special NaN if 2273 // the input wasn't special either. 2274 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2275 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2276 2277 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2278 // This also guarantees that a sNaN does not become Inf on a truncation 2279 // that loses all payload bits. 2280 if (isSignaling()) { 2281 makeQuiet(); 2282 fs = opInvalidOp; 2283 } else { 2284 fs = opOK; 2285 } 2286 } else { 2287 *losesInfo = false; 2288 fs = opOK; 2289 } 2290 2291 return fs; 2292 } 2293 2294 /* Convert a floating point number to an integer according to the 2295 rounding mode. If the rounded integer value is out of range this 2296 returns an invalid operation exception and the contents of the 2297 destination parts are unspecified. If the rounded value is in 2298 range but the floating point number is not the exact integer, the C 2299 standard doesn't require an inexact exception to be raised. IEEE 2300 854 does require it so we do that. 2301 2302 Note that for conversions to integer type the C standard requires 2303 round-to-zero to always be used. */ 2304 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2305 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2306 roundingMode rounding_mode, bool *isExact) const { 2307 lostFraction lost_fraction; 2308 const integerPart *src; 2309 unsigned int dstPartsCount, truncatedBits; 2310 2311 *isExact = false; 2312 2313 /* Handle the three special cases first. */ 2314 if (category == fcInfinity || category == fcNaN) 2315 return opInvalidOp; 2316 2317 dstPartsCount = partCountForBits(width); 2318 assert(dstPartsCount <= parts.size() && "Integer too big"); 2319 2320 if (category == fcZero) { 2321 APInt::tcSet(parts.data(), 0, dstPartsCount); 2322 // Negative zero can't be represented as an int. 2323 *isExact = !sign; 2324 return opOK; 2325 } 2326 2327 src = significandParts(); 2328 2329 /* Step 1: place our absolute value, with any fraction truncated, in 2330 the destination. */ 2331 if (exponent < 0) { 2332 /* Our absolute value is less than one; truncate everything. */ 2333 APInt::tcSet(parts.data(), 0, dstPartsCount); 2334 /* For exponent -1 the integer bit represents .5, look at that. 2335 For smaller exponents leftmost truncated bit is 0. */ 2336 truncatedBits = semantics->precision -1U - exponent; 2337 } else { 2338 /* We want the most significant (exponent + 1) bits; the rest are 2339 truncated. */ 2340 unsigned int bits = exponent + 1U; 2341 2342 /* Hopelessly large in magnitude? */ 2343 if (bits > width) 2344 return opInvalidOp; 2345 2346 if (bits < semantics->precision) { 2347 /* We truncate (semantics->precision - bits) bits. */ 2348 truncatedBits = semantics->precision - bits; 2349 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2350 } else { 2351 /* We want at least as many bits as are available. */ 2352 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2353 0); 2354 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2355 bits - semantics->precision); 2356 truncatedBits = 0; 2357 } 2358 } 2359 2360 /* Step 2: work out any lost fraction, and increment the absolute 2361 value if we would round away from zero. */ 2362 if (truncatedBits) { 2363 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2364 truncatedBits); 2365 if (lost_fraction != lfExactlyZero && 2366 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2367 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2368 return opInvalidOp; /* Overflow. */ 2369 } 2370 } else { 2371 lost_fraction = lfExactlyZero; 2372 } 2373 2374 /* Step 3: check if we fit in the destination. */ 2375 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2376 2377 if (sign) { 2378 if (!isSigned) { 2379 /* Negative numbers cannot be represented as unsigned. */ 2380 if (omsb != 0) 2381 return opInvalidOp; 2382 } else { 2383 /* It takes omsb bits to represent the unsigned integer value. 2384 We lose a bit for the sign, but care is needed as the 2385 maximally negative integer is a special case. */ 2386 if (omsb == width && 2387 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2388 return opInvalidOp; 2389 2390 /* This case can happen because of rounding. */ 2391 if (omsb > width) 2392 return opInvalidOp; 2393 } 2394 2395 APInt::tcNegate (parts.data(), dstPartsCount); 2396 } else { 2397 if (omsb >= width + !isSigned) 2398 return opInvalidOp; 2399 } 2400 2401 if (lost_fraction == lfExactlyZero) { 2402 *isExact = true; 2403 return opOK; 2404 } else 2405 return opInexact; 2406 } 2407 2408 /* Same as convertToSignExtendedInteger, except we provide 2409 deterministic values in case of an invalid operation exception, 2410 namely zero for NaNs and the minimal or maximal value respectively 2411 for underflow or overflow. 2412 The *isExact output tells whether the result is exact, in the sense 2413 that converting it back to the original floating point type produces 2414 the original value. This is almost equivalent to result==opOK, 2415 except for negative zeroes. 2416 */ 2417 IEEEFloat::opStatus 2418 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2419 unsigned int width, bool isSigned, 2420 roundingMode rounding_mode, bool *isExact) const { 2421 opStatus fs; 2422 2423 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2424 isExact); 2425 2426 if (fs == opInvalidOp) { 2427 unsigned int bits, dstPartsCount; 2428 2429 dstPartsCount = partCountForBits(width); 2430 assert(dstPartsCount <= parts.size() && "Integer too big"); 2431 2432 if (category == fcNaN) 2433 bits = 0; 2434 else if (sign) 2435 bits = isSigned; 2436 else 2437 bits = width - isSigned; 2438 2439 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2440 if (sign && isSigned) 2441 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2442 } 2443 2444 return fs; 2445 } 2446 2447 /* Convert an unsigned integer SRC to a floating point number, 2448 rounding according to ROUNDING_MODE. The sign of the floating 2449 point number is not modified. */ 2450 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2451 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2452 unsigned int omsb, precision, dstCount; 2453 integerPart *dst; 2454 lostFraction lost_fraction; 2455 2456 category = fcNormal; 2457 omsb = APInt::tcMSB(src, srcCount) + 1; 2458 dst = significandParts(); 2459 dstCount = partCount(); 2460 precision = semantics->precision; 2461 2462 /* We want the most significant PRECISION bits of SRC. There may not 2463 be that many; extract what we can. */ 2464 if (precision <= omsb) { 2465 exponent = omsb - 1; 2466 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2467 omsb - precision); 2468 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2469 } else { 2470 exponent = precision - 1; 2471 lost_fraction = lfExactlyZero; 2472 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2473 } 2474 2475 return normalize(rounding_mode, lost_fraction); 2476 } 2477 2478 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2479 roundingMode rounding_mode) { 2480 unsigned int partCount = Val.getNumWords(); 2481 APInt api = Val; 2482 2483 sign = false; 2484 if (isSigned && api.isNegative()) { 2485 sign = true; 2486 api = -api; 2487 } 2488 2489 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2490 } 2491 2492 /* Convert a two's complement integer SRC to a floating point number, 2493 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2494 integer is signed, in which case it must be sign-extended. */ 2495 IEEEFloat::opStatus 2496 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2497 unsigned int srcCount, bool isSigned, 2498 roundingMode rounding_mode) { 2499 opStatus status; 2500 2501 if (isSigned && 2502 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2503 integerPart *copy; 2504 2505 /* If we're signed and negative negate a copy. */ 2506 sign = true; 2507 copy = new integerPart[srcCount]; 2508 APInt::tcAssign(copy, src, srcCount); 2509 APInt::tcNegate(copy, srcCount); 2510 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2511 delete [] copy; 2512 } else { 2513 sign = false; 2514 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2515 } 2516 2517 return status; 2518 } 2519 2520 /* FIXME: should this just take a const APInt reference? */ 2521 IEEEFloat::opStatus 2522 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2523 unsigned int width, bool isSigned, 2524 roundingMode rounding_mode) { 2525 unsigned int partCount = partCountForBits(width); 2526 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2527 2528 sign = false; 2529 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2530 sign = true; 2531 api = -api; 2532 } 2533 2534 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2535 } 2536 2537 Expected<IEEEFloat::opStatus> 2538 IEEEFloat::convertFromHexadecimalString(StringRef s, 2539 roundingMode rounding_mode) { 2540 lostFraction lost_fraction = lfExactlyZero; 2541 2542 category = fcNormal; 2543 zeroSignificand(); 2544 exponent = 0; 2545 2546 integerPart *significand = significandParts(); 2547 unsigned partsCount = partCount(); 2548 unsigned bitPos = partsCount * integerPartWidth; 2549 bool computedTrailingFraction = false; 2550 2551 // Skip leading zeroes and any (hexa)decimal point. 2552 StringRef::iterator begin = s.begin(); 2553 StringRef::iterator end = s.end(); 2554 StringRef::iterator dot; 2555 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2556 if (!PtrOrErr) 2557 return PtrOrErr.takeError(); 2558 StringRef::iterator p = *PtrOrErr; 2559 StringRef::iterator firstSignificantDigit = p; 2560 2561 while (p != end) { 2562 integerPart hex_value; 2563 2564 if (*p == '.') { 2565 if (dot != end) 2566 return createError("String contains multiple dots"); 2567 dot = p++; 2568 continue; 2569 } 2570 2571 hex_value = hexDigitValue(*p); 2572 if (hex_value == -1U) 2573 break; 2574 2575 p++; 2576 2577 // Store the number while we have space. 2578 if (bitPos) { 2579 bitPos -= 4; 2580 hex_value <<= bitPos % integerPartWidth; 2581 significand[bitPos / integerPartWidth] |= hex_value; 2582 } else if (!computedTrailingFraction) { 2583 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2584 if (!FractOrErr) 2585 return FractOrErr.takeError(); 2586 lost_fraction = *FractOrErr; 2587 computedTrailingFraction = true; 2588 } 2589 } 2590 2591 /* Hex floats require an exponent but not a hexadecimal point. */ 2592 if (p == end) 2593 return createError("Hex strings require an exponent"); 2594 if (*p != 'p' && *p != 'P') 2595 return createError("Invalid character in significand"); 2596 if (p == begin) 2597 return createError("Significand has no digits"); 2598 if (dot != end && p - begin == 1) 2599 return createError("Significand has no digits"); 2600 2601 /* Ignore the exponent if we are zero. */ 2602 if (p != firstSignificantDigit) { 2603 int expAdjustment; 2604 2605 /* Implicit hexadecimal point? */ 2606 if (dot == end) 2607 dot = p; 2608 2609 /* Calculate the exponent adjustment implicit in the number of 2610 significant digits. */ 2611 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2612 if (expAdjustment < 0) 2613 expAdjustment++; 2614 expAdjustment = expAdjustment * 4 - 1; 2615 2616 /* Adjust for writing the significand starting at the most 2617 significant nibble. */ 2618 expAdjustment += semantics->precision; 2619 expAdjustment -= partsCount * integerPartWidth; 2620 2621 /* Adjust for the given exponent. */ 2622 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2623 if (!ExpOrErr) 2624 return ExpOrErr.takeError(); 2625 exponent = *ExpOrErr; 2626 } 2627 2628 return normalize(rounding_mode, lost_fraction); 2629 } 2630 2631 IEEEFloat::opStatus 2632 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2633 unsigned sigPartCount, int exp, 2634 roundingMode rounding_mode) { 2635 unsigned int parts, pow5PartCount; 2636 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2637 integerPart pow5Parts[maxPowerOfFiveParts]; 2638 bool isNearest; 2639 2640 isNearest = (rounding_mode == rmNearestTiesToEven || 2641 rounding_mode == rmNearestTiesToAway); 2642 2643 parts = partCountForBits(semantics->precision + 11); 2644 2645 /* Calculate pow(5, abs(exp)). */ 2646 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2647 2648 for (;; parts *= 2) { 2649 opStatus sigStatus, powStatus; 2650 unsigned int excessPrecision, truncatedBits; 2651 2652 calcSemantics.precision = parts * integerPartWidth - 1; 2653 excessPrecision = calcSemantics.precision - semantics->precision; 2654 truncatedBits = excessPrecision; 2655 2656 IEEEFloat decSig(calcSemantics, uninitialized); 2657 decSig.makeZero(sign); 2658 IEEEFloat pow5(calcSemantics); 2659 2660 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2661 rmNearestTiesToEven); 2662 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2663 rmNearestTiesToEven); 2664 /* Add exp, as 10^n = 5^n * 2^n. */ 2665 decSig.exponent += exp; 2666 2667 lostFraction calcLostFraction; 2668 integerPart HUerr, HUdistance; 2669 unsigned int powHUerr; 2670 2671 if (exp >= 0) { 2672 /* multiplySignificand leaves the precision-th bit set to 1. */ 2673 calcLostFraction = decSig.multiplySignificand(pow5); 2674 powHUerr = powStatus != opOK; 2675 } else { 2676 calcLostFraction = decSig.divideSignificand(pow5); 2677 /* Denormal numbers have less precision. */ 2678 if (decSig.exponent < semantics->minExponent) { 2679 excessPrecision += (semantics->minExponent - decSig.exponent); 2680 truncatedBits = excessPrecision; 2681 if (excessPrecision > calcSemantics.precision) 2682 excessPrecision = calcSemantics.precision; 2683 } 2684 /* Extra half-ulp lost in reciprocal of exponent. */ 2685 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2686 } 2687 2688 /* Both multiplySignificand and divideSignificand return the 2689 result with the integer bit set. */ 2690 assert(APInt::tcExtractBit 2691 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2692 2693 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2694 powHUerr); 2695 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2696 excessPrecision, isNearest); 2697 2698 /* Are we guaranteed to round correctly if we truncate? */ 2699 if (HUdistance >= HUerr) { 2700 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2701 calcSemantics.precision - excessPrecision, 2702 excessPrecision); 2703 /* Take the exponent of decSig. If we tcExtract-ed less bits 2704 above we must adjust our exponent to compensate for the 2705 implicit right shift. */ 2706 exponent = (decSig.exponent + semantics->precision 2707 - (calcSemantics.precision - excessPrecision)); 2708 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2709 decSig.partCount(), 2710 truncatedBits); 2711 return normalize(rounding_mode, calcLostFraction); 2712 } 2713 } 2714 } 2715 2716 Expected<IEEEFloat::opStatus> 2717 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2718 decimalInfo D; 2719 opStatus fs; 2720 2721 /* Scan the text. */ 2722 StringRef::iterator p = str.begin(); 2723 if (Error Err = interpretDecimal(p, str.end(), &D)) 2724 return std::move(Err); 2725 2726 /* Handle the quick cases. First the case of no significant digits, 2727 i.e. zero, and then exponents that are obviously too large or too 2728 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2729 definitely overflows if 2730 2731 (exp - 1) * L >= maxExponent 2732 2733 and definitely underflows to zero where 2734 2735 (exp + 1) * L <= minExponent - precision 2736 2737 With integer arithmetic the tightest bounds for L are 2738 2739 93/28 < L < 196/59 [ numerator <= 256 ] 2740 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2741 */ 2742 2743 // Test if we have a zero number allowing for strings with no null terminators 2744 // and zero decimals with non-zero exponents. 2745 // 2746 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2747 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2748 // be at most one dot. On the other hand, if we have a zero with a non-zero 2749 // exponent, then we know that D.firstSigDigit will be non-numeric. 2750 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2751 category = fcZero; 2752 fs = opOK; 2753 2754 /* Check whether the normalized exponent is high enough to overflow 2755 max during the log-rebasing in the max-exponent check below. */ 2756 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2757 fs = handleOverflow(rounding_mode); 2758 2759 /* If it wasn't, then it also wasn't high enough to overflow max 2760 during the log-rebasing in the min-exponent check. Check that it 2761 won't overflow min in either check, then perform the min-exponent 2762 check. */ 2763 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2764 (D.normalizedExponent + 1) * 28738 <= 2765 8651 * (semantics->minExponent - (int) semantics->precision)) { 2766 /* Underflow to zero and round. */ 2767 category = fcNormal; 2768 zeroSignificand(); 2769 fs = normalize(rounding_mode, lfLessThanHalf); 2770 2771 /* We can finally safely perform the max-exponent check. */ 2772 } else if ((D.normalizedExponent - 1) * 42039 2773 >= 12655 * semantics->maxExponent) { 2774 /* Overflow and round. */ 2775 fs = handleOverflow(rounding_mode); 2776 } else { 2777 integerPart *decSignificand; 2778 unsigned int partCount; 2779 2780 /* A tight upper bound on number of bits required to hold an 2781 N-digit decimal integer is N * 196 / 59. Allocate enough space 2782 to hold the full significand, and an extra part required by 2783 tcMultiplyPart. */ 2784 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2785 partCount = partCountForBits(1 + 196 * partCount / 59); 2786 decSignificand = new integerPart[partCount + 1]; 2787 partCount = 0; 2788 2789 /* Convert to binary efficiently - we do almost all multiplication 2790 in an integerPart. When this would overflow do we do a single 2791 bignum multiplication, and then revert again to multiplication 2792 in an integerPart. */ 2793 do { 2794 integerPart decValue, val, multiplier; 2795 2796 val = 0; 2797 multiplier = 1; 2798 2799 do { 2800 if (*p == '.') { 2801 p++; 2802 if (p == str.end()) { 2803 break; 2804 } 2805 } 2806 decValue = decDigitValue(*p++); 2807 if (decValue >= 10U) { 2808 delete[] decSignificand; 2809 return createError("Invalid character in significand"); 2810 } 2811 multiplier *= 10; 2812 val = val * 10 + decValue; 2813 /* The maximum number that can be multiplied by ten with any 2814 digit added without overflowing an integerPart. */ 2815 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2816 2817 /* Multiply out the current part. */ 2818 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2819 partCount, partCount + 1, false); 2820 2821 /* If we used another part (likely but not guaranteed), increase 2822 the count. */ 2823 if (decSignificand[partCount]) 2824 partCount++; 2825 } while (p <= D.lastSigDigit); 2826 2827 category = fcNormal; 2828 fs = roundSignificandWithExponent(decSignificand, partCount, 2829 D.exponent, rounding_mode); 2830 2831 delete [] decSignificand; 2832 } 2833 2834 return fs; 2835 } 2836 2837 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2838 const size_t MIN_NAME_SIZE = 3; 2839 2840 if (str.size() < MIN_NAME_SIZE) 2841 return false; 2842 2843 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2844 makeInf(false); 2845 return true; 2846 } 2847 2848 bool IsNegative = str.front() == '-'; 2849 if (IsNegative) { 2850 str = str.drop_front(); 2851 if (str.size() < MIN_NAME_SIZE) 2852 return false; 2853 2854 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2855 makeInf(true); 2856 return true; 2857 } 2858 } 2859 2860 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2861 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2862 if (IsSignaling) { 2863 str = str.drop_front(); 2864 if (str.size() < MIN_NAME_SIZE) 2865 return false; 2866 } 2867 2868 if (str.startswith("nan") || str.startswith("NaN")) { 2869 str = str.drop_front(3); 2870 2871 // A NaN without payload. 2872 if (str.empty()) { 2873 makeNaN(IsSignaling, IsNegative); 2874 return true; 2875 } 2876 2877 // Allow the payload to be inside parentheses. 2878 if (str.front() == '(') { 2879 // Parentheses should be balanced (and not empty). 2880 if (str.size() <= 2 || str.back() != ')') 2881 return false; 2882 2883 str = str.slice(1, str.size() - 1); 2884 } 2885 2886 // Determine the payload number's radix. 2887 unsigned Radix = 10; 2888 if (str[0] == '0') { 2889 if (str.size() > 1 && tolower(str[1]) == 'x') { 2890 str = str.drop_front(2); 2891 Radix = 16; 2892 } else 2893 Radix = 8; 2894 } 2895 2896 // Parse the payload and make the NaN. 2897 APInt Payload; 2898 if (!str.getAsInteger(Radix, Payload)) { 2899 makeNaN(IsSignaling, IsNegative, &Payload); 2900 return true; 2901 } 2902 } 2903 2904 return false; 2905 } 2906 2907 Expected<IEEEFloat::opStatus> 2908 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 2909 if (str.empty()) 2910 return createError("Invalid string length"); 2911 2912 // Handle special cases. 2913 if (convertFromStringSpecials(str)) 2914 return opOK; 2915 2916 /* Handle a leading minus sign. */ 2917 StringRef::iterator p = str.begin(); 2918 size_t slen = str.size(); 2919 sign = *p == '-' ? 1 : 0; 2920 if (*p == '-' || *p == '+') { 2921 p++; 2922 slen--; 2923 if (!slen) 2924 return createError("String has no digits"); 2925 } 2926 2927 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2928 if (slen == 2) 2929 return createError("Invalid string"); 2930 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2931 rounding_mode); 2932 } 2933 2934 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2935 } 2936 2937 /* Write out a hexadecimal representation of the floating point value 2938 to DST, which must be of sufficient size, in the C99 form 2939 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2940 excluding the terminating NUL. 2941 2942 If UPPERCASE, the output is in upper case, otherwise in lower case. 2943 2944 HEXDIGITS digits appear altogether, rounding the value if 2945 necessary. If HEXDIGITS is 0, the minimal precision to display the 2946 number precisely is used instead. If nothing would appear after 2947 the decimal point it is suppressed. 2948 2949 The decimal exponent is always printed and has at least one digit. 2950 Zero values display an exponent of zero. Infinities and NaNs 2951 appear as "infinity" or "nan" respectively. 2952 2953 The above rules are as specified by C99. There is ambiguity about 2954 what the leading hexadecimal digit should be. This implementation 2955 uses whatever is necessary so that the exponent is displayed as 2956 stored. This implies the exponent will fall within the IEEE format 2957 range, and the leading hexadecimal digit will be 0 (for denormals), 2958 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2959 any other digits zero). 2960 */ 2961 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2962 bool upperCase, 2963 roundingMode rounding_mode) const { 2964 char *p; 2965 2966 p = dst; 2967 if (sign) 2968 *dst++ = '-'; 2969 2970 switch (category) { 2971 case fcInfinity: 2972 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2973 dst += sizeof infinityL - 1; 2974 break; 2975 2976 case fcNaN: 2977 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2978 dst += sizeof NaNU - 1; 2979 break; 2980 2981 case fcZero: 2982 *dst++ = '0'; 2983 *dst++ = upperCase ? 'X': 'x'; 2984 *dst++ = '0'; 2985 if (hexDigits > 1) { 2986 *dst++ = '.'; 2987 memset (dst, '0', hexDigits - 1); 2988 dst += hexDigits - 1; 2989 } 2990 *dst++ = upperCase ? 'P': 'p'; 2991 *dst++ = '0'; 2992 break; 2993 2994 case fcNormal: 2995 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2996 break; 2997 } 2998 2999 *dst = 0; 3000 3001 return static_cast<unsigned int>(dst - p); 3002 } 3003 3004 /* Does the hard work of outputting the correctly rounded hexadecimal 3005 form of a normal floating point number with the specified number of 3006 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3007 digits necessary to print the value precisely is output. */ 3008 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3009 bool upperCase, 3010 roundingMode rounding_mode) const { 3011 unsigned int count, valueBits, shift, partsCount, outputDigits; 3012 const char *hexDigitChars; 3013 const integerPart *significand; 3014 char *p; 3015 bool roundUp; 3016 3017 *dst++ = '0'; 3018 *dst++ = upperCase ? 'X': 'x'; 3019 3020 roundUp = false; 3021 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3022 3023 significand = significandParts(); 3024 partsCount = partCount(); 3025 3026 /* +3 because the first digit only uses the single integer bit, so 3027 we have 3 virtual zero most-significant-bits. */ 3028 valueBits = semantics->precision + 3; 3029 shift = integerPartWidth - valueBits % integerPartWidth; 3030 3031 /* The natural number of digits required ignoring trailing 3032 insignificant zeroes. */ 3033 outputDigits = (valueBits - significandLSB () + 3) / 4; 3034 3035 /* hexDigits of zero means use the required number for the 3036 precision. Otherwise, see if we are truncating. If we are, 3037 find out if we need to round away from zero. */ 3038 if (hexDigits) { 3039 if (hexDigits < outputDigits) { 3040 /* We are dropping non-zero bits, so need to check how to round. 3041 "bits" is the number of dropped bits. */ 3042 unsigned int bits; 3043 lostFraction fraction; 3044 3045 bits = valueBits - hexDigits * 4; 3046 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3047 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3048 } 3049 outputDigits = hexDigits; 3050 } 3051 3052 /* Write the digits consecutively, and start writing in the location 3053 of the hexadecimal point. We move the most significant digit 3054 left and add the hexadecimal point later. */ 3055 p = ++dst; 3056 3057 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3058 3059 while (outputDigits && count) { 3060 integerPart part; 3061 3062 /* Put the most significant integerPartWidth bits in "part". */ 3063 if (--count == partsCount) 3064 part = 0; /* An imaginary higher zero part. */ 3065 else 3066 part = significand[count] << shift; 3067 3068 if (count && shift) 3069 part |= significand[count - 1] >> (integerPartWidth - shift); 3070 3071 /* Convert as much of "part" to hexdigits as we can. */ 3072 unsigned int curDigits = integerPartWidth / 4; 3073 3074 if (curDigits > outputDigits) 3075 curDigits = outputDigits; 3076 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3077 outputDigits -= curDigits; 3078 } 3079 3080 if (roundUp) { 3081 char *q = dst; 3082 3083 /* Note that hexDigitChars has a trailing '0'. */ 3084 do { 3085 q--; 3086 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3087 } while (*q == '0'); 3088 assert(q >= p); 3089 } else { 3090 /* Add trailing zeroes. */ 3091 memset (dst, '0', outputDigits); 3092 dst += outputDigits; 3093 } 3094 3095 /* Move the most significant digit to before the point, and if there 3096 is something after the decimal point add it. This must come 3097 after rounding above. */ 3098 p[-1] = p[0]; 3099 if (dst -1 == p) 3100 dst--; 3101 else 3102 p[0] = '.'; 3103 3104 /* Finally output the exponent. */ 3105 *dst++ = upperCase ? 'P': 'p'; 3106 3107 return writeSignedDecimal (dst, exponent); 3108 } 3109 3110 hash_code hash_value(const IEEEFloat &Arg) { 3111 if (!Arg.isFiniteNonZero()) 3112 return hash_combine((uint8_t)Arg.category, 3113 // NaN has no sign, fix it at zero. 3114 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3115 Arg.semantics->precision); 3116 3117 // Normal floats need their exponent and significand hashed. 3118 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3119 Arg.semantics->precision, Arg.exponent, 3120 hash_combine_range( 3121 Arg.significandParts(), 3122 Arg.significandParts() + Arg.partCount())); 3123 } 3124 3125 // Conversion from APFloat to/from host float/double. It may eventually be 3126 // possible to eliminate these and have everybody deal with APFloats, but that 3127 // will take a while. This approach will not easily extend to long double. 3128 // Current implementation requires integerPartWidth==64, which is correct at 3129 // the moment but could be made more general. 3130 3131 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3132 // the actual IEEE respresentations. We compensate for that here. 3133 3134 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3135 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3136 assert(partCount()==2); 3137 3138 uint64_t myexponent, mysignificand; 3139 3140 if (isFiniteNonZero()) { 3141 myexponent = exponent+16383; //bias 3142 mysignificand = significandParts()[0]; 3143 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3144 myexponent = 0; // denormal 3145 } else if (category==fcZero) { 3146 myexponent = 0; 3147 mysignificand = 0; 3148 } else if (category==fcInfinity) { 3149 myexponent = 0x7fff; 3150 mysignificand = 0x8000000000000000ULL; 3151 } else { 3152 assert(category == fcNaN && "Unknown category"); 3153 myexponent = 0x7fff; 3154 mysignificand = significandParts()[0]; 3155 } 3156 3157 uint64_t words[2]; 3158 words[0] = mysignificand; 3159 words[1] = ((uint64_t)(sign & 1) << 15) | 3160 (myexponent & 0x7fffLL); 3161 return APInt(80, words); 3162 } 3163 3164 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3165 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3166 assert(partCount()==2); 3167 3168 uint64_t words[2]; 3169 opStatus fs; 3170 bool losesInfo; 3171 3172 // Convert number to double. To avoid spurious underflows, we re- 3173 // normalize against the "double" minExponent first, and only *then* 3174 // truncate the mantissa. The result of that second conversion 3175 // may be inexact, but should never underflow. 3176 // Declare fltSemantics before APFloat that uses it (and 3177 // saves pointer to it) to ensure correct destruction order. 3178 fltSemantics extendedSemantics = *semantics; 3179 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3180 IEEEFloat extended(*this); 3181 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3182 assert(fs == opOK && !losesInfo); 3183 (void)fs; 3184 3185 IEEEFloat u(extended); 3186 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3187 assert(fs == opOK || fs == opInexact); 3188 (void)fs; 3189 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3190 3191 // If conversion was exact or resulted in a special case, we're done; 3192 // just set the second double to zero. Otherwise, re-convert back to 3193 // the extended format and compute the difference. This now should 3194 // convert exactly to double. 3195 if (u.isFiniteNonZero() && losesInfo) { 3196 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3197 assert(fs == opOK && !losesInfo); 3198 (void)fs; 3199 3200 IEEEFloat v(extended); 3201 v.subtract(u, rmNearestTiesToEven); 3202 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3203 assert(fs == opOK && !losesInfo); 3204 (void)fs; 3205 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3206 } else { 3207 words[1] = 0; 3208 } 3209 3210 return APInt(128, words); 3211 } 3212 3213 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3214 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3215 assert(partCount()==2); 3216 3217 uint64_t myexponent, mysignificand, mysignificand2; 3218 3219 if (isFiniteNonZero()) { 3220 myexponent = exponent+16383; //bias 3221 mysignificand = significandParts()[0]; 3222 mysignificand2 = significandParts()[1]; 3223 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3224 myexponent = 0; // denormal 3225 } else if (category==fcZero) { 3226 myexponent = 0; 3227 mysignificand = mysignificand2 = 0; 3228 } else if (category==fcInfinity) { 3229 myexponent = 0x7fff; 3230 mysignificand = mysignificand2 = 0; 3231 } else { 3232 assert(category == fcNaN && "Unknown category!"); 3233 myexponent = 0x7fff; 3234 mysignificand = significandParts()[0]; 3235 mysignificand2 = significandParts()[1]; 3236 } 3237 3238 uint64_t words[2]; 3239 words[0] = mysignificand; 3240 words[1] = ((uint64_t)(sign & 1) << 63) | 3241 ((myexponent & 0x7fff) << 48) | 3242 (mysignificand2 & 0xffffffffffffLL); 3243 3244 return APInt(128, words); 3245 } 3246 3247 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3248 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3249 assert(partCount()==1); 3250 3251 uint64_t myexponent, mysignificand; 3252 3253 if (isFiniteNonZero()) { 3254 myexponent = exponent+1023; //bias 3255 mysignificand = *significandParts(); 3256 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3257 myexponent = 0; // denormal 3258 } else if (category==fcZero) { 3259 myexponent = 0; 3260 mysignificand = 0; 3261 } else if (category==fcInfinity) { 3262 myexponent = 0x7ff; 3263 mysignificand = 0; 3264 } else { 3265 assert(category == fcNaN && "Unknown category!"); 3266 myexponent = 0x7ff; 3267 mysignificand = *significandParts(); 3268 } 3269 3270 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3271 ((myexponent & 0x7ff) << 52) | 3272 (mysignificand & 0xfffffffffffffLL)))); 3273 } 3274 3275 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3276 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3277 assert(partCount()==1); 3278 3279 uint32_t myexponent, mysignificand; 3280 3281 if (isFiniteNonZero()) { 3282 myexponent = exponent+127; //bias 3283 mysignificand = (uint32_t)*significandParts(); 3284 if (myexponent == 1 && !(mysignificand & 0x800000)) 3285 myexponent = 0; // denormal 3286 } else if (category==fcZero) { 3287 myexponent = 0; 3288 mysignificand = 0; 3289 } else if (category==fcInfinity) { 3290 myexponent = 0xff; 3291 mysignificand = 0; 3292 } else { 3293 assert(category == fcNaN && "Unknown category!"); 3294 myexponent = 0xff; 3295 mysignificand = (uint32_t)*significandParts(); 3296 } 3297 3298 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3299 (mysignificand & 0x7fffff))); 3300 } 3301 3302 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3303 assert(semantics == (const llvm::fltSemantics *)&semBFloat); 3304 assert(partCount() == 1); 3305 3306 uint32_t myexponent, mysignificand; 3307 3308 if (isFiniteNonZero()) { 3309 myexponent = exponent + 127; // bias 3310 mysignificand = (uint32_t)*significandParts(); 3311 if (myexponent == 1 && !(mysignificand & 0x80)) 3312 myexponent = 0; // denormal 3313 } else if (category == fcZero) { 3314 myexponent = 0; 3315 mysignificand = 0; 3316 } else if (category == fcInfinity) { 3317 myexponent = 0xff; 3318 mysignificand = 0; 3319 } else { 3320 assert(category == fcNaN && "Unknown category!"); 3321 myexponent = 0xff; 3322 mysignificand = (uint32_t)*significandParts(); 3323 } 3324 3325 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | 3326 (mysignificand & 0x7f))); 3327 } 3328 3329 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3330 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3331 assert(partCount()==1); 3332 3333 uint32_t myexponent, mysignificand; 3334 3335 if (isFiniteNonZero()) { 3336 myexponent = exponent+15; //bias 3337 mysignificand = (uint32_t)*significandParts(); 3338 if (myexponent == 1 && !(mysignificand & 0x400)) 3339 myexponent = 0; // denormal 3340 } else if (category==fcZero) { 3341 myexponent = 0; 3342 mysignificand = 0; 3343 } else if (category==fcInfinity) { 3344 myexponent = 0x1f; 3345 mysignificand = 0; 3346 } else { 3347 assert(category == fcNaN && "Unknown category!"); 3348 myexponent = 0x1f; 3349 mysignificand = (uint32_t)*significandParts(); 3350 } 3351 3352 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3353 (mysignificand & 0x3ff))); 3354 } 3355 3356 // This function creates an APInt that is just a bit map of the floating 3357 // point constant as it would appear in memory. It is not a conversion, 3358 // and treating the result as a normal integer is unlikely to be useful. 3359 3360 APInt IEEEFloat::bitcastToAPInt() const { 3361 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3362 return convertHalfAPFloatToAPInt(); 3363 3364 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3365 return convertBFloatAPFloatToAPInt(); 3366 3367 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3368 return convertFloatAPFloatToAPInt(); 3369 3370 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3371 return convertDoubleAPFloatToAPInt(); 3372 3373 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3374 return convertQuadrupleAPFloatToAPInt(); 3375 3376 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3377 return convertPPCDoubleDoubleAPFloatToAPInt(); 3378 3379 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3380 "unknown format!"); 3381 return convertF80LongDoubleAPFloatToAPInt(); 3382 } 3383 3384 float IEEEFloat::convertToFloat() const { 3385 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3386 "Float semantics are not IEEEsingle"); 3387 APInt api = bitcastToAPInt(); 3388 return api.bitsToFloat(); 3389 } 3390 3391 double IEEEFloat::convertToDouble() const { 3392 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3393 "Float semantics are not IEEEdouble"); 3394 APInt api = bitcastToAPInt(); 3395 return api.bitsToDouble(); 3396 } 3397 3398 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3399 /// does not support these bit patterns: 3400 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3401 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3402 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3403 /// exponent = 0, integer bit 1 ("pseudodenormal") 3404 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3405 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3406 uint64_t i1 = api.getRawData()[0]; 3407 uint64_t i2 = api.getRawData()[1]; 3408 uint64_t myexponent = (i2 & 0x7fff); 3409 uint64_t mysignificand = i1; 3410 uint8_t myintegerbit = mysignificand >> 63; 3411 3412 initialize(&semX87DoubleExtended); 3413 assert(partCount()==2); 3414 3415 sign = static_cast<unsigned int>(i2>>15); 3416 if (myexponent == 0 && mysignificand == 0) { 3417 makeZero(sign); 3418 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3419 makeInf(sign); 3420 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3421 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3422 category = fcNaN; 3423 exponent = exponentNaN(); 3424 significandParts()[0] = mysignificand; 3425 significandParts()[1] = 0; 3426 } else { 3427 category = fcNormal; 3428 exponent = myexponent - 16383; 3429 significandParts()[0] = mysignificand; 3430 significandParts()[1] = 0; 3431 if (myexponent==0) // denormal 3432 exponent = -16382; 3433 } 3434 } 3435 3436 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3437 uint64_t i1 = api.getRawData()[0]; 3438 uint64_t i2 = api.getRawData()[1]; 3439 opStatus fs; 3440 bool losesInfo; 3441 3442 // Get the first double and convert to our format. 3443 initFromDoubleAPInt(APInt(64, i1)); 3444 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3445 assert(fs == opOK && !losesInfo); 3446 (void)fs; 3447 3448 // Unless we have a special case, add in second double. 3449 if (isFiniteNonZero()) { 3450 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3451 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3452 assert(fs == opOK && !losesInfo); 3453 (void)fs; 3454 3455 add(v, rmNearestTiesToEven); 3456 } 3457 } 3458 3459 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3460 uint64_t i1 = api.getRawData()[0]; 3461 uint64_t i2 = api.getRawData()[1]; 3462 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3463 uint64_t mysignificand = i1; 3464 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3465 3466 initialize(&semIEEEquad); 3467 assert(partCount()==2); 3468 3469 sign = static_cast<unsigned int>(i2>>63); 3470 if (myexponent==0 && 3471 (mysignificand==0 && mysignificand2==0)) { 3472 makeZero(sign); 3473 } else if (myexponent==0x7fff && 3474 (mysignificand==0 && mysignificand2==0)) { 3475 makeInf(sign); 3476 } else if (myexponent==0x7fff && 3477 (mysignificand!=0 || mysignificand2 !=0)) { 3478 category = fcNaN; 3479 exponent = exponentNaN(); 3480 significandParts()[0] = mysignificand; 3481 significandParts()[1] = mysignificand2; 3482 } else { 3483 category = fcNormal; 3484 exponent = myexponent - 16383; 3485 significandParts()[0] = mysignificand; 3486 significandParts()[1] = mysignificand2; 3487 if (myexponent==0) // denormal 3488 exponent = -16382; 3489 else 3490 significandParts()[1] |= 0x1000000000000LL; // integer bit 3491 } 3492 } 3493 3494 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3495 uint64_t i = *api.getRawData(); 3496 uint64_t myexponent = (i >> 52) & 0x7ff; 3497 uint64_t mysignificand = i & 0xfffffffffffffLL; 3498 3499 initialize(&semIEEEdouble); 3500 assert(partCount()==1); 3501 3502 sign = static_cast<unsigned int>(i>>63); 3503 if (myexponent==0 && mysignificand==0) { 3504 makeZero(sign); 3505 } else if (myexponent==0x7ff && mysignificand==0) { 3506 makeInf(sign); 3507 } else if (myexponent==0x7ff && mysignificand!=0) { 3508 category = fcNaN; 3509 exponent = exponentNaN(); 3510 *significandParts() = mysignificand; 3511 } else { 3512 category = fcNormal; 3513 exponent = myexponent - 1023; 3514 *significandParts() = mysignificand; 3515 if (myexponent==0) // denormal 3516 exponent = -1022; 3517 else 3518 *significandParts() |= 0x10000000000000LL; // integer bit 3519 } 3520 } 3521 3522 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3523 uint32_t i = (uint32_t)*api.getRawData(); 3524 uint32_t myexponent = (i >> 23) & 0xff; 3525 uint32_t mysignificand = i & 0x7fffff; 3526 3527 initialize(&semIEEEsingle); 3528 assert(partCount()==1); 3529 3530 sign = i >> 31; 3531 if (myexponent==0 && mysignificand==0) { 3532 makeZero(sign); 3533 } else if (myexponent==0xff && mysignificand==0) { 3534 makeInf(sign); 3535 } else if (myexponent==0xff && mysignificand!=0) { 3536 category = fcNaN; 3537 exponent = exponentNaN(); 3538 *significandParts() = mysignificand; 3539 } else { 3540 category = fcNormal; 3541 exponent = myexponent - 127; //bias 3542 *significandParts() = mysignificand; 3543 if (myexponent==0) // denormal 3544 exponent = -126; 3545 else 3546 *significandParts() |= 0x800000; // integer bit 3547 } 3548 } 3549 3550 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3551 uint32_t i = (uint32_t)*api.getRawData(); 3552 uint32_t myexponent = (i >> 7) & 0xff; 3553 uint32_t mysignificand = i & 0x7f; 3554 3555 initialize(&semBFloat); 3556 assert(partCount() == 1); 3557 3558 sign = i >> 15; 3559 if (myexponent == 0 && mysignificand == 0) { 3560 makeZero(sign); 3561 } else if (myexponent == 0xff && mysignificand == 0) { 3562 makeInf(sign); 3563 } else if (myexponent == 0xff && mysignificand != 0) { 3564 category = fcNaN; 3565 exponent = exponentNaN(); 3566 *significandParts() = mysignificand; 3567 } else { 3568 category = fcNormal; 3569 exponent = myexponent - 127; // bias 3570 *significandParts() = mysignificand; 3571 if (myexponent == 0) // denormal 3572 exponent = -126; 3573 else 3574 *significandParts() |= 0x80; // integer bit 3575 } 3576 } 3577 3578 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3579 uint32_t i = (uint32_t)*api.getRawData(); 3580 uint32_t myexponent = (i >> 10) & 0x1f; 3581 uint32_t mysignificand = i & 0x3ff; 3582 3583 initialize(&semIEEEhalf); 3584 assert(partCount()==1); 3585 3586 sign = i >> 15; 3587 if (myexponent==0 && mysignificand==0) { 3588 makeZero(sign); 3589 } else if (myexponent==0x1f && mysignificand==0) { 3590 makeInf(sign); 3591 } else if (myexponent==0x1f && mysignificand!=0) { 3592 category = fcNaN; 3593 exponent = exponentNaN(); 3594 *significandParts() = mysignificand; 3595 } else { 3596 category = fcNormal; 3597 exponent = myexponent - 15; //bias 3598 *significandParts() = mysignificand; 3599 if (myexponent==0) // denormal 3600 exponent = -14; 3601 else 3602 *significandParts() |= 0x400; // integer bit 3603 } 3604 } 3605 3606 /// Treat api as containing the bits of a floating point number. Currently 3607 /// we infer the floating point type from the size of the APInt. The 3608 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3609 /// when the size is anything else). 3610 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3611 assert(api.getBitWidth() == Sem->sizeInBits); 3612 if (Sem == &semIEEEhalf) 3613 return initFromHalfAPInt(api); 3614 if (Sem == &semBFloat) 3615 return initFromBFloatAPInt(api); 3616 if (Sem == &semIEEEsingle) 3617 return initFromFloatAPInt(api); 3618 if (Sem == &semIEEEdouble) 3619 return initFromDoubleAPInt(api); 3620 if (Sem == &semX87DoubleExtended) 3621 return initFromF80LongDoubleAPInt(api); 3622 if (Sem == &semIEEEquad) 3623 return initFromQuadrupleAPInt(api); 3624 if (Sem == &semPPCDoubleDoubleLegacy) 3625 return initFromPPCDoubleDoubleAPInt(api); 3626 3627 llvm_unreachable(nullptr); 3628 } 3629 3630 /// Make this number the largest magnitude normal number in the given 3631 /// semantics. 3632 void IEEEFloat::makeLargest(bool Negative) { 3633 // We want (in interchange format): 3634 // sign = {Negative} 3635 // exponent = 1..10 3636 // significand = 1..1 3637 category = fcNormal; 3638 sign = Negative; 3639 exponent = semantics->maxExponent; 3640 3641 // Use memset to set all but the highest integerPart to all ones. 3642 integerPart *significand = significandParts(); 3643 unsigned PartCount = partCount(); 3644 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3645 3646 // Set the high integerPart especially setting all unused top bits for 3647 // internal consistency. 3648 const unsigned NumUnusedHighBits = 3649 PartCount*integerPartWidth - semantics->precision; 3650 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3651 ? (~integerPart(0) >> NumUnusedHighBits) 3652 : 0; 3653 } 3654 3655 /// Make this number the smallest magnitude denormal number in the given 3656 /// semantics. 3657 void IEEEFloat::makeSmallest(bool Negative) { 3658 // We want (in interchange format): 3659 // sign = {Negative} 3660 // exponent = 0..0 3661 // significand = 0..01 3662 category = fcNormal; 3663 sign = Negative; 3664 exponent = semantics->minExponent; 3665 APInt::tcSet(significandParts(), 1, partCount()); 3666 } 3667 3668 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3669 // We want (in interchange format): 3670 // sign = {Negative} 3671 // exponent = 0..0 3672 // significand = 10..0 3673 3674 category = fcNormal; 3675 zeroSignificand(); 3676 sign = Negative; 3677 exponent = semantics->minExponent; 3678 significandParts()[partCountForBits(semantics->precision) - 1] |= 3679 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3680 } 3681 3682 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3683 initFromAPInt(&Sem, API); 3684 } 3685 3686 IEEEFloat::IEEEFloat(float f) { 3687 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3688 } 3689 3690 IEEEFloat::IEEEFloat(double d) { 3691 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3692 } 3693 3694 namespace { 3695 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3696 Buffer.append(Str.begin(), Str.end()); 3697 } 3698 3699 /// Removes data from the given significand until it is no more 3700 /// precise than is required for the desired precision. 3701 void AdjustToPrecision(APInt &significand, 3702 int &exp, unsigned FormatPrecision) { 3703 unsigned bits = significand.getActiveBits(); 3704 3705 // 196/59 is a very slight overestimate of lg_2(10). 3706 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3707 3708 if (bits <= bitsRequired) return; 3709 3710 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3711 if (!tensRemovable) return; 3712 3713 exp += tensRemovable; 3714 3715 APInt divisor(significand.getBitWidth(), 1); 3716 APInt powten(significand.getBitWidth(), 10); 3717 while (true) { 3718 if (tensRemovable & 1) 3719 divisor *= powten; 3720 tensRemovable >>= 1; 3721 if (!tensRemovable) break; 3722 powten *= powten; 3723 } 3724 3725 significand = significand.udiv(divisor); 3726 3727 // Truncate the significand down to its active bit count. 3728 significand = significand.trunc(significand.getActiveBits()); 3729 } 3730 3731 3732 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3733 int &exp, unsigned FormatPrecision) { 3734 unsigned N = buffer.size(); 3735 if (N <= FormatPrecision) return; 3736 3737 // The most significant figures are the last ones in the buffer. 3738 unsigned FirstSignificant = N - FormatPrecision; 3739 3740 // Round. 3741 // FIXME: this probably shouldn't use 'round half up'. 3742 3743 // Rounding down is just a truncation, except we also want to drop 3744 // trailing zeros from the new result. 3745 if (buffer[FirstSignificant - 1] < '5') { 3746 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3747 FirstSignificant++; 3748 3749 exp += FirstSignificant; 3750 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3751 return; 3752 } 3753 3754 // Rounding up requires a decimal add-with-carry. If we continue 3755 // the carry, the newly-introduced zeros will just be truncated. 3756 for (unsigned I = FirstSignificant; I != N; ++I) { 3757 if (buffer[I] == '9') { 3758 FirstSignificant++; 3759 } else { 3760 buffer[I]++; 3761 break; 3762 } 3763 } 3764 3765 // If we carried through, we have exactly one digit of precision. 3766 if (FirstSignificant == N) { 3767 exp += FirstSignificant; 3768 buffer.clear(); 3769 buffer.push_back('1'); 3770 return; 3771 } 3772 3773 exp += FirstSignificant; 3774 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3775 } 3776 } // namespace 3777 3778 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3779 unsigned FormatMaxPadding, bool TruncateZero) const { 3780 switch (category) { 3781 case fcInfinity: 3782 if (isNegative()) 3783 return append(Str, "-Inf"); 3784 else 3785 return append(Str, "+Inf"); 3786 3787 case fcNaN: return append(Str, "NaN"); 3788 3789 case fcZero: 3790 if (isNegative()) 3791 Str.push_back('-'); 3792 3793 if (!FormatMaxPadding) { 3794 if (TruncateZero) 3795 append(Str, "0.0E+0"); 3796 else { 3797 append(Str, "0.0"); 3798 if (FormatPrecision > 1) 3799 Str.append(FormatPrecision - 1, '0'); 3800 append(Str, "e+00"); 3801 } 3802 } else 3803 Str.push_back('0'); 3804 return; 3805 3806 case fcNormal: 3807 break; 3808 } 3809 3810 if (isNegative()) 3811 Str.push_back('-'); 3812 3813 // Decompose the number into an APInt and an exponent. 3814 int exp = exponent - ((int) semantics->precision - 1); 3815 APInt significand(semantics->precision, 3816 makeArrayRef(significandParts(), 3817 partCountForBits(semantics->precision))); 3818 3819 // Set FormatPrecision if zero. We want to do this before we 3820 // truncate trailing zeros, as those are part of the precision. 3821 if (!FormatPrecision) { 3822 // We use enough digits so the number can be round-tripped back to an 3823 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3824 // Accurately" by Steele and White. 3825 // FIXME: Using a formula based purely on the precision is conservative; 3826 // we can print fewer digits depending on the actual value being printed. 3827 3828 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3829 FormatPrecision = 2 + semantics->precision * 59 / 196; 3830 } 3831 3832 // Ignore trailing binary zeros. 3833 int trailingZeros = significand.countTrailingZeros(); 3834 exp += trailingZeros; 3835 significand.lshrInPlace(trailingZeros); 3836 3837 // Change the exponent from 2^e to 10^e. 3838 if (exp == 0) { 3839 // Nothing to do. 3840 } else if (exp > 0) { 3841 // Just shift left. 3842 significand = significand.zext(semantics->precision + exp); 3843 significand <<= exp; 3844 exp = 0; 3845 } else { /* exp < 0 */ 3846 int texp = -exp; 3847 3848 // We transform this using the identity: 3849 // (N)(2^-e) == (N)(5^e)(10^-e) 3850 // This means we have to multiply N (the significand) by 5^e. 3851 // To avoid overflow, we have to operate on numbers large 3852 // enough to store N * 5^e: 3853 // log2(N * 5^e) == log2(N) + e * log2(5) 3854 // <= semantics->precision + e * 137 / 59 3855 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3856 3857 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3858 3859 // Multiply significand by 5^e. 3860 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3861 significand = significand.zext(precision); 3862 APInt five_to_the_i(precision, 5); 3863 while (true) { 3864 if (texp & 1) significand *= five_to_the_i; 3865 3866 texp >>= 1; 3867 if (!texp) break; 3868 five_to_the_i *= five_to_the_i; 3869 } 3870 } 3871 3872 AdjustToPrecision(significand, exp, FormatPrecision); 3873 3874 SmallVector<char, 256> buffer; 3875 3876 // Fill the buffer. 3877 unsigned precision = significand.getBitWidth(); 3878 APInt ten(precision, 10); 3879 APInt digit(precision, 0); 3880 3881 bool inTrail = true; 3882 while (significand != 0) { 3883 // digit <- significand % 10 3884 // significand <- significand / 10 3885 APInt::udivrem(significand, ten, significand, digit); 3886 3887 unsigned d = digit.getZExtValue(); 3888 3889 // Drop trailing zeros. 3890 if (inTrail && !d) exp++; 3891 else { 3892 buffer.push_back((char) ('0' + d)); 3893 inTrail = false; 3894 } 3895 } 3896 3897 assert(!buffer.empty() && "no characters in buffer!"); 3898 3899 // Drop down to FormatPrecision. 3900 // TODO: don't do more precise calculations above than are required. 3901 AdjustToPrecision(buffer, exp, FormatPrecision); 3902 3903 unsigned NDigits = buffer.size(); 3904 3905 // Check whether we should use scientific notation. 3906 bool FormatScientific; 3907 if (!FormatMaxPadding) 3908 FormatScientific = true; 3909 else { 3910 if (exp >= 0) { 3911 // 765e3 --> 765000 3912 // ^^^ 3913 // But we shouldn't make the number look more precise than it is. 3914 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3915 NDigits + (unsigned) exp > FormatPrecision); 3916 } else { 3917 // Power of the most significant digit. 3918 int MSD = exp + (int) (NDigits - 1); 3919 if (MSD >= 0) { 3920 // 765e-2 == 7.65 3921 FormatScientific = false; 3922 } else { 3923 // 765e-5 == 0.00765 3924 // ^ ^^ 3925 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3926 } 3927 } 3928 } 3929 3930 // Scientific formatting is pretty straightforward. 3931 if (FormatScientific) { 3932 exp += (NDigits - 1); 3933 3934 Str.push_back(buffer[NDigits-1]); 3935 Str.push_back('.'); 3936 if (NDigits == 1 && TruncateZero) 3937 Str.push_back('0'); 3938 else 3939 for (unsigned I = 1; I != NDigits; ++I) 3940 Str.push_back(buffer[NDigits-1-I]); 3941 // Fill with zeros up to FormatPrecision. 3942 if (!TruncateZero && FormatPrecision > NDigits - 1) 3943 Str.append(FormatPrecision - NDigits + 1, '0'); 3944 // For !TruncateZero we use lower 'e'. 3945 Str.push_back(TruncateZero ? 'E' : 'e'); 3946 3947 Str.push_back(exp >= 0 ? '+' : '-'); 3948 if (exp < 0) exp = -exp; 3949 SmallVector<char, 6> expbuf; 3950 do { 3951 expbuf.push_back((char) ('0' + (exp % 10))); 3952 exp /= 10; 3953 } while (exp); 3954 // Exponent always at least two digits if we do not truncate zeros. 3955 if (!TruncateZero && expbuf.size() < 2) 3956 expbuf.push_back('0'); 3957 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3958 Str.push_back(expbuf[E-1-I]); 3959 return; 3960 } 3961 3962 // Non-scientific, positive exponents. 3963 if (exp >= 0) { 3964 for (unsigned I = 0; I != NDigits; ++I) 3965 Str.push_back(buffer[NDigits-1-I]); 3966 for (unsigned I = 0; I != (unsigned) exp; ++I) 3967 Str.push_back('0'); 3968 return; 3969 } 3970 3971 // Non-scientific, negative exponents. 3972 3973 // The number of digits to the left of the decimal point. 3974 int NWholeDigits = exp + (int) NDigits; 3975 3976 unsigned I = 0; 3977 if (NWholeDigits > 0) { 3978 for (; I != (unsigned) NWholeDigits; ++I) 3979 Str.push_back(buffer[NDigits-I-1]); 3980 Str.push_back('.'); 3981 } else { 3982 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3983 3984 Str.push_back('0'); 3985 Str.push_back('.'); 3986 for (unsigned Z = 1; Z != NZeros; ++Z) 3987 Str.push_back('0'); 3988 } 3989 3990 for (; I != NDigits; ++I) 3991 Str.push_back(buffer[NDigits-I-1]); 3992 } 3993 3994 bool IEEEFloat::getExactInverse(APFloat *inv) const { 3995 // Special floats and denormals have no exact inverse. 3996 if (!isFiniteNonZero()) 3997 return false; 3998 3999 // Check that the number is a power of two by making sure that only the 4000 // integer bit is set in the significand. 4001 if (significandLSB() != semantics->precision - 1) 4002 return false; 4003 4004 // Get the inverse. 4005 IEEEFloat reciprocal(*semantics, 1ULL); 4006 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4007 return false; 4008 4009 // Avoid multiplication with a denormal, it is not safe on all platforms and 4010 // may be slower than a normal division. 4011 if (reciprocal.isDenormal()) 4012 return false; 4013 4014 assert(reciprocal.isFiniteNonZero() && 4015 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4016 4017 if (inv) 4018 *inv = APFloat(reciprocal, *semantics); 4019 4020 return true; 4021 } 4022 4023 bool IEEEFloat::isSignaling() const { 4024 if (!isNaN()) 4025 return false; 4026 4027 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4028 // first bit of the trailing significand being 0. 4029 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4030 } 4031 4032 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4033 /// 4034 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4035 /// appropriate sign switching before/after the computation. 4036 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4037 // If we are performing nextDown, swap sign so we have -x. 4038 if (nextDown) 4039 changeSign(); 4040 4041 // Compute nextUp(x) 4042 opStatus result = opOK; 4043 4044 // Handle each float category separately. 4045 switch (category) { 4046 case fcInfinity: 4047 // nextUp(+inf) = +inf 4048 if (!isNegative()) 4049 break; 4050 // nextUp(-inf) = -getLargest() 4051 makeLargest(true); 4052 break; 4053 case fcNaN: 4054 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4055 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4056 // change the payload. 4057 if (isSignaling()) { 4058 result = opInvalidOp; 4059 // For consistency, propagate the sign of the sNaN to the qNaN. 4060 makeNaN(false, isNegative(), nullptr); 4061 } 4062 break; 4063 case fcZero: 4064 // nextUp(pm 0) = +getSmallest() 4065 makeSmallest(false); 4066 break; 4067 case fcNormal: 4068 // nextUp(-getSmallest()) = -0 4069 if (isSmallest() && isNegative()) { 4070 APInt::tcSet(significandParts(), 0, partCount()); 4071 category = fcZero; 4072 exponent = 0; 4073 break; 4074 } 4075 4076 // nextUp(getLargest()) == INFINITY 4077 if (isLargest() && !isNegative()) { 4078 APInt::tcSet(significandParts(), 0, partCount()); 4079 category = fcInfinity; 4080 exponent = semantics->maxExponent + 1; 4081 break; 4082 } 4083 4084 // nextUp(normal) == normal + inc. 4085 if (isNegative()) { 4086 // If we are negative, we need to decrement the significand. 4087 4088 // We only cross a binade boundary that requires adjusting the exponent 4089 // if: 4090 // 1. exponent != semantics->minExponent. This implies we are not in the 4091 // smallest binade or are dealing with denormals. 4092 // 2. Our significand excluding the integral bit is all zeros. 4093 bool WillCrossBinadeBoundary = 4094 exponent != semantics->minExponent && isSignificandAllZeros(); 4095 4096 // Decrement the significand. 4097 // 4098 // We always do this since: 4099 // 1. If we are dealing with a non-binade decrement, by definition we 4100 // just decrement the significand. 4101 // 2. If we are dealing with a normal -> normal binade decrement, since 4102 // we have an explicit integral bit the fact that all bits but the 4103 // integral bit are zero implies that subtracting one will yield a 4104 // significand with 0 integral bit and 1 in all other spots. Thus we 4105 // must just adjust the exponent and set the integral bit to 1. 4106 // 3. If we are dealing with a normal -> denormal binade decrement, 4107 // since we set the integral bit to 0 when we represent denormals, we 4108 // just decrement the significand. 4109 integerPart *Parts = significandParts(); 4110 APInt::tcDecrement(Parts, partCount()); 4111 4112 if (WillCrossBinadeBoundary) { 4113 // Our result is a normal number. Do the following: 4114 // 1. Set the integral bit to 1. 4115 // 2. Decrement the exponent. 4116 APInt::tcSetBit(Parts, semantics->precision - 1); 4117 exponent--; 4118 } 4119 } else { 4120 // If we are positive, we need to increment the significand. 4121 4122 // We only cross a binade boundary that requires adjusting the exponent if 4123 // the input is not a denormal and all of said input's significand bits 4124 // are set. If all of said conditions are true: clear the significand, set 4125 // the integral bit to 1, and increment the exponent. If we have a 4126 // denormal always increment since moving denormals and the numbers in the 4127 // smallest normal binade have the same exponent in our representation. 4128 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4129 4130 if (WillCrossBinadeBoundary) { 4131 integerPart *Parts = significandParts(); 4132 APInt::tcSet(Parts, 0, partCount()); 4133 APInt::tcSetBit(Parts, semantics->precision - 1); 4134 assert(exponent != semantics->maxExponent && 4135 "We can not increment an exponent beyond the maxExponent allowed" 4136 " by the given floating point semantics."); 4137 exponent++; 4138 } else { 4139 incrementSignificand(); 4140 } 4141 } 4142 break; 4143 } 4144 4145 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4146 if (nextDown) 4147 changeSign(); 4148 4149 return result; 4150 } 4151 4152 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4153 return semantics->maxExponent + 1; 4154 } 4155 4156 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4157 return semantics->maxExponent + 1; 4158 } 4159 4160 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4161 return semantics->minExponent - 1; 4162 } 4163 4164 void IEEEFloat::makeInf(bool Negative) { 4165 category = fcInfinity; 4166 sign = Negative; 4167 exponent = exponentInf(); 4168 APInt::tcSet(significandParts(), 0, partCount()); 4169 } 4170 4171 void IEEEFloat::makeZero(bool Negative) { 4172 category = fcZero; 4173 sign = Negative; 4174 exponent = exponentZero(); 4175 APInt::tcSet(significandParts(), 0, partCount()); 4176 } 4177 4178 void IEEEFloat::makeQuiet() { 4179 assert(isNaN()); 4180 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4181 } 4182 4183 int ilogb(const IEEEFloat &Arg) { 4184 if (Arg.isNaN()) 4185 return IEEEFloat::IEK_NaN; 4186 if (Arg.isZero()) 4187 return IEEEFloat::IEK_Zero; 4188 if (Arg.isInfinity()) 4189 return IEEEFloat::IEK_Inf; 4190 if (!Arg.isDenormal()) 4191 return Arg.exponent; 4192 4193 IEEEFloat Normalized(Arg); 4194 int SignificandBits = Arg.getSemantics().precision - 1; 4195 4196 Normalized.exponent += SignificandBits; 4197 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4198 return Normalized.exponent - SignificandBits; 4199 } 4200 4201 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4202 auto MaxExp = X.getSemantics().maxExponent; 4203 auto MinExp = X.getSemantics().minExponent; 4204 4205 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4206 // overflow; clamp it to a safe range before adding, but ensure that the range 4207 // is large enough that the clamp does not change the result. The range we 4208 // need to support is the difference between the largest possible exponent and 4209 // the normalized exponent of half the smallest denormal. 4210 4211 int SignificandBits = X.getSemantics().precision - 1; 4212 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4213 4214 // Clamp to one past the range ends to let normalize handle overlflow. 4215 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4216 X.normalize(RoundingMode, lfExactlyZero); 4217 if (X.isNaN()) 4218 X.makeQuiet(); 4219 return X; 4220 } 4221 4222 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4223 Exp = ilogb(Val); 4224 4225 // Quiet signalling nans. 4226 if (Exp == IEEEFloat::IEK_NaN) { 4227 IEEEFloat Quiet(Val); 4228 Quiet.makeQuiet(); 4229 return Quiet; 4230 } 4231 4232 if (Exp == IEEEFloat::IEK_Inf) 4233 return Val; 4234 4235 // 1 is added because frexp is defined to return a normalized fraction in 4236 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4237 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4238 return scalbn(Val, -Exp, RM); 4239 } 4240 4241 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4242 : Semantics(&S), 4243 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4244 assert(Semantics == &semPPCDoubleDouble); 4245 } 4246 4247 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4248 : Semantics(&S), 4249 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4250 APFloat(semIEEEdouble, uninitialized)}) { 4251 assert(Semantics == &semPPCDoubleDouble); 4252 } 4253 4254 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4255 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4256 APFloat(semIEEEdouble)}) { 4257 assert(Semantics == &semPPCDoubleDouble); 4258 } 4259 4260 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4261 : Semantics(&S), 4262 Floats(new APFloat[2]{ 4263 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4264 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4265 assert(Semantics == &semPPCDoubleDouble); 4266 } 4267 4268 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4269 APFloat &&Second) 4270 : Semantics(&S), 4271 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4272 assert(Semantics == &semPPCDoubleDouble); 4273 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4274 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4275 } 4276 4277 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4278 : Semantics(RHS.Semantics), 4279 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4280 APFloat(RHS.Floats[1])} 4281 : nullptr) { 4282 assert(Semantics == &semPPCDoubleDouble); 4283 } 4284 4285 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4286 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4287 RHS.Semantics = &semBogus; 4288 assert(Semantics == &semPPCDoubleDouble); 4289 } 4290 4291 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4292 if (Semantics == RHS.Semantics && RHS.Floats) { 4293 Floats[0] = RHS.Floats[0]; 4294 Floats[1] = RHS.Floats[1]; 4295 } else if (this != &RHS) { 4296 this->~DoubleAPFloat(); 4297 new (this) DoubleAPFloat(RHS); 4298 } 4299 return *this; 4300 } 4301 4302 // Implement addition, subtraction, multiplication and division based on: 4303 // "Software for Doubled-Precision Floating-Point Computations", 4304 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4305 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4306 const APFloat &c, const APFloat &cc, 4307 roundingMode RM) { 4308 int Status = opOK; 4309 APFloat z = a; 4310 Status |= z.add(c, RM); 4311 if (!z.isFinite()) { 4312 if (!z.isInfinity()) { 4313 Floats[0] = std::move(z); 4314 Floats[1].makeZero(/* Neg = */ false); 4315 return (opStatus)Status; 4316 } 4317 Status = opOK; 4318 auto AComparedToC = a.compareAbsoluteValue(c); 4319 z = cc; 4320 Status |= z.add(aa, RM); 4321 if (AComparedToC == APFloat::cmpGreaterThan) { 4322 // z = cc + aa + c + a; 4323 Status |= z.add(c, RM); 4324 Status |= z.add(a, RM); 4325 } else { 4326 // z = cc + aa + a + c; 4327 Status |= z.add(a, RM); 4328 Status |= z.add(c, RM); 4329 } 4330 if (!z.isFinite()) { 4331 Floats[0] = std::move(z); 4332 Floats[1].makeZero(/* Neg = */ false); 4333 return (opStatus)Status; 4334 } 4335 Floats[0] = z; 4336 APFloat zz = aa; 4337 Status |= zz.add(cc, RM); 4338 if (AComparedToC == APFloat::cmpGreaterThan) { 4339 // Floats[1] = a - z + c + zz; 4340 Floats[1] = a; 4341 Status |= Floats[1].subtract(z, RM); 4342 Status |= Floats[1].add(c, RM); 4343 Status |= Floats[1].add(zz, RM); 4344 } else { 4345 // Floats[1] = c - z + a + zz; 4346 Floats[1] = c; 4347 Status |= Floats[1].subtract(z, RM); 4348 Status |= Floats[1].add(a, RM); 4349 Status |= Floats[1].add(zz, RM); 4350 } 4351 } else { 4352 // q = a - z; 4353 APFloat q = a; 4354 Status |= q.subtract(z, RM); 4355 4356 // zz = q + c + (a - (q + z)) + aa + cc; 4357 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4358 auto zz = q; 4359 Status |= zz.add(c, RM); 4360 Status |= q.add(z, RM); 4361 Status |= q.subtract(a, RM); 4362 q.changeSign(); 4363 Status |= zz.add(q, RM); 4364 Status |= zz.add(aa, RM); 4365 Status |= zz.add(cc, RM); 4366 if (zz.isZero() && !zz.isNegative()) { 4367 Floats[0] = std::move(z); 4368 Floats[1].makeZero(/* Neg = */ false); 4369 return opOK; 4370 } 4371 Floats[0] = z; 4372 Status |= Floats[0].add(zz, RM); 4373 if (!Floats[0].isFinite()) { 4374 Floats[1].makeZero(/* Neg = */ false); 4375 return (opStatus)Status; 4376 } 4377 Floats[1] = std::move(z); 4378 Status |= Floats[1].subtract(Floats[0], RM); 4379 Status |= Floats[1].add(zz, RM); 4380 } 4381 return (opStatus)Status; 4382 } 4383 4384 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4385 const DoubleAPFloat &RHS, 4386 DoubleAPFloat &Out, 4387 roundingMode RM) { 4388 if (LHS.getCategory() == fcNaN) { 4389 Out = LHS; 4390 return opOK; 4391 } 4392 if (RHS.getCategory() == fcNaN) { 4393 Out = RHS; 4394 return opOK; 4395 } 4396 if (LHS.getCategory() == fcZero) { 4397 Out = RHS; 4398 return opOK; 4399 } 4400 if (RHS.getCategory() == fcZero) { 4401 Out = LHS; 4402 return opOK; 4403 } 4404 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4405 LHS.isNegative() != RHS.isNegative()) { 4406 Out.makeNaN(false, Out.isNegative(), nullptr); 4407 return opInvalidOp; 4408 } 4409 if (LHS.getCategory() == fcInfinity) { 4410 Out = LHS; 4411 return opOK; 4412 } 4413 if (RHS.getCategory() == fcInfinity) { 4414 Out = RHS; 4415 return opOK; 4416 } 4417 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4418 4419 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4420 CC(RHS.Floats[1]); 4421 assert(&A.getSemantics() == &semIEEEdouble); 4422 assert(&AA.getSemantics() == &semIEEEdouble); 4423 assert(&C.getSemantics() == &semIEEEdouble); 4424 assert(&CC.getSemantics() == &semIEEEdouble); 4425 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4426 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4427 return Out.addImpl(A, AA, C, CC, RM); 4428 } 4429 4430 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4431 roundingMode RM) { 4432 return addWithSpecial(*this, RHS, *this, RM); 4433 } 4434 4435 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4436 roundingMode RM) { 4437 changeSign(); 4438 auto Ret = add(RHS, RM); 4439 changeSign(); 4440 return Ret; 4441 } 4442 4443 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4444 APFloat::roundingMode RM) { 4445 const auto &LHS = *this; 4446 auto &Out = *this; 4447 /* Interesting observation: For special categories, finding the lowest 4448 common ancestor of the following layered graph gives the correct 4449 return category: 4450 4451 NaN 4452 / \ 4453 Zero Inf 4454 \ / 4455 Normal 4456 4457 e.g. NaN * NaN = NaN 4458 Zero * Inf = NaN 4459 Normal * Zero = Zero 4460 Normal * Inf = Inf 4461 */ 4462 if (LHS.getCategory() == fcNaN) { 4463 Out = LHS; 4464 return opOK; 4465 } 4466 if (RHS.getCategory() == fcNaN) { 4467 Out = RHS; 4468 return opOK; 4469 } 4470 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4471 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4472 Out.makeNaN(false, false, nullptr); 4473 return opOK; 4474 } 4475 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4476 Out = LHS; 4477 return opOK; 4478 } 4479 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4480 Out = RHS; 4481 return opOK; 4482 } 4483 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4484 "Special cases not handled exhaustively"); 4485 4486 int Status = opOK; 4487 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4488 // t = a * c 4489 APFloat T = A; 4490 Status |= T.multiply(C, RM); 4491 if (!T.isFiniteNonZero()) { 4492 Floats[0] = T; 4493 Floats[1].makeZero(/* Neg = */ false); 4494 return (opStatus)Status; 4495 } 4496 4497 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4498 APFloat Tau = A; 4499 T.changeSign(); 4500 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4501 T.changeSign(); 4502 { 4503 // v = a * d 4504 APFloat V = A; 4505 Status |= V.multiply(D, RM); 4506 // w = b * c 4507 APFloat W = B; 4508 Status |= W.multiply(C, RM); 4509 Status |= V.add(W, RM); 4510 // tau += v + w 4511 Status |= Tau.add(V, RM); 4512 } 4513 // u = t + tau 4514 APFloat U = T; 4515 Status |= U.add(Tau, RM); 4516 4517 Floats[0] = U; 4518 if (!U.isFinite()) { 4519 Floats[1].makeZero(/* Neg = */ false); 4520 } else { 4521 // Floats[1] = (t - u) + tau 4522 Status |= T.subtract(U, RM); 4523 Status |= T.add(Tau, RM); 4524 Floats[1] = T; 4525 } 4526 return (opStatus)Status; 4527 } 4528 4529 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4530 APFloat::roundingMode RM) { 4531 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4532 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4533 auto Ret = 4534 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4535 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4536 return Ret; 4537 } 4538 4539 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4540 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4541 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4542 auto Ret = 4543 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4544 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4545 return Ret; 4546 } 4547 4548 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4549 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4550 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4551 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4552 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4553 return Ret; 4554 } 4555 4556 APFloat::opStatus 4557 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4558 const DoubleAPFloat &Addend, 4559 APFloat::roundingMode RM) { 4560 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4561 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4562 auto Ret = Tmp.fusedMultiplyAdd( 4563 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4564 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4565 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4566 return Ret; 4567 } 4568 4569 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4570 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4571 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4572 auto Ret = Tmp.roundToIntegral(RM); 4573 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4574 return Ret; 4575 } 4576 4577 void DoubleAPFloat::changeSign() { 4578 Floats[0].changeSign(); 4579 Floats[1].changeSign(); 4580 } 4581 4582 APFloat::cmpResult 4583 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4584 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4585 if (Result != cmpEqual) 4586 return Result; 4587 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4588 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4589 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4590 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4591 if (Against && !RHSAgainst) 4592 return cmpLessThan; 4593 if (!Against && RHSAgainst) 4594 return cmpGreaterThan; 4595 if (!Against && !RHSAgainst) 4596 return Result; 4597 if (Against && RHSAgainst) 4598 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4599 } 4600 return Result; 4601 } 4602 4603 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4604 return Floats[0].getCategory(); 4605 } 4606 4607 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4608 4609 void DoubleAPFloat::makeInf(bool Neg) { 4610 Floats[0].makeInf(Neg); 4611 Floats[1].makeZero(/* Neg = */ false); 4612 } 4613 4614 void DoubleAPFloat::makeZero(bool Neg) { 4615 Floats[0].makeZero(Neg); 4616 Floats[1].makeZero(/* Neg = */ false); 4617 } 4618 4619 void DoubleAPFloat::makeLargest(bool Neg) { 4620 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4621 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4622 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4623 if (Neg) 4624 changeSign(); 4625 } 4626 4627 void DoubleAPFloat::makeSmallest(bool Neg) { 4628 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4629 Floats[0].makeSmallest(Neg); 4630 Floats[1].makeZero(/* Neg = */ false); 4631 } 4632 4633 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4634 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4635 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4636 if (Neg) 4637 Floats[0].changeSign(); 4638 Floats[1].makeZero(/* Neg = */ false); 4639 } 4640 4641 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4642 Floats[0].makeNaN(SNaN, Neg, fill); 4643 Floats[1].makeZero(/* Neg = */ false); 4644 } 4645 4646 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4647 auto Result = Floats[0].compare(RHS.Floats[0]); 4648 // |Float[0]| > |Float[1]| 4649 if (Result == APFloat::cmpEqual) 4650 return Floats[1].compare(RHS.Floats[1]); 4651 return Result; 4652 } 4653 4654 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4655 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4656 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4657 } 4658 4659 hash_code hash_value(const DoubleAPFloat &Arg) { 4660 if (Arg.Floats) 4661 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4662 return hash_combine(Arg.Semantics); 4663 } 4664 4665 APInt DoubleAPFloat::bitcastToAPInt() const { 4666 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4667 uint64_t Data[] = { 4668 Floats[0].bitcastToAPInt().getRawData()[0], 4669 Floats[1].bitcastToAPInt().getRawData()[0], 4670 }; 4671 return APInt(128, 2, Data); 4672 } 4673 4674 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4675 roundingMode RM) { 4676 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4677 APFloat Tmp(semPPCDoubleDoubleLegacy); 4678 auto Ret = Tmp.convertFromString(S, RM); 4679 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4680 return Ret; 4681 } 4682 4683 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4684 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4685 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4686 auto Ret = Tmp.next(nextDown); 4687 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4688 return Ret; 4689 } 4690 4691 APFloat::opStatus 4692 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4693 unsigned int Width, bool IsSigned, 4694 roundingMode RM, bool *IsExact) const { 4695 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4696 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4697 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4698 } 4699 4700 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4701 bool IsSigned, 4702 roundingMode RM) { 4703 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4704 APFloat Tmp(semPPCDoubleDoubleLegacy); 4705 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4706 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4707 return Ret; 4708 } 4709 4710 APFloat::opStatus 4711 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4712 unsigned int InputSize, 4713 bool IsSigned, roundingMode RM) { 4714 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4715 APFloat Tmp(semPPCDoubleDoubleLegacy); 4716 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4717 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4718 return Ret; 4719 } 4720 4721 APFloat::opStatus 4722 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4723 unsigned int InputSize, 4724 bool IsSigned, roundingMode RM) { 4725 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4726 APFloat Tmp(semPPCDoubleDoubleLegacy); 4727 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4728 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4729 return Ret; 4730 } 4731 4732 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4733 unsigned int HexDigits, 4734 bool UpperCase, 4735 roundingMode RM) const { 4736 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4737 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4738 .convertToHexString(DST, HexDigits, UpperCase, RM); 4739 } 4740 4741 bool DoubleAPFloat::isDenormal() const { 4742 return getCategory() == fcNormal && 4743 (Floats[0].isDenormal() || Floats[1].isDenormal() || 4744 // (double)(Hi + Lo) == Hi defines a normal number. 4745 Floats[0] != Floats[0] + Floats[1]); 4746 } 4747 4748 bool DoubleAPFloat::isSmallest() const { 4749 if (getCategory() != fcNormal) 4750 return false; 4751 DoubleAPFloat Tmp(*this); 4752 Tmp.makeSmallest(this->isNegative()); 4753 return Tmp.compare(*this) == cmpEqual; 4754 } 4755 4756 bool DoubleAPFloat::isLargest() const { 4757 if (getCategory() != fcNormal) 4758 return false; 4759 DoubleAPFloat Tmp(*this); 4760 Tmp.makeLargest(this->isNegative()); 4761 return Tmp.compare(*this) == cmpEqual; 4762 } 4763 4764 bool DoubleAPFloat::isInteger() const { 4765 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4766 return Floats[0].isInteger() && Floats[1].isInteger(); 4767 } 4768 4769 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 4770 unsigned FormatPrecision, 4771 unsigned FormatMaxPadding, 4772 bool TruncateZero) const { 4773 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4774 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4775 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 4776 } 4777 4778 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 4779 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4780 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4781 if (!inv) 4782 return Tmp.getExactInverse(nullptr); 4783 APFloat Inv(semPPCDoubleDoubleLegacy); 4784 auto Ret = Tmp.getExactInverse(&Inv); 4785 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 4786 return Ret; 4787 } 4788 4789 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 4790 APFloat::roundingMode RM) { 4791 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4792 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 4793 scalbn(Arg.Floats[1], Exp, RM)); 4794 } 4795 4796 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 4797 APFloat::roundingMode RM) { 4798 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4799 APFloat First = frexp(Arg.Floats[0], Exp, RM); 4800 APFloat Second = Arg.Floats[1]; 4801 if (Arg.getCategory() == APFloat::fcNormal) 4802 Second = scalbn(Second, -Exp, RM); 4803 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 4804 } 4805 4806 } // namespace detail 4807 4808 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4809 if (usesLayout<IEEEFloat>(Semantics)) { 4810 new (&IEEE) IEEEFloat(std::move(F)); 4811 return; 4812 } 4813 if (usesLayout<DoubleAPFloat>(Semantics)) { 4814 const fltSemantics& S = F.getSemantics(); 4815 new (&Double) 4816 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 4817 APFloat(semIEEEdouble)); 4818 return; 4819 } 4820 llvm_unreachable("Unexpected semantics"); 4821 } 4822 4823 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 4824 roundingMode RM) { 4825 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 4826 } 4827 4828 hash_code hash_value(const APFloat &Arg) { 4829 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 4830 return hash_value(Arg.U.IEEE); 4831 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 4832 return hash_value(Arg.U.Double); 4833 llvm_unreachable("Unexpected semantics"); 4834 } 4835 4836 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4837 : APFloat(Semantics) { 4838 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 4839 assert(StatusOrErr && "Invalid floating point representation"); 4840 consumeError(StatusOrErr.takeError()); 4841 } 4842 4843 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4844 roundingMode RM, bool *losesInfo) { 4845 if (&getSemantics() == &ToSemantics) { 4846 *losesInfo = false; 4847 return opOK; 4848 } 4849 if (usesLayout<IEEEFloat>(getSemantics()) && 4850 usesLayout<IEEEFloat>(ToSemantics)) 4851 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4852 if (usesLayout<IEEEFloat>(getSemantics()) && 4853 usesLayout<DoubleAPFloat>(ToSemantics)) { 4854 assert(&ToSemantics == &semPPCDoubleDouble); 4855 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 4856 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 4857 return Ret; 4858 } 4859 if (usesLayout<DoubleAPFloat>(getSemantics()) && 4860 usesLayout<IEEEFloat>(ToSemantics)) { 4861 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4862 *this = APFloat(std::move(getIEEE()), ToSemantics); 4863 return Ret; 4864 } 4865 llvm_unreachable("Unexpected semantics"); 4866 } 4867 4868 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 4869 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 4870 } 4871 4872 void APFloat::print(raw_ostream &OS) const { 4873 SmallVector<char, 16> Buffer; 4874 toString(Buffer); 4875 OS << Buffer << "\n"; 4876 } 4877 4878 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4879 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 4880 #endif 4881 4882 void APFloat::Profile(FoldingSetNodeID &NID) const { 4883 NID.Add(bitcastToAPInt()); 4884 } 4885 4886 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 4887 an APSInt, whose initial bit-width and signed-ness are used to determine the 4888 precision of the conversion. 4889 */ 4890 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 4891 roundingMode rounding_mode, 4892 bool *isExact) const { 4893 unsigned bitWidth = result.getBitWidth(); 4894 SmallVector<uint64_t, 4> parts(result.getNumWords()); 4895 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 4896 rounding_mode, isExact); 4897 // Keeps the original signed-ness. 4898 result = APInt(bitWidth, parts); 4899 return status; 4900 } 4901 4902 double APFloat::convertToDouble() const { 4903 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 4904 return getIEEE().convertToDouble(); 4905 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 4906 "Float semantics is not representable by IEEEdouble"); 4907 APFloat Temp = *this; 4908 bool LosesInfo; 4909 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 4910 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4911 (void)St; 4912 return Temp.getIEEE().convertToDouble(); 4913 } 4914 4915 float APFloat::convertToFloat() const { 4916 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 4917 return getIEEE().convertToFloat(); 4918 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 4919 "Float semantics is not representable by IEEEsingle"); 4920 APFloat Temp = *this; 4921 bool LosesInfo; 4922 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 4923 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4924 (void)St; 4925 return Temp.getIEEE().convertToFloat(); 4926 } 4927 4928 } // namespace llvm 4929 4930 #undef APFLOAT_DISPATCH_ON_SEMANTICS 4931