1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 /* Represents floating point arithmetic semantics. */ 54 struct fltSemantics { 55 /* The largest E such that 2^E is representable; this matches the 56 definition of IEEE 754. */ 57 APFloatBase::ExponentType maxExponent; 58 59 /* The smallest E such that 2^E is a normalized number; this 60 matches the definition of IEEE 754. */ 61 APFloatBase::ExponentType minExponent; 62 63 /* Number of bits in the significand. This includes the integer 64 bit. */ 65 unsigned int precision; 66 67 /* Number of bits actually used in the semantics. */ 68 unsigned int sizeInBits; 69 70 // Returns true if any number described by this semantics can be precisely 71 // represented by the specified semantics. 72 bool isRepresentableBy(const fltSemantics &S) const { 73 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 74 precision <= S.precision; 75 } 76 }; 77 78 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 79 static const fltSemantics semBFloat = {127, -126, 8, 16}; 80 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 81 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 82 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 83 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 84 static const fltSemantics semBogus = {0, 0, 0, 0}; 85 86 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 87 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 88 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 89 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 90 to each other, and two 11-bit exponents. 91 92 Note: we need to make the value different from semBogus as otherwise 93 an unsafe optimization may collapse both values to a single address, 94 and we heavily rely on them having distinct addresses. */ 95 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 96 97 /* These are legacy semantics for the fallback, inaccrurate implementation of 98 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 99 operation. It's equivalent to having an IEEE number with consecutive 106 100 bits of mantissa and 11 bits of exponent. 101 102 It's not equivalent to IBM double-double. For example, a legit IBM 103 double-double, 1 + epsilon: 104 105 1 + epsilon = 1 + (1 >> 1076) 106 107 is not representable by a consecutive 106 bits of mantissa. 108 109 Currently, these semantics are used in the following way: 110 111 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 112 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 113 semPPCDoubleDoubleLegacy -> IEEE operations 114 115 We use bitcastToAPInt() to get the bit representation (in APInt) of the 116 underlying IEEEdouble, then use the APInt constructor to construct the 117 legacy IEEE float. 118 119 TODO: Implement all operations in semPPCDoubleDouble, and delete these 120 semantics. */ 121 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 122 53 + 53, 128}; 123 124 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 125 switch (S) { 126 case S_IEEEhalf: 127 return IEEEhalf(); 128 case S_BFloat: 129 return BFloat(); 130 case S_IEEEsingle: 131 return IEEEsingle(); 132 case S_IEEEdouble: 133 return IEEEdouble(); 134 case S_x87DoubleExtended: 135 return x87DoubleExtended(); 136 case S_IEEEquad: 137 return IEEEquad(); 138 case S_PPCDoubleDouble: 139 return PPCDoubleDouble(); 140 } 141 llvm_unreachable("Unrecognised floating semantics"); 142 } 143 144 APFloatBase::Semantics 145 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 146 if (&Sem == &llvm::APFloat::IEEEhalf()) 147 return S_IEEEhalf; 148 else if (&Sem == &llvm::APFloat::BFloat()) 149 return S_BFloat; 150 else if (&Sem == &llvm::APFloat::IEEEsingle()) 151 return S_IEEEsingle; 152 else if (&Sem == &llvm::APFloat::IEEEdouble()) 153 return S_IEEEdouble; 154 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 155 return S_x87DoubleExtended; 156 else if (&Sem == &llvm::APFloat::IEEEquad()) 157 return S_IEEEquad; 158 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 159 return S_PPCDoubleDouble; 160 else 161 llvm_unreachable("Unknown floating semantics"); 162 } 163 164 const fltSemantics &APFloatBase::IEEEhalf() { 165 return semIEEEhalf; 166 } 167 const fltSemantics &APFloatBase::BFloat() { 168 return semBFloat; 169 } 170 const fltSemantics &APFloatBase::IEEEsingle() { 171 return semIEEEsingle; 172 } 173 const fltSemantics &APFloatBase::IEEEdouble() { 174 return semIEEEdouble; 175 } 176 const fltSemantics &APFloatBase::IEEEquad() { 177 return semIEEEquad; 178 } 179 const fltSemantics &APFloatBase::x87DoubleExtended() { 180 return semX87DoubleExtended; 181 } 182 const fltSemantics &APFloatBase::Bogus() { 183 return semBogus; 184 } 185 const fltSemantics &APFloatBase::PPCDoubleDouble() { 186 return semPPCDoubleDouble; 187 } 188 189 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 190 constexpr RoundingMode APFloatBase::rmTowardPositive; 191 constexpr RoundingMode APFloatBase::rmTowardNegative; 192 constexpr RoundingMode APFloatBase::rmTowardZero; 193 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 194 195 /* A tight upper bound on number of parts required to hold the value 196 pow(5, power) is 197 198 power * 815 / (351 * integerPartWidth) + 1 199 200 However, whilst the result may require only this many parts, 201 because we are multiplying two values to get it, the 202 multiplication may require an extra part with the excess part 203 being zero (consider the trivial case of 1 * 1, tcFullMultiply 204 requires two parts to hold the single-part result). So we add an 205 extra one to guarantee enough space whilst multiplying. */ 206 const unsigned int maxExponent = 16383; 207 const unsigned int maxPrecision = 113; 208 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 209 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 210 211 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 212 return semantics.precision; 213 } 214 APFloatBase::ExponentType 215 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 216 return semantics.maxExponent; 217 } 218 APFloatBase::ExponentType 219 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 220 return semantics.minExponent; 221 } 222 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 223 return semantics.sizeInBits; 224 } 225 226 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 227 return Sem.sizeInBits; 228 } 229 230 /* A bunch of private, handy routines. */ 231 232 static inline Error createError(const Twine &Err) { 233 return make_error<StringError>(Err, inconvertibleErrorCode()); 234 } 235 236 static inline unsigned int 237 partCountForBits(unsigned int bits) 238 { 239 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 240 } 241 242 /* Returns 0U-9U. Return values >= 10U are not digits. */ 243 static inline unsigned int 244 decDigitValue(unsigned int c) 245 { 246 return c - '0'; 247 } 248 249 /* Return the value of a decimal exponent of the form 250 [+-]ddddddd. 251 252 If the exponent overflows, returns a large exponent with the 253 appropriate sign. */ 254 static Expected<int> readExponent(StringRef::iterator begin, 255 StringRef::iterator end) { 256 bool isNegative; 257 unsigned int absExponent; 258 const unsigned int overlargeExponent = 24000; /* FIXME. */ 259 StringRef::iterator p = begin; 260 261 // Treat no exponent as 0 to match binutils 262 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 263 return 0; 264 } 265 266 isNegative = (*p == '-'); 267 if (*p == '-' || *p == '+') { 268 p++; 269 if (p == end) 270 return createError("Exponent has no digits"); 271 } 272 273 absExponent = decDigitValue(*p++); 274 if (absExponent >= 10U) 275 return createError("Invalid character in exponent"); 276 277 for (; p != end; ++p) { 278 unsigned int value; 279 280 value = decDigitValue(*p); 281 if (value >= 10U) 282 return createError("Invalid character in exponent"); 283 284 absExponent = absExponent * 10U + value; 285 if (absExponent >= overlargeExponent) { 286 absExponent = overlargeExponent; 287 break; 288 } 289 } 290 291 if (isNegative) 292 return -(int) absExponent; 293 else 294 return (int) absExponent; 295 } 296 297 /* This is ugly and needs cleaning up, but I don't immediately see 298 how whilst remaining safe. */ 299 static Expected<int> totalExponent(StringRef::iterator p, 300 StringRef::iterator end, 301 int exponentAdjustment) { 302 int unsignedExponent; 303 bool negative, overflow; 304 int exponent = 0; 305 306 if (p == end) 307 return createError("Exponent has no digits"); 308 309 negative = *p == '-'; 310 if (*p == '-' || *p == '+') { 311 p++; 312 if (p == end) 313 return createError("Exponent has no digits"); 314 } 315 316 unsignedExponent = 0; 317 overflow = false; 318 for (; p != end; ++p) { 319 unsigned int value; 320 321 value = decDigitValue(*p); 322 if (value >= 10U) 323 return createError("Invalid character in exponent"); 324 325 unsignedExponent = unsignedExponent * 10 + value; 326 if (unsignedExponent > 32767) { 327 overflow = true; 328 break; 329 } 330 } 331 332 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 333 overflow = true; 334 335 if (!overflow) { 336 exponent = unsignedExponent; 337 if (negative) 338 exponent = -exponent; 339 exponent += exponentAdjustment; 340 if (exponent > 32767 || exponent < -32768) 341 overflow = true; 342 } 343 344 if (overflow) 345 exponent = negative ? -32768: 32767; 346 347 return exponent; 348 } 349 350 static Expected<StringRef::iterator> 351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 352 StringRef::iterator *dot) { 353 StringRef::iterator p = begin; 354 *dot = end; 355 while (p != end && *p == '0') 356 p++; 357 358 if (p != end && *p == '.') { 359 *dot = p++; 360 361 if (end - begin == 1) 362 return createError("Significand has no digits"); 363 364 while (p != end && *p == '0') 365 p++; 366 } 367 368 return p; 369 } 370 371 /* Given a normal decimal floating point number of the form 372 373 dddd.dddd[eE][+-]ddd 374 375 where the decimal point and exponent are optional, fill out the 376 structure D. Exponent is appropriate if the significand is 377 treated as an integer, and normalizedExponent if the significand 378 is taken to have the decimal point after a single leading 379 non-zero digit. 380 381 If the value is zero, V->firstSigDigit points to a non-digit, and 382 the return exponent is zero. 383 */ 384 struct decimalInfo { 385 const char *firstSigDigit; 386 const char *lastSigDigit; 387 int exponent; 388 int normalizedExponent; 389 }; 390 391 static Error interpretDecimal(StringRef::iterator begin, 392 StringRef::iterator end, decimalInfo *D) { 393 StringRef::iterator dot = end; 394 395 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 396 if (!PtrOrErr) 397 return PtrOrErr.takeError(); 398 StringRef::iterator p = *PtrOrErr; 399 400 D->firstSigDigit = p; 401 D->exponent = 0; 402 D->normalizedExponent = 0; 403 404 for (; p != end; ++p) { 405 if (*p == '.') { 406 if (dot != end) 407 return createError("String contains multiple dots"); 408 dot = p++; 409 if (p == end) 410 break; 411 } 412 if (decDigitValue(*p) >= 10U) 413 break; 414 } 415 416 if (p != end) { 417 if (*p != 'e' && *p != 'E') 418 return createError("Invalid character in significand"); 419 if (p == begin) 420 return createError("Significand has no digits"); 421 if (dot != end && p - begin == 1) 422 return createError("Significand has no digits"); 423 424 /* p points to the first non-digit in the string */ 425 auto ExpOrErr = readExponent(p + 1, end); 426 if (!ExpOrErr) 427 return ExpOrErr.takeError(); 428 D->exponent = *ExpOrErr; 429 430 /* Implied decimal point? */ 431 if (dot == end) 432 dot = p; 433 } 434 435 /* If number is all zeroes accept any exponent. */ 436 if (p != D->firstSigDigit) { 437 /* Drop insignificant trailing zeroes. */ 438 if (p != begin) { 439 do 440 do 441 p--; 442 while (p != begin && *p == '0'); 443 while (p != begin && *p == '.'); 444 } 445 446 /* Adjust the exponents for any decimal point. */ 447 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 448 D->normalizedExponent = (D->exponent + 449 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 450 - (dot > D->firstSigDigit && dot < p))); 451 } 452 453 D->lastSigDigit = p; 454 return Error::success(); 455 } 456 457 /* Return the trailing fraction of a hexadecimal number. 458 DIGITVALUE is the first hex digit of the fraction, P points to 459 the next digit. */ 460 static Expected<lostFraction> 461 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 462 unsigned int digitValue) { 463 unsigned int hexDigit; 464 465 /* If the first trailing digit isn't 0 or 8 we can work out the 466 fraction immediately. */ 467 if (digitValue > 8) 468 return lfMoreThanHalf; 469 else if (digitValue < 8 && digitValue > 0) 470 return lfLessThanHalf; 471 472 // Otherwise we need to find the first non-zero digit. 473 while (p != end && (*p == '0' || *p == '.')) 474 p++; 475 476 if (p == end) 477 return createError("Invalid trailing hexadecimal fraction!"); 478 479 hexDigit = hexDigitValue(*p); 480 481 /* If we ran off the end it is exactly zero or one-half, otherwise 482 a little more. */ 483 if (hexDigit == -1U) 484 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 485 else 486 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 487 } 488 489 /* Return the fraction lost were a bignum truncated losing the least 490 significant BITS bits. */ 491 static lostFraction 492 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 493 unsigned int partCount, 494 unsigned int bits) 495 { 496 unsigned int lsb; 497 498 lsb = APInt::tcLSB(parts, partCount); 499 500 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 501 if (bits <= lsb) 502 return lfExactlyZero; 503 if (bits == lsb + 1) 504 return lfExactlyHalf; 505 if (bits <= partCount * APFloatBase::integerPartWidth && 506 APInt::tcExtractBit(parts, bits - 1)) 507 return lfMoreThanHalf; 508 509 return lfLessThanHalf; 510 } 511 512 /* Shift DST right BITS bits noting lost fraction. */ 513 static lostFraction 514 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 515 { 516 lostFraction lost_fraction; 517 518 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 519 520 APInt::tcShiftRight(dst, parts, bits); 521 522 return lost_fraction; 523 } 524 525 /* Combine the effect of two lost fractions. */ 526 static lostFraction 527 combineLostFractions(lostFraction moreSignificant, 528 lostFraction lessSignificant) 529 { 530 if (lessSignificant != lfExactlyZero) { 531 if (moreSignificant == lfExactlyZero) 532 moreSignificant = lfLessThanHalf; 533 else if (moreSignificant == lfExactlyHalf) 534 moreSignificant = lfMoreThanHalf; 535 } 536 537 return moreSignificant; 538 } 539 540 /* The error from the true value, in half-ulps, on multiplying two 541 floating point numbers, which differ from the value they 542 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 543 than the returned value. 544 545 See "How to Read Floating Point Numbers Accurately" by William D 546 Clinger. */ 547 static unsigned int 548 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 549 { 550 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 551 552 if (HUerr1 + HUerr2 == 0) 553 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 554 else 555 return inexactMultiply + 2 * (HUerr1 + HUerr2); 556 } 557 558 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 559 when the least significant BITS are truncated. BITS cannot be 560 zero. */ 561 static APFloatBase::integerPart 562 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 563 bool isNearest) { 564 unsigned int count, partBits; 565 APFloatBase::integerPart part, boundary; 566 567 assert(bits != 0); 568 569 bits--; 570 count = bits / APFloatBase::integerPartWidth; 571 partBits = bits % APFloatBase::integerPartWidth + 1; 572 573 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 574 575 if (isNearest) 576 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 577 else 578 boundary = 0; 579 580 if (count == 0) { 581 if (part - boundary <= boundary - part) 582 return part - boundary; 583 else 584 return boundary - part; 585 } 586 587 if (part == boundary) { 588 while (--count) 589 if (parts[count]) 590 return ~(APFloatBase::integerPart) 0; /* A lot. */ 591 592 return parts[0]; 593 } else if (part == boundary - 1) { 594 while (--count) 595 if (~parts[count]) 596 return ~(APFloatBase::integerPart) 0; /* A lot. */ 597 598 return -parts[0]; 599 } 600 601 return ~(APFloatBase::integerPart) 0; /* A lot. */ 602 } 603 604 /* Place pow(5, power) in DST, and return the number of parts used. 605 DST must be at least one part larger than size of the answer. */ 606 static unsigned int 607 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 608 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 609 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 610 pow5s[0] = 78125 * 5; 611 612 unsigned int partsCount[16] = { 1 }; 613 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 614 unsigned int result; 615 assert(power <= maxExponent); 616 617 p1 = dst; 618 p2 = scratch; 619 620 *p1 = firstEightPowers[power & 7]; 621 power >>= 3; 622 623 result = 1; 624 pow5 = pow5s; 625 626 for (unsigned int n = 0; power; power >>= 1, n++) { 627 unsigned int pc; 628 629 pc = partsCount[n]; 630 631 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 632 if (pc == 0) { 633 pc = partsCount[n - 1]; 634 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 635 pc *= 2; 636 if (pow5[pc - 1] == 0) 637 pc--; 638 partsCount[n] = pc; 639 } 640 641 if (power & 1) { 642 APFloatBase::integerPart *tmp; 643 644 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 645 result += pc; 646 if (p2[result - 1] == 0) 647 result--; 648 649 /* Now result is in p1 with partsCount parts and p2 is scratch 650 space. */ 651 tmp = p1; 652 p1 = p2; 653 p2 = tmp; 654 } 655 656 pow5 += pc; 657 } 658 659 if (p1 != dst) 660 APInt::tcAssign(dst, p1, result); 661 662 return result; 663 } 664 665 /* Zero at the end to avoid modular arithmetic when adding one; used 666 when rounding up during hexadecimal output. */ 667 static const char hexDigitsLower[] = "0123456789abcdef0"; 668 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 669 static const char infinityL[] = "infinity"; 670 static const char infinityU[] = "INFINITY"; 671 static const char NaNL[] = "nan"; 672 static const char NaNU[] = "NAN"; 673 674 /* Write out an integerPart in hexadecimal, starting with the most 675 significant nibble. Write out exactly COUNT hexdigits, return 676 COUNT. */ 677 static unsigned int 678 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 679 const char *hexDigitChars) 680 { 681 unsigned int result = count; 682 683 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 684 685 part >>= (APFloatBase::integerPartWidth - 4 * count); 686 while (count--) { 687 dst[count] = hexDigitChars[part & 0xf]; 688 part >>= 4; 689 } 690 691 return result; 692 } 693 694 /* Write out an unsigned decimal integer. */ 695 static char * 696 writeUnsignedDecimal (char *dst, unsigned int n) 697 { 698 char buff[40], *p; 699 700 p = buff; 701 do 702 *p++ = '0' + n % 10; 703 while (n /= 10); 704 705 do 706 *dst++ = *--p; 707 while (p != buff); 708 709 return dst; 710 } 711 712 /* Write out a signed decimal integer. */ 713 static char * 714 writeSignedDecimal (char *dst, int value) 715 { 716 if (value < 0) { 717 *dst++ = '-'; 718 dst = writeUnsignedDecimal(dst, -(unsigned) value); 719 } else 720 dst = writeUnsignedDecimal(dst, value); 721 722 return dst; 723 } 724 725 namespace detail { 726 /* Constructors. */ 727 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 728 unsigned int count; 729 730 semantics = ourSemantics; 731 count = partCount(); 732 if (count > 1) 733 significand.parts = new integerPart[count]; 734 } 735 736 void IEEEFloat::freeSignificand() { 737 if (needsCleanup()) 738 delete [] significand.parts; 739 } 740 741 void IEEEFloat::assign(const IEEEFloat &rhs) { 742 assert(semantics == rhs.semantics); 743 744 sign = rhs.sign; 745 category = rhs.category; 746 exponent = rhs.exponent; 747 if (isFiniteNonZero() || category == fcNaN) 748 copySignificand(rhs); 749 } 750 751 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 752 assert(isFiniteNonZero() || category == fcNaN); 753 assert(rhs.partCount() >= partCount()); 754 755 APInt::tcAssign(significandParts(), rhs.significandParts(), 756 partCount()); 757 } 758 759 /* Make this number a NaN, with an arbitrary but deterministic value 760 for the significand. If double or longer, this is a signalling NaN, 761 which may not be ideal. If float, this is QNaN(0). */ 762 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 763 category = fcNaN; 764 sign = Negative; 765 exponent = exponentNaN(); 766 767 integerPart *significand = significandParts(); 768 unsigned numParts = partCount(); 769 770 // Set the significand bits to the fill. 771 if (!fill || fill->getNumWords() < numParts) 772 APInt::tcSet(significand, 0, numParts); 773 if (fill) { 774 APInt::tcAssign(significand, fill->getRawData(), 775 std::min(fill->getNumWords(), numParts)); 776 777 // Zero out the excess bits of the significand. 778 unsigned bitsToPreserve = semantics->precision - 1; 779 unsigned part = bitsToPreserve / 64; 780 bitsToPreserve %= 64; 781 significand[part] &= ((1ULL << bitsToPreserve) - 1); 782 for (part++; part != numParts; ++part) 783 significand[part] = 0; 784 } 785 786 unsigned QNaNBit = semantics->precision - 2; 787 788 if (SNaN) { 789 // We always have to clear the QNaN bit to make it an SNaN. 790 APInt::tcClearBit(significand, QNaNBit); 791 792 // If there are no bits set in the payload, we have to set 793 // *something* to make it a NaN instead of an infinity; 794 // conventionally, this is the next bit down from the QNaN bit. 795 if (APInt::tcIsZero(significand, numParts)) 796 APInt::tcSetBit(significand, QNaNBit - 1); 797 } else { 798 // We always have to set the QNaN bit to make it a QNaN. 799 APInt::tcSetBit(significand, QNaNBit); 800 } 801 802 // For x87 extended precision, we want to make a NaN, not a 803 // pseudo-NaN. Maybe we should expose the ability to make 804 // pseudo-NaNs? 805 if (semantics == &semX87DoubleExtended) 806 APInt::tcSetBit(significand, QNaNBit + 1); 807 } 808 809 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 810 if (this != &rhs) { 811 if (semantics != rhs.semantics) { 812 freeSignificand(); 813 initialize(rhs.semantics); 814 } 815 assign(rhs); 816 } 817 818 return *this; 819 } 820 821 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 822 freeSignificand(); 823 824 semantics = rhs.semantics; 825 significand = rhs.significand; 826 exponent = rhs.exponent; 827 category = rhs.category; 828 sign = rhs.sign; 829 830 rhs.semantics = &semBogus; 831 return *this; 832 } 833 834 bool IEEEFloat::isDenormal() const { 835 return isFiniteNonZero() && (exponent == semantics->minExponent) && 836 (APInt::tcExtractBit(significandParts(), 837 semantics->precision - 1) == 0); 838 } 839 840 bool IEEEFloat::isSmallest() const { 841 // The smallest number by magnitude in our format will be the smallest 842 // denormal, i.e. the floating point number with exponent being minimum 843 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 844 return isFiniteNonZero() && exponent == semantics->minExponent && 845 significandMSB() == 0; 846 } 847 848 bool IEEEFloat::isSignificandAllOnes() const { 849 // Test if the significand excluding the integral bit is all ones. This allows 850 // us to test for binade boundaries. 851 const integerPart *Parts = significandParts(); 852 const unsigned PartCount = partCountForBits(semantics->precision); 853 for (unsigned i = 0; i < PartCount - 1; i++) 854 if (~Parts[i]) 855 return false; 856 857 // Set the unused high bits to all ones when we compare. 858 const unsigned NumHighBits = 859 PartCount*integerPartWidth - semantics->precision + 1; 860 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 861 "Can not have more high bits to fill than integerPartWidth"); 862 const integerPart HighBitFill = 863 ~integerPart(0) << (integerPartWidth - NumHighBits); 864 if (~(Parts[PartCount - 1] | HighBitFill)) 865 return false; 866 867 return true; 868 } 869 870 bool IEEEFloat::isSignificandAllZeros() const { 871 // Test if the significand excluding the integral bit is all zeros. This 872 // allows us to test for binade boundaries. 873 const integerPart *Parts = significandParts(); 874 const unsigned PartCount = partCountForBits(semantics->precision); 875 876 for (unsigned i = 0; i < PartCount - 1; i++) 877 if (Parts[i]) 878 return false; 879 880 // Compute how many bits are used in the final word. 881 const unsigned NumHighBits = 882 PartCount*integerPartWidth - semantics->precision + 1; 883 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 884 "clear than integerPartWidth"); 885 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 886 887 if (Parts[PartCount - 1] & HighBitMask) 888 return false; 889 890 return true; 891 } 892 893 bool IEEEFloat::isLargest() const { 894 // The largest number by magnitude in our format will be the floating point 895 // number with maximum exponent and with significand that is all ones. 896 return isFiniteNonZero() && exponent == semantics->maxExponent 897 && isSignificandAllOnes(); 898 } 899 900 bool IEEEFloat::isInteger() const { 901 // This could be made more efficient; I'm going for obviously correct. 902 if (!isFinite()) return false; 903 IEEEFloat truncated = *this; 904 truncated.roundToIntegral(rmTowardZero); 905 return compare(truncated) == cmpEqual; 906 } 907 908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 909 if (this == &rhs) 910 return true; 911 if (semantics != rhs.semantics || 912 category != rhs.category || 913 sign != rhs.sign) 914 return false; 915 if (category==fcZero || category==fcInfinity) 916 return true; 917 918 if (isFiniteNonZero() && exponent != rhs.exponent) 919 return false; 920 921 return std::equal(significandParts(), significandParts() + partCount(), 922 rhs.significandParts()); 923 } 924 925 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 926 initialize(&ourSemantics); 927 sign = 0; 928 category = fcNormal; 929 zeroSignificand(); 930 exponent = ourSemantics.precision - 1; 931 significandParts()[0] = value; 932 normalize(rmNearestTiesToEven, lfExactlyZero); 933 } 934 935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 936 initialize(&ourSemantics); 937 makeZero(false); 938 } 939 940 // Delegate to the previous constructor, because later copy constructor may 941 // actually inspects category, which can't be garbage. 942 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 943 : IEEEFloat(ourSemantics) {} 944 945 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 946 initialize(rhs.semantics); 947 assign(rhs); 948 } 949 950 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 951 *this = std::move(rhs); 952 } 953 954 IEEEFloat::~IEEEFloat() { freeSignificand(); } 955 956 unsigned int IEEEFloat::partCount() const { 957 return partCountForBits(semantics->precision + 1); 958 } 959 960 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 961 return const_cast<IEEEFloat *>(this)->significandParts(); 962 } 963 964 IEEEFloat::integerPart *IEEEFloat::significandParts() { 965 if (partCount() > 1) 966 return significand.parts; 967 else 968 return &significand.part; 969 } 970 971 void IEEEFloat::zeroSignificand() { 972 APInt::tcSet(significandParts(), 0, partCount()); 973 } 974 975 /* Increment an fcNormal floating point number's significand. */ 976 void IEEEFloat::incrementSignificand() { 977 integerPart carry; 978 979 carry = APInt::tcIncrement(significandParts(), partCount()); 980 981 /* Our callers should never cause us to overflow. */ 982 assert(carry == 0); 983 (void)carry; 984 } 985 986 /* Add the significand of the RHS. Returns the carry flag. */ 987 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 988 integerPart *parts; 989 990 parts = significandParts(); 991 992 assert(semantics == rhs.semantics); 993 assert(exponent == rhs.exponent); 994 995 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 996 } 997 998 /* Subtract the significand of the RHS with a borrow flag. Returns 999 the borrow flag. */ 1000 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1001 integerPart borrow) { 1002 integerPart *parts; 1003 1004 parts = significandParts(); 1005 1006 assert(semantics == rhs.semantics); 1007 assert(exponent == rhs.exponent); 1008 1009 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1010 partCount()); 1011 } 1012 1013 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1014 on to the full-precision result of the multiplication. Returns the 1015 lost fraction. */ 1016 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1017 IEEEFloat addend) { 1018 unsigned int omsb; // One, not zero, based MSB. 1019 unsigned int partsCount, newPartsCount, precision; 1020 integerPart *lhsSignificand; 1021 integerPart scratch[4]; 1022 integerPart *fullSignificand; 1023 lostFraction lost_fraction; 1024 bool ignored; 1025 1026 assert(semantics == rhs.semantics); 1027 1028 precision = semantics->precision; 1029 1030 // Allocate space for twice as many bits as the original significand, plus one 1031 // extra bit for the addition to overflow into. 1032 newPartsCount = partCountForBits(precision * 2 + 1); 1033 1034 if (newPartsCount > 4) 1035 fullSignificand = new integerPart[newPartsCount]; 1036 else 1037 fullSignificand = scratch; 1038 1039 lhsSignificand = significandParts(); 1040 partsCount = partCount(); 1041 1042 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1043 rhs.significandParts(), partsCount, partsCount); 1044 1045 lost_fraction = lfExactlyZero; 1046 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1047 exponent += rhs.exponent; 1048 1049 // Assume the operands involved in the multiplication are single-precision 1050 // FP, and the two multiplicants are: 1051 // *this = a23 . a22 ... a0 * 2^e1 1052 // rhs = b23 . b22 ... b0 * 2^e2 1053 // the result of multiplication is: 1054 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1055 // Note that there are three significant bits at the left-hand side of the 1056 // radix point: two for the multiplication, and an overflow bit for the 1057 // addition (that will always be zero at this point). Move the radix point 1058 // toward left by two bits, and adjust exponent accordingly. 1059 exponent += 2; 1060 1061 if (addend.isNonZero()) { 1062 // The intermediate result of the multiplication has "2 * precision" 1063 // signicant bit; adjust the addend to be consistent with mul result. 1064 // 1065 Significand savedSignificand = significand; 1066 const fltSemantics *savedSemantics = semantics; 1067 fltSemantics extendedSemantics; 1068 opStatus status; 1069 unsigned int extendedPrecision; 1070 1071 // Normalize our MSB to one below the top bit to allow for overflow. 1072 extendedPrecision = 2 * precision + 1; 1073 if (omsb != extendedPrecision - 1) { 1074 assert(extendedPrecision > omsb); 1075 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1076 (extendedPrecision - 1) - omsb); 1077 exponent -= (extendedPrecision - 1) - omsb; 1078 } 1079 1080 /* Create new semantics. */ 1081 extendedSemantics = *semantics; 1082 extendedSemantics.precision = extendedPrecision; 1083 1084 if (newPartsCount == 1) 1085 significand.part = fullSignificand[0]; 1086 else 1087 significand.parts = fullSignificand; 1088 semantics = &extendedSemantics; 1089 1090 // Make a copy so we can convert it to the extended semantics. 1091 // Note that we cannot convert the addend directly, as the extendedSemantics 1092 // is a local variable (which we take a reference to). 1093 IEEEFloat extendedAddend(addend); 1094 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1095 assert(status == opOK); 1096 (void)status; 1097 1098 // Shift the significand of the addend right by one bit. This guarantees 1099 // that the high bit of the significand is zero (same as fullSignificand), 1100 // so the addition will overflow (if it does overflow at all) into the top bit. 1101 lost_fraction = extendedAddend.shiftSignificandRight(1); 1102 assert(lost_fraction == lfExactlyZero && 1103 "Lost precision while shifting addend for fused-multiply-add."); 1104 1105 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1106 1107 /* Restore our state. */ 1108 if (newPartsCount == 1) 1109 fullSignificand[0] = significand.part; 1110 significand = savedSignificand; 1111 semantics = savedSemantics; 1112 1113 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1114 } 1115 1116 // Convert the result having "2 * precision" significant-bits back to the one 1117 // having "precision" significant-bits. First, move the radix point from 1118 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1119 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1120 exponent -= precision + 1; 1121 1122 // In case MSB resides at the left-hand side of radix point, shift the 1123 // mantissa right by some amount to make sure the MSB reside right before 1124 // the radix point (i.e. "MSB . rest-significant-bits"). 1125 // 1126 // Note that the result is not normalized when "omsb < precision". So, the 1127 // caller needs to call IEEEFloat::normalize() if normalized value is 1128 // expected. 1129 if (omsb > precision) { 1130 unsigned int bits, significantParts; 1131 lostFraction lf; 1132 1133 bits = omsb - precision; 1134 significantParts = partCountForBits(omsb); 1135 lf = shiftRight(fullSignificand, significantParts, bits); 1136 lost_fraction = combineLostFractions(lf, lost_fraction); 1137 exponent += bits; 1138 } 1139 1140 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1141 1142 if (newPartsCount > 4) 1143 delete [] fullSignificand; 1144 1145 return lost_fraction; 1146 } 1147 1148 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1149 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1150 } 1151 1152 /* Multiply the significands of LHS and RHS to DST. */ 1153 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1154 unsigned int bit, i, partsCount; 1155 const integerPart *rhsSignificand; 1156 integerPart *lhsSignificand, *dividend, *divisor; 1157 integerPart scratch[4]; 1158 lostFraction lost_fraction; 1159 1160 assert(semantics == rhs.semantics); 1161 1162 lhsSignificand = significandParts(); 1163 rhsSignificand = rhs.significandParts(); 1164 partsCount = partCount(); 1165 1166 if (partsCount > 2) 1167 dividend = new integerPart[partsCount * 2]; 1168 else 1169 dividend = scratch; 1170 1171 divisor = dividend + partsCount; 1172 1173 /* Copy the dividend and divisor as they will be modified in-place. */ 1174 for (i = 0; i < partsCount; i++) { 1175 dividend[i] = lhsSignificand[i]; 1176 divisor[i] = rhsSignificand[i]; 1177 lhsSignificand[i] = 0; 1178 } 1179 1180 exponent -= rhs.exponent; 1181 1182 unsigned int precision = semantics->precision; 1183 1184 /* Normalize the divisor. */ 1185 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1186 if (bit) { 1187 exponent += bit; 1188 APInt::tcShiftLeft(divisor, partsCount, bit); 1189 } 1190 1191 /* Normalize the dividend. */ 1192 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1193 if (bit) { 1194 exponent -= bit; 1195 APInt::tcShiftLeft(dividend, partsCount, bit); 1196 } 1197 1198 /* Ensure the dividend >= divisor initially for the loop below. 1199 Incidentally, this means that the division loop below is 1200 guaranteed to set the integer bit to one. */ 1201 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1202 exponent--; 1203 APInt::tcShiftLeft(dividend, partsCount, 1); 1204 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1205 } 1206 1207 /* Long division. */ 1208 for (bit = precision; bit; bit -= 1) { 1209 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1210 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1211 APInt::tcSetBit(lhsSignificand, bit - 1); 1212 } 1213 1214 APInt::tcShiftLeft(dividend, partsCount, 1); 1215 } 1216 1217 /* Figure out the lost fraction. */ 1218 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1219 1220 if (cmp > 0) 1221 lost_fraction = lfMoreThanHalf; 1222 else if (cmp == 0) 1223 lost_fraction = lfExactlyHalf; 1224 else if (APInt::tcIsZero(dividend, partsCount)) 1225 lost_fraction = lfExactlyZero; 1226 else 1227 lost_fraction = lfLessThanHalf; 1228 1229 if (partsCount > 2) 1230 delete [] dividend; 1231 1232 return lost_fraction; 1233 } 1234 1235 unsigned int IEEEFloat::significandMSB() const { 1236 return APInt::tcMSB(significandParts(), partCount()); 1237 } 1238 1239 unsigned int IEEEFloat::significandLSB() const { 1240 return APInt::tcLSB(significandParts(), partCount()); 1241 } 1242 1243 /* Note that a zero result is NOT normalized to fcZero. */ 1244 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1245 /* Our exponent should not overflow. */ 1246 assert((ExponentType) (exponent + bits) >= exponent); 1247 1248 exponent += bits; 1249 1250 return shiftRight(significandParts(), partCount(), bits); 1251 } 1252 1253 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1255 assert(bits < semantics->precision); 1256 1257 if (bits) { 1258 unsigned int partsCount = partCount(); 1259 1260 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1261 exponent -= bits; 1262 1263 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1264 } 1265 } 1266 1267 IEEEFloat::cmpResult 1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1269 int compare; 1270 1271 assert(semantics == rhs.semantics); 1272 assert(isFiniteNonZero()); 1273 assert(rhs.isFiniteNonZero()); 1274 1275 compare = exponent - rhs.exponent; 1276 1277 /* If exponents are equal, do an unsigned bignum comparison of the 1278 significands. */ 1279 if (compare == 0) 1280 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1281 partCount()); 1282 1283 if (compare > 0) 1284 return cmpGreaterThan; 1285 else if (compare < 0) 1286 return cmpLessThan; 1287 else 1288 return cmpEqual; 1289 } 1290 1291 /* Set the least significant BITS bits of a bignum, clear the 1292 rest. */ 1293 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1294 unsigned bits) { 1295 unsigned i = 0; 1296 while (bits > APInt::APINT_BITS_PER_WORD) { 1297 dst[i++] = ~(APInt::WordType)0; 1298 bits -= APInt::APINT_BITS_PER_WORD; 1299 } 1300 1301 if (bits) 1302 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1303 1304 while (i < parts) 1305 dst[i++] = 0; 1306 } 1307 1308 /* Handle overflow. Sign is preserved. We either become infinity or 1309 the largest finite number. */ 1310 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1311 /* Infinity? */ 1312 if (rounding_mode == rmNearestTiesToEven || 1313 rounding_mode == rmNearestTiesToAway || 1314 (rounding_mode == rmTowardPositive && !sign) || 1315 (rounding_mode == rmTowardNegative && sign)) { 1316 category = fcInfinity; 1317 return (opStatus) (opOverflow | opInexact); 1318 } 1319 1320 /* Otherwise we become the largest finite number. */ 1321 category = fcNormal; 1322 exponent = semantics->maxExponent; 1323 tcSetLeastSignificantBits(significandParts(), partCount(), 1324 semantics->precision); 1325 1326 return opInexact; 1327 } 1328 1329 /* Returns TRUE if, when truncating the current number, with BIT the 1330 new LSB, with the given lost fraction and rounding mode, the result 1331 would need to be rounded away from zero (i.e., by increasing the 1332 signficand). This routine must work for fcZero of both signs, and 1333 fcNormal numbers. */ 1334 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1335 lostFraction lost_fraction, 1336 unsigned int bit) const { 1337 /* NaNs and infinities should not have lost fractions. */ 1338 assert(isFiniteNonZero() || category == fcZero); 1339 1340 /* Current callers never pass this so we don't handle it. */ 1341 assert(lost_fraction != lfExactlyZero); 1342 1343 switch (rounding_mode) { 1344 case rmNearestTiesToAway: 1345 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1346 1347 case rmNearestTiesToEven: 1348 if (lost_fraction == lfMoreThanHalf) 1349 return true; 1350 1351 /* Our zeroes don't have a significand to test. */ 1352 if (lost_fraction == lfExactlyHalf && category != fcZero) 1353 return APInt::tcExtractBit(significandParts(), bit); 1354 1355 return false; 1356 1357 case rmTowardZero: 1358 return false; 1359 1360 case rmTowardPositive: 1361 return !sign; 1362 1363 case rmTowardNegative: 1364 return sign; 1365 1366 default: 1367 break; 1368 } 1369 llvm_unreachable("Invalid rounding mode found"); 1370 } 1371 1372 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1373 lostFraction lost_fraction) { 1374 unsigned int omsb; /* One, not zero, based MSB. */ 1375 int exponentChange; 1376 1377 if (!isFiniteNonZero()) 1378 return opOK; 1379 1380 /* Before rounding normalize the exponent of fcNormal numbers. */ 1381 omsb = significandMSB() + 1; 1382 1383 if (omsb) { 1384 /* OMSB is numbered from 1. We want to place it in the integer 1385 bit numbered PRECISION if possible, with a compensating change in 1386 the exponent. */ 1387 exponentChange = omsb - semantics->precision; 1388 1389 /* If the resulting exponent is too high, overflow according to 1390 the rounding mode. */ 1391 if (exponent + exponentChange > semantics->maxExponent) 1392 return handleOverflow(rounding_mode); 1393 1394 /* Subnormal numbers have exponent minExponent, and their MSB 1395 is forced based on that. */ 1396 if (exponent + exponentChange < semantics->minExponent) 1397 exponentChange = semantics->minExponent - exponent; 1398 1399 /* Shifting left is easy as we don't lose precision. */ 1400 if (exponentChange < 0) { 1401 assert(lost_fraction == lfExactlyZero); 1402 1403 shiftSignificandLeft(-exponentChange); 1404 1405 return opOK; 1406 } 1407 1408 if (exponentChange > 0) { 1409 lostFraction lf; 1410 1411 /* Shift right and capture any new lost fraction. */ 1412 lf = shiftSignificandRight(exponentChange); 1413 1414 lost_fraction = combineLostFractions(lf, lost_fraction); 1415 1416 /* Keep OMSB up-to-date. */ 1417 if (omsb > (unsigned) exponentChange) 1418 omsb -= exponentChange; 1419 else 1420 omsb = 0; 1421 } 1422 } 1423 1424 /* Now round the number according to rounding_mode given the lost 1425 fraction. */ 1426 1427 /* As specified in IEEE 754, since we do not trap we do not report 1428 underflow for exact results. */ 1429 if (lost_fraction == lfExactlyZero) { 1430 /* Canonicalize zeroes. */ 1431 if (omsb == 0) 1432 category = fcZero; 1433 1434 return opOK; 1435 } 1436 1437 /* Increment the significand if we're rounding away from zero. */ 1438 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1439 if (omsb == 0) 1440 exponent = semantics->minExponent; 1441 1442 incrementSignificand(); 1443 omsb = significandMSB() + 1; 1444 1445 /* Did the significand increment overflow? */ 1446 if (omsb == (unsigned) semantics->precision + 1) { 1447 /* Renormalize by incrementing the exponent and shifting our 1448 significand right one. However if we already have the 1449 maximum exponent we overflow to infinity. */ 1450 if (exponent == semantics->maxExponent) { 1451 category = fcInfinity; 1452 1453 return (opStatus) (opOverflow | opInexact); 1454 } 1455 1456 shiftSignificandRight(1); 1457 1458 return opInexact; 1459 } 1460 } 1461 1462 /* The normal case - we were and are not denormal, and any 1463 significand increment above didn't overflow. */ 1464 if (omsb == semantics->precision) 1465 return opInexact; 1466 1467 /* We have a non-zero denormal. */ 1468 assert(omsb < semantics->precision); 1469 1470 /* Canonicalize zeroes. */ 1471 if (omsb == 0) 1472 category = fcZero; 1473 1474 /* The fcZero case is a denormal that underflowed to zero. */ 1475 return (opStatus) (opUnderflow | opInexact); 1476 } 1477 1478 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1479 bool subtract) { 1480 switch (PackCategoriesIntoKey(category, rhs.category)) { 1481 default: 1482 llvm_unreachable(nullptr); 1483 1484 case PackCategoriesIntoKey(fcZero, fcNaN): 1485 case PackCategoriesIntoKey(fcNormal, fcNaN): 1486 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1487 assign(rhs); 1488 LLVM_FALLTHROUGH; 1489 case PackCategoriesIntoKey(fcNaN, fcZero): 1490 case PackCategoriesIntoKey(fcNaN, fcNormal): 1491 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1492 case PackCategoriesIntoKey(fcNaN, fcNaN): 1493 if (isSignaling()) { 1494 makeQuiet(); 1495 return opInvalidOp; 1496 } 1497 return rhs.isSignaling() ? opInvalidOp : opOK; 1498 1499 case PackCategoriesIntoKey(fcNormal, fcZero): 1500 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1501 case PackCategoriesIntoKey(fcInfinity, fcZero): 1502 return opOK; 1503 1504 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1505 case PackCategoriesIntoKey(fcZero, fcInfinity): 1506 category = fcInfinity; 1507 sign = rhs.sign ^ subtract; 1508 return opOK; 1509 1510 case PackCategoriesIntoKey(fcZero, fcNormal): 1511 assign(rhs); 1512 sign = rhs.sign ^ subtract; 1513 return opOK; 1514 1515 case PackCategoriesIntoKey(fcZero, fcZero): 1516 /* Sign depends on rounding mode; handled by caller. */ 1517 return opOK; 1518 1519 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1520 /* Differently signed infinities can only be validly 1521 subtracted. */ 1522 if (((sign ^ rhs.sign)!=0) != subtract) { 1523 makeNaN(); 1524 return opInvalidOp; 1525 } 1526 1527 return opOK; 1528 1529 case PackCategoriesIntoKey(fcNormal, fcNormal): 1530 return opDivByZero; 1531 } 1532 } 1533 1534 /* Add or subtract two normal numbers. */ 1535 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1536 bool subtract) { 1537 integerPart carry; 1538 lostFraction lost_fraction; 1539 int bits; 1540 1541 /* Determine if the operation on the absolute values is effectively 1542 an addition or subtraction. */ 1543 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1544 1545 /* Are we bigger exponent-wise than the RHS? */ 1546 bits = exponent - rhs.exponent; 1547 1548 /* Subtraction is more subtle than one might naively expect. */ 1549 if (subtract) { 1550 IEEEFloat temp_rhs(rhs); 1551 1552 if (bits == 0) 1553 lost_fraction = lfExactlyZero; 1554 else if (bits > 0) { 1555 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1556 shiftSignificandLeft(1); 1557 } else { 1558 lost_fraction = shiftSignificandRight(-bits - 1); 1559 temp_rhs.shiftSignificandLeft(1); 1560 } 1561 1562 // Should we reverse the subtraction. 1563 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1564 carry = temp_rhs.subtractSignificand 1565 (*this, lost_fraction != lfExactlyZero); 1566 copySignificand(temp_rhs); 1567 sign = !sign; 1568 } else { 1569 carry = subtractSignificand 1570 (temp_rhs, lost_fraction != lfExactlyZero); 1571 } 1572 1573 /* Invert the lost fraction - it was on the RHS and 1574 subtracted. */ 1575 if (lost_fraction == lfLessThanHalf) 1576 lost_fraction = lfMoreThanHalf; 1577 else if (lost_fraction == lfMoreThanHalf) 1578 lost_fraction = lfLessThanHalf; 1579 1580 /* The code above is intended to ensure that no borrow is 1581 necessary. */ 1582 assert(!carry); 1583 (void)carry; 1584 } else { 1585 if (bits > 0) { 1586 IEEEFloat temp_rhs(rhs); 1587 1588 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1589 carry = addSignificand(temp_rhs); 1590 } else { 1591 lost_fraction = shiftSignificandRight(-bits); 1592 carry = addSignificand(rhs); 1593 } 1594 1595 /* We have a guard bit; generating a carry cannot happen. */ 1596 assert(!carry); 1597 (void)carry; 1598 } 1599 1600 return lost_fraction; 1601 } 1602 1603 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1604 switch (PackCategoriesIntoKey(category, rhs.category)) { 1605 default: 1606 llvm_unreachable(nullptr); 1607 1608 case PackCategoriesIntoKey(fcZero, fcNaN): 1609 case PackCategoriesIntoKey(fcNormal, fcNaN): 1610 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1611 assign(rhs); 1612 sign = false; 1613 LLVM_FALLTHROUGH; 1614 case PackCategoriesIntoKey(fcNaN, fcZero): 1615 case PackCategoriesIntoKey(fcNaN, fcNormal): 1616 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1617 case PackCategoriesIntoKey(fcNaN, fcNaN): 1618 sign ^= rhs.sign; // restore the original sign 1619 if (isSignaling()) { 1620 makeQuiet(); 1621 return opInvalidOp; 1622 } 1623 return rhs.isSignaling() ? opInvalidOp : opOK; 1624 1625 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1626 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1627 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1628 category = fcInfinity; 1629 return opOK; 1630 1631 case PackCategoriesIntoKey(fcZero, fcNormal): 1632 case PackCategoriesIntoKey(fcNormal, fcZero): 1633 case PackCategoriesIntoKey(fcZero, fcZero): 1634 category = fcZero; 1635 return opOK; 1636 1637 case PackCategoriesIntoKey(fcZero, fcInfinity): 1638 case PackCategoriesIntoKey(fcInfinity, fcZero): 1639 makeNaN(); 1640 return opInvalidOp; 1641 1642 case PackCategoriesIntoKey(fcNormal, fcNormal): 1643 return opOK; 1644 } 1645 } 1646 1647 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1648 switch (PackCategoriesIntoKey(category, rhs.category)) { 1649 default: 1650 llvm_unreachable(nullptr); 1651 1652 case PackCategoriesIntoKey(fcZero, fcNaN): 1653 case PackCategoriesIntoKey(fcNormal, fcNaN): 1654 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1655 assign(rhs); 1656 sign = false; 1657 LLVM_FALLTHROUGH; 1658 case PackCategoriesIntoKey(fcNaN, fcZero): 1659 case PackCategoriesIntoKey(fcNaN, fcNormal): 1660 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1661 case PackCategoriesIntoKey(fcNaN, fcNaN): 1662 sign ^= rhs.sign; // restore the original sign 1663 if (isSignaling()) { 1664 makeQuiet(); 1665 return opInvalidOp; 1666 } 1667 return rhs.isSignaling() ? opInvalidOp : opOK; 1668 1669 case PackCategoriesIntoKey(fcInfinity, fcZero): 1670 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1671 case PackCategoriesIntoKey(fcZero, fcInfinity): 1672 case PackCategoriesIntoKey(fcZero, fcNormal): 1673 return opOK; 1674 1675 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1676 category = fcZero; 1677 return opOK; 1678 1679 case PackCategoriesIntoKey(fcNormal, fcZero): 1680 category = fcInfinity; 1681 return opDivByZero; 1682 1683 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1684 case PackCategoriesIntoKey(fcZero, fcZero): 1685 makeNaN(); 1686 return opInvalidOp; 1687 1688 case PackCategoriesIntoKey(fcNormal, fcNormal): 1689 return opOK; 1690 } 1691 } 1692 1693 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1694 switch (PackCategoriesIntoKey(category, rhs.category)) { 1695 default: 1696 llvm_unreachable(nullptr); 1697 1698 case PackCategoriesIntoKey(fcZero, fcNaN): 1699 case PackCategoriesIntoKey(fcNormal, fcNaN): 1700 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1701 assign(rhs); 1702 LLVM_FALLTHROUGH; 1703 case PackCategoriesIntoKey(fcNaN, fcZero): 1704 case PackCategoriesIntoKey(fcNaN, fcNormal): 1705 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1706 case PackCategoriesIntoKey(fcNaN, fcNaN): 1707 if (isSignaling()) { 1708 makeQuiet(); 1709 return opInvalidOp; 1710 } 1711 return rhs.isSignaling() ? opInvalidOp : opOK; 1712 1713 case PackCategoriesIntoKey(fcZero, fcInfinity): 1714 case PackCategoriesIntoKey(fcZero, fcNormal): 1715 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1716 return opOK; 1717 1718 case PackCategoriesIntoKey(fcNormal, fcZero): 1719 case PackCategoriesIntoKey(fcInfinity, fcZero): 1720 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1721 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1722 case PackCategoriesIntoKey(fcZero, fcZero): 1723 makeNaN(); 1724 return opInvalidOp; 1725 1726 case PackCategoriesIntoKey(fcNormal, fcNormal): 1727 return opOK; 1728 } 1729 } 1730 1731 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1732 switch (PackCategoriesIntoKey(category, rhs.category)) { 1733 default: 1734 llvm_unreachable(nullptr); 1735 1736 case PackCategoriesIntoKey(fcZero, fcNaN): 1737 case PackCategoriesIntoKey(fcNormal, fcNaN): 1738 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1739 assign(rhs); 1740 LLVM_FALLTHROUGH; 1741 case PackCategoriesIntoKey(fcNaN, fcZero): 1742 case PackCategoriesIntoKey(fcNaN, fcNormal): 1743 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1744 case PackCategoriesIntoKey(fcNaN, fcNaN): 1745 if (isSignaling()) { 1746 makeQuiet(); 1747 return opInvalidOp; 1748 } 1749 return rhs.isSignaling() ? opInvalidOp : opOK; 1750 1751 case PackCategoriesIntoKey(fcZero, fcInfinity): 1752 case PackCategoriesIntoKey(fcZero, fcNormal): 1753 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1754 return opOK; 1755 1756 case PackCategoriesIntoKey(fcNormal, fcZero): 1757 case PackCategoriesIntoKey(fcInfinity, fcZero): 1758 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1759 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1760 case PackCategoriesIntoKey(fcZero, fcZero): 1761 makeNaN(); 1762 return opInvalidOp; 1763 1764 case PackCategoriesIntoKey(fcNormal, fcNormal): 1765 return opDivByZero; // fake status, indicating this is not a special case 1766 } 1767 } 1768 1769 /* Change sign. */ 1770 void IEEEFloat::changeSign() { 1771 /* Look mummy, this one's easy. */ 1772 sign = !sign; 1773 } 1774 1775 /* Normalized addition or subtraction. */ 1776 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1777 roundingMode rounding_mode, 1778 bool subtract) { 1779 opStatus fs; 1780 1781 fs = addOrSubtractSpecials(rhs, subtract); 1782 1783 /* This return code means it was not a simple case. */ 1784 if (fs == opDivByZero) { 1785 lostFraction lost_fraction; 1786 1787 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1788 fs = normalize(rounding_mode, lost_fraction); 1789 1790 /* Can only be zero if we lost no fraction. */ 1791 assert(category != fcZero || lost_fraction == lfExactlyZero); 1792 } 1793 1794 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1795 positive zero unless rounding to minus infinity, except that 1796 adding two like-signed zeroes gives that zero. */ 1797 if (category == fcZero) { 1798 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1799 sign = (rounding_mode == rmTowardNegative); 1800 } 1801 1802 return fs; 1803 } 1804 1805 /* Normalized addition. */ 1806 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1807 roundingMode rounding_mode) { 1808 return addOrSubtract(rhs, rounding_mode, false); 1809 } 1810 1811 /* Normalized subtraction. */ 1812 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1813 roundingMode rounding_mode) { 1814 return addOrSubtract(rhs, rounding_mode, true); 1815 } 1816 1817 /* Normalized multiply. */ 1818 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1819 roundingMode rounding_mode) { 1820 opStatus fs; 1821 1822 sign ^= rhs.sign; 1823 fs = multiplySpecials(rhs); 1824 1825 if (isFiniteNonZero()) { 1826 lostFraction lost_fraction = multiplySignificand(rhs); 1827 fs = normalize(rounding_mode, lost_fraction); 1828 if (lost_fraction != lfExactlyZero) 1829 fs = (opStatus) (fs | opInexact); 1830 } 1831 1832 return fs; 1833 } 1834 1835 /* Normalized divide. */ 1836 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1837 roundingMode rounding_mode) { 1838 opStatus fs; 1839 1840 sign ^= rhs.sign; 1841 fs = divideSpecials(rhs); 1842 1843 if (isFiniteNonZero()) { 1844 lostFraction lost_fraction = divideSignificand(rhs); 1845 fs = normalize(rounding_mode, lost_fraction); 1846 if (lost_fraction != lfExactlyZero) 1847 fs = (opStatus) (fs | opInexact); 1848 } 1849 1850 return fs; 1851 } 1852 1853 /* Normalized remainder. */ 1854 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1855 opStatus fs; 1856 unsigned int origSign = sign; 1857 1858 // First handle the special cases. 1859 fs = remainderSpecials(rhs); 1860 if (fs != opDivByZero) 1861 return fs; 1862 1863 fs = opOK; 1864 1865 // Make sure the current value is less than twice the denom. If the addition 1866 // did not succeed (an overflow has happened), which means that the finite 1867 // value we currently posses must be less than twice the denom (as we are 1868 // using the same semantics). 1869 IEEEFloat P2 = rhs; 1870 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1871 fs = mod(P2); 1872 assert(fs == opOK); 1873 } 1874 1875 // Lets work with absolute numbers. 1876 IEEEFloat P = rhs; 1877 P.sign = false; 1878 sign = false; 1879 1880 // 1881 // To calculate the remainder we use the following scheme. 1882 // 1883 // The remainder is defained as follows: 1884 // 1885 // remainder = numer - rquot * denom = x - r * p 1886 // 1887 // Where r is the result of: x/p, rounded toward the nearest integral value 1888 // (with halfway cases rounded toward the even number). 1889 // 1890 // Currently, (after x mod 2p): 1891 // r is the number of 2p's present inside x, which is inherently, an even 1892 // number of p's. 1893 // 1894 // We may split the remaining calculation into 4 options: 1895 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1896 // - if x == 0.5p then we round to the nearest even number which is 0, and we 1897 // are done as well. 1898 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 1899 // to subtract 1p at least once. 1900 // - if x >= p then we must subtract p at least once, as x must be a 1901 // remainder. 1902 // 1903 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 1904 // 1905 // We can now split the remaining calculation to the following 3 options: 1906 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 1907 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 1908 // must round up to the next even number. so we must subtract p once more. 1909 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 1910 // integral, and subtract p once more. 1911 // 1912 1913 // Extend the semantics to prevent an overflow/underflow or inexact result. 1914 bool losesInfo; 1915 fltSemantics extendedSemantics = *semantics; 1916 extendedSemantics.maxExponent++; 1917 extendedSemantics.minExponent--; 1918 extendedSemantics.precision += 2; 1919 1920 IEEEFloat VEx = *this; 1921 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1922 assert(fs == opOK && !losesInfo); 1923 IEEEFloat PEx = P; 1924 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 1925 assert(fs == opOK && !losesInfo); 1926 1927 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 1928 // any fraction. 1929 fs = VEx.add(VEx, rmNearestTiesToEven); 1930 assert(fs == opOK); 1931 1932 if (VEx.compare(PEx) == cmpGreaterThan) { 1933 fs = subtract(P, rmNearestTiesToEven); 1934 assert(fs == opOK); 1935 1936 // Make VEx = this.add(this), but because we have different semantics, we do 1937 // not want to `convert` again, so we just subtract PEx twice (which equals 1938 // to the desired value). 1939 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1940 assert(fs == opOK); 1941 fs = VEx.subtract(PEx, rmNearestTiesToEven); 1942 assert(fs == opOK); 1943 1944 cmpResult result = VEx.compare(PEx); 1945 if (result == cmpGreaterThan || result == cmpEqual) { 1946 fs = subtract(P, rmNearestTiesToEven); 1947 assert(fs == opOK); 1948 } 1949 } 1950 1951 if (isZero()) 1952 sign = origSign; // IEEE754 requires this 1953 else 1954 sign ^= origSign; 1955 return fs; 1956 } 1957 1958 /* Normalized llvm frem (C fmod). */ 1959 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 1960 opStatus fs; 1961 fs = modSpecials(rhs); 1962 unsigned int origSign = sign; 1963 1964 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 1965 compareAbsoluteValue(rhs) != cmpLessThan) { 1966 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven); 1967 if (compareAbsoluteValue(V) == cmpLessThan) 1968 V = scalbn(V, -1, rmNearestTiesToEven); 1969 V.sign = sign; 1970 1971 fs = subtract(V, rmNearestTiesToEven); 1972 assert(fs==opOK); 1973 } 1974 if (isZero()) 1975 sign = origSign; // fmod requires this 1976 return fs; 1977 } 1978 1979 /* Normalized fused-multiply-add. */ 1980 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 1981 const IEEEFloat &addend, 1982 roundingMode rounding_mode) { 1983 opStatus fs; 1984 1985 /* Post-multiplication sign, before addition. */ 1986 sign ^= multiplicand.sign; 1987 1988 /* If and only if all arguments are normal do we need to do an 1989 extended-precision calculation. */ 1990 if (isFiniteNonZero() && 1991 multiplicand.isFiniteNonZero() && 1992 addend.isFinite()) { 1993 lostFraction lost_fraction; 1994 1995 lost_fraction = multiplySignificand(multiplicand, addend); 1996 fs = normalize(rounding_mode, lost_fraction); 1997 if (lost_fraction != lfExactlyZero) 1998 fs = (opStatus) (fs | opInexact); 1999 2000 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2001 positive zero unless rounding to minus infinity, except that 2002 adding two like-signed zeroes gives that zero. */ 2003 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 2004 sign = (rounding_mode == rmTowardNegative); 2005 } else { 2006 fs = multiplySpecials(multiplicand); 2007 2008 /* FS can only be opOK or opInvalidOp. There is no more work 2009 to do in the latter case. The IEEE-754R standard says it is 2010 implementation-defined in this case whether, if ADDEND is a 2011 quiet NaN, we raise invalid op; this implementation does so. 2012 2013 If we need to do the addition we can do so with normal 2014 precision. */ 2015 if (fs == opOK) 2016 fs = addOrSubtract(addend, rounding_mode, false); 2017 } 2018 2019 return fs; 2020 } 2021 2022 /* Rounding-mode correct round to integral value. */ 2023 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2024 opStatus fs; 2025 2026 if (isInfinity()) 2027 // [IEEE Std 754-2008 6.1]: 2028 // The behavior of infinity in floating-point arithmetic is derived from the 2029 // limiting cases of real arithmetic with operands of arbitrarily 2030 // large magnitude, when such a limit exists. 2031 // ... 2032 // Operations on infinite operands are usually exact and therefore signal no 2033 // exceptions ... 2034 return opOK; 2035 2036 if (isNaN()) { 2037 if (isSignaling()) { 2038 // [IEEE Std 754-2008 6.2]: 2039 // Under default exception handling, any operation signaling an invalid 2040 // operation exception and for which a floating-point result is to be 2041 // delivered shall deliver a quiet NaN. 2042 makeQuiet(); 2043 // [IEEE Std 754-2008 6.2]: 2044 // Signaling NaNs shall be reserved operands that, under default exception 2045 // handling, signal the invalid operation exception(see 7.2) for every 2046 // general-computational and signaling-computational operation except for 2047 // the conversions described in 5.12. 2048 return opInvalidOp; 2049 } else { 2050 // [IEEE Std 754-2008 6.2]: 2051 // For an operation with quiet NaN inputs, other than maximum and minimum 2052 // operations, if a floating-point result is to be delivered the result 2053 // shall be a quiet NaN which should be one of the input NaNs. 2054 // ... 2055 // Every general-computational and quiet-computational operation involving 2056 // one or more input NaNs, none of them signaling, shall signal no 2057 // exception, except fusedMultiplyAdd might signal the invalid operation 2058 // exception(see 7.2). 2059 return opOK; 2060 } 2061 } 2062 2063 if (isZero()) { 2064 // [IEEE Std 754-2008 6.3]: 2065 // ... the sign of the result of conversions, the quantize operation, the 2066 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2067 // the sign of the first or only operand. 2068 return opOK; 2069 } 2070 2071 // If the exponent is large enough, we know that this value is already 2072 // integral, and the arithmetic below would potentially cause it to saturate 2073 // to +/-Inf. Bail out early instead. 2074 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2075 return opOK; 2076 2077 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2078 // precision of our format, and then subtract it back off again. The choice 2079 // of rounding modes for the addition/subtraction determines the rounding mode 2080 // for our integral rounding as well. 2081 // NOTE: When the input value is negative, we do subtraction followed by 2082 // addition instead. 2083 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2084 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2085 IEEEFloat MagicConstant(*semantics); 2086 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2087 rmNearestTiesToEven); 2088 assert(fs == opOK); 2089 MagicConstant.sign = sign; 2090 2091 // Preserve the input sign so that we can handle the case of zero result 2092 // correctly. 2093 bool inputSign = isNegative(); 2094 2095 fs = add(MagicConstant, rounding_mode); 2096 2097 // Current value and 'MagicConstant' are both integers, so the result of the 2098 // subtraction is always exact according to Sterbenz' lemma. 2099 subtract(MagicConstant, rounding_mode); 2100 2101 // Restore the input sign. 2102 if (inputSign != isNegative()) 2103 changeSign(); 2104 2105 return fs; 2106 } 2107 2108 2109 /* Comparison requires normalized numbers. */ 2110 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2111 cmpResult result; 2112 2113 assert(semantics == rhs.semantics); 2114 2115 switch (PackCategoriesIntoKey(category, rhs.category)) { 2116 default: 2117 llvm_unreachable(nullptr); 2118 2119 case PackCategoriesIntoKey(fcNaN, fcZero): 2120 case PackCategoriesIntoKey(fcNaN, fcNormal): 2121 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2122 case PackCategoriesIntoKey(fcNaN, fcNaN): 2123 case PackCategoriesIntoKey(fcZero, fcNaN): 2124 case PackCategoriesIntoKey(fcNormal, fcNaN): 2125 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2126 return cmpUnordered; 2127 2128 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2129 case PackCategoriesIntoKey(fcInfinity, fcZero): 2130 case PackCategoriesIntoKey(fcNormal, fcZero): 2131 if (sign) 2132 return cmpLessThan; 2133 else 2134 return cmpGreaterThan; 2135 2136 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2137 case PackCategoriesIntoKey(fcZero, fcInfinity): 2138 case PackCategoriesIntoKey(fcZero, fcNormal): 2139 if (rhs.sign) 2140 return cmpGreaterThan; 2141 else 2142 return cmpLessThan; 2143 2144 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2145 if (sign == rhs.sign) 2146 return cmpEqual; 2147 else if (sign) 2148 return cmpLessThan; 2149 else 2150 return cmpGreaterThan; 2151 2152 case PackCategoriesIntoKey(fcZero, fcZero): 2153 return cmpEqual; 2154 2155 case PackCategoriesIntoKey(fcNormal, fcNormal): 2156 break; 2157 } 2158 2159 /* Two normal numbers. Do they have the same sign? */ 2160 if (sign != rhs.sign) { 2161 if (sign) 2162 result = cmpLessThan; 2163 else 2164 result = cmpGreaterThan; 2165 } else { 2166 /* Compare absolute values; invert result if negative. */ 2167 result = compareAbsoluteValue(rhs); 2168 2169 if (sign) { 2170 if (result == cmpLessThan) 2171 result = cmpGreaterThan; 2172 else if (result == cmpGreaterThan) 2173 result = cmpLessThan; 2174 } 2175 } 2176 2177 return result; 2178 } 2179 2180 /// IEEEFloat::convert - convert a value of one floating point type to another. 2181 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2182 /// records whether the transformation lost information, i.e. whether 2183 /// converting the result back to the original type will produce the 2184 /// original value (this is almost the same as return value==fsOK, but there 2185 /// are edge cases where this is not so). 2186 2187 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2188 roundingMode rounding_mode, 2189 bool *losesInfo) { 2190 lostFraction lostFraction; 2191 unsigned int newPartCount, oldPartCount; 2192 opStatus fs; 2193 int shift; 2194 const fltSemantics &fromSemantics = *semantics; 2195 2196 lostFraction = lfExactlyZero; 2197 newPartCount = partCountForBits(toSemantics.precision + 1); 2198 oldPartCount = partCount(); 2199 shift = toSemantics.precision - fromSemantics.precision; 2200 2201 bool X86SpecialNan = false; 2202 if (&fromSemantics == &semX87DoubleExtended && 2203 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2204 (!(*significandParts() & 0x8000000000000000ULL) || 2205 !(*significandParts() & 0x4000000000000000ULL))) { 2206 // x86 has some unusual NaNs which cannot be represented in any other 2207 // format; note them here. 2208 X86SpecialNan = true; 2209 } 2210 2211 // If this is a truncation of a denormal number, and the target semantics 2212 // has larger exponent range than the source semantics (this can happen 2213 // when truncating from PowerPC double-double to double format), the 2214 // right shift could lose result mantissa bits. Adjust exponent instead 2215 // of performing excessive shift. 2216 if (shift < 0 && isFiniteNonZero()) { 2217 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2218 if (exponent + exponentChange < toSemantics.minExponent) 2219 exponentChange = toSemantics.minExponent - exponent; 2220 if (exponentChange < shift) 2221 exponentChange = shift; 2222 if (exponentChange < 0) { 2223 shift -= exponentChange; 2224 exponent += exponentChange; 2225 } 2226 } 2227 2228 // If this is a truncation, perform the shift before we narrow the storage. 2229 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2230 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2231 2232 // Fix the storage so it can hold to new value. 2233 if (newPartCount > oldPartCount) { 2234 // The new type requires more storage; make it available. 2235 integerPart *newParts; 2236 newParts = new integerPart[newPartCount]; 2237 APInt::tcSet(newParts, 0, newPartCount); 2238 if (isFiniteNonZero() || category==fcNaN) 2239 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2240 freeSignificand(); 2241 significand.parts = newParts; 2242 } else if (newPartCount == 1 && oldPartCount != 1) { 2243 // Switch to built-in storage for a single part. 2244 integerPart newPart = 0; 2245 if (isFiniteNonZero() || category==fcNaN) 2246 newPart = significandParts()[0]; 2247 freeSignificand(); 2248 significand.part = newPart; 2249 } 2250 2251 // Now that we have the right storage, switch the semantics. 2252 semantics = &toSemantics; 2253 2254 // If this is an extension, perform the shift now that the storage is 2255 // available. 2256 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2257 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2258 2259 if (isFiniteNonZero()) { 2260 fs = normalize(rounding_mode, lostFraction); 2261 *losesInfo = (fs != opOK); 2262 } else if (category == fcNaN) { 2263 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2264 2265 // For x87 extended precision, we want to make a NaN, not a special NaN if 2266 // the input wasn't special either. 2267 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2268 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2269 2270 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2271 // This also guarantees that a sNaN does not become Inf on a truncation 2272 // that loses all payload bits. 2273 if (isSignaling()) { 2274 makeQuiet(); 2275 fs = opInvalidOp; 2276 } else { 2277 fs = opOK; 2278 } 2279 } else { 2280 *losesInfo = false; 2281 fs = opOK; 2282 } 2283 2284 return fs; 2285 } 2286 2287 /* Convert a floating point number to an integer according to the 2288 rounding mode. If the rounded integer value is out of range this 2289 returns an invalid operation exception and the contents of the 2290 destination parts are unspecified. If the rounded value is in 2291 range but the floating point number is not the exact integer, the C 2292 standard doesn't require an inexact exception to be raised. IEEE 2293 854 does require it so we do that. 2294 2295 Note that for conversions to integer type the C standard requires 2296 round-to-zero to always be used. */ 2297 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2298 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2299 roundingMode rounding_mode, bool *isExact) const { 2300 lostFraction lost_fraction; 2301 const integerPart *src; 2302 unsigned int dstPartsCount, truncatedBits; 2303 2304 *isExact = false; 2305 2306 /* Handle the three special cases first. */ 2307 if (category == fcInfinity || category == fcNaN) 2308 return opInvalidOp; 2309 2310 dstPartsCount = partCountForBits(width); 2311 assert(dstPartsCount <= parts.size() && "Integer too big"); 2312 2313 if (category == fcZero) { 2314 APInt::tcSet(parts.data(), 0, dstPartsCount); 2315 // Negative zero can't be represented as an int. 2316 *isExact = !sign; 2317 return opOK; 2318 } 2319 2320 src = significandParts(); 2321 2322 /* Step 1: place our absolute value, with any fraction truncated, in 2323 the destination. */ 2324 if (exponent < 0) { 2325 /* Our absolute value is less than one; truncate everything. */ 2326 APInt::tcSet(parts.data(), 0, dstPartsCount); 2327 /* For exponent -1 the integer bit represents .5, look at that. 2328 For smaller exponents leftmost truncated bit is 0. */ 2329 truncatedBits = semantics->precision -1U - exponent; 2330 } else { 2331 /* We want the most significant (exponent + 1) bits; the rest are 2332 truncated. */ 2333 unsigned int bits = exponent + 1U; 2334 2335 /* Hopelessly large in magnitude? */ 2336 if (bits > width) 2337 return opInvalidOp; 2338 2339 if (bits < semantics->precision) { 2340 /* We truncate (semantics->precision - bits) bits. */ 2341 truncatedBits = semantics->precision - bits; 2342 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2343 } else { 2344 /* We want at least as many bits as are available. */ 2345 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2346 0); 2347 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2348 bits - semantics->precision); 2349 truncatedBits = 0; 2350 } 2351 } 2352 2353 /* Step 2: work out any lost fraction, and increment the absolute 2354 value if we would round away from zero. */ 2355 if (truncatedBits) { 2356 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2357 truncatedBits); 2358 if (lost_fraction != lfExactlyZero && 2359 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2360 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2361 return opInvalidOp; /* Overflow. */ 2362 } 2363 } else { 2364 lost_fraction = lfExactlyZero; 2365 } 2366 2367 /* Step 3: check if we fit in the destination. */ 2368 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2369 2370 if (sign) { 2371 if (!isSigned) { 2372 /* Negative numbers cannot be represented as unsigned. */ 2373 if (omsb != 0) 2374 return opInvalidOp; 2375 } else { 2376 /* It takes omsb bits to represent the unsigned integer value. 2377 We lose a bit for the sign, but care is needed as the 2378 maximally negative integer is a special case. */ 2379 if (omsb == width && 2380 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2381 return opInvalidOp; 2382 2383 /* This case can happen because of rounding. */ 2384 if (omsb > width) 2385 return opInvalidOp; 2386 } 2387 2388 APInt::tcNegate (parts.data(), dstPartsCount); 2389 } else { 2390 if (omsb >= width + !isSigned) 2391 return opInvalidOp; 2392 } 2393 2394 if (lost_fraction == lfExactlyZero) { 2395 *isExact = true; 2396 return opOK; 2397 } else 2398 return opInexact; 2399 } 2400 2401 /* Same as convertToSignExtendedInteger, except we provide 2402 deterministic values in case of an invalid operation exception, 2403 namely zero for NaNs and the minimal or maximal value respectively 2404 for underflow or overflow. 2405 The *isExact output tells whether the result is exact, in the sense 2406 that converting it back to the original floating point type produces 2407 the original value. This is almost equivalent to result==opOK, 2408 except for negative zeroes. 2409 */ 2410 IEEEFloat::opStatus 2411 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2412 unsigned int width, bool isSigned, 2413 roundingMode rounding_mode, bool *isExact) const { 2414 opStatus fs; 2415 2416 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2417 isExact); 2418 2419 if (fs == opInvalidOp) { 2420 unsigned int bits, dstPartsCount; 2421 2422 dstPartsCount = partCountForBits(width); 2423 assert(dstPartsCount <= parts.size() && "Integer too big"); 2424 2425 if (category == fcNaN) 2426 bits = 0; 2427 else if (sign) 2428 bits = isSigned; 2429 else 2430 bits = width - isSigned; 2431 2432 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2433 if (sign && isSigned) 2434 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2435 } 2436 2437 return fs; 2438 } 2439 2440 /* Convert an unsigned integer SRC to a floating point number, 2441 rounding according to ROUNDING_MODE. The sign of the floating 2442 point number is not modified. */ 2443 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2444 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2445 unsigned int omsb, precision, dstCount; 2446 integerPart *dst; 2447 lostFraction lost_fraction; 2448 2449 category = fcNormal; 2450 omsb = APInt::tcMSB(src, srcCount) + 1; 2451 dst = significandParts(); 2452 dstCount = partCount(); 2453 precision = semantics->precision; 2454 2455 /* We want the most significant PRECISION bits of SRC. There may not 2456 be that many; extract what we can. */ 2457 if (precision <= omsb) { 2458 exponent = omsb - 1; 2459 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2460 omsb - precision); 2461 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2462 } else { 2463 exponent = precision - 1; 2464 lost_fraction = lfExactlyZero; 2465 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2466 } 2467 2468 return normalize(rounding_mode, lost_fraction); 2469 } 2470 2471 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2472 roundingMode rounding_mode) { 2473 unsigned int partCount = Val.getNumWords(); 2474 APInt api = Val; 2475 2476 sign = false; 2477 if (isSigned && api.isNegative()) { 2478 sign = true; 2479 api = -api; 2480 } 2481 2482 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2483 } 2484 2485 /* Convert a two's complement integer SRC to a floating point number, 2486 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2487 integer is signed, in which case it must be sign-extended. */ 2488 IEEEFloat::opStatus 2489 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2490 unsigned int srcCount, bool isSigned, 2491 roundingMode rounding_mode) { 2492 opStatus status; 2493 2494 if (isSigned && 2495 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2496 integerPart *copy; 2497 2498 /* If we're signed and negative negate a copy. */ 2499 sign = true; 2500 copy = new integerPart[srcCount]; 2501 APInt::tcAssign(copy, src, srcCount); 2502 APInt::tcNegate(copy, srcCount); 2503 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2504 delete [] copy; 2505 } else { 2506 sign = false; 2507 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2508 } 2509 2510 return status; 2511 } 2512 2513 /* FIXME: should this just take a const APInt reference? */ 2514 IEEEFloat::opStatus 2515 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2516 unsigned int width, bool isSigned, 2517 roundingMode rounding_mode) { 2518 unsigned int partCount = partCountForBits(width); 2519 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2520 2521 sign = false; 2522 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2523 sign = true; 2524 api = -api; 2525 } 2526 2527 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2528 } 2529 2530 Expected<IEEEFloat::opStatus> 2531 IEEEFloat::convertFromHexadecimalString(StringRef s, 2532 roundingMode rounding_mode) { 2533 lostFraction lost_fraction = lfExactlyZero; 2534 2535 category = fcNormal; 2536 zeroSignificand(); 2537 exponent = 0; 2538 2539 integerPart *significand = significandParts(); 2540 unsigned partsCount = partCount(); 2541 unsigned bitPos = partsCount * integerPartWidth; 2542 bool computedTrailingFraction = false; 2543 2544 // Skip leading zeroes and any (hexa)decimal point. 2545 StringRef::iterator begin = s.begin(); 2546 StringRef::iterator end = s.end(); 2547 StringRef::iterator dot; 2548 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2549 if (!PtrOrErr) 2550 return PtrOrErr.takeError(); 2551 StringRef::iterator p = *PtrOrErr; 2552 StringRef::iterator firstSignificantDigit = p; 2553 2554 while (p != end) { 2555 integerPart hex_value; 2556 2557 if (*p == '.') { 2558 if (dot != end) 2559 return createError("String contains multiple dots"); 2560 dot = p++; 2561 continue; 2562 } 2563 2564 hex_value = hexDigitValue(*p); 2565 if (hex_value == -1U) 2566 break; 2567 2568 p++; 2569 2570 // Store the number while we have space. 2571 if (bitPos) { 2572 bitPos -= 4; 2573 hex_value <<= bitPos % integerPartWidth; 2574 significand[bitPos / integerPartWidth] |= hex_value; 2575 } else if (!computedTrailingFraction) { 2576 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2577 if (!FractOrErr) 2578 return FractOrErr.takeError(); 2579 lost_fraction = *FractOrErr; 2580 computedTrailingFraction = true; 2581 } 2582 } 2583 2584 /* Hex floats require an exponent but not a hexadecimal point. */ 2585 if (p == end) 2586 return createError("Hex strings require an exponent"); 2587 if (*p != 'p' && *p != 'P') 2588 return createError("Invalid character in significand"); 2589 if (p == begin) 2590 return createError("Significand has no digits"); 2591 if (dot != end && p - begin == 1) 2592 return createError("Significand has no digits"); 2593 2594 /* Ignore the exponent if we are zero. */ 2595 if (p != firstSignificantDigit) { 2596 int expAdjustment; 2597 2598 /* Implicit hexadecimal point? */ 2599 if (dot == end) 2600 dot = p; 2601 2602 /* Calculate the exponent adjustment implicit in the number of 2603 significant digits. */ 2604 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2605 if (expAdjustment < 0) 2606 expAdjustment++; 2607 expAdjustment = expAdjustment * 4 - 1; 2608 2609 /* Adjust for writing the significand starting at the most 2610 significant nibble. */ 2611 expAdjustment += semantics->precision; 2612 expAdjustment -= partsCount * integerPartWidth; 2613 2614 /* Adjust for the given exponent. */ 2615 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2616 if (!ExpOrErr) 2617 return ExpOrErr.takeError(); 2618 exponent = *ExpOrErr; 2619 } 2620 2621 return normalize(rounding_mode, lost_fraction); 2622 } 2623 2624 IEEEFloat::opStatus 2625 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2626 unsigned sigPartCount, int exp, 2627 roundingMode rounding_mode) { 2628 unsigned int parts, pow5PartCount; 2629 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2630 integerPart pow5Parts[maxPowerOfFiveParts]; 2631 bool isNearest; 2632 2633 isNearest = (rounding_mode == rmNearestTiesToEven || 2634 rounding_mode == rmNearestTiesToAway); 2635 2636 parts = partCountForBits(semantics->precision + 11); 2637 2638 /* Calculate pow(5, abs(exp)). */ 2639 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2640 2641 for (;; parts *= 2) { 2642 opStatus sigStatus, powStatus; 2643 unsigned int excessPrecision, truncatedBits; 2644 2645 calcSemantics.precision = parts * integerPartWidth - 1; 2646 excessPrecision = calcSemantics.precision - semantics->precision; 2647 truncatedBits = excessPrecision; 2648 2649 IEEEFloat decSig(calcSemantics, uninitialized); 2650 decSig.makeZero(sign); 2651 IEEEFloat pow5(calcSemantics); 2652 2653 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2654 rmNearestTiesToEven); 2655 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2656 rmNearestTiesToEven); 2657 /* Add exp, as 10^n = 5^n * 2^n. */ 2658 decSig.exponent += exp; 2659 2660 lostFraction calcLostFraction; 2661 integerPart HUerr, HUdistance; 2662 unsigned int powHUerr; 2663 2664 if (exp >= 0) { 2665 /* multiplySignificand leaves the precision-th bit set to 1. */ 2666 calcLostFraction = decSig.multiplySignificand(pow5); 2667 powHUerr = powStatus != opOK; 2668 } else { 2669 calcLostFraction = decSig.divideSignificand(pow5); 2670 /* Denormal numbers have less precision. */ 2671 if (decSig.exponent < semantics->minExponent) { 2672 excessPrecision += (semantics->minExponent - decSig.exponent); 2673 truncatedBits = excessPrecision; 2674 if (excessPrecision > calcSemantics.precision) 2675 excessPrecision = calcSemantics.precision; 2676 } 2677 /* Extra half-ulp lost in reciprocal of exponent. */ 2678 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2679 } 2680 2681 /* Both multiplySignificand and divideSignificand return the 2682 result with the integer bit set. */ 2683 assert(APInt::tcExtractBit 2684 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2685 2686 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2687 powHUerr); 2688 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2689 excessPrecision, isNearest); 2690 2691 /* Are we guaranteed to round correctly if we truncate? */ 2692 if (HUdistance >= HUerr) { 2693 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2694 calcSemantics.precision - excessPrecision, 2695 excessPrecision); 2696 /* Take the exponent of decSig. If we tcExtract-ed less bits 2697 above we must adjust our exponent to compensate for the 2698 implicit right shift. */ 2699 exponent = (decSig.exponent + semantics->precision 2700 - (calcSemantics.precision - excessPrecision)); 2701 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2702 decSig.partCount(), 2703 truncatedBits); 2704 return normalize(rounding_mode, calcLostFraction); 2705 } 2706 } 2707 } 2708 2709 Expected<IEEEFloat::opStatus> 2710 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2711 decimalInfo D; 2712 opStatus fs; 2713 2714 /* Scan the text. */ 2715 StringRef::iterator p = str.begin(); 2716 if (Error Err = interpretDecimal(p, str.end(), &D)) 2717 return std::move(Err); 2718 2719 /* Handle the quick cases. First the case of no significant digits, 2720 i.e. zero, and then exponents that are obviously too large or too 2721 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2722 definitely overflows if 2723 2724 (exp - 1) * L >= maxExponent 2725 2726 and definitely underflows to zero where 2727 2728 (exp + 1) * L <= minExponent - precision 2729 2730 With integer arithmetic the tightest bounds for L are 2731 2732 93/28 < L < 196/59 [ numerator <= 256 ] 2733 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2734 */ 2735 2736 // Test if we have a zero number allowing for strings with no null terminators 2737 // and zero decimals with non-zero exponents. 2738 // 2739 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2740 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2741 // be at most one dot. On the other hand, if we have a zero with a non-zero 2742 // exponent, then we know that D.firstSigDigit will be non-numeric. 2743 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2744 category = fcZero; 2745 fs = opOK; 2746 2747 /* Check whether the normalized exponent is high enough to overflow 2748 max during the log-rebasing in the max-exponent check below. */ 2749 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2750 fs = handleOverflow(rounding_mode); 2751 2752 /* If it wasn't, then it also wasn't high enough to overflow max 2753 during the log-rebasing in the min-exponent check. Check that it 2754 won't overflow min in either check, then perform the min-exponent 2755 check. */ 2756 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2757 (D.normalizedExponent + 1) * 28738 <= 2758 8651 * (semantics->minExponent - (int) semantics->precision)) { 2759 /* Underflow to zero and round. */ 2760 category = fcNormal; 2761 zeroSignificand(); 2762 fs = normalize(rounding_mode, lfLessThanHalf); 2763 2764 /* We can finally safely perform the max-exponent check. */ 2765 } else if ((D.normalizedExponent - 1) * 42039 2766 >= 12655 * semantics->maxExponent) { 2767 /* Overflow and round. */ 2768 fs = handleOverflow(rounding_mode); 2769 } else { 2770 integerPart *decSignificand; 2771 unsigned int partCount; 2772 2773 /* A tight upper bound on number of bits required to hold an 2774 N-digit decimal integer is N * 196 / 59. Allocate enough space 2775 to hold the full significand, and an extra part required by 2776 tcMultiplyPart. */ 2777 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2778 partCount = partCountForBits(1 + 196 * partCount / 59); 2779 decSignificand = new integerPart[partCount + 1]; 2780 partCount = 0; 2781 2782 /* Convert to binary efficiently - we do almost all multiplication 2783 in an integerPart. When this would overflow do we do a single 2784 bignum multiplication, and then revert again to multiplication 2785 in an integerPart. */ 2786 do { 2787 integerPart decValue, val, multiplier; 2788 2789 val = 0; 2790 multiplier = 1; 2791 2792 do { 2793 if (*p == '.') { 2794 p++; 2795 if (p == str.end()) { 2796 break; 2797 } 2798 } 2799 decValue = decDigitValue(*p++); 2800 if (decValue >= 10U) { 2801 delete[] decSignificand; 2802 return createError("Invalid character in significand"); 2803 } 2804 multiplier *= 10; 2805 val = val * 10 + decValue; 2806 /* The maximum number that can be multiplied by ten with any 2807 digit added without overflowing an integerPart. */ 2808 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2809 2810 /* Multiply out the current part. */ 2811 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2812 partCount, partCount + 1, false); 2813 2814 /* If we used another part (likely but not guaranteed), increase 2815 the count. */ 2816 if (decSignificand[partCount]) 2817 partCount++; 2818 } while (p <= D.lastSigDigit); 2819 2820 category = fcNormal; 2821 fs = roundSignificandWithExponent(decSignificand, partCount, 2822 D.exponent, rounding_mode); 2823 2824 delete [] decSignificand; 2825 } 2826 2827 return fs; 2828 } 2829 2830 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2831 const size_t MIN_NAME_SIZE = 3; 2832 2833 if (str.size() < MIN_NAME_SIZE) 2834 return false; 2835 2836 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2837 makeInf(false); 2838 return true; 2839 } 2840 2841 bool IsNegative = str.front() == '-'; 2842 if (IsNegative) { 2843 str = str.drop_front(); 2844 if (str.size() < MIN_NAME_SIZE) 2845 return false; 2846 2847 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2848 makeInf(true); 2849 return true; 2850 } 2851 } 2852 2853 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2854 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2855 if (IsSignaling) { 2856 str = str.drop_front(); 2857 if (str.size() < MIN_NAME_SIZE) 2858 return false; 2859 } 2860 2861 if (str.startswith("nan") || str.startswith("NaN")) { 2862 str = str.drop_front(3); 2863 2864 // A NaN without payload. 2865 if (str.empty()) { 2866 makeNaN(IsSignaling, IsNegative); 2867 return true; 2868 } 2869 2870 // Allow the payload to be inside parentheses. 2871 if (str.front() == '(') { 2872 // Parentheses should be balanced (and not empty). 2873 if (str.size() <= 2 || str.back() != ')') 2874 return false; 2875 2876 str = str.slice(1, str.size() - 1); 2877 } 2878 2879 // Determine the payload number's radix. 2880 unsigned Radix = 10; 2881 if (str[0] == '0') { 2882 if (str.size() > 1 && tolower(str[1]) == 'x') { 2883 str = str.drop_front(2); 2884 Radix = 16; 2885 } else 2886 Radix = 8; 2887 } 2888 2889 // Parse the payload and make the NaN. 2890 APInt Payload; 2891 if (!str.getAsInteger(Radix, Payload)) { 2892 makeNaN(IsSignaling, IsNegative, &Payload); 2893 return true; 2894 } 2895 } 2896 2897 return false; 2898 } 2899 2900 Expected<IEEEFloat::opStatus> 2901 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 2902 if (str.empty()) 2903 return createError("Invalid string length"); 2904 2905 // Handle special cases. 2906 if (convertFromStringSpecials(str)) 2907 return opOK; 2908 2909 /* Handle a leading minus sign. */ 2910 StringRef::iterator p = str.begin(); 2911 size_t slen = str.size(); 2912 sign = *p == '-' ? 1 : 0; 2913 if (*p == '-' || *p == '+') { 2914 p++; 2915 slen--; 2916 if (!slen) 2917 return createError("String has no digits"); 2918 } 2919 2920 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2921 if (slen == 2) 2922 return createError("Invalid string"); 2923 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2924 rounding_mode); 2925 } 2926 2927 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2928 } 2929 2930 /* Write out a hexadecimal representation of the floating point value 2931 to DST, which must be of sufficient size, in the C99 form 2932 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2933 excluding the terminating NUL. 2934 2935 If UPPERCASE, the output is in upper case, otherwise in lower case. 2936 2937 HEXDIGITS digits appear altogether, rounding the value if 2938 necessary. If HEXDIGITS is 0, the minimal precision to display the 2939 number precisely is used instead. If nothing would appear after 2940 the decimal point it is suppressed. 2941 2942 The decimal exponent is always printed and has at least one digit. 2943 Zero values display an exponent of zero. Infinities and NaNs 2944 appear as "infinity" or "nan" respectively. 2945 2946 The above rules are as specified by C99. There is ambiguity about 2947 what the leading hexadecimal digit should be. This implementation 2948 uses whatever is necessary so that the exponent is displayed as 2949 stored. This implies the exponent will fall within the IEEE format 2950 range, and the leading hexadecimal digit will be 0 (for denormals), 2951 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2952 any other digits zero). 2953 */ 2954 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 2955 bool upperCase, 2956 roundingMode rounding_mode) const { 2957 char *p; 2958 2959 p = dst; 2960 if (sign) 2961 *dst++ = '-'; 2962 2963 switch (category) { 2964 case fcInfinity: 2965 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2966 dst += sizeof infinityL - 1; 2967 break; 2968 2969 case fcNaN: 2970 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2971 dst += sizeof NaNU - 1; 2972 break; 2973 2974 case fcZero: 2975 *dst++ = '0'; 2976 *dst++ = upperCase ? 'X': 'x'; 2977 *dst++ = '0'; 2978 if (hexDigits > 1) { 2979 *dst++ = '.'; 2980 memset (dst, '0', hexDigits - 1); 2981 dst += hexDigits - 1; 2982 } 2983 *dst++ = upperCase ? 'P': 'p'; 2984 *dst++ = '0'; 2985 break; 2986 2987 case fcNormal: 2988 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2989 break; 2990 } 2991 2992 *dst = 0; 2993 2994 return static_cast<unsigned int>(dst - p); 2995 } 2996 2997 /* Does the hard work of outputting the correctly rounded hexadecimal 2998 form of a normal floating point number with the specified number of 2999 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3000 digits necessary to print the value precisely is output. */ 3001 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3002 bool upperCase, 3003 roundingMode rounding_mode) const { 3004 unsigned int count, valueBits, shift, partsCount, outputDigits; 3005 const char *hexDigitChars; 3006 const integerPart *significand; 3007 char *p; 3008 bool roundUp; 3009 3010 *dst++ = '0'; 3011 *dst++ = upperCase ? 'X': 'x'; 3012 3013 roundUp = false; 3014 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3015 3016 significand = significandParts(); 3017 partsCount = partCount(); 3018 3019 /* +3 because the first digit only uses the single integer bit, so 3020 we have 3 virtual zero most-significant-bits. */ 3021 valueBits = semantics->precision + 3; 3022 shift = integerPartWidth - valueBits % integerPartWidth; 3023 3024 /* The natural number of digits required ignoring trailing 3025 insignificant zeroes. */ 3026 outputDigits = (valueBits - significandLSB () + 3) / 4; 3027 3028 /* hexDigits of zero means use the required number for the 3029 precision. Otherwise, see if we are truncating. If we are, 3030 find out if we need to round away from zero. */ 3031 if (hexDigits) { 3032 if (hexDigits < outputDigits) { 3033 /* We are dropping non-zero bits, so need to check how to round. 3034 "bits" is the number of dropped bits. */ 3035 unsigned int bits; 3036 lostFraction fraction; 3037 3038 bits = valueBits - hexDigits * 4; 3039 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3040 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3041 } 3042 outputDigits = hexDigits; 3043 } 3044 3045 /* Write the digits consecutively, and start writing in the location 3046 of the hexadecimal point. We move the most significant digit 3047 left and add the hexadecimal point later. */ 3048 p = ++dst; 3049 3050 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3051 3052 while (outputDigits && count) { 3053 integerPart part; 3054 3055 /* Put the most significant integerPartWidth bits in "part". */ 3056 if (--count == partsCount) 3057 part = 0; /* An imaginary higher zero part. */ 3058 else 3059 part = significand[count] << shift; 3060 3061 if (count && shift) 3062 part |= significand[count - 1] >> (integerPartWidth - shift); 3063 3064 /* Convert as much of "part" to hexdigits as we can. */ 3065 unsigned int curDigits = integerPartWidth / 4; 3066 3067 if (curDigits > outputDigits) 3068 curDigits = outputDigits; 3069 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3070 outputDigits -= curDigits; 3071 } 3072 3073 if (roundUp) { 3074 char *q = dst; 3075 3076 /* Note that hexDigitChars has a trailing '0'. */ 3077 do { 3078 q--; 3079 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3080 } while (*q == '0'); 3081 assert(q >= p); 3082 } else { 3083 /* Add trailing zeroes. */ 3084 memset (dst, '0', outputDigits); 3085 dst += outputDigits; 3086 } 3087 3088 /* Move the most significant digit to before the point, and if there 3089 is something after the decimal point add it. This must come 3090 after rounding above. */ 3091 p[-1] = p[0]; 3092 if (dst -1 == p) 3093 dst--; 3094 else 3095 p[0] = '.'; 3096 3097 /* Finally output the exponent. */ 3098 *dst++ = upperCase ? 'P': 'p'; 3099 3100 return writeSignedDecimal (dst, exponent); 3101 } 3102 3103 hash_code hash_value(const IEEEFloat &Arg) { 3104 if (!Arg.isFiniteNonZero()) 3105 return hash_combine((uint8_t)Arg.category, 3106 // NaN has no sign, fix it at zero. 3107 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3108 Arg.semantics->precision); 3109 3110 // Normal floats need their exponent and significand hashed. 3111 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3112 Arg.semantics->precision, Arg.exponent, 3113 hash_combine_range( 3114 Arg.significandParts(), 3115 Arg.significandParts() + Arg.partCount())); 3116 } 3117 3118 // Conversion from APFloat to/from host float/double. It may eventually be 3119 // possible to eliminate these and have everybody deal with APFloats, but that 3120 // will take a while. This approach will not easily extend to long double. 3121 // Current implementation requires integerPartWidth==64, which is correct at 3122 // the moment but could be made more general. 3123 3124 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3125 // the actual IEEE respresentations. We compensate for that here. 3126 3127 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3128 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3129 assert(partCount()==2); 3130 3131 uint64_t myexponent, mysignificand; 3132 3133 if (isFiniteNonZero()) { 3134 myexponent = exponent+16383; //bias 3135 mysignificand = significandParts()[0]; 3136 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3137 myexponent = 0; // denormal 3138 } else if (category==fcZero) { 3139 myexponent = 0; 3140 mysignificand = 0; 3141 } else if (category==fcInfinity) { 3142 myexponent = 0x7fff; 3143 mysignificand = 0x8000000000000000ULL; 3144 } else { 3145 assert(category == fcNaN && "Unknown category"); 3146 myexponent = 0x7fff; 3147 mysignificand = significandParts()[0]; 3148 } 3149 3150 uint64_t words[2]; 3151 words[0] = mysignificand; 3152 words[1] = ((uint64_t)(sign & 1) << 15) | 3153 (myexponent & 0x7fffLL); 3154 return APInt(80, words); 3155 } 3156 3157 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3158 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3159 assert(partCount()==2); 3160 3161 uint64_t words[2]; 3162 opStatus fs; 3163 bool losesInfo; 3164 3165 // Convert number to double. To avoid spurious underflows, we re- 3166 // normalize against the "double" minExponent first, and only *then* 3167 // truncate the mantissa. The result of that second conversion 3168 // may be inexact, but should never underflow. 3169 // Declare fltSemantics before APFloat that uses it (and 3170 // saves pointer to it) to ensure correct destruction order. 3171 fltSemantics extendedSemantics = *semantics; 3172 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3173 IEEEFloat extended(*this); 3174 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3175 assert(fs == opOK && !losesInfo); 3176 (void)fs; 3177 3178 IEEEFloat u(extended); 3179 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3180 assert(fs == opOK || fs == opInexact); 3181 (void)fs; 3182 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3183 3184 // If conversion was exact or resulted in a special case, we're done; 3185 // just set the second double to zero. Otherwise, re-convert back to 3186 // the extended format and compute the difference. This now should 3187 // convert exactly to double. 3188 if (u.isFiniteNonZero() && losesInfo) { 3189 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3190 assert(fs == opOK && !losesInfo); 3191 (void)fs; 3192 3193 IEEEFloat v(extended); 3194 v.subtract(u, rmNearestTiesToEven); 3195 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3196 assert(fs == opOK && !losesInfo); 3197 (void)fs; 3198 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3199 } else { 3200 words[1] = 0; 3201 } 3202 3203 return APInt(128, words); 3204 } 3205 3206 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3207 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3208 assert(partCount()==2); 3209 3210 uint64_t myexponent, mysignificand, mysignificand2; 3211 3212 if (isFiniteNonZero()) { 3213 myexponent = exponent+16383; //bias 3214 mysignificand = significandParts()[0]; 3215 mysignificand2 = significandParts()[1]; 3216 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3217 myexponent = 0; // denormal 3218 } else if (category==fcZero) { 3219 myexponent = 0; 3220 mysignificand = mysignificand2 = 0; 3221 } else if (category==fcInfinity) { 3222 myexponent = 0x7fff; 3223 mysignificand = mysignificand2 = 0; 3224 } else { 3225 assert(category == fcNaN && "Unknown category!"); 3226 myexponent = 0x7fff; 3227 mysignificand = significandParts()[0]; 3228 mysignificand2 = significandParts()[1]; 3229 } 3230 3231 uint64_t words[2]; 3232 words[0] = mysignificand; 3233 words[1] = ((uint64_t)(sign & 1) << 63) | 3234 ((myexponent & 0x7fff) << 48) | 3235 (mysignificand2 & 0xffffffffffffLL); 3236 3237 return APInt(128, words); 3238 } 3239 3240 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3241 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3242 assert(partCount()==1); 3243 3244 uint64_t myexponent, mysignificand; 3245 3246 if (isFiniteNonZero()) { 3247 myexponent = exponent+1023; //bias 3248 mysignificand = *significandParts(); 3249 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3250 myexponent = 0; // denormal 3251 } else if (category==fcZero) { 3252 myexponent = 0; 3253 mysignificand = 0; 3254 } else if (category==fcInfinity) { 3255 myexponent = 0x7ff; 3256 mysignificand = 0; 3257 } else { 3258 assert(category == fcNaN && "Unknown category!"); 3259 myexponent = 0x7ff; 3260 mysignificand = *significandParts(); 3261 } 3262 3263 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3264 ((myexponent & 0x7ff) << 52) | 3265 (mysignificand & 0xfffffffffffffLL)))); 3266 } 3267 3268 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3269 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3270 assert(partCount()==1); 3271 3272 uint32_t myexponent, mysignificand; 3273 3274 if (isFiniteNonZero()) { 3275 myexponent = exponent+127; //bias 3276 mysignificand = (uint32_t)*significandParts(); 3277 if (myexponent == 1 && !(mysignificand & 0x800000)) 3278 myexponent = 0; // denormal 3279 } else if (category==fcZero) { 3280 myexponent = 0; 3281 mysignificand = 0; 3282 } else if (category==fcInfinity) { 3283 myexponent = 0xff; 3284 mysignificand = 0; 3285 } else { 3286 assert(category == fcNaN && "Unknown category!"); 3287 myexponent = 0xff; 3288 mysignificand = (uint32_t)*significandParts(); 3289 } 3290 3291 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3292 (mysignificand & 0x7fffff))); 3293 } 3294 3295 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3296 assert(semantics == (const llvm::fltSemantics *)&semBFloat); 3297 assert(partCount() == 1); 3298 3299 uint32_t myexponent, mysignificand; 3300 3301 if (isFiniteNonZero()) { 3302 myexponent = exponent + 127; // bias 3303 mysignificand = (uint32_t)*significandParts(); 3304 if (myexponent == 1 && !(mysignificand & 0x80)) 3305 myexponent = 0; // denormal 3306 } else if (category == fcZero) { 3307 myexponent = 0; 3308 mysignificand = 0; 3309 } else if (category == fcInfinity) { 3310 myexponent = 0xff; 3311 mysignificand = 0; 3312 } else { 3313 assert(category == fcNaN && "Unknown category!"); 3314 myexponent = 0xff; 3315 mysignificand = (uint32_t)*significandParts(); 3316 } 3317 3318 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | 3319 (mysignificand & 0x7f))); 3320 } 3321 3322 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3323 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3324 assert(partCount()==1); 3325 3326 uint32_t myexponent, mysignificand; 3327 3328 if (isFiniteNonZero()) { 3329 myexponent = exponent+15; //bias 3330 mysignificand = (uint32_t)*significandParts(); 3331 if (myexponent == 1 && !(mysignificand & 0x400)) 3332 myexponent = 0; // denormal 3333 } else if (category==fcZero) { 3334 myexponent = 0; 3335 mysignificand = 0; 3336 } else if (category==fcInfinity) { 3337 myexponent = 0x1f; 3338 mysignificand = 0; 3339 } else { 3340 assert(category == fcNaN && "Unknown category!"); 3341 myexponent = 0x1f; 3342 mysignificand = (uint32_t)*significandParts(); 3343 } 3344 3345 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3346 (mysignificand & 0x3ff))); 3347 } 3348 3349 // This function creates an APInt that is just a bit map of the floating 3350 // point constant as it would appear in memory. It is not a conversion, 3351 // and treating the result as a normal integer is unlikely to be useful. 3352 3353 APInt IEEEFloat::bitcastToAPInt() const { 3354 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3355 return convertHalfAPFloatToAPInt(); 3356 3357 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3358 return convertBFloatAPFloatToAPInt(); 3359 3360 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3361 return convertFloatAPFloatToAPInt(); 3362 3363 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3364 return convertDoubleAPFloatToAPInt(); 3365 3366 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3367 return convertQuadrupleAPFloatToAPInt(); 3368 3369 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3370 return convertPPCDoubleDoubleAPFloatToAPInt(); 3371 3372 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3373 "unknown format!"); 3374 return convertF80LongDoubleAPFloatToAPInt(); 3375 } 3376 3377 float IEEEFloat::convertToFloat() const { 3378 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3379 "Float semantics are not IEEEsingle"); 3380 APInt api = bitcastToAPInt(); 3381 return api.bitsToFloat(); 3382 } 3383 3384 double IEEEFloat::convertToDouble() const { 3385 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3386 "Float semantics are not IEEEdouble"); 3387 APInt api = bitcastToAPInt(); 3388 return api.bitsToDouble(); 3389 } 3390 3391 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3392 /// does not support these bit patterns: 3393 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3394 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3395 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3396 /// exponent = 0, integer bit 1 ("pseudodenormal") 3397 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3398 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3399 uint64_t i1 = api.getRawData()[0]; 3400 uint64_t i2 = api.getRawData()[1]; 3401 uint64_t myexponent = (i2 & 0x7fff); 3402 uint64_t mysignificand = i1; 3403 uint8_t myintegerbit = mysignificand >> 63; 3404 3405 initialize(&semX87DoubleExtended); 3406 assert(partCount()==2); 3407 3408 sign = static_cast<unsigned int>(i2>>15); 3409 if (myexponent == 0 && mysignificand == 0) { 3410 makeZero(sign); 3411 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3412 makeInf(sign); 3413 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3414 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3415 category = fcNaN; 3416 exponent = exponentNaN(); 3417 significandParts()[0] = mysignificand; 3418 significandParts()[1] = 0; 3419 } else { 3420 category = fcNormal; 3421 exponent = myexponent - 16383; 3422 significandParts()[0] = mysignificand; 3423 significandParts()[1] = 0; 3424 if (myexponent==0) // denormal 3425 exponent = -16382; 3426 } 3427 } 3428 3429 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3430 uint64_t i1 = api.getRawData()[0]; 3431 uint64_t i2 = api.getRawData()[1]; 3432 opStatus fs; 3433 bool losesInfo; 3434 3435 // Get the first double and convert to our format. 3436 initFromDoubleAPInt(APInt(64, i1)); 3437 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3438 assert(fs == opOK && !losesInfo); 3439 (void)fs; 3440 3441 // Unless we have a special case, add in second double. 3442 if (isFiniteNonZero()) { 3443 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3444 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3445 assert(fs == opOK && !losesInfo); 3446 (void)fs; 3447 3448 add(v, rmNearestTiesToEven); 3449 } 3450 } 3451 3452 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3453 uint64_t i1 = api.getRawData()[0]; 3454 uint64_t i2 = api.getRawData()[1]; 3455 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3456 uint64_t mysignificand = i1; 3457 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3458 3459 initialize(&semIEEEquad); 3460 assert(partCount()==2); 3461 3462 sign = static_cast<unsigned int>(i2>>63); 3463 if (myexponent==0 && 3464 (mysignificand==0 && mysignificand2==0)) { 3465 makeZero(sign); 3466 } else if (myexponent==0x7fff && 3467 (mysignificand==0 && mysignificand2==0)) { 3468 makeInf(sign); 3469 } else if (myexponent==0x7fff && 3470 (mysignificand!=0 || mysignificand2 !=0)) { 3471 category = fcNaN; 3472 exponent = exponentNaN(); 3473 significandParts()[0] = mysignificand; 3474 significandParts()[1] = mysignificand2; 3475 } else { 3476 category = fcNormal; 3477 exponent = myexponent - 16383; 3478 significandParts()[0] = mysignificand; 3479 significandParts()[1] = mysignificand2; 3480 if (myexponent==0) // denormal 3481 exponent = -16382; 3482 else 3483 significandParts()[1] |= 0x1000000000000LL; // integer bit 3484 } 3485 } 3486 3487 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3488 uint64_t i = *api.getRawData(); 3489 uint64_t myexponent = (i >> 52) & 0x7ff; 3490 uint64_t mysignificand = i & 0xfffffffffffffLL; 3491 3492 initialize(&semIEEEdouble); 3493 assert(partCount()==1); 3494 3495 sign = static_cast<unsigned int>(i>>63); 3496 if (myexponent==0 && mysignificand==0) { 3497 makeZero(sign); 3498 } else if (myexponent==0x7ff && mysignificand==0) { 3499 makeInf(sign); 3500 } else if (myexponent==0x7ff && mysignificand!=0) { 3501 category = fcNaN; 3502 exponent = exponentNaN(); 3503 *significandParts() = mysignificand; 3504 } else { 3505 category = fcNormal; 3506 exponent = myexponent - 1023; 3507 *significandParts() = mysignificand; 3508 if (myexponent==0) // denormal 3509 exponent = -1022; 3510 else 3511 *significandParts() |= 0x10000000000000LL; // integer bit 3512 } 3513 } 3514 3515 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3516 uint32_t i = (uint32_t)*api.getRawData(); 3517 uint32_t myexponent = (i >> 23) & 0xff; 3518 uint32_t mysignificand = i & 0x7fffff; 3519 3520 initialize(&semIEEEsingle); 3521 assert(partCount()==1); 3522 3523 sign = i >> 31; 3524 if (myexponent==0 && mysignificand==0) { 3525 makeZero(sign); 3526 } else if (myexponent==0xff && mysignificand==0) { 3527 makeInf(sign); 3528 } else if (myexponent==0xff && mysignificand!=0) { 3529 category = fcNaN; 3530 exponent = exponentNaN(); 3531 *significandParts() = mysignificand; 3532 } else { 3533 category = fcNormal; 3534 exponent = myexponent - 127; //bias 3535 *significandParts() = mysignificand; 3536 if (myexponent==0) // denormal 3537 exponent = -126; 3538 else 3539 *significandParts() |= 0x800000; // integer bit 3540 } 3541 } 3542 3543 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3544 uint32_t i = (uint32_t)*api.getRawData(); 3545 uint32_t myexponent = (i >> 7) & 0xff; 3546 uint32_t mysignificand = i & 0x7f; 3547 3548 initialize(&semBFloat); 3549 assert(partCount() == 1); 3550 3551 sign = i >> 15; 3552 if (myexponent == 0 && mysignificand == 0) { 3553 makeZero(sign); 3554 } else if (myexponent == 0xff && mysignificand == 0) { 3555 makeInf(sign); 3556 } else if (myexponent == 0xff && mysignificand != 0) { 3557 category = fcNaN; 3558 exponent = exponentNaN(); 3559 *significandParts() = mysignificand; 3560 } else { 3561 category = fcNormal; 3562 exponent = myexponent - 127; // bias 3563 *significandParts() = mysignificand; 3564 if (myexponent == 0) // denormal 3565 exponent = -126; 3566 else 3567 *significandParts() |= 0x80; // integer bit 3568 } 3569 } 3570 3571 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3572 uint32_t i = (uint32_t)*api.getRawData(); 3573 uint32_t myexponent = (i >> 10) & 0x1f; 3574 uint32_t mysignificand = i & 0x3ff; 3575 3576 initialize(&semIEEEhalf); 3577 assert(partCount()==1); 3578 3579 sign = i >> 15; 3580 if (myexponent==0 && mysignificand==0) { 3581 makeZero(sign); 3582 } else if (myexponent==0x1f && mysignificand==0) { 3583 makeInf(sign); 3584 } else if (myexponent==0x1f && mysignificand!=0) { 3585 category = fcNaN; 3586 exponent = exponentNaN(); 3587 *significandParts() = mysignificand; 3588 } else { 3589 category = fcNormal; 3590 exponent = myexponent - 15; //bias 3591 *significandParts() = mysignificand; 3592 if (myexponent==0) // denormal 3593 exponent = -14; 3594 else 3595 *significandParts() |= 0x400; // integer bit 3596 } 3597 } 3598 3599 /// Treat api as containing the bits of a floating point number. Currently 3600 /// we infer the floating point type from the size of the APInt. The 3601 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3602 /// when the size is anything else). 3603 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3604 assert(api.getBitWidth() == Sem->sizeInBits); 3605 if (Sem == &semIEEEhalf) 3606 return initFromHalfAPInt(api); 3607 if (Sem == &semBFloat) 3608 return initFromBFloatAPInt(api); 3609 if (Sem == &semIEEEsingle) 3610 return initFromFloatAPInt(api); 3611 if (Sem == &semIEEEdouble) 3612 return initFromDoubleAPInt(api); 3613 if (Sem == &semX87DoubleExtended) 3614 return initFromF80LongDoubleAPInt(api); 3615 if (Sem == &semIEEEquad) 3616 return initFromQuadrupleAPInt(api); 3617 if (Sem == &semPPCDoubleDoubleLegacy) 3618 return initFromPPCDoubleDoubleAPInt(api); 3619 3620 llvm_unreachable(nullptr); 3621 } 3622 3623 /// Make this number the largest magnitude normal number in the given 3624 /// semantics. 3625 void IEEEFloat::makeLargest(bool Negative) { 3626 // We want (in interchange format): 3627 // sign = {Negative} 3628 // exponent = 1..10 3629 // significand = 1..1 3630 category = fcNormal; 3631 sign = Negative; 3632 exponent = semantics->maxExponent; 3633 3634 // Use memset to set all but the highest integerPart to all ones. 3635 integerPart *significand = significandParts(); 3636 unsigned PartCount = partCount(); 3637 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3638 3639 // Set the high integerPart especially setting all unused top bits for 3640 // internal consistency. 3641 const unsigned NumUnusedHighBits = 3642 PartCount*integerPartWidth - semantics->precision; 3643 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3644 ? (~integerPart(0) >> NumUnusedHighBits) 3645 : 0; 3646 } 3647 3648 /// Make this number the smallest magnitude denormal number in the given 3649 /// semantics. 3650 void IEEEFloat::makeSmallest(bool Negative) { 3651 // We want (in interchange format): 3652 // sign = {Negative} 3653 // exponent = 0..0 3654 // significand = 0..01 3655 category = fcNormal; 3656 sign = Negative; 3657 exponent = semantics->minExponent; 3658 APInt::tcSet(significandParts(), 1, partCount()); 3659 } 3660 3661 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3662 // We want (in interchange format): 3663 // sign = {Negative} 3664 // exponent = 0..0 3665 // significand = 10..0 3666 3667 category = fcNormal; 3668 zeroSignificand(); 3669 sign = Negative; 3670 exponent = semantics->minExponent; 3671 significandParts()[partCountForBits(semantics->precision) - 1] |= 3672 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); 3673 } 3674 3675 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3676 initFromAPInt(&Sem, API); 3677 } 3678 3679 IEEEFloat::IEEEFloat(float f) { 3680 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3681 } 3682 3683 IEEEFloat::IEEEFloat(double d) { 3684 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3685 } 3686 3687 namespace { 3688 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3689 Buffer.append(Str.begin(), Str.end()); 3690 } 3691 3692 /// Removes data from the given significand until it is no more 3693 /// precise than is required for the desired precision. 3694 void AdjustToPrecision(APInt &significand, 3695 int &exp, unsigned FormatPrecision) { 3696 unsigned bits = significand.getActiveBits(); 3697 3698 // 196/59 is a very slight overestimate of lg_2(10). 3699 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3700 3701 if (bits <= bitsRequired) return; 3702 3703 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3704 if (!tensRemovable) return; 3705 3706 exp += tensRemovable; 3707 3708 APInt divisor(significand.getBitWidth(), 1); 3709 APInt powten(significand.getBitWidth(), 10); 3710 while (true) { 3711 if (tensRemovable & 1) 3712 divisor *= powten; 3713 tensRemovable >>= 1; 3714 if (!tensRemovable) break; 3715 powten *= powten; 3716 } 3717 3718 significand = significand.udiv(divisor); 3719 3720 // Truncate the significand down to its active bit count. 3721 significand = significand.trunc(significand.getActiveBits()); 3722 } 3723 3724 3725 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3726 int &exp, unsigned FormatPrecision) { 3727 unsigned N = buffer.size(); 3728 if (N <= FormatPrecision) return; 3729 3730 // The most significant figures are the last ones in the buffer. 3731 unsigned FirstSignificant = N - FormatPrecision; 3732 3733 // Round. 3734 // FIXME: this probably shouldn't use 'round half up'. 3735 3736 // Rounding down is just a truncation, except we also want to drop 3737 // trailing zeros from the new result. 3738 if (buffer[FirstSignificant - 1] < '5') { 3739 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3740 FirstSignificant++; 3741 3742 exp += FirstSignificant; 3743 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3744 return; 3745 } 3746 3747 // Rounding up requires a decimal add-with-carry. If we continue 3748 // the carry, the newly-introduced zeros will just be truncated. 3749 for (unsigned I = FirstSignificant; I != N; ++I) { 3750 if (buffer[I] == '9') { 3751 FirstSignificant++; 3752 } else { 3753 buffer[I]++; 3754 break; 3755 } 3756 } 3757 3758 // If we carried through, we have exactly one digit of precision. 3759 if (FirstSignificant == N) { 3760 exp += FirstSignificant; 3761 buffer.clear(); 3762 buffer.push_back('1'); 3763 return; 3764 } 3765 3766 exp += FirstSignificant; 3767 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3768 } 3769 } // namespace 3770 3771 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 3772 unsigned FormatMaxPadding, bool TruncateZero) const { 3773 switch (category) { 3774 case fcInfinity: 3775 if (isNegative()) 3776 return append(Str, "-Inf"); 3777 else 3778 return append(Str, "+Inf"); 3779 3780 case fcNaN: return append(Str, "NaN"); 3781 3782 case fcZero: 3783 if (isNegative()) 3784 Str.push_back('-'); 3785 3786 if (!FormatMaxPadding) { 3787 if (TruncateZero) 3788 append(Str, "0.0E+0"); 3789 else { 3790 append(Str, "0.0"); 3791 if (FormatPrecision > 1) 3792 Str.append(FormatPrecision - 1, '0'); 3793 append(Str, "e+00"); 3794 } 3795 } else 3796 Str.push_back('0'); 3797 return; 3798 3799 case fcNormal: 3800 break; 3801 } 3802 3803 if (isNegative()) 3804 Str.push_back('-'); 3805 3806 // Decompose the number into an APInt and an exponent. 3807 int exp = exponent - ((int) semantics->precision - 1); 3808 APInt significand(semantics->precision, 3809 makeArrayRef(significandParts(), 3810 partCountForBits(semantics->precision))); 3811 3812 // Set FormatPrecision if zero. We want to do this before we 3813 // truncate trailing zeros, as those are part of the precision. 3814 if (!FormatPrecision) { 3815 // We use enough digits so the number can be round-tripped back to an 3816 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3817 // Accurately" by Steele and White. 3818 // FIXME: Using a formula based purely on the precision is conservative; 3819 // we can print fewer digits depending on the actual value being printed. 3820 3821 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3822 FormatPrecision = 2 + semantics->precision * 59 / 196; 3823 } 3824 3825 // Ignore trailing binary zeros. 3826 int trailingZeros = significand.countTrailingZeros(); 3827 exp += trailingZeros; 3828 significand.lshrInPlace(trailingZeros); 3829 3830 // Change the exponent from 2^e to 10^e. 3831 if (exp == 0) { 3832 // Nothing to do. 3833 } else if (exp > 0) { 3834 // Just shift left. 3835 significand = significand.zext(semantics->precision + exp); 3836 significand <<= exp; 3837 exp = 0; 3838 } else { /* exp < 0 */ 3839 int texp = -exp; 3840 3841 // We transform this using the identity: 3842 // (N)(2^-e) == (N)(5^e)(10^-e) 3843 // This means we have to multiply N (the significand) by 5^e. 3844 // To avoid overflow, we have to operate on numbers large 3845 // enough to store N * 5^e: 3846 // log2(N * 5^e) == log2(N) + e * log2(5) 3847 // <= semantics->precision + e * 137 / 59 3848 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3849 3850 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3851 3852 // Multiply significand by 5^e. 3853 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3854 significand = significand.zext(precision); 3855 APInt five_to_the_i(precision, 5); 3856 while (true) { 3857 if (texp & 1) significand *= five_to_the_i; 3858 3859 texp >>= 1; 3860 if (!texp) break; 3861 five_to_the_i *= five_to_the_i; 3862 } 3863 } 3864 3865 AdjustToPrecision(significand, exp, FormatPrecision); 3866 3867 SmallVector<char, 256> buffer; 3868 3869 // Fill the buffer. 3870 unsigned precision = significand.getBitWidth(); 3871 APInt ten(precision, 10); 3872 APInt digit(precision, 0); 3873 3874 bool inTrail = true; 3875 while (significand != 0) { 3876 // digit <- significand % 10 3877 // significand <- significand / 10 3878 APInt::udivrem(significand, ten, significand, digit); 3879 3880 unsigned d = digit.getZExtValue(); 3881 3882 // Drop trailing zeros. 3883 if (inTrail && !d) exp++; 3884 else { 3885 buffer.push_back((char) ('0' + d)); 3886 inTrail = false; 3887 } 3888 } 3889 3890 assert(!buffer.empty() && "no characters in buffer!"); 3891 3892 // Drop down to FormatPrecision. 3893 // TODO: don't do more precise calculations above than are required. 3894 AdjustToPrecision(buffer, exp, FormatPrecision); 3895 3896 unsigned NDigits = buffer.size(); 3897 3898 // Check whether we should use scientific notation. 3899 bool FormatScientific; 3900 if (!FormatMaxPadding) 3901 FormatScientific = true; 3902 else { 3903 if (exp >= 0) { 3904 // 765e3 --> 765000 3905 // ^^^ 3906 // But we shouldn't make the number look more precise than it is. 3907 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3908 NDigits + (unsigned) exp > FormatPrecision); 3909 } else { 3910 // Power of the most significant digit. 3911 int MSD = exp + (int) (NDigits - 1); 3912 if (MSD >= 0) { 3913 // 765e-2 == 7.65 3914 FormatScientific = false; 3915 } else { 3916 // 765e-5 == 0.00765 3917 // ^ ^^ 3918 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3919 } 3920 } 3921 } 3922 3923 // Scientific formatting is pretty straightforward. 3924 if (FormatScientific) { 3925 exp += (NDigits - 1); 3926 3927 Str.push_back(buffer[NDigits-1]); 3928 Str.push_back('.'); 3929 if (NDigits == 1 && TruncateZero) 3930 Str.push_back('0'); 3931 else 3932 for (unsigned I = 1; I != NDigits; ++I) 3933 Str.push_back(buffer[NDigits-1-I]); 3934 // Fill with zeros up to FormatPrecision. 3935 if (!TruncateZero && FormatPrecision > NDigits - 1) 3936 Str.append(FormatPrecision - NDigits + 1, '0'); 3937 // For !TruncateZero we use lower 'e'. 3938 Str.push_back(TruncateZero ? 'E' : 'e'); 3939 3940 Str.push_back(exp >= 0 ? '+' : '-'); 3941 if (exp < 0) exp = -exp; 3942 SmallVector<char, 6> expbuf; 3943 do { 3944 expbuf.push_back((char) ('0' + (exp % 10))); 3945 exp /= 10; 3946 } while (exp); 3947 // Exponent always at least two digits if we do not truncate zeros. 3948 if (!TruncateZero && expbuf.size() < 2) 3949 expbuf.push_back('0'); 3950 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3951 Str.push_back(expbuf[E-1-I]); 3952 return; 3953 } 3954 3955 // Non-scientific, positive exponents. 3956 if (exp >= 0) { 3957 for (unsigned I = 0; I != NDigits; ++I) 3958 Str.push_back(buffer[NDigits-1-I]); 3959 for (unsigned I = 0; I != (unsigned) exp; ++I) 3960 Str.push_back('0'); 3961 return; 3962 } 3963 3964 // Non-scientific, negative exponents. 3965 3966 // The number of digits to the left of the decimal point. 3967 int NWholeDigits = exp + (int) NDigits; 3968 3969 unsigned I = 0; 3970 if (NWholeDigits > 0) { 3971 for (; I != (unsigned) NWholeDigits; ++I) 3972 Str.push_back(buffer[NDigits-I-1]); 3973 Str.push_back('.'); 3974 } else { 3975 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3976 3977 Str.push_back('0'); 3978 Str.push_back('.'); 3979 for (unsigned Z = 1; Z != NZeros; ++Z) 3980 Str.push_back('0'); 3981 } 3982 3983 for (; I != NDigits; ++I) 3984 Str.push_back(buffer[NDigits-I-1]); 3985 } 3986 3987 bool IEEEFloat::getExactInverse(APFloat *inv) const { 3988 // Special floats and denormals have no exact inverse. 3989 if (!isFiniteNonZero()) 3990 return false; 3991 3992 // Check that the number is a power of two by making sure that only the 3993 // integer bit is set in the significand. 3994 if (significandLSB() != semantics->precision - 1) 3995 return false; 3996 3997 // Get the inverse. 3998 IEEEFloat reciprocal(*semantics, 1ULL); 3999 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4000 return false; 4001 4002 // Avoid multiplication with a denormal, it is not safe on all platforms and 4003 // may be slower than a normal division. 4004 if (reciprocal.isDenormal()) 4005 return false; 4006 4007 assert(reciprocal.isFiniteNonZero() && 4008 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4009 4010 if (inv) 4011 *inv = APFloat(reciprocal, *semantics); 4012 4013 return true; 4014 } 4015 4016 bool IEEEFloat::isSignaling() const { 4017 if (!isNaN()) 4018 return false; 4019 4020 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4021 // first bit of the trailing significand being 0. 4022 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4023 } 4024 4025 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4026 /// 4027 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4028 /// appropriate sign switching before/after the computation. 4029 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4030 // If we are performing nextDown, swap sign so we have -x. 4031 if (nextDown) 4032 changeSign(); 4033 4034 // Compute nextUp(x) 4035 opStatus result = opOK; 4036 4037 // Handle each float category separately. 4038 switch (category) { 4039 case fcInfinity: 4040 // nextUp(+inf) = +inf 4041 if (!isNegative()) 4042 break; 4043 // nextUp(-inf) = -getLargest() 4044 makeLargest(true); 4045 break; 4046 case fcNaN: 4047 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4048 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4049 // change the payload. 4050 if (isSignaling()) { 4051 result = opInvalidOp; 4052 // For consistency, propagate the sign of the sNaN to the qNaN. 4053 makeNaN(false, isNegative(), nullptr); 4054 } 4055 break; 4056 case fcZero: 4057 // nextUp(pm 0) = +getSmallest() 4058 makeSmallest(false); 4059 break; 4060 case fcNormal: 4061 // nextUp(-getSmallest()) = -0 4062 if (isSmallest() && isNegative()) { 4063 APInt::tcSet(significandParts(), 0, partCount()); 4064 category = fcZero; 4065 exponent = 0; 4066 break; 4067 } 4068 4069 // nextUp(getLargest()) == INFINITY 4070 if (isLargest() && !isNegative()) { 4071 APInt::tcSet(significandParts(), 0, partCount()); 4072 category = fcInfinity; 4073 exponent = semantics->maxExponent + 1; 4074 break; 4075 } 4076 4077 // nextUp(normal) == normal + inc. 4078 if (isNegative()) { 4079 // If we are negative, we need to decrement the significand. 4080 4081 // We only cross a binade boundary that requires adjusting the exponent 4082 // if: 4083 // 1. exponent != semantics->minExponent. This implies we are not in the 4084 // smallest binade or are dealing with denormals. 4085 // 2. Our significand excluding the integral bit is all zeros. 4086 bool WillCrossBinadeBoundary = 4087 exponent != semantics->minExponent && isSignificandAllZeros(); 4088 4089 // Decrement the significand. 4090 // 4091 // We always do this since: 4092 // 1. If we are dealing with a non-binade decrement, by definition we 4093 // just decrement the significand. 4094 // 2. If we are dealing with a normal -> normal binade decrement, since 4095 // we have an explicit integral bit the fact that all bits but the 4096 // integral bit are zero implies that subtracting one will yield a 4097 // significand with 0 integral bit and 1 in all other spots. Thus we 4098 // must just adjust the exponent and set the integral bit to 1. 4099 // 3. If we are dealing with a normal -> denormal binade decrement, 4100 // since we set the integral bit to 0 when we represent denormals, we 4101 // just decrement the significand. 4102 integerPart *Parts = significandParts(); 4103 APInt::tcDecrement(Parts, partCount()); 4104 4105 if (WillCrossBinadeBoundary) { 4106 // Our result is a normal number. Do the following: 4107 // 1. Set the integral bit to 1. 4108 // 2. Decrement the exponent. 4109 APInt::tcSetBit(Parts, semantics->precision - 1); 4110 exponent--; 4111 } 4112 } else { 4113 // If we are positive, we need to increment the significand. 4114 4115 // We only cross a binade boundary that requires adjusting the exponent if 4116 // the input is not a denormal and all of said input's significand bits 4117 // are set. If all of said conditions are true: clear the significand, set 4118 // the integral bit to 1, and increment the exponent. If we have a 4119 // denormal always increment since moving denormals and the numbers in the 4120 // smallest normal binade have the same exponent in our representation. 4121 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4122 4123 if (WillCrossBinadeBoundary) { 4124 integerPart *Parts = significandParts(); 4125 APInt::tcSet(Parts, 0, partCount()); 4126 APInt::tcSetBit(Parts, semantics->precision - 1); 4127 assert(exponent != semantics->maxExponent && 4128 "We can not increment an exponent beyond the maxExponent allowed" 4129 " by the given floating point semantics."); 4130 exponent++; 4131 } else { 4132 incrementSignificand(); 4133 } 4134 } 4135 break; 4136 } 4137 4138 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4139 if (nextDown) 4140 changeSign(); 4141 4142 return result; 4143 } 4144 4145 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4146 return semantics->maxExponent + 1; 4147 } 4148 4149 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4150 return semantics->maxExponent + 1; 4151 } 4152 4153 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4154 return semantics->minExponent - 1; 4155 } 4156 4157 void IEEEFloat::makeInf(bool Negative) { 4158 category = fcInfinity; 4159 sign = Negative; 4160 exponent = exponentInf(); 4161 APInt::tcSet(significandParts(), 0, partCount()); 4162 } 4163 4164 void IEEEFloat::makeZero(bool Negative) { 4165 category = fcZero; 4166 sign = Negative; 4167 exponent = exponentZero(); 4168 APInt::tcSet(significandParts(), 0, partCount()); 4169 } 4170 4171 void IEEEFloat::makeQuiet() { 4172 assert(isNaN()); 4173 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4174 } 4175 4176 int ilogb(const IEEEFloat &Arg) { 4177 if (Arg.isNaN()) 4178 return IEEEFloat::IEK_NaN; 4179 if (Arg.isZero()) 4180 return IEEEFloat::IEK_Zero; 4181 if (Arg.isInfinity()) 4182 return IEEEFloat::IEK_Inf; 4183 if (!Arg.isDenormal()) 4184 return Arg.exponent; 4185 4186 IEEEFloat Normalized(Arg); 4187 int SignificandBits = Arg.getSemantics().precision - 1; 4188 4189 Normalized.exponent += SignificandBits; 4190 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4191 return Normalized.exponent - SignificandBits; 4192 } 4193 4194 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4195 auto MaxExp = X.getSemantics().maxExponent; 4196 auto MinExp = X.getSemantics().minExponent; 4197 4198 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4199 // overflow; clamp it to a safe range before adding, but ensure that the range 4200 // is large enough that the clamp does not change the result. The range we 4201 // need to support is the difference between the largest possible exponent and 4202 // the normalized exponent of half the smallest denormal. 4203 4204 int SignificandBits = X.getSemantics().precision - 1; 4205 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4206 4207 // Clamp to one past the range ends to let normalize handle overlflow. 4208 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4209 X.normalize(RoundingMode, lfExactlyZero); 4210 if (X.isNaN()) 4211 X.makeQuiet(); 4212 return X; 4213 } 4214 4215 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4216 Exp = ilogb(Val); 4217 4218 // Quiet signalling nans. 4219 if (Exp == IEEEFloat::IEK_NaN) { 4220 IEEEFloat Quiet(Val); 4221 Quiet.makeQuiet(); 4222 return Quiet; 4223 } 4224 4225 if (Exp == IEEEFloat::IEK_Inf) 4226 return Val; 4227 4228 // 1 is added because frexp is defined to return a normalized fraction in 4229 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4230 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4231 return scalbn(Val, -Exp, RM); 4232 } 4233 4234 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4235 : Semantics(&S), 4236 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4237 assert(Semantics == &semPPCDoubleDouble); 4238 } 4239 4240 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4241 : Semantics(&S), 4242 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4243 APFloat(semIEEEdouble, uninitialized)}) { 4244 assert(Semantics == &semPPCDoubleDouble); 4245 } 4246 4247 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4248 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4249 APFloat(semIEEEdouble)}) { 4250 assert(Semantics == &semPPCDoubleDouble); 4251 } 4252 4253 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4254 : Semantics(&S), 4255 Floats(new APFloat[2]{ 4256 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4257 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4258 assert(Semantics == &semPPCDoubleDouble); 4259 } 4260 4261 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4262 APFloat &&Second) 4263 : Semantics(&S), 4264 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4265 assert(Semantics == &semPPCDoubleDouble); 4266 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4267 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4268 } 4269 4270 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4271 : Semantics(RHS.Semantics), 4272 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4273 APFloat(RHS.Floats[1])} 4274 : nullptr) { 4275 assert(Semantics == &semPPCDoubleDouble); 4276 } 4277 4278 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4279 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4280 RHS.Semantics = &semBogus; 4281 assert(Semantics == &semPPCDoubleDouble); 4282 } 4283 4284 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4285 if (Semantics == RHS.Semantics && RHS.Floats) { 4286 Floats[0] = RHS.Floats[0]; 4287 Floats[1] = RHS.Floats[1]; 4288 } else if (this != &RHS) { 4289 this->~DoubleAPFloat(); 4290 new (this) DoubleAPFloat(RHS); 4291 } 4292 return *this; 4293 } 4294 4295 // Implement addition, subtraction, multiplication and division based on: 4296 // "Software for Doubled-Precision Floating-Point Computations", 4297 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4298 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4299 const APFloat &c, const APFloat &cc, 4300 roundingMode RM) { 4301 int Status = opOK; 4302 APFloat z = a; 4303 Status |= z.add(c, RM); 4304 if (!z.isFinite()) { 4305 if (!z.isInfinity()) { 4306 Floats[0] = std::move(z); 4307 Floats[1].makeZero(/* Neg = */ false); 4308 return (opStatus)Status; 4309 } 4310 Status = opOK; 4311 auto AComparedToC = a.compareAbsoluteValue(c); 4312 z = cc; 4313 Status |= z.add(aa, RM); 4314 if (AComparedToC == APFloat::cmpGreaterThan) { 4315 // z = cc + aa + c + a; 4316 Status |= z.add(c, RM); 4317 Status |= z.add(a, RM); 4318 } else { 4319 // z = cc + aa + a + c; 4320 Status |= z.add(a, RM); 4321 Status |= z.add(c, RM); 4322 } 4323 if (!z.isFinite()) { 4324 Floats[0] = std::move(z); 4325 Floats[1].makeZero(/* Neg = */ false); 4326 return (opStatus)Status; 4327 } 4328 Floats[0] = z; 4329 APFloat zz = aa; 4330 Status |= zz.add(cc, RM); 4331 if (AComparedToC == APFloat::cmpGreaterThan) { 4332 // Floats[1] = a - z + c + zz; 4333 Floats[1] = a; 4334 Status |= Floats[1].subtract(z, RM); 4335 Status |= Floats[1].add(c, RM); 4336 Status |= Floats[1].add(zz, RM); 4337 } else { 4338 // Floats[1] = c - z + a + zz; 4339 Floats[1] = c; 4340 Status |= Floats[1].subtract(z, RM); 4341 Status |= Floats[1].add(a, RM); 4342 Status |= Floats[1].add(zz, RM); 4343 } 4344 } else { 4345 // q = a - z; 4346 APFloat q = a; 4347 Status |= q.subtract(z, RM); 4348 4349 // zz = q + c + (a - (q + z)) + aa + cc; 4350 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4351 auto zz = q; 4352 Status |= zz.add(c, RM); 4353 Status |= q.add(z, RM); 4354 Status |= q.subtract(a, RM); 4355 q.changeSign(); 4356 Status |= zz.add(q, RM); 4357 Status |= zz.add(aa, RM); 4358 Status |= zz.add(cc, RM); 4359 if (zz.isZero() && !zz.isNegative()) { 4360 Floats[0] = std::move(z); 4361 Floats[1].makeZero(/* Neg = */ false); 4362 return opOK; 4363 } 4364 Floats[0] = z; 4365 Status |= Floats[0].add(zz, RM); 4366 if (!Floats[0].isFinite()) { 4367 Floats[1].makeZero(/* Neg = */ false); 4368 return (opStatus)Status; 4369 } 4370 Floats[1] = std::move(z); 4371 Status |= Floats[1].subtract(Floats[0], RM); 4372 Status |= Floats[1].add(zz, RM); 4373 } 4374 return (opStatus)Status; 4375 } 4376 4377 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4378 const DoubleAPFloat &RHS, 4379 DoubleAPFloat &Out, 4380 roundingMode RM) { 4381 if (LHS.getCategory() == fcNaN) { 4382 Out = LHS; 4383 return opOK; 4384 } 4385 if (RHS.getCategory() == fcNaN) { 4386 Out = RHS; 4387 return opOK; 4388 } 4389 if (LHS.getCategory() == fcZero) { 4390 Out = RHS; 4391 return opOK; 4392 } 4393 if (RHS.getCategory() == fcZero) { 4394 Out = LHS; 4395 return opOK; 4396 } 4397 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4398 LHS.isNegative() != RHS.isNegative()) { 4399 Out.makeNaN(false, Out.isNegative(), nullptr); 4400 return opInvalidOp; 4401 } 4402 if (LHS.getCategory() == fcInfinity) { 4403 Out = LHS; 4404 return opOK; 4405 } 4406 if (RHS.getCategory() == fcInfinity) { 4407 Out = RHS; 4408 return opOK; 4409 } 4410 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4411 4412 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4413 CC(RHS.Floats[1]); 4414 assert(&A.getSemantics() == &semIEEEdouble); 4415 assert(&AA.getSemantics() == &semIEEEdouble); 4416 assert(&C.getSemantics() == &semIEEEdouble); 4417 assert(&CC.getSemantics() == &semIEEEdouble); 4418 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4419 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4420 return Out.addImpl(A, AA, C, CC, RM); 4421 } 4422 4423 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4424 roundingMode RM) { 4425 return addWithSpecial(*this, RHS, *this, RM); 4426 } 4427 4428 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4429 roundingMode RM) { 4430 changeSign(); 4431 auto Ret = add(RHS, RM); 4432 changeSign(); 4433 return Ret; 4434 } 4435 4436 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4437 APFloat::roundingMode RM) { 4438 const auto &LHS = *this; 4439 auto &Out = *this; 4440 /* Interesting observation: For special categories, finding the lowest 4441 common ancestor of the following layered graph gives the correct 4442 return category: 4443 4444 NaN 4445 / \ 4446 Zero Inf 4447 \ / 4448 Normal 4449 4450 e.g. NaN * NaN = NaN 4451 Zero * Inf = NaN 4452 Normal * Zero = Zero 4453 Normal * Inf = Inf 4454 */ 4455 if (LHS.getCategory() == fcNaN) { 4456 Out = LHS; 4457 return opOK; 4458 } 4459 if (RHS.getCategory() == fcNaN) { 4460 Out = RHS; 4461 return opOK; 4462 } 4463 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4464 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4465 Out.makeNaN(false, false, nullptr); 4466 return opOK; 4467 } 4468 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4469 Out = LHS; 4470 return opOK; 4471 } 4472 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4473 Out = RHS; 4474 return opOK; 4475 } 4476 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4477 "Special cases not handled exhaustively"); 4478 4479 int Status = opOK; 4480 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4481 // t = a * c 4482 APFloat T = A; 4483 Status |= T.multiply(C, RM); 4484 if (!T.isFiniteNonZero()) { 4485 Floats[0] = T; 4486 Floats[1].makeZero(/* Neg = */ false); 4487 return (opStatus)Status; 4488 } 4489 4490 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4491 APFloat Tau = A; 4492 T.changeSign(); 4493 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4494 T.changeSign(); 4495 { 4496 // v = a * d 4497 APFloat V = A; 4498 Status |= V.multiply(D, RM); 4499 // w = b * c 4500 APFloat W = B; 4501 Status |= W.multiply(C, RM); 4502 Status |= V.add(W, RM); 4503 // tau += v + w 4504 Status |= Tau.add(V, RM); 4505 } 4506 // u = t + tau 4507 APFloat U = T; 4508 Status |= U.add(Tau, RM); 4509 4510 Floats[0] = U; 4511 if (!U.isFinite()) { 4512 Floats[1].makeZero(/* Neg = */ false); 4513 } else { 4514 // Floats[1] = (t - u) + tau 4515 Status |= T.subtract(U, RM); 4516 Status |= T.add(Tau, RM); 4517 Floats[1] = T; 4518 } 4519 return (opStatus)Status; 4520 } 4521 4522 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4523 APFloat::roundingMode RM) { 4524 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4525 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4526 auto Ret = 4527 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4528 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4529 return Ret; 4530 } 4531 4532 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4533 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4534 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4535 auto Ret = 4536 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4537 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4538 return Ret; 4539 } 4540 4541 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4542 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4543 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4544 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4545 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4546 return Ret; 4547 } 4548 4549 APFloat::opStatus 4550 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4551 const DoubleAPFloat &Addend, 4552 APFloat::roundingMode RM) { 4553 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4554 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4555 auto Ret = Tmp.fusedMultiplyAdd( 4556 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4557 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4558 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4559 return Ret; 4560 } 4561 4562 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4563 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4564 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4565 auto Ret = Tmp.roundToIntegral(RM); 4566 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4567 return Ret; 4568 } 4569 4570 void DoubleAPFloat::changeSign() { 4571 Floats[0].changeSign(); 4572 Floats[1].changeSign(); 4573 } 4574 4575 APFloat::cmpResult 4576 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4577 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4578 if (Result != cmpEqual) 4579 return Result; 4580 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4581 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4582 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4583 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4584 if (Against && !RHSAgainst) 4585 return cmpLessThan; 4586 if (!Against && RHSAgainst) 4587 return cmpGreaterThan; 4588 if (!Against && !RHSAgainst) 4589 return Result; 4590 if (Against && RHSAgainst) 4591 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4592 } 4593 return Result; 4594 } 4595 4596 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4597 return Floats[0].getCategory(); 4598 } 4599 4600 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4601 4602 void DoubleAPFloat::makeInf(bool Neg) { 4603 Floats[0].makeInf(Neg); 4604 Floats[1].makeZero(/* Neg = */ false); 4605 } 4606 4607 void DoubleAPFloat::makeZero(bool Neg) { 4608 Floats[0].makeZero(Neg); 4609 Floats[1].makeZero(/* Neg = */ false); 4610 } 4611 4612 void DoubleAPFloat::makeLargest(bool Neg) { 4613 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4614 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4615 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4616 if (Neg) 4617 changeSign(); 4618 } 4619 4620 void DoubleAPFloat::makeSmallest(bool Neg) { 4621 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4622 Floats[0].makeSmallest(Neg); 4623 Floats[1].makeZero(/* Neg = */ false); 4624 } 4625 4626 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4627 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4628 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4629 if (Neg) 4630 Floats[0].changeSign(); 4631 Floats[1].makeZero(/* Neg = */ false); 4632 } 4633 4634 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4635 Floats[0].makeNaN(SNaN, Neg, fill); 4636 Floats[1].makeZero(/* Neg = */ false); 4637 } 4638 4639 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4640 auto Result = Floats[0].compare(RHS.Floats[0]); 4641 // |Float[0]| > |Float[1]| 4642 if (Result == APFloat::cmpEqual) 4643 return Floats[1].compare(RHS.Floats[1]); 4644 return Result; 4645 } 4646 4647 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4648 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4649 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4650 } 4651 4652 hash_code hash_value(const DoubleAPFloat &Arg) { 4653 if (Arg.Floats) 4654 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4655 return hash_combine(Arg.Semantics); 4656 } 4657 4658 APInt DoubleAPFloat::bitcastToAPInt() const { 4659 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4660 uint64_t Data[] = { 4661 Floats[0].bitcastToAPInt().getRawData()[0], 4662 Floats[1].bitcastToAPInt().getRawData()[0], 4663 }; 4664 return APInt(128, 2, Data); 4665 } 4666 4667 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4668 roundingMode RM) { 4669 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4670 APFloat Tmp(semPPCDoubleDoubleLegacy); 4671 auto Ret = Tmp.convertFromString(S, RM); 4672 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4673 return Ret; 4674 } 4675 4676 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4677 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4678 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4679 auto Ret = Tmp.next(nextDown); 4680 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4681 return Ret; 4682 } 4683 4684 APFloat::opStatus 4685 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4686 unsigned int Width, bool IsSigned, 4687 roundingMode RM, bool *IsExact) const { 4688 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4689 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4690 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4691 } 4692 4693 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4694 bool IsSigned, 4695 roundingMode RM) { 4696 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4697 APFloat Tmp(semPPCDoubleDoubleLegacy); 4698 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4699 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4700 return Ret; 4701 } 4702 4703 APFloat::opStatus 4704 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4705 unsigned int InputSize, 4706 bool IsSigned, roundingMode RM) { 4707 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4708 APFloat Tmp(semPPCDoubleDoubleLegacy); 4709 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4710 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4711 return Ret; 4712 } 4713 4714 APFloat::opStatus 4715 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4716 unsigned int InputSize, 4717 bool IsSigned, roundingMode RM) { 4718 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4719 APFloat Tmp(semPPCDoubleDoubleLegacy); 4720 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4721 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4722 return Ret; 4723 } 4724 4725 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4726 unsigned int HexDigits, 4727 bool UpperCase, 4728 roundingMode RM) const { 4729 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4730 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4731 .convertToHexString(DST, HexDigits, UpperCase, RM); 4732 } 4733 4734 bool DoubleAPFloat::isDenormal() const { 4735 return getCategory() == fcNormal && 4736 (Floats[0].isDenormal() || Floats[1].isDenormal() || 4737 // (double)(Hi + Lo) == Hi defines a normal number. 4738 Floats[0] != Floats[0] + Floats[1]); 4739 } 4740 4741 bool DoubleAPFloat::isSmallest() const { 4742 if (getCategory() != fcNormal) 4743 return false; 4744 DoubleAPFloat Tmp(*this); 4745 Tmp.makeSmallest(this->isNegative()); 4746 return Tmp.compare(*this) == cmpEqual; 4747 } 4748 4749 bool DoubleAPFloat::isLargest() const { 4750 if (getCategory() != fcNormal) 4751 return false; 4752 DoubleAPFloat Tmp(*this); 4753 Tmp.makeLargest(this->isNegative()); 4754 return Tmp.compare(*this) == cmpEqual; 4755 } 4756 4757 bool DoubleAPFloat::isInteger() const { 4758 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4759 return Floats[0].isInteger() && Floats[1].isInteger(); 4760 } 4761 4762 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 4763 unsigned FormatPrecision, 4764 unsigned FormatMaxPadding, 4765 bool TruncateZero) const { 4766 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4767 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4768 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 4769 } 4770 4771 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 4772 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4773 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4774 if (!inv) 4775 return Tmp.getExactInverse(nullptr); 4776 APFloat Inv(semPPCDoubleDoubleLegacy); 4777 auto Ret = Tmp.getExactInverse(&Inv); 4778 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 4779 return Ret; 4780 } 4781 4782 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 4783 APFloat::roundingMode RM) { 4784 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4785 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 4786 scalbn(Arg.Floats[1], Exp, RM)); 4787 } 4788 4789 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 4790 APFloat::roundingMode RM) { 4791 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4792 APFloat First = frexp(Arg.Floats[0], Exp, RM); 4793 APFloat Second = Arg.Floats[1]; 4794 if (Arg.getCategory() == APFloat::fcNormal) 4795 Second = scalbn(Second, -Exp, RM); 4796 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 4797 } 4798 4799 } // namespace detail 4800 4801 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 4802 if (usesLayout<IEEEFloat>(Semantics)) { 4803 new (&IEEE) IEEEFloat(std::move(F)); 4804 return; 4805 } 4806 if (usesLayout<DoubleAPFloat>(Semantics)) { 4807 const fltSemantics& S = F.getSemantics(); 4808 new (&Double) 4809 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 4810 APFloat(semIEEEdouble)); 4811 return; 4812 } 4813 llvm_unreachable("Unexpected semantics"); 4814 } 4815 4816 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 4817 roundingMode RM) { 4818 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 4819 } 4820 4821 hash_code hash_value(const APFloat &Arg) { 4822 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 4823 return hash_value(Arg.U.IEEE); 4824 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 4825 return hash_value(Arg.U.Double); 4826 llvm_unreachable("Unexpected semantics"); 4827 } 4828 4829 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 4830 : APFloat(Semantics) { 4831 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 4832 assert(StatusOrErr && "Invalid floating point representation"); 4833 consumeError(StatusOrErr.takeError()); 4834 } 4835 4836 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 4837 roundingMode RM, bool *losesInfo) { 4838 if (&getSemantics() == &ToSemantics) { 4839 *losesInfo = false; 4840 return opOK; 4841 } 4842 if (usesLayout<IEEEFloat>(getSemantics()) && 4843 usesLayout<IEEEFloat>(ToSemantics)) 4844 return U.IEEE.convert(ToSemantics, RM, losesInfo); 4845 if (usesLayout<IEEEFloat>(getSemantics()) && 4846 usesLayout<DoubleAPFloat>(ToSemantics)) { 4847 assert(&ToSemantics == &semPPCDoubleDouble); 4848 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 4849 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 4850 return Ret; 4851 } 4852 if (usesLayout<DoubleAPFloat>(getSemantics()) && 4853 usesLayout<IEEEFloat>(ToSemantics)) { 4854 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 4855 *this = APFloat(std::move(getIEEE()), ToSemantics); 4856 return Ret; 4857 } 4858 llvm_unreachable("Unexpected semantics"); 4859 } 4860 4861 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 4862 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 4863 } 4864 4865 void APFloat::print(raw_ostream &OS) const { 4866 SmallVector<char, 16> Buffer; 4867 toString(Buffer); 4868 OS << Buffer << "\n"; 4869 } 4870 4871 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4872 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 4873 #endif 4874 4875 void APFloat::Profile(FoldingSetNodeID &NID) const { 4876 NID.Add(bitcastToAPInt()); 4877 } 4878 4879 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 4880 an APSInt, whose initial bit-width and signed-ness are used to determine the 4881 precision of the conversion. 4882 */ 4883 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 4884 roundingMode rounding_mode, 4885 bool *isExact) const { 4886 unsigned bitWidth = result.getBitWidth(); 4887 SmallVector<uint64_t, 4> parts(result.getNumWords()); 4888 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 4889 rounding_mode, isExact); 4890 // Keeps the original signed-ness. 4891 result = APInt(bitWidth, parts); 4892 return status; 4893 } 4894 4895 double APFloat::convertToDouble() const { 4896 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 4897 return getIEEE().convertToDouble(); 4898 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 4899 "Float semantics is not representable by IEEEdouble"); 4900 APFloat Temp = *this; 4901 bool LosesInfo; 4902 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 4903 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4904 (void)St; 4905 return Temp.getIEEE().convertToDouble(); 4906 } 4907 4908 float APFloat::convertToFloat() const { 4909 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 4910 return getIEEE().convertToFloat(); 4911 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 4912 "Float semantics is not representable by IEEEsingle"); 4913 APFloat Temp = *this; 4914 bool LosesInfo; 4915 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 4916 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 4917 (void)St; 4918 return Temp.getIEEE().convertToFloat(); 4919 } 4920 4921 } // namespace llvm 4922 4923 #undef APFLOAT_DISPATCH_ON_SEMANTICS 4924