1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Config/llvm-config.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cstring> 27 #include <limits.h> 28 29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 30 do { \ 31 if (usesLayout<IEEEFloat>(getSemantics())) \ 32 return U.IEEE.METHOD_CALL; \ 33 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 34 return U.Double.METHOD_CALL; \ 35 llvm_unreachable("Unexpected semantics"); \ 36 } while (false) 37 38 using namespace llvm; 39 40 /// A macro used to combine two fcCategory enums into one key which can be used 41 /// in a switch statement to classify how the interaction of two APFloat's 42 /// categories affects an operation. 43 /// 44 /// TODO: If clang source code is ever allowed to use constexpr in its own 45 /// codebase, change this into a static inline function. 46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 47 48 /* Assumed in hexadecimal significand parsing, and conversion to 49 hexadecimal strings. */ 50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 51 52 namespace llvm { 53 54 // How the nonfinite values Inf and NaN are represented. 55 enum class fltNonfiniteBehavior { 56 // Represents standard IEEE 754 behavior. A value is nonfinite if the 57 // exponent field is all 1s. In such cases, a value is Inf if the 58 // significand bits are all zero, and NaN otherwise 59 IEEE754, 60 61 // Only the Float8E5M2 has this behavior. There is no Inf representation. A 62 // value is NaN if the exponent field and the mantissa field are all 1s. 63 // This behavior matches the FP8 E4M3 type described in 64 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 65 // as non-signalling, although the paper does not state whether the NaN 66 // values are signalling or not. 67 NanOnly, 68 }; 69 70 /* Represents floating point arithmetic semantics. */ 71 struct fltSemantics { 72 /* The largest E such that 2^E is representable; this matches the 73 definition of IEEE 754. */ 74 APFloatBase::ExponentType maxExponent; 75 76 /* The smallest E such that 2^E is a normalized number; this 77 matches the definition of IEEE 754. */ 78 APFloatBase::ExponentType minExponent; 79 80 /* Number of bits in the significand. This includes the integer 81 bit. */ 82 unsigned int precision; 83 84 /* Number of bits actually used in the semantics. */ 85 unsigned int sizeInBits; 86 87 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 88 89 // Returns true if any number described by this semantics can be precisely 90 // represented by the specified semantics. Does not take into account 91 // the value of fltNonfiniteBehavior. 92 bool isRepresentableBy(const fltSemantics &S) const { 93 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 94 precision <= S.precision; 95 } 96 }; 97 98 static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; 99 static const fltSemantics semBFloat = {127, -126, 8, 16}; 100 static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; 101 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 102 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 103 static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 104 static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8, 105 fltNonfiniteBehavior::NanOnly}; 106 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 107 static const fltSemantics semBogus = {0, 0, 0, 0}; 108 109 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 110 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 111 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 112 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 113 to each other, and two 11-bit exponents. 114 115 Note: we need to make the value different from semBogus as otherwise 116 an unsafe optimization may collapse both values to a single address, 117 and we heavily rely on them having distinct addresses. */ 118 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 119 120 /* These are legacy semantics for the fallback, inaccrurate implementation of 121 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 122 operation. It's equivalent to having an IEEE number with consecutive 106 123 bits of mantissa and 11 bits of exponent. 124 125 It's not equivalent to IBM double-double. For example, a legit IBM 126 double-double, 1 + epsilon: 127 128 1 + epsilon = 1 + (1 >> 1076) 129 130 is not representable by a consecutive 106 bits of mantissa. 131 132 Currently, these semantics are used in the following way: 133 134 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 135 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 136 semPPCDoubleDoubleLegacy -> IEEE operations 137 138 We use bitcastToAPInt() to get the bit representation (in APInt) of the 139 underlying IEEEdouble, then use the APInt constructor to construct the 140 legacy IEEE float. 141 142 TODO: Implement all operations in semPPCDoubleDouble, and delete these 143 semantics. */ 144 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 145 53 + 53, 128}; 146 147 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 148 switch (S) { 149 case S_IEEEhalf: 150 return IEEEhalf(); 151 case S_BFloat: 152 return BFloat(); 153 case S_IEEEsingle: 154 return IEEEsingle(); 155 case S_IEEEdouble: 156 return IEEEdouble(); 157 case S_IEEEquad: 158 return IEEEquad(); 159 case S_PPCDoubleDouble: 160 return PPCDoubleDouble(); 161 case S_Float8E5M2: 162 return Float8E5M2(); 163 case S_Float8E4M3FN: 164 return Float8E4M3FN(); 165 case S_x87DoubleExtended: 166 return x87DoubleExtended(); 167 } 168 llvm_unreachable("Unrecognised floating semantics"); 169 } 170 171 APFloatBase::Semantics 172 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 173 if (&Sem == &llvm::APFloat::IEEEhalf()) 174 return S_IEEEhalf; 175 else if (&Sem == &llvm::APFloat::BFloat()) 176 return S_BFloat; 177 else if (&Sem == &llvm::APFloat::IEEEsingle()) 178 return S_IEEEsingle; 179 else if (&Sem == &llvm::APFloat::IEEEdouble()) 180 return S_IEEEdouble; 181 else if (&Sem == &llvm::APFloat::IEEEquad()) 182 return S_IEEEquad; 183 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 184 return S_PPCDoubleDouble; 185 else if (&Sem == &llvm::APFloat::Float8E5M2()) 186 return S_Float8E5M2; 187 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 188 return S_Float8E4M3FN; 189 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 190 return S_x87DoubleExtended; 191 else 192 llvm_unreachable("Unknown floating semantics"); 193 } 194 195 const fltSemantics &APFloatBase::IEEEhalf() { 196 return semIEEEhalf; 197 } 198 const fltSemantics &APFloatBase::BFloat() { 199 return semBFloat; 200 } 201 const fltSemantics &APFloatBase::IEEEsingle() { 202 return semIEEEsingle; 203 } 204 const fltSemantics &APFloatBase::IEEEdouble() { 205 return semIEEEdouble; 206 } 207 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 208 const fltSemantics &APFloatBase::PPCDoubleDouble() { 209 return semPPCDoubleDouble; 210 } 211 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 212 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 213 const fltSemantics &APFloatBase::x87DoubleExtended() { 214 return semX87DoubleExtended; 215 } 216 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 217 218 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 219 constexpr RoundingMode APFloatBase::rmTowardPositive; 220 constexpr RoundingMode APFloatBase::rmTowardNegative; 221 constexpr RoundingMode APFloatBase::rmTowardZero; 222 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 223 224 /* A tight upper bound on number of parts required to hold the value 225 pow(5, power) is 226 227 power * 815 / (351 * integerPartWidth) + 1 228 229 However, whilst the result may require only this many parts, 230 because we are multiplying two values to get it, the 231 multiplication may require an extra part with the excess part 232 being zero (consider the trivial case of 1 * 1, tcFullMultiply 233 requires two parts to hold the single-part result). So we add an 234 extra one to guarantee enough space whilst multiplying. */ 235 const unsigned int maxExponent = 16383; 236 const unsigned int maxPrecision = 113; 237 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 238 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 239 240 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 241 return semantics.precision; 242 } 243 APFloatBase::ExponentType 244 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 245 return semantics.maxExponent; 246 } 247 APFloatBase::ExponentType 248 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 249 return semantics.minExponent; 250 } 251 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 252 return semantics.sizeInBits; 253 } 254 255 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 256 return Sem.sizeInBits; 257 } 258 259 /* A bunch of private, handy routines. */ 260 261 static inline Error createError(const Twine &Err) { 262 return make_error<StringError>(Err, inconvertibleErrorCode()); 263 } 264 265 static inline unsigned int 266 partCountForBits(unsigned int bits) 267 { 268 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 269 } 270 271 /* Returns 0U-9U. Return values >= 10U are not digits. */ 272 static inline unsigned int 273 decDigitValue(unsigned int c) 274 { 275 return c - '0'; 276 } 277 278 /* Return the value of a decimal exponent of the form 279 [+-]ddddddd. 280 281 If the exponent overflows, returns a large exponent with the 282 appropriate sign. */ 283 static Expected<int> readExponent(StringRef::iterator begin, 284 StringRef::iterator end) { 285 bool isNegative; 286 unsigned int absExponent; 287 const unsigned int overlargeExponent = 24000; /* FIXME. */ 288 StringRef::iterator p = begin; 289 290 // Treat no exponent as 0 to match binutils 291 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 292 return 0; 293 } 294 295 isNegative = (*p == '-'); 296 if (*p == '-' || *p == '+') { 297 p++; 298 if (p == end) 299 return createError("Exponent has no digits"); 300 } 301 302 absExponent = decDigitValue(*p++); 303 if (absExponent >= 10U) 304 return createError("Invalid character in exponent"); 305 306 for (; p != end; ++p) { 307 unsigned int value; 308 309 value = decDigitValue(*p); 310 if (value >= 10U) 311 return createError("Invalid character in exponent"); 312 313 absExponent = absExponent * 10U + value; 314 if (absExponent >= overlargeExponent) { 315 absExponent = overlargeExponent; 316 break; 317 } 318 } 319 320 if (isNegative) 321 return -(int) absExponent; 322 else 323 return (int) absExponent; 324 } 325 326 /* This is ugly and needs cleaning up, but I don't immediately see 327 how whilst remaining safe. */ 328 static Expected<int> totalExponent(StringRef::iterator p, 329 StringRef::iterator end, 330 int exponentAdjustment) { 331 int unsignedExponent; 332 bool negative, overflow; 333 int exponent = 0; 334 335 if (p == end) 336 return createError("Exponent has no digits"); 337 338 negative = *p == '-'; 339 if (*p == '-' || *p == '+') { 340 p++; 341 if (p == end) 342 return createError("Exponent has no digits"); 343 } 344 345 unsignedExponent = 0; 346 overflow = false; 347 for (; p != end; ++p) { 348 unsigned int value; 349 350 value = decDigitValue(*p); 351 if (value >= 10U) 352 return createError("Invalid character in exponent"); 353 354 unsignedExponent = unsignedExponent * 10 + value; 355 if (unsignedExponent > 32767) { 356 overflow = true; 357 break; 358 } 359 } 360 361 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 362 overflow = true; 363 364 if (!overflow) { 365 exponent = unsignedExponent; 366 if (negative) 367 exponent = -exponent; 368 exponent += exponentAdjustment; 369 if (exponent > 32767 || exponent < -32768) 370 overflow = true; 371 } 372 373 if (overflow) 374 exponent = negative ? -32768: 32767; 375 376 return exponent; 377 } 378 379 static Expected<StringRef::iterator> 380 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 381 StringRef::iterator *dot) { 382 StringRef::iterator p = begin; 383 *dot = end; 384 while (p != end && *p == '0') 385 p++; 386 387 if (p != end && *p == '.') { 388 *dot = p++; 389 390 if (end - begin == 1) 391 return createError("Significand has no digits"); 392 393 while (p != end && *p == '0') 394 p++; 395 } 396 397 return p; 398 } 399 400 /* Given a normal decimal floating point number of the form 401 402 dddd.dddd[eE][+-]ddd 403 404 where the decimal point and exponent are optional, fill out the 405 structure D. Exponent is appropriate if the significand is 406 treated as an integer, and normalizedExponent if the significand 407 is taken to have the decimal point after a single leading 408 non-zero digit. 409 410 If the value is zero, V->firstSigDigit points to a non-digit, and 411 the return exponent is zero. 412 */ 413 struct decimalInfo { 414 const char *firstSigDigit; 415 const char *lastSigDigit; 416 int exponent; 417 int normalizedExponent; 418 }; 419 420 static Error interpretDecimal(StringRef::iterator begin, 421 StringRef::iterator end, decimalInfo *D) { 422 StringRef::iterator dot = end; 423 424 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 425 if (!PtrOrErr) 426 return PtrOrErr.takeError(); 427 StringRef::iterator p = *PtrOrErr; 428 429 D->firstSigDigit = p; 430 D->exponent = 0; 431 D->normalizedExponent = 0; 432 433 for (; p != end; ++p) { 434 if (*p == '.') { 435 if (dot != end) 436 return createError("String contains multiple dots"); 437 dot = p++; 438 if (p == end) 439 break; 440 } 441 if (decDigitValue(*p) >= 10U) 442 break; 443 } 444 445 if (p != end) { 446 if (*p != 'e' && *p != 'E') 447 return createError("Invalid character in significand"); 448 if (p == begin) 449 return createError("Significand has no digits"); 450 if (dot != end && p - begin == 1) 451 return createError("Significand has no digits"); 452 453 /* p points to the first non-digit in the string */ 454 auto ExpOrErr = readExponent(p + 1, end); 455 if (!ExpOrErr) 456 return ExpOrErr.takeError(); 457 D->exponent = *ExpOrErr; 458 459 /* Implied decimal point? */ 460 if (dot == end) 461 dot = p; 462 } 463 464 /* If number is all zeroes accept any exponent. */ 465 if (p != D->firstSigDigit) { 466 /* Drop insignificant trailing zeroes. */ 467 if (p != begin) { 468 do 469 do 470 p--; 471 while (p != begin && *p == '0'); 472 while (p != begin && *p == '.'); 473 } 474 475 /* Adjust the exponents for any decimal point. */ 476 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 477 D->normalizedExponent = (D->exponent + 478 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 479 - (dot > D->firstSigDigit && dot < p))); 480 } 481 482 D->lastSigDigit = p; 483 return Error::success(); 484 } 485 486 /* Return the trailing fraction of a hexadecimal number. 487 DIGITVALUE is the first hex digit of the fraction, P points to 488 the next digit. */ 489 static Expected<lostFraction> 490 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 491 unsigned int digitValue) { 492 unsigned int hexDigit; 493 494 /* If the first trailing digit isn't 0 or 8 we can work out the 495 fraction immediately. */ 496 if (digitValue > 8) 497 return lfMoreThanHalf; 498 else if (digitValue < 8 && digitValue > 0) 499 return lfLessThanHalf; 500 501 // Otherwise we need to find the first non-zero digit. 502 while (p != end && (*p == '0' || *p == '.')) 503 p++; 504 505 if (p == end) 506 return createError("Invalid trailing hexadecimal fraction!"); 507 508 hexDigit = hexDigitValue(*p); 509 510 /* If we ran off the end it is exactly zero or one-half, otherwise 511 a little more. */ 512 if (hexDigit == -1U) 513 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 514 else 515 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 516 } 517 518 /* Return the fraction lost were a bignum truncated losing the least 519 significant BITS bits. */ 520 static lostFraction 521 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 522 unsigned int partCount, 523 unsigned int bits) 524 { 525 unsigned int lsb; 526 527 lsb = APInt::tcLSB(parts, partCount); 528 529 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 530 if (bits <= lsb) 531 return lfExactlyZero; 532 if (bits == lsb + 1) 533 return lfExactlyHalf; 534 if (bits <= partCount * APFloatBase::integerPartWidth && 535 APInt::tcExtractBit(parts, bits - 1)) 536 return lfMoreThanHalf; 537 538 return lfLessThanHalf; 539 } 540 541 /* Shift DST right BITS bits noting lost fraction. */ 542 static lostFraction 543 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 544 { 545 lostFraction lost_fraction; 546 547 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 548 549 APInt::tcShiftRight(dst, parts, bits); 550 551 return lost_fraction; 552 } 553 554 /* Combine the effect of two lost fractions. */ 555 static lostFraction 556 combineLostFractions(lostFraction moreSignificant, 557 lostFraction lessSignificant) 558 { 559 if (lessSignificant != lfExactlyZero) { 560 if (moreSignificant == lfExactlyZero) 561 moreSignificant = lfLessThanHalf; 562 else if (moreSignificant == lfExactlyHalf) 563 moreSignificant = lfMoreThanHalf; 564 } 565 566 return moreSignificant; 567 } 568 569 /* The error from the true value, in half-ulps, on multiplying two 570 floating point numbers, which differ from the value they 571 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 572 than the returned value. 573 574 See "How to Read Floating Point Numbers Accurately" by William D 575 Clinger. */ 576 static unsigned int 577 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 578 { 579 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 580 581 if (HUerr1 + HUerr2 == 0) 582 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 583 else 584 return inexactMultiply + 2 * (HUerr1 + HUerr2); 585 } 586 587 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 588 when the least significant BITS are truncated. BITS cannot be 589 zero. */ 590 static APFloatBase::integerPart 591 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 592 bool isNearest) { 593 unsigned int count, partBits; 594 APFloatBase::integerPart part, boundary; 595 596 assert(bits != 0); 597 598 bits--; 599 count = bits / APFloatBase::integerPartWidth; 600 partBits = bits % APFloatBase::integerPartWidth + 1; 601 602 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 603 604 if (isNearest) 605 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 606 else 607 boundary = 0; 608 609 if (count == 0) { 610 if (part - boundary <= boundary - part) 611 return part - boundary; 612 else 613 return boundary - part; 614 } 615 616 if (part == boundary) { 617 while (--count) 618 if (parts[count]) 619 return ~(APFloatBase::integerPart) 0; /* A lot. */ 620 621 return parts[0]; 622 } else if (part == boundary - 1) { 623 while (--count) 624 if (~parts[count]) 625 return ~(APFloatBase::integerPart) 0; /* A lot. */ 626 627 return -parts[0]; 628 } 629 630 return ~(APFloatBase::integerPart) 0; /* A lot. */ 631 } 632 633 /* Place pow(5, power) in DST, and return the number of parts used. 634 DST must be at least one part larger than size of the answer. */ 635 static unsigned int 636 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 637 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 638 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 639 pow5s[0] = 78125 * 5; 640 641 unsigned int partsCount[16] = { 1 }; 642 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 643 unsigned int result; 644 assert(power <= maxExponent); 645 646 p1 = dst; 647 p2 = scratch; 648 649 *p1 = firstEightPowers[power & 7]; 650 power >>= 3; 651 652 result = 1; 653 pow5 = pow5s; 654 655 for (unsigned int n = 0; power; power >>= 1, n++) { 656 unsigned int pc; 657 658 pc = partsCount[n]; 659 660 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 661 if (pc == 0) { 662 pc = partsCount[n - 1]; 663 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 664 pc *= 2; 665 if (pow5[pc - 1] == 0) 666 pc--; 667 partsCount[n] = pc; 668 } 669 670 if (power & 1) { 671 APFloatBase::integerPart *tmp; 672 673 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 674 result += pc; 675 if (p2[result - 1] == 0) 676 result--; 677 678 /* Now result is in p1 with partsCount parts and p2 is scratch 679 space. */ 680 tmp = p1; 681 p1 = p2; 682 p2 = tmp; 683 } 684 685 pow5 += pc; 686 } 687 688 if (p1 != dst) 689 APInt::tcAssign(dst, p1, result); 690 691 return result; 692 } 693 694 /* Zero at the end to avoid modular arithmetic when adding one; used 695 when rounding up during hexadecimal output. */ 696 static const char hexDigitsLower[] = "0123456789abcdef0"; 697 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 698 static const char infinityL[] = "infinity"; 699 static const char infinityU[] = "INFINITY"; 700 static const char NaNL[] = "nan"; 701 static const char NaNU[] = "NAN"; 702 703 /* Write out an integerPart in hexadecimal, starting with the most 704 significant nibble. Write out exactly COUNT hexdigits, return 705 COUNT. */ 706 static unsigned int 707 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 708 const char *hexDigitChars) 709 { 710 unsigned int result = count; 711 712 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 713 714 part >>= (APFloatBase::integerPartWidth - 4 * count); 715 while (count--) { 716 dst[count] = hexDigitChars[part & 0xf]; 717 part >>= 4; 718 } 719 720 return result; 721 } 722 723 /* Write out an unsigned decimal integer. */ 724 static char * 725 writeUnsignedDecimal (char *dst, unsigned int n) 726 { 727 char buff[40], *p; 728 729 p = buff; 730 do 731 *p++ = '0' + n % 10; 732 while (n /= 10); 733 734 do 735 *dst++ = *--p; 736 while (p != buff); 737 738 return dst; 739 } 740 741 /* Write out a signed decimal integer. */ 742 static char * 743 writeSignedDecimal (char *dst, int value) 744 { 745 if (value < 0) { 746 *dst++ = '-'; 747 dst = writeUnsignedDecimal(dst, -(unsigned) value); 748 } else 749 dst = writeUnsignedDecimal(dst, value); 750 751 return dst; 752 } 753 754 namespace detail { 755 /* Constructors. */ 756 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 757 unsigned int count; 758 759 semantics = ourSemantics; 760 count = partCount(); 761 if (count > 1) 762 significand.parts = new integerPart[count]; 763 } 764 765 void IEEEFloat::freeSignificand() { 766 if (needsCleanup()) 767 delete [] significand.parts; 768 } 769 770 void IEEEFloat::assign(const IEEEFloat &rhs) { 771 assert(semantics == rhs.semantics); 772 773 sign = rhs.sign; 774 category = rhs.category; 775 exponent = rhs.exponent; 776 if (isFiniteNonZero() || category == fcNaN) 777 copySignificand(rhs); 778 } 779 780 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 781 assert(isFiniteNonZero() || category == fcNaN); 782 assert(rhs.partCount() >= partCount()); 783 784 APInt::tcAssign(significandParts(), rhs.significandParts(), 785 partCount()); 786 } 787 788 /* Make this number a NaN, with an arbitrary but deterministic value 789 for the significand. If double or longer, this is a signalling NaN, 790 which may not be ideal. If float, this is QNaN(0). */ 791 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 792 category = fcNaN; 793 sign = Negative; 794 exponent = exponentNaN(); 795 796 integerPart *significand = significandParts(); 797 unsigned numParts = partCount(); 798 799 APInt fill_storage; 800 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 801 // The only NaN representation is where the mantissa is all 1s, which is 802 // non-signalling. 803 SNaN = false; 804 fill_storage = APInt::getAllOnes(semantics->precision - 1); 805 fill = &fill_storage; 806 } 807 808 // Set the significand bits to the fill. 809 if (!fill || fill->getNumWords() < numParts) 810 APInt::tcSet(significand, 0, numParts); 811 if (fill) { 812 APInt::tcAssign(significand, fill->getRawData(), 813 std::min(fill->getNumWords(), numParts)); 814 815 // Zero out the excess bits of the significand. 816 unsigned bitsToPreserve = semantics->precision - 1; 817 unsigned part = bitsToPreserve / 64; 818 bitsToPreserve %= 64; 819 significand[part] &= ((1ULL << bitsToPreserve) - 1); 820 for (part++; part != numParts; ++part) 821 significand[part] = 0; 822 } 823 824 unsigned QNaNBit = semantics->precision - 2; 825 826 if (SNaN) { 827 // We always have to clear the QNaN bit to make it an SNaN. 828 APInt::tcClearBit(significand, QNaNBit); 829 830 // If there are no bits set in the payload, we have to set 831 // *something* to make it a NaN instead of an infinity; 832 // conventionally, this is the next bit down from the QNaN bit. 833 if (APInt::tcIsZero(significand, numParts)) 834 APInt::tcSetBit(significand, QNaNBit - 1); 835 } else { 836 // We always have to set the QNaN bit to make it a QNaN. 837 APInt::tcSetBit(significand, QNaNBit); 838 } 839 840 // For x87 extended precision, we want to make a NaN, not a 841 // pseudo-NaN. Maybe we should expose the ability to make 842 // pseudo-NaNs? 843 if (semantics == &semX87DoubleExtended) 844 APInt::tcSetBit(significand, QNaNBit + 1); 845 } 846 847 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 848 if (this != &rhs) { 849 if (semantics != rhs.semantics) { 850 freeSignificand(); 851 initialize(rhs.semantics); 852 } 853 assign(rhs); 854 } 855 856 return *this; 857 } 858 859 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 860 freeSignificand(); 861 862 semantics = rhs.semantics; 863 significand = rhs.significand; 864 exponent = rhs.exponent; 865 category = rhs.category; 866 sign = rhs.sign; 867 868 rhs.semantics = &semBogus; 869 return *this; 870 } 871 872 bool IEEEFloat::isDenormal() const { 873 return isFiniteNonZero() && (exponent == semantics->minExponent) && 874 (APInt::tcExtractBit(significandParts(), 875 semantics->precision - 1) == 0); 876 } 877 878 bool IEEEFloat::isSmallest() const { 879 // The smallest number by magnitude in our format will be the smallest 880 // denormal, i.e. the floating point number with exponent being minimum 881 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 882 return isFiniteNonZero() && exponent == semantics->minExponent && 883 significandMSB() == 0; 884 } 885 886 bool IEEEFloat::isSmallestNormalized() const { 887 return getCategory() == fcNormal && exponent == semantics->minExponent && 888 isSignificandAllZerosExceptMSB(); 889 } 890 891 bool IEEEFloat::isSignificandAllOnes() const { 892 // Test if the significand excluding the integral bit is all ones. This allows 893 // us to test for binade boundaries. 894 const integerPart *Parts = significandParts(); 895 const unsigned PartCount = partCountForBits(semantics->precision); 896 for (unsigned i = 0; i < PartCount - 1; i++) 897 if (~Parts[i]) 898 return false; 899 900 // Set the unused high bits to all ones when we compare. 901 const unsigned NumHighBits = 902 PartCount*integerPartWidth - semantics->precision + 1; 903 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 904 "Can not have more high bits to fill than integerPartWidth"); 905 const integerPart HighBitFill = 906 ~integerPart(0) << (integerPartWidth - NumHighBits); 907 if (~(Parts[PartCount - 1] | HighBitFill)) 908 return false; 909 910 return true; 911 } 912 913 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 914 // Test if the significand excluding the integral bit is all ones except for 915 // the least significant bit. 916 const integerPart *Parts = significandParts(); 917 918 if (Parts[0] & 1) 919 return false; 920 921 const unsigned PartCount = partCountForBits(semantics->precision); 922 for (unsigned i = 0; i < PartCount - 1; i++) { 923 if (~Parts[i] & ~unsigned{!i}) 924 return false; 925 } 926 927 // Set the unused high bits to all ones when we compare. 928 const unsigned NumHighBits = 929 PartCount * integerPartWidth - semantics->precision + 1; 930 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 931 "Can not have more high bits to fill than integerPartWidth"); 932 const integerPart HighBitFill = ~integerPart(0) 933 << (integerPartWidth - NumHighBits); 934 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 935 return false; 936 937 return true; 938 } 939 940 bool IEEEFloat::isSignificandAllZeros() const { 941 // Test if the significand excluding the integral bit is all zeros. This 942 // allows us to test for binade boundaries. 943 const integerPart *Parts = significandParts(); 944 const unsigned PartCount = partCountForBits(semantics->precision); 945 946 for (unsigned i = 0; i < PartCount - 1; i++) 947 if (Parts[i]) 948 return false; 949 950 // Compute how many bits are used in the final word. 951 const unsigned NumHighBits = 952 PartCount*integerPartWidth - semantics->precision + 1; 953 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 954 "clear than integerPartWidth"); 955 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 956 957 if (Parts[PartCount - 1] & HighBitMask) 958 return false; 959 960 return true; 961 } 962 963 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 964 const integerPart *Parts = significandParts(); 965 const unsigned PartCount = partCountForBits(semantics->precision); 966 967 for (unsigned i = 0; i < PartCount - 1; i++) { 968 if (Parts[i]) 969 return false; 970 } 971 972 const unsigned NumHighBits = 973 PartCount * integerPartWidth - semantics->precision + 1; 974 return Parts[PartCount - 1] == integerPart(1) 975 << (integerPartWidth - NumHighBits); 976 } 977 978 bool IEEEFloat::isLargest() const { 979 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 980 // The largest number by magnitude in our format will be the floating point 981 // number with maximum exponent and with significand that is all ones except 982 // the LSB. 983 return isFiniteNonZero() && exponent == semantics->maxExponent && 984 isSignificandAllOnesExceptLSB(); 985 } else { 986 // The largest number by magnitude in our format will be the floating point 987 // number with maximum exponent and with significand that is all ones. 988 return isFiniteNonZero() && exponent == semantics->maxExponent && 989 isSignificandAllOnes(); 990 } 991 } 992 993 bool IEEEFloat::isInteger() const { 994 // This could be made more efficient; I'm going for obviously correct. 995 if (!isFinite()) return false; 996 IEEEFloat truncated = *this; 997 truncated.roundToIntegral(rmTowardZero); 998 return compare(truncated) == cmpEqual; 999 } 1000 1001 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1002 if (this == &rhs) 1003 return true; 1004 if (semantics != rhs.semantics || 1005 category != rhs.category || 1006 sign != rhs.sign) 1007 return false; 1008 if (category==fcZero || category==fcInfinity) 1009 return true; 1010 1011 if (isFiniteNonZero() && exponent != rhs.exponent) 1012 return false; 1013 1014 return std::equal(significandParts(), significandParts() + partCount(), 1015 rhs.significandParts()); 1016 } 1017 1018 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1019 initialize(&ourSemantics); 1020 sign = 0; 1021 category = fcNormal; 1022 zeroSignificand(); 1023 exponent = ourSemantics.precision - 1; 1024 significandParts()[0] = value; 1025 normalize(rmNearestTiesToEven, lfExactlyZero); 1026 } 1027 1028 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1029 initialize(&ourSemantics); 1030 makeZero(false); 1031 } 1032 1033 // Delegate to the previous constructor, because later copy constructor may 1034 // actually inspects category, which can't be garbage. 1035 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1036 : IEEEFloat(ourSemantics) {} 1037 1038 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1039 initialize(rhs.semantics); 1040 assign(rhs); 1041 } 1042 1043 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1044 *this = std::move(rhs); 1045 } 1046 1047 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1048 1049 unsigned int IEEEFloat::partCount() const { 1050 return partCountForBits(semantics->precision + 1); 1051 } 1052 1053 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1054 return const_cast<IEEEFloat *>(this)->significandParts(); 1055 } 1056 1057 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1058 if (partCount() > 1) 1059 return significand.parts; 1060 else 1061 return &significand.part; 1062 } 1063 1064 void IEEEFloat::zeroSignificand() { 1065 APInt::tcSet(significandParts(), 0, partCount()); 1066 } 1067 1068 /* Increment an fcNormal floating point number's significand. */ 1069 void IEEEFloat::incrementSignificand() { 1070 integerPart carry; 1071 1072 carry = APInt::tcIncrement(significandParts(), partCount()); 1073 1074 /* Our callers should never cause us to overflow. */ 1075 assert(carry == 0); 1076 (void)carry; 1077 } 1078 1079 /* Add the significand of the RHS. Returns the carry flag. */ 1080 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1081 integerPart *parts; 1082 1083 parts = significandParts(); 1084 1085 assert(semantics == rhs.semantics); 1086 assert(exponent == rhs.exponent); 1087 1088 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1089 } 1090 1091 /* Subtract the significand of the RHS with a borrow flag. Returns 1092 the borrow flag. */ 1093 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1094 integerPart borrow) { 1095 integerPart *parts; 1096 1097 parts = significandParts(); 1098 1099 assert(semantics == rhs.semantics); 1100 assert(exponent == rhs.exponent); 1101 1102 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1103 partCount()); 1104 } 1105 1106 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1107 on to the full-precision result of the multiplication. Returns the 1108 lost fraction. */ 1109 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1110 IEEEFloat addend) { 1111 unsigned int omsb; // One, not zero, based MSB. 1112 unsigned int partsCount, newPartsCount, precision; 1113 integerPart *lhsSignificand; 1114 integerPart scratch[4]; 1115 integerPart *fullSignificand; 1116 lostFraction lost_fraction; 1117 bool ignored; 1118 1119 assert(semantics == rhs.semantics); 1120 1121 precision = semantics->precision; 1122 1123 // Allocate space for twice as many bits as the original significand, plus one 1124 // extra bit for the addition to overflow into. 1125 newPartsCount = partCountForBits(precision * 2 + 1); 1126 1127 if (newPartsCount > 4) 1128 fullSignificand = new integerPart[newPartsCount]; 1129 else 1130 fullSignificand = scratch; 1131 1132 lhsSignificand = significandParts(); 1133 partsCount = partCount(); 1134 1135 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1136 rhs.significandParts(), partsCount, partsCount); 1137 1138 lost_fraction = lfExactlyZero; 1139 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1140 exponent += rhs.exponent; 1141 1142 // Assume the operands involved in the multiplication are single-precision 1143 // FP, and the two multiplicants are: 1144 // *this = a23 . a22 ... a0 * 2^e1 1145 // rhs = b23 . b22 ... b0 * 2^e2 1146 // the result of multiplication is: 1147 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1148 // Note that there are three significant bits at the left-hand side of the 1149 // radix point: two for the multiplication, and an overflow bit for the 1150 // addition (that will always be zero at this point). Move the radix point 1151 // toward left by two bits, and adjust exponent accordingly. 1152 exponent += 2; 1153 1154 if (addend.isNonZero()) { 1155 // The intermediate result of the multiplication has "2 * precision" 1156 // signicant bit; adjust the addend to be consistent with mul result. 1157 // 1158 Significand savedSignificand = significand; 1159 const fltSemantics *savedSemantics = semantics; 1160 fltSemantics extendedSemantics; 1161 opStatus status; 1162 unsigned int extendedPrecision; 1163 1164 // Normalize our MSB to one below the top bit to allow for overflow. 1165 extendedPrecision = 2 * precision + 1; 1166 if (omsb != extendedPrecision - 1) { 1167 assert(extendedPrecision > omsb); 1168 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1169 (extendedPrecision - 1) - omsb); 1170 exponent -= (extendedPrecision - 1) - omsb; 1171 } 1172 1173 /* Create new semantics. */ 1174 extendedSemantics = *semantics; 1175 extendedSemantics.precision = extendedPrecision; 1176 1177 if (newPartsCount == 1) 1178 significand.part = fullSignificand[0]; 1179 else 1180 significand.parts = fullSignificand; 1181 semantics = &extendedSemantics; 1182 1183 // Make a copy so we can convert it to the extended semantics. 1184 // Note that we cannot convert the addend directly, as the extendedSemantics 1185 // is a local variable (which we take a reference to). 1186 IEEEFloat extendedAddend(addend); 1187 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1188 assert(status == opOK); 1189 (void)status; 1190 1191 // Shift the significand of the addend right by one bit. This guarantees 1192 // that the high bit of the significand is zero (same as fullSignificand), 1193 // so the addition will overflow (if it does overflow at all) into the top bit. 1194 lost_fraction = extendedAddend.shiftSignificandRight(1); 1195 assert(lost_fraction == lfExactlyZero && 1196 "Lost precision while shifting addend for fused-multiply-add."); 1197 1198 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1199 1200 /* Restore our state. */ 1201 if (newPartsCount == 1) 1202 fullSignificand[0] = significand.part; 1203 significand = savedSignificand; 1204 semantics = savedSemantics; 1205 1206 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1207 } 1208 1209 // Convert the result having "2 * precision" significant-bits back to the one 1210 // having "precision" significant-bits. First, move the radix point from 1211 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1212 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1213 exponent -= precision + 1; 1214 1215 // In case MSB resides at the left-hand side of radix point, shift the 1216 // mantissa right by some amount to make sure the MSB reside right before 1217 // the radix point (i.e. "MSB . rest-significant-bits"). 1218 // 1219 // Note that the result is not normalized when "omsb < precision". So, the 1220 // caller needs to call IEEEFloat::normalize() if normalized value is 1221 // expected. 1222 if (omsb > precision) { 1223 unsigned int bits, significantParts; 1224 lostFraction lf; 1225 1226 bits = omsb - precision; 1227 significantParts = partCountForBits(omsb); 1228 lf = shiftRight(fullSignificand, significantParts, bits); 1229 lost_fraction = combineLostFractions(lf, lost_fraction); 1230 exponent += bits; 1231 } 1232 1233 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1234 1235 if (newPartsCount > 4) 1236 delete [] fullSignificand; 1237 1238 return lost_fraction; 1239 } 1240 1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1242 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1243 } 1244 1245 /* Multiply the significands of LHS and RHS to DST. */ 1246 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1247 unsigned int bit, i, partsCount; 1248 const integerPart *rhsSignificand; 1249 integerPart *lhsSignificand, *dividend, *divisor; 1250 integerPart scratch[4]; 1251 lostFraction lost_fraction; 1252 1253 assert(semantics == rhs.semantics); 1254 1255 lhsSignificand = significandParts(); 1256 rhsSignificand = rhs.significandParts(); 1257 partsCount = partCount(); 1258 1259 if (partsCount > 2) 1260 dividend = new integerPart[partsCount * 2]; 1261 else 1262 dividend = scratch; 1263 1264 divisor = dividend + partsCount; 1265 1266 /* Copy the dividend and divisor as they will be modified in-place. */ 1267 for (i = 0; i < partsCount; i++) { 1268 dividend[i] = lhsSignificand[i]; 1269 divisor[i] = rhsSignificand[i]; 1270 lhsSignificand[i] = 0; 1271 } 1272 1273 exponent -= rhs.exponent; 1274 1275 unsigned int precision = semantics->precision; 1276 1277 /* Normalize the divisor. */ 1278 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1279 if (bit) { 1280 exponent += bit; 1281 APInt::tcShiftLeft(divisor, partsCount, bit); 1282 } 1283 1284 /* Normalize the dividend. */ 1285 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1286 if (bit) { 1287 exponent -= bit; 1288 APInt::tcShiftLeft(dividend, partsCount, bit); 1289 } 1290 1291 /* Ensure the dividend >= divisor initially for the loop below. 1292 Incidentally, this means that the division loop below is 1293 guaranteed to set the integer bit to one. */ 1294 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1295 exponent--; 1296 APInt::tcShiftLeft(dividend, partsCount, 1); 1297 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1298 } 1299 1300 /* Long division. */ 1301 for (bit = precision; bit; bit -= 1) { 1302 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1303 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1304 APInt::tcSetBit(lhsSignificand, bit - 1); 1305 } 1306 1307 APInt::tcShiftLeft(dividend, partsCount, 1); 1308 } 1309 1310 /* Figure out the lost fraction. */ 1311 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1312 1313 if (cmp > 0) 1314 lost_fraction = lfMoreThanHalf; 1315 else if (cmp == 0) 1316 lost_fraction = lfExactlyHalf; 1317 else if (APInt::tcIsZero(dividend, partsCount)) 1318 lost_fraction = lfExactlyZero; 1319 else 1320 lost_fraction = lfLessThanHalf; 1321 1322 if (partsCount > 2) 1323 delete [] dividend; 1324 1325 return lost_fraction; 1326 } 1327 1328 unsigned int IEEEFloat::significandMSB() const { 1329 return APInt::tcMSB(significandParts(), partCount()); 1330 } 1331 1332 unsigned int IEEEFloat::significandLSB() const { 1333 return APInt::tcLSB(significandParts(), partCount()); 1334 } 1335 1336 /* Note that a zero result is NOT normalized to fcZero. */ 1337 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1338 /* Our exponent should not overflow. */ 1339 assert((ExponentType) (exponent + bits) >= exponent); 1340 1341 exponent += bits; 1342 1343 return shiftRight(significandParts(), partCount(), bits); 1344 } 1345 1346 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1347 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1348 assert(bits < semantics->precision); 1349 1350 if (bits) { 1351 unsigned int partsCount = partCount(); 1352 1353 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1354 exponent -= bits; 1355 1356 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1357 } 1358 } 1359 1360 IEEEFloat::cmpResult 1361 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1362 int compare; 1363 1364 assert(semantics == rhs.semantics); 1365 assert(isFiniteNonZero()); 1366 assert(rhs.isFiniteNonZero()); 1367 1368 compare = exponent - rhs.exponent; 1369 1370 /* If exponents are equal, do an unsigned bignum comparison of the 1371 significands. */ 1372 if (compare == 0) 1373 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1374 partCount()); 1375 1376 if (compare > 0) 1377 return cmpGreaterThan; 1378 else if (compare < 0) 1379 return cmpLessThan; 1380 else 1381 return cmpEqual; 1382 } 1383 1384 /* Set the least significant BITS bits of a bignum, clear the 1385 rest. */ 1386 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1387 unsigned bits) { 1388 unsigned i = 0; 1389 while (bits > APInt::APINT_BITS_PER_WORD) { 1390 dst[i++] = ~(APInt::WordType)0; 1391 bits -= APInt::APINT_BITS_PER_WORD; 1392 } 1393 1394 if (bits) 1395 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1396 1397 while (i < parts) 1398 dst[i++] = 0; 1399 } 1400 1401 /* Handle overflow. Sign is preserved. We either become infinity or 1402 the largest finite number. */ 1403 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1404 /* Infinity? */ 1405 if (rounding_mode == rmNearestTiesToEven || 1406 rounding_mode == rmNearestTiesToAway || 1407 (rounding_mode == rmTowardPositive && !sign) || 1408 (rounding_mode == rmTowardNegative && sign)) { 1409 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1410 makeNaN(false, sign); 1411 else 1412 category = fcInfinity; 1413 return (opStatus) (opOverflow | opInexact); 1414 } 1415 1416 /* Otherwise we become the largest finite number. */ 1417 category = fcNormal; 1418 exponent = semantics->maxExponent; 1419 tcSetLeastSignificantBits(significandParts(), partCount(), 1420 semantics->precision); 1421 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1422 APInt::tcClearBit(significandParts(), 0); 1423 1424 return opInexact; 1425 } 1426 1427 /* Returns TRUE if, when truncating the current number, with BIT the 1428 new LSB, with the given lost fraction and rounding mode, the result 1429 would need to be rounded away from zero (i.e., by increasing the 1430 signficand). This routine must work for fcZero of both signs, and 1431 fcNormal numbers. */ 1432 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1433 lostFraction lost_fraction, 1434 unsigned int bit) const { 1435 /* NaNs and infinities should not have lost fractions. */ 1436 assert(isFiniteNonZero() || category == fcZero); 1437 1438 /* Current callers never pass this so we don't handle it. */ 1439 assert(lost_fraction != lfExactlyZero); 1440 1441 switch (rounding_mode) { 1442 case rmNearestTiesToAway: 1443 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1444 1445 case rmNearestTiesToEven: 1446 if (lost_fraction == lfMoreThanHalf) 1447 return true; 1448 1449 /* Our zeroes don't have a significand to test. */ 1450 if (lost_fraction == lfExactlyHalf && category != fcZero) 1451 return APInt::tcExtractBit(significandParts(), bit); 1452 1453 return false; 1454 1455 case rmTowardZero: 1456 return false; 1457 1458 case rmTowardPositive: 1459 return !sign; 1460 1461 case rmTowardNegative: 1462 return sign; 1463 1464 default: 1465 break; 1466 } 1467 llvm_unreachable("Invalid rounding mode found"); 1468 } 1469 1470 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1471 lostFraction lost_fraction) { 1472 unsigned int omsb; /* One, not zero, based MSB. */ 1473 int exponentChange; 1474 1475 if (!isFiniteNonZero()) 1476 return opOK; 1477 1478 /* Before rounding normalize the exponent of fcNormal numbers. */ 1479 omsb = significandMSB() + 1; 1480 1481 if (omsb) { 1482 /* OMSB is numbered from 1. We want to place it in the integer 1483 bit numbered PRECISION if possible, with a compensating change in 1484 the exponent. */ 1485 exponentChange = omsb - semantics->precision; 1486 1487 /* If the resulting exponent is too high, overflow according to 1488 the rounding mode. */ 1489 if (exponent + exponentChange > semantics->maxExponent) 1490 return handleOverflow(rounding_mode); 1491 1492 /* Subnormal numbers have exponent minExponent, and their MSB 1493 is forced based on that. */ 1494 if (exponent + exponentChange < semantics->minExponent) 1495 exponentChange = semantics->minExponent - exponent; 1496 1497 /* Shifting left is easy as we don't lose precision. */ 1498 if (exponentChange < 0) { 1499 assert(lost_fraction == lfExactlyZero); 1500 1501 shiftSignificandLeft(-exponentChange); 1502 1503 return opOK; 1504 } 1505 1506 if (exponentChange > 0) { 1507 lostFraction lf; 1508 1509 /* Shift right and capture any new lost fraction. */ 1510 lf = shiftSignificandRight(exponentChange); 1511 1512 lost_fraction = combineLostFractions(lf, lost_fraction); 1513 1514 /* Keep OMSB up-to-date. */ 1515 if (omsb > (unsigned) exponentChange) 1516 omsb -= exponentChange; 1517 else 1518 omsb = 0; 1519 } 1520 } 1521 1522 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1523 exponent == semantics->maxExponent && isSignificandAllOnes()) 1524 return handleOverflow(rounding_mode); 1525 1526 /* Now round the number according to rounding_mode given the lost 1527 fraction. */ 1528 1529 /* As specified in IEEE 754, since we do not trap we do not report 1530 underflow for exact results. */ 1531 if (lost_fraction == lfExactlyZero) { 1532 /* Canonicalize zeroes. */ 1533 if (omsb == 0) 1534 category = fcZero; 1535 1536 return opOK; 1537 } 1538 1539 /* Increment the significand if we're rounding away from zero. */ 1540 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1541 if (omsb == 0) 1542 exponent = semantics->minExponent; 1543 1544 incrementSignificand(); 1545 omsb = significandMSB() + 1; 1546 1547 /* Did the significand increment overflow? */ 1548 if (omsb == (unsigned) semantics->precision + 1) { 1549 /* Renormalize by incrementing the exponent and shifting our 1550 significand right one. However if we already have the 1551 maximum exponent we overflow to infinity. */ 1552 if (exponent == semantics->maxExponent) { 1553 category = fcInfinity; 1554 1555 return (opStatus) (opOverflow | opInexact); 1556 } 1557 1558 shiftSignificandRight(1); 1559 1560 return opInexact; 1561 } 1562 1563 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1564 exponent == semantics->maxExponent && isSignificandAllOnes()) 1565 return handleOverflow(rounding_mode); 1566 } 1567 1568 /* The normal case - we were and are not denormal, and any 1569 significand increment above didn't overflow. */ 1570 if (omsb == semantics->precision) 1571 return opInexact; 1572 1573 /* We have a non-zero denormal. */ 1574 assert(omsb < semantics->precision); 1575 1576 /* Canonicalize zeroes. */ 1577 if (omsb == 0) 1578 category = fcZero; 1579 1580 /* The fcZero case is a denormal that underflowed to zero. */ 1581 return (opStatus) (opUnderflow | opInexact); 1582 } 1583 1584 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1585 bool subtract) { 1586 switch (PackCategoriesIntoKey(category, rhs.category)) { 1587 default: 1588 llvm_unreachable(nullptr); 1589 1590 case PackCategoriesIntoKey(fcZero, fcNaN): 1591 case PackCategoriesIntoKey(fcNormal, fcNaN): 1592 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1593 assign(rhs); 1594 [[fallthrough]]; 1595 case PackCategoriesIntoKey(fcNaN, fcZero): 1596 case PackCategoriesIntoKey(fcNaN, fcNormal): 1597 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1598 case PackCategoriesIntoKey(fcNaN, fcNaN): 1599 if (isSignaling()) { 1600 makeQuiet(); 1601 return opInvalidOp; 1602 } 1603 return rhs.isSignaling() ? opInvalidOp : opOK; 1604 1605 case PackCategoriesIntoKey(fcNormal, fcZero): 1606 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1607 case PackCategoriesIntoKey(fcInfinity, fcZero): 1608 return opOK; 1609 1610 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1611 case PackCategoriesIntoKey(fcZero, fcInfinity): 1612 category = fcInfinity; 1613 sign = rhs.sign ^ subtract; 1614 return opOK; 1615 1616 case PackCategoriesIntoKey(fcZero, fcNormal): 1617 assign(rhs); 1618 sign = rhs.sign ^ subtract; 1619 return opOK; 1620 1621 case PackCategoriesIntoKey(fcZero, fcZero): 1622 /* Sign depends on rounding mode; handled by caller. */ 1623 return opOK; 1624 1625 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1626 /* Differently signed infinities can only be validly 1627 subtracted. */ 1628 if (((sign ^ rhs.sign)!=0) != subtract) { 1629 makeNaN(); 1630 return opInvalidOp; 1631 } 1632 1633 return opOK; 1634 1635 case PackCategoriesIntoKey(fcNormal, fcNormal): 1636 return opDivByZero; 1637 } 1638 } 1639 1640 /* Add or subtract two normal numbers. */ 1641 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1642 bool subtract) { 1643 integerPart carry; 1644 lostFraction lost_fraction; 1645 int bits; 1646 1647 /* Determine if the operation on the absolute values is effectively 1648 an addition or subtraction. */ 1649 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1650 1651 /* Are we bigger exponent-wise than the RHS? */ 1652 bits = exponent - rhs.exponent; 1653 1654 /* Subtraction is more subtle than one might naively expect. */ 1655 if (subtract) { 1656 IEEEFloat temp_rhs(rhs); 1657 1658 if (bits == 0) 1659 lost_fraction = lfExactlyZero; 1660 else if (bits > 0) { 1661 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1662 shiftSignificandLeft(1); 1663 } else { 1664 lost_fraction = shiftSignificandRight(-bits - 1); 1665 temp_rhs.shiftSignificandLeft(1); 1666 } 1667 1668 // Should we reverse the subtraction. 1669 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1670 carry = temp_rhs.subtractSignificand 1671 (*this, lost_fraction != lfExactlyZero); 1672 copySignificand(temp_rhs); 1673 sign = !sign; 1674 } else { 1675 carry = subtractSignificand 1676 (temp_rhs, lost_fraction != lfExactlyZero); 1677 } 1678 1679 /* Invert the lost fraction - it was on the RHS and 1680 subtracted. */ 1681 if (lost_fraction == lfLessThanHalf) 1682 lost_fraction = lfMoreThanHalf; 1683 else if (lost_fraction == lfMoreThanHalf) 1684 lost_fraction = lfLessThanHalf; 1685 1686 /* The code above is intended to ensure that no borrow is 1687 necessary. */ 1688 assert(!carry); 1689 (void)carry; 1690 } else { 1691 if (bits > 0) { 1692 IEEEFloat temp_rhs(rhs); 1693 1694 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1695 carry = addSignificand(temp_rhs); 1696 } else { 1697 lost_fraction = shiftSignificandRight(-bits); 1698 carry = addSignificand(rhs); 1699 } 1700 1701 /* We have a guard bit; generating a carry cannot happen. */ 1702 assert(!carry); 1703 (void)carry; 1704 } 1705 1706 return lost_fraction; 1707 } 1708 1709 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1710 switch (PackCategoriesIntoKey(category, rhs.category)) { 1711 default: 1712 llvm_unreachable(nullptr); 1713 1714 case PackCategoriesIntoKey(fcZero, fcNaN): 1715 case PackCategoriesIntoKey(fcNormal, fcNaN): 1716 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1717 assign(rhs); 1718 sign = false; 1719 [[fallthrough]]; 1720 case PackCategoriesIntoKey(fcNaN, fcZero): 1721 case PackCategoriesIntoKey(fcNaN, fcNormal): 1722 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1723 case PackCategoriesIntoKey(fcNaN, fcNaN): 1724 sign ^= rhs.sign; // restore the original sign 1725 if (isSignaling()) { 1726 makeQuiet(); 1727 return opInvalidOp; 1728 } 1729 return rhs.isSignaling() ? opInvalidOp : opOK; 1730 1731 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1732 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1733 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1734 category = fcInfinity; 1735 return opOK; 1736 1737 case PackCategoriesIntoKey(fcZero, fcNormal): 1738 case PackCategoriesIntoKey(fcNormal, fcZero): 1739 case PackCategoriesIntoKey(fcZero, fcZero): 1740 category = fcZero; 1741 return opOK; 1742 1743 case PackCategoriesIntoKey(fcZero, fcInfinity): 1744 case PackCategoriesIntoKey(fcInfinity, fcZero): 1745 makeNaN(); 1746 return opInvalidOp; 1747 1748 case PackCategoriesIntoKey(fcNormal, fcNormal): 1749 return opOK; 1750 } 1751 } 1752 1753 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1754 switch (PackCategoriesIntoKey(category, rhs.category)) { 1755 default: 1756 llvm_unreachable(nullptr); 1757 1758 case PackCategoriesIntoKey(fcZero, fcNaN): 1759 case PackCategoriesIntoKey(fcNormal, fcNaN): 1760 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1761 assign(rhs); 1762 sign = false; 1763 [[fallthrough]]; 1764 case PackCategoriesIntoKey(fcNaN, fcZero): 1765 case PackCategoriesIntoKey(fcNaN, fcNormal): 1766 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1767 case PackCategoriesIntoKey(fcNaN, fcNaN): 1768 sign ^= rhs.sign; // restore the original sign 1769 if (isSignaling()) { 1770 makeQuiet(); 1771 return opInvalidOp; 1772 } 1773 return rhs.isSignaling() ? opInvalidOp : opOK; 1774 1775 case PackCategoriesIntoKey(fcInfinity, fcZero): 1776 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1777 case PackCategoriesIntoKey(fcZero, fcInfinity): 1778 case PackCategoriesIntoKey(fcZero, fcNormal): 1779 return opOK; 1780 1781 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1782 category = fcZero; 1783 return opOK; 1784 1785 case PackCategoriesIntoKey(fcNormal, fcZero): 1786 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1787 makeNaN(false, sign); 1788 else 1789 category = fcInfinity; 1790 return opDivByZero; 1791 1792 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1793 case PackCategoriesIntoKey(fcZero, fcZero): 1794 makeNaN(); 1795 return opInvalidOp; 1796 1797 case PackCategoriesIntoKey(fcNormal, fcNormal): 1798 return opOK; 1799 } 1800 } 1801 1802 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1803 switch (PackCategoriesIntoKey(category, rhs.category)) { 1804 default: 1805 llvm_unreachable(nullptr); 1806 1807 case PackCategoriesIntoKey(fcZero, fcNaN): 1808 case PackCategoriesIntoKey(fcNormal, fcNaN): 1809 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1810 assign(rhs); 1811 [[fallthrough]]; 1812 case PackCategoriesIntoKey(fcNaN, fcZero): 1813 case PackCategoriesIntoKey(fcNaN, fcNormal): 1814 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1815 case PackCategoriesIntoKey(fcNaN, fcNaN): 1816 if (isSignaling()) { 1817 makeQuiet(); 1818 return opInvalidOp; 1819 } 1820 return rhs.isSignaling() ? opInvalidOp : opOK; 1821 1822 case PackCategoriesIntoKey(fcZero, fcInfinity): 1823 case PackCategoriesIntoKey(fcZero, fcNormal): 1824 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1825 return opOK; 1826 1827 case PackCategoriesIntoKey(fcNormal, fcZero): 1828 case PackCategoriesIntoKey(fcInfinity, fcZero): 1829 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1830 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1831 case PackCategoriesIntoKey(fcZero, fcZero): 1832 makeNaN(); 1833 return opInvalidOp; 1834 1835 case PackCategoriesIntoKey(fcNormal, fcNormal): 1836 return opOK; 1837 } 1838 } 1839 1840 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1841 switch (PackCategoriesIntoKey(category, rhs.category)) { 1842 default: 1843 llvm_unreachable(nullptr); 1844 1845 case PackCategoriesIntoKey(fcZero, fcNaN): 1846 case PackCategoriesIntoKey(fcNormal, fcNaN): 1847 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1848 assign(rhs); 1849 [[fallthrough]]; 1850 case PackCategoriesIntoKey(fcNaN, fcZero): 1851 case PackCategoriesIntoKey(fcNaN, fcNormal): 1852 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1853 case PackCategoriesIntoKey(fcNaN, fcNaN): 1854 if (isSignaling()) { 1855 makeQuiet(); 1856 return opInvalidOp; 1857 } 1858 return rhs.isSignaling() ? opInvalidOp : opOK; 1859 1860 case PackCategoriesIntoKey(fcZero, fcInfinity): 1861 case PackCategoriesIntoKey(fcZero, fcNormal): 1862 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1863 return opOK; 1864 1865 case PackCategoriesIntoKey(fcNormal, fcZero): 1866 case PackCategoriesIntoKey(fcInfinity, fcZero): 1867 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1868 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1869 case PackCategoriesIntoKey(fcZero, fcZero): 1870 makeNaN(); 1871 return opInvalidOp; 1872 1873 case PackCategoriesIntoKey(fcNormal, fcNormal): 1874 return opDivByZero; // fake status, indicating this is not a special case 1875 } 1876 } 1877 1878 /* Change sign. */ 1879 void IEEEFloat::changeSign() { 1880 /* Look mummy, this one's easy. */ 1881 sign = !sign; 1882 } 1883 1884 /* Normalized addition or subtraction. */ 1885 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 1886 roundingMode rounding_mode, 1887 bool subtract) { 1888 opStatus fs; 1889 1890 fs = addOrSubtractSpecials(rhs, subtract); 1891 1892 /* This return code means it was not a simple case. */ 1893 if (fs == opDivByZero) { 1894 lostFraction lost_fraction; 1895 1896 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1897 fs = normalize(rounding_mode, lost_fraction); 1898 1899 /* Can only be zero if we lost no fraction. */ 1900 assert(category != fcZero || lost_fraction == lfExactlyZero); 1901 } 1902 1903 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1904 positive zero unless rounding to minus infinity, except that 1905 adding two like-signed zeroes gives that zero. */ 1906 if (category == fcZero) { 1907 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1908 sign = (rounding_mode == rmTowardNegative); 1909 } 1910 1911 return fs; 1912 } 1913 1914 /* Normalized addition. */ 1915 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 1916 roundingMode rounding_mode) { 1917 return addOrSubtract(rhs, rounding_mode, false); 1918 } 1919 1920 /* Normalized subtraction. */ 1921 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 1922 roundingMode rounding_mode) { 1923 return addOrSubtract(rhs, rounding_mode, true); 1924 } 1925 1926 /* Normalized multiply. */ 1927 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 1928 roundingMode rounding_mode) { 1929 opStatus fs; 1930 1931 sign ^= rhs.sign; 1932 fs = multiplySpecials(rhs); 1933 1934 if (isFiniteNonZero()) { 1935 lostFraction lost_fraction = multiplySignificand(rhs); 1936 fs = normalize(rounding_mode, lost_fraction); 1937 if (lost_fraction != lfExactlyZero) 1938 fs = (opStatus) (fs | opInexact); 1939 } 1940 1941 return fs; 1942 } 1943 1944 /* Normalized divide. */ 1945 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 1946 roundingMode rounding_mode) { 1947 opStatus fs; 1948 1949 sign ^= rhs.sign; 1950 fs = divideSpecials(rhs); 1951 1952 if (isFiniteNonZero()) { 1953 lostFraction lost_fraction = divideSignificand(rhs); 1954 fs = normalize(rounding_mode, lost_fraction); 1955 if (lost_fraction != lfExactlyZero) 1956 fs = (opStatus) (fs | opInexact); 1957 } 1958 1959 return fs; 1960 } 1961 1962 /* Normalized remainder. */ 1963 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 1964 opStatus fs; 1965 unsigned int origSign = sign; 1966 1967 // First handle the special cases. 1968 fs = remainderSpecials(rhs); 1969 if (fs != opDivByZero) 1970 return fs; 1971 1972 fs = opOK; 1973 1974 // Make sure the current value is less than twice the denom. If the addition 1975 // did not succeed (an overflow has happened), which means that the finite 1976 // value we currently posses must be less than twice the denom (as we are 1977 // using the same semantics). 1978 IEEEFloat P2 = rhs; 1979 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 1980 fs = mod(P2); 1981 assert(fs == opOK); 1982 } 1983 1984 // Lets work with absolute numbers. 1985 IEEEFloat P = rhs; 1986 P.sign = false; 1987 sign = false; 1988 1989 // 1990 // To calculate the remainder we use the following scheme. 1991 // 1992 // The remainder is defained as follows: 1993 // 1994 // remainder = numer - rquot * denom = x - r * p 1995 // 1996 // Where r is the result of: x/p, rounded toward the nearest integral value 1997 // (with halfway cases rounded toward the even number). 1998 // 1999 // Currently, (after x mod 2p): 2000 // r is the number of 2p's present inside x, which is inherently, an even 2001 // number of p's. 2002 // 2003 // We may split the remaining calculation into 4 options: 2004 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2005 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2006 // are done as well. 2007 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2008 // to subtract 1p at least once. 2009 // - if x >= p then we must subtract p at least once, as x must be a 2010 // remainder. 2011 // 2012 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2013 // 2014 // We can now split the remaining calculation to the following 3 options: 2015 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2016 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2017 // must round up to the next even number. so we must subtract p once more. 2018 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2019 // integral, and subtract p once more. 2020 // 2021 2022 // Extend the semantics to prevent an overflow/underflow or inexact result. 2023 bool losesInfo; 2024 fltSemantics extendedSemantics = *semantics; 2025 extendedSemantics.maxExponent++; 2026 extendedSemantics.minExponent--; 2027 extendedSemantics.precision += 2; 2028 2029 IEEEFloat VEx = *this; 2030 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2031 assert(fs == opOK && !losesInfo); 2032 IEEEFloat PEx = P; 2033 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2034 assert(fs == opOK && !losesInfo); 2035 2036 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2037 // any fraction. 2038 fs = VEx.add(VEx, rmNearestTiesToEven); 2039 assert(fs == opOK); 2040 2041 if (VEx.compare(PEx) == cmpGreaterThan) { 2042 fs = subtract(P, rmNearestTiesToEven); 2043 assert(fs == opOK); 2044 2045 // Make VEx = this.add(this), but because we have different semantics, we do 2046 // not want to `convert` again, so we just subtract PEx twice (which equals 2047 // to the desired value). 2048 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2049 assert(fs == opOK); 2050 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2051 assert(fs == opOK); 2052 2053 cmpResult result = VEx.compare(PEx); 2054 if (result == cmpGreaterThan || result == cmpEqual) { 2055 fs = subtract(P, rmNearestTiesToEven); 2056 assert(fs == opOK); 2057 } 2058 } 2059 2060 if (isZero()) 2061 sign = origSign; // IEEE754 requires this 2062 else 2063 sign ^= origSign; 2064 return fs; 2065 } 2066 2067 /* Normalized llvm frem (C fmod). */ 2068 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2069 opStatus fs; 2070 fs = modSpecials(rhs); 2071 unsigned int origSign = sign; 2072 2073 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2074 compareAbsoluteValue(rhs) != cmpLessThan) { 2075 int Exp = ilogb(*this) - ilogb(rhs); 2076 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2077 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2078 // check for it. 2079 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2080 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2081 V.sign = sign; 2082 2083 fs = subtract(V, rmNearestTiesToEven); 2084 assert(fs==opOK); 2085 } 2086 if (isZero()) 2087 sign = origSign; // fmod requires this 2088 return fs; 2089 } 2090 2091 /* Normalized fused-multiply-add. */ 2092 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2093 const IEEEFloat &addend, 2094 roundingMode rounding_mode) { 2095 opStatus fs; 2096 2097 /* Post-multiplication sign, before addition. */ 2098 sign ^= multiplicand.sign; 2099 2100 /* If and only if all arguments are normal do we need to do an 2101 extended-precision calculation. */ 2102 if (isFiniteNonZero() && 2103 multiplicand.isFiniteNonZero() && 2104 addend.isFinite()) { 2105 lostFraction lost_fraction; 2106 2107 lost_fraction = multiplySignificand(multiplicand, addend); 2108 fs = normalize(rounding_mode, lost_fraction); 2109 if (lost_fraction != lfExactlyZero) 2110 fs = (opStatus) (fs | opInexact); 2111 2112 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2113 positive zero unless rounding to minus infinity, except that 2114 adding two like-signed zeroes gives that zero. */ 2115 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 2116 sign = (rounding_mode == rmTowardNegative); 2117 } else { 2118 fs = multiplySpecials(multiplicand); 2119 2120 /* FS can only be opOK or opInvalidOp. There is no more work 2121 to do in the latter case. The IEEE-754R standard says it is 2122 implementation-defined in this case whether, if ADDEND is a 2123 quiet NaN, we raise invalid op; this implementation does so. 2124 2125 If we need to do the addition we can do so with normal 2126 precision. */ 2127 if (fs == opOK) 2128 fs = addOrSubtract(addend, rounding_mode, false); 2129 } 2130 2131 return fs; 2132 } 2133 2134 /* Rounding-mode correct round to integral value. */ 2135 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2136 opStatus fs; 2137 2138 if (isInfinity()) 2139 // [IEEE Std 754-2008 6.1]: 2140 // The behavior of infinity in floating-point arithmetic is derived from the 2141 // limiting cases of real arithmetic with operands of arbitrarily 2142 // large magnitude, when such a limit exists. 2143 // ... 2144 // Operations on infinite operands are usually exact and therefore signal no 2145 // exceptions ... 2146 return opOK; 2147 2148 if (isNaN()) { 2149 if (isSignaling()) { 2150 // [IEEE Std 754-2008 6.2]: 2151 // Under default exception handling, any operation signaling an invalid 2152 // operation exception and for which a floating-point result is to be 2153 // delivered shall deliver a quiet NaN. 2154 makeQuiet(); 2155 // [IEEE Std 754-2008 6.2]: 2156 // Signaling NaNs shall be reserved operands that, under default exception 2157 // handling, signal the invalid operation exception(see 7.2) for every 2158 // general-computational and signaling-computational operation except for 2159 // the conversions described in 5.12. 2160 return opInvalidOp; 2161 } else { 2162 // [IEEE Std 754-2008 6.2]: 2163 // For an operation with quiet NaN inputs, other than maximum and minimum 2164 // operations, if a floating-point result is to be delivered the result 2165 // shall be a quiet NaN which should be one of the input NaNs. 2166 // ... 2167 // Every general-computational and quiet-computational operation involving 2168 // one or more input NaNs, none of them signaling, shall signal no 2169 // exception, except fusedMultiplyAdd might signal the invalid operation 2170 // exception(see 7.2). 2171 return opOK; 2172 } 2173 } 2174 2175 if (isZero()) { 2176 // [IEEE Std 754-2008 6.3]: 2177 // ... the sign of the result of conversions, the quantize operation, the 2178 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2179 // the sign of the first or only operand. 2180 return opOK; 2181 } 2182 2183 // If the exponent is large enough, we know that this value is already 2184 // integral, and the arithmetic below would potentially cause it to saturate 2185 // to +/-Inf. Bail out early instead. 2186 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2187 return opOK; 2188 2189 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2190 // precision of our format, and then subtract it back off again. The choice 2191 // of rounding modes for the addition/subtraction determines the rounding mode 2192 // for our integral rounding as well. 2193 // NOTE: When the input value is negative, we do subtraction followed by 2194 // addition instead. 2195 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2196 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2197 IEEEFloat MagicConstant(*semantics); 2198 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2199 rmNearestTiesToEven); 2200 assert(fs == opOK); 2201 MagicConstant.sign = sign; 2202 2203 // Preserve the input sign so that we can handle the case of zero result 2204 // correctly. 2205 bool inputSign = isNegative(); 2206 2207 fs = add(MagicConstant, rounding_mode); 2208 2209 // Current value and 'MagicConstant' are both integers, so the result of the 2210 // subtraction is always exact according to Sterbenz' lemma. 2211 subtract(MagicConstant, rounding_mode); 2212 2213 // Restore the input sign. 2214 if (inputSign != isNegative()) 2215 changeSign(); 2216 2217 return fs; 2218 } 2219 2220 2221 /* Comparison requires normalized numbers. */ 2222 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2223 cmpResult result; 2224 2225 assert(semantics == rhs.semantics); 2226 2227 switch (PackCategoriesIntoKey(category, rhs.category)) { 2228 default: 2229 llvm_unreachable(nullptr); 2230 2231 case PackCategoriesIntoKey(fcNaN, fcZero): 2232 case PackCategoriesIntoKey(fcNaN, fcNormal): 2233 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2234 case PackCategoriesIntoKey(fcNaN, fcNaN): 2235 case PackCategoriesIntoKey(fcZero, fcNaN): 2236 case PackCategoriesIntoKey(fcNormal, fcNaN): 2237 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2238 return cmpUnordered; 2239 2240 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2241 case PackCategoriesIntoKey(fcInfinity, fcZero): 2242 case PackCategoriesIntoKey(fcNormal, fcZero): 2243 if (sign) 2244 return cmpLessThan; 2245 else 2246 return cmpGreaterThan; 2247 2248 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2249 case PackCategoriesIntoKey(fcZero, fcInfinity): 2250 case PackCategoriesIntoKey(fcZero, fcNormal): 2251 if (rhs.sign) 2252 return cmpGreaterThan; 2253 else 2254 return cmpLessThan; 2255 2256 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2257 if (sign == rhs.sign) 2258 return cmpEqual; 2259 else if (sign) 2260 return cmpLessThan; 2261 else 2262 return cmpGreaterThan; 2263 2264 case PackCategoriesIntoKey(fcZero, fcZero): 2265 return cmpEqual; 2266 2267 case PackCategoriesIntoKey(fcNormal, fcNormal): 2268 break; 2269 } 2270 2271 /* Two normal numbers. Do they have the same sign? */ 2272 if (sign != rhs.sign) { 2273 if (sign) 2274 result = cmpLessThan; 2275 else 2276 result = cmpGreaterThan; 2277 } else { 2278 /* Compare absolute values; invert result if negative. */ 2279 result = compareAbsoluteValue(rhs); 2280 2281 if (sign) { 2282 if (result == cmpLessThan) 2283 result = cmpGreaterThan; 2284 else if (result == cmpGreaterThan) 2285 result = cmpLessThan; 2286 } 2287 } 2288 2289 return result; 2290 } 2291 2292 /// IEEEFloat::convert - convert a value of one floating point type to another. 2293 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2294 /// records whether the transformation lost information, i.e. whether 2295 /// converting the result back to the original type will produce the 2296 /// original value (this is almost the same as return value==fsOK, but there 2297 /// are edge cases where this is not so). 2298 2299 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2300 roundingMode rounding_mode, 2301 bool *losesInfo) { 2302 lostFraction lostFraction; 2303 unsigned int newPartCount, oldPartCount; 2304 opStatus fs; 2305 int shift; 2306 const fltSemantics &fromSemantics = *semantics; 2307 bool is_signaling = isSignaling(); 2308 2309 lostFraction = lfExactlyZero; 2310 newPartCount = partCountForBits(toSemantics.precision + 1); 2311 oldPartCount = partCount(); 2312 shift = toSemantics.precision - fromSemantics.precision; 2313 2314 bool X86SpecialNan = false; 2315 if (&fromSemantics == &semX87DoubleExtended && 2316 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2317 (!(*significandParts() & 0x8000000000000000ULL) || 2318 !(*significandParts() & 0x4000000000000000ULL))) { 2319 // x86 has some unusual NaNs which cannot be represented in any other 2320 // format; note them here. 2321 X86SpecialNan = true; 2322 } 2323 2324 // If this is a truncation of a denormal number, and the target semantics 2325 // has larger exponent range than the source semantics (this can happen 2326 // when truncating from PowerPC double-double to double format), the 2327 // right shift could lose result mantissa bits. Adjust exponent instead 2328 // of performing excessive shift. 2329 // Also do a similar trick in case shifting denormal would produce zero 2330 // significand as this case isn't handled correctly by normalize. 2331 if (shift < 0 && isFiniteNonZero()) { 2332 int omsb = significandMSB() + 1; 2333 int exponentChange = omsb - fromSemantics.precision; 2334 if (exponent + exponentChange < toSemantics.minExponent) 2335 exponentChange = toSemantics.minExponent - exponent; 2336 if (exponentChange < shift) 2337 exponentChange = shift; 2338 if (exponentChange < 0) { 2339 shift -= exponentChange; 2340 exponent += exponentChange; 2341 } else if (omsb <= -shift) { 2342 exponentChange = omsb + shift - 1; // leave at least one bit set 2343 shift -= exponentChange; 2344 exponent += exponentChange; 2345 } 2346 } 2347 2348 // If this is a truncation, perform the shift before we narrow the storage. 2349 if (shift < 0 && (isFiniteNonZero() || 2350 (category == fcNaN && semantics->nonFiniteBehavior != 2351 fltNonfiniteBehavior::NanOnly))) 2352 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2353 2354 // Fix the storage so it can hold to new value. 2355 if (newPartCount > oldPartCount) { 2356 // The new type requires more storage; make it available. 2357 integerPart *newParts; 2358 newParts = new integerPart[newPartCount]; 2359 APInt::tcSet(newParts, 0, newPartCount); 2360 if (isFiniteNonZero() || category==fcNaN) 2361 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2362 freeSignificand(); 2363 significand.parts = newParts; 2364 } else if (newPartCount == 1 && oldPartCount != 1) { 2365 // Switch to built-in storage for a single part. 2366 integerPart newPart = 0; 2367 if (isFiniteNonZero() || category==fcNaN) 2368 newPart = significandParts()[0]; 2369 freeSignificand(); 2370 significand.part = newPart; 2371 } 2372 2373 // Now that we have the right storage, switch the semantics. 2374 semantics = &toSemantics; 2375 2376 // If this is an extension, perform the shift now that the storage is 2377 // available. 2378 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2379 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2380 2381 if (isFiniteNonZero()) { 2382 fs = normalize(rounding_mode, lostFraction); 2383 *losesInfo = (fs != opOK); 2384 } else if (category == fcNaN) { 2385 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2386 *losesInfo = 2387 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2388 makeNaN(false, sign); 2389 return is_signaling ? opInvalidOp : opOK; 2390 } 2391 2392 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2393 2394 // For x87 extended precision, we want to make a NaN, not a special NaN if 2395 // the input wasn't special either. 2396 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2397 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2398 2399 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2400 // This also guarantees that a sNaN does not become Inf on a truncation 2401 // that loses all payload bits. 2402 if (is_signaling) { 2403 makeQuiet(); 2404 fs = opInvalidOp; 2405 } else { 2406 fs = opOK; 2407 } 2408 } else if (category == fcInfinity && 2409 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2410 makeNaN(false, sign); 2411 *losesInfo = true; 2412 fs = opInexact; 2413 } else { 2414 *losesInfo = false; 2415 fs = opOK; 2416 } 2417 2418 return fs; 2419 } 2420 2421 /* Convert a floating point number to an integer according to the 2422 rounding mode. If the rounded integer value is out of range this 2423 returns an invalid operation exception and the contents of the 2424 destination parts are unspecified. If the rounded value is in 2425 range but the floating point number is not the exact integer, the C 2426 standard doesn't require an inexact exception to be raised. IEEE 2427 854 does require it so we do that. 2428 2429 Note that for conversions to integer type the C standard requires 2430 round-to-zero to always be used. */ 2431 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2432 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2433 roundingMode rounding_mode, bool *isExact) const { 2434 lostFraction lost_fraction; 2435 const integerPart *src; 2436 unsigned int dstPartsCount, truncatedBits; 2437 2438 *isExact = false; 2439 2440 /* Handle the three special cases first. */ 2441 if (category == fcInfinity || category == fcNaN) 2442 return opInvalidOp; 2443 2444 dstPartsCount = partCountForBits(width); 2445 assert(dstPartsCount <= parts.size() && "Integer too big"); 2446 2447 if (category == fcZero) { 2448 APInt::tcSet(parts.data(), 0, dstPartsCount); 2449 // Negative zero can't be represented as an int. 2450 *isExact = !sign; 2451 return opOK; 2452 } 2453 2454 src = significandParts(); 2455 2456 /* Step 1: place our absolute value, with any fraction truncated, in 2457 the destination. */ 2458 if (exponent < 0) { 2459 /* Our absolute value is less than one; truncate everything. */ 2460 APInt::tcSet(parts.data(), 0, dstPartsCount); 2461 /* For exponent -1 the integer bit represents .5, look at that. 2462 For smaller exponents leftmost truncated bit is 0. */ 2463 truncatedBits = semantics->precision -1U - exponent; 2464 } else { 2465 /* We want the most significant (exponent + 1) bits; the rest are 2466 truncated. */ 2467 unsigned int bits = exponent + 1U; 2468 2469 /* Hopelessly large in magnitude? */ 2470 if (bits > width) 2471 return opInvalidOp; 2472 2473 if (bits < semantics->precision) { 2474 /* We truncate (semantics->precision - bits) bits. */ 2475 truncatedBits = semantics->precision - bits; 2476 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2477 } else { 2478 /* We want at least as many bits as are available. */ 2479 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2480 0); 2481 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2482 bits - semantics->precision); 2483 truncatedBits = 0; 2484 } 2485 } 2486 2487 /* Step 2: work out any lost fraction, and increment the absolute 2488 value if we would round away from zero. */ 2489 if (truncatedBits) { 2490 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2491 truncatedBits); 2492 if (lost_fraction != lfExactlyZero && 2493 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2494 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2495 return opInvalidOp; /* Overflow. */ 2496 } 2497 } else { 2498 lost_fraction = lfExactlyZero; 2499 } 2500 2501 /* Step 3: check if we fit in the destination. */ 2502 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2503 2504 if (sign) { 2505 if (!isSigned) { 2506 /* Negative numbers cannot be represented as unsigned. */ 2507 if (omsb != 0) 2508 return opInvalidOp; 2509 } else { 2510 /* It takes omsb bits to represent the unsigned integer value. 2511 We lose a bit for the sign, but care is needed as the 2512 maximally negative integer is a special case. */ 2513 if (omsb == width && 2514 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2515 return opInvalidOp; 2516 2517 /* This case can happen because of rounding. */ 2518 if (omsb > width) 2519 return opInvalidOp; 2520 } 2521 2522 APInt::tcNegate (parts.data(), dstPartsCount); 2523 } else { 2524 if (omsb >= width + !isSigned) 2525 return opInvalidOp; 2526 } 2527 2528 if (lost_fraction == lfExactlyZero) { 2529 *isExact = true; 2530 return opOK; 2531 } else 2532 return opInexact; 2533 } 2534 2535 /* Same as convertToSignExtendedInteger, except we provide 2536 deterministic values in case of an invalid operation exception, 2537 namely zero for NaNs and the minimal or maximal value respectively 2538 for underflow or overflow. 2539 The *isExact output tells whether the result is exact, in the sense 2540 that converting it back to the original floating point type produces 2541 the original value. This is almost equivalent to result==opOK, 2542 except for negative zeroes. 2543 */ 2544 IEEEFloat::opStatus 2545 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2546 unsigned int width, bool isSigned, 2547 roundingMode rounding_mode, bool *isExact) const { 2548 opStatus fs; 2549 2550 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2551 isExact); 2552 2553 if (fs == opInvalidOp) { 2554 unsigned int bits, dstPartsCount; 2555 2556 dstPartsCount = partCountForBits(width); 2557 assert(dstPartsCount <= parts.size() && "Integer too big"); 2558 2559 if (category == fcNaN) 2560 bits = 0; 2561 else if (sign) 2562 bits = isSigned; 2563 else 2564 bits = width - isSigned; 2565 2566 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2567 if (sign && isSigned) 2568 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2569 } 2570 2571 return fs; 2572 } 2573 2574 /* Convert an unsigned integer SRC to a floating point number, 2575 rounding according to ROUNDING_MODE. The sign of the floating 2576 point number is not modified. */ 2577 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2578 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2579 unsigned int omsb, precision, dstCount; 2580 integerPart *dst; 2581 lostFraction lost_fraction; 2582 2583 category = fcNormal; 2584 omsb = APInt::tcMSB(src, srcCount) + 1; 2585 dst = significandParts(); 2586 dstCount = partCount(); 2587 precision = semantics->precision; 2588 2589 /* We want the most significant PRECISION bits of SRC. There may not 2590 be that many; extract what we can. */ 2591 if (precision <= omsb) { 2592 exponent = omsb - 1; 2593 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2594 omsb - precision); 2595 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2596 } else { 2597 exponent = precision - 1; 2598 lost_fraction = lfExactlyZero; 2599 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2600 } 2601 2602 return normalize(rounding_mode, lost_fraction); 2603 } 2604 2605 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2606 roundingMode rounding_mode) { 2607 unsigned int partCount = Val.getNumWords(); 2608 APInt api = Val; 2609 2610 sign = false; 2611 if (isSigned && api.isNegative()) { 2612 sign = true; 2613 api = -api; 2614 } 2615 2616 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2617 } 2618 2619 /* Convert a two's complement integer SRC to a floating point number, 2620 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2621 integer is signed, in which case it must be sign-extended. */ 2622 IEEEFloat::opStatus 2623 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2624 unsigned int srcCount, bool isSigned, 2625 roundingMode rounding_mode) { 2626 opStatus status; 2627 2628 if (isSigned && 2629 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2630 integerPart *copy; 2631 2632 /* If we're signed and negative negate a copy. */ 2633 sign = true; 2634 copy = new integerPart[srcCount]; 2635 APInt::tcAssign(copy, src, srcCount); 2636 APInt::tcNegate(copy, srcCount); 2637 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2638 delete [] copy; 2639 } else { 2640 sign = false; 2641 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2642 } 2643 2644 return status; 2645 } 2646 2647 /* FIXME: should this just take a const APInt reference? */ 2648 IEEEFloat::opStatus 2649 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2650 unsigned int width, bool isSigned, 2651 roundingMode rounding_mode) { 2652 unsigned int partCount = partCountForBits(width); 2653 APInt api = APInt(width, ArrayRef(parts, partCount)); 2654 2655 sign = false; 2656 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2657 sign = true; 2658 api = -api; 2659 } 2660 2661 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2662 } 2663 2664 Expected<IEEEFloat::opStatus> 2665 IEEEFloat::convertFromHexadecimalString(StringRef s, 2666 roundingMode rounding_mode) { 2667 lostFraction lost_fraction = lfExactlyZero; 2668 2669 category = fcNormal; 2670 zeroSignificand(); 2671 exponent = 0; 2672 2673 integerPart *significand = significandParts(); 2674 unsigned partsCount = partCount(); 2675 unsigned bitPos = partsCount * integerPartWidth; 2676 bool computedTrailingFraction = false; 2677 2678 // Skip leading zeroes and any (hexa)decimal point. 2679 StringRef::iterator begin = s.begin(); 2680 StringRef::iterator end = s.end(); 2681 StringRef::iterator dot; 2682 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2683 if (!PtrOrErr) 2684 return PtrOrErr.takeError(); 2685 StringRef::iterator p = *PtrOrErr; 2686 StringRef::iterator firstSignificantDigit = p; 2687 2688 while (p != end) { 2689 integerPart hex_value; 2690 2691 if (*p == '.') { 2692 if (dot != end) 2693 return createError("String contains multiple dots"); 2694 dot = p++; 2695 continue; 2696 } 2697 2698 hex_value = hexDigitValue(*p); 2699 if (hex_value == -1U) 2700 break; 2701 2702 p++; 2703 2704 // Store the number while we have space. 2705 if (bitPos) { 2706 bitPos -= 4; 2707 hex_value <<= bitPos % integerPartWidth; 2708 significand[bitPos / integerPartWidth] |= hex_value; 2709 } else if (!computedTrailingFraction) { 2710 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2711 if (!FractOrErr) 2712 return FractOrErr.takeError(); 2713 lost_fraction = *FractOrErr; 2714 computedTrailingFraction = true; 2715 } 2716 } 2717 2718 /* Hex floats require an exponent but not a hexadecimal point. */ 2719 if (p == end) 2720 return createError("Hex strings require an exponent"); 2721 if (*p != 'p' && *p != 'P') 2722 return createError("Invalid character in significand"); 2723 if (p == begin) 2724 return createError("Significand has no digits"); 2725 if (dot != end && p - begin == 1) 2726 return createError("Significand has no digits"); 2727 2728 /* Ignore the exponent if we are zero. */ 2729 if (p != firstSignificantDigit) { 2730 int expAdjustment; 2731 2732 /* Implicit hexadecimal point? */ 2733 if (dot == end) 2734 dot = p; 2735 2736 /* Calculate the exponent adjustment implicit in the number of 2737 significant digits. */ 2738 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2739 if (expAdjustment < 0) 2740 expAdjustment++; 2741 expAdjustment = expAdjustment * 4 - 1; 2742 2743 /* Adjust for writing the significand starting at the most 2744 significant nibble. */ 2745 expAdjustment += semantics->precision; 2746 expAdjustment -= partsCount * integerPartWidth; 2747 2748 /* Adjust for the given exponent. */ 2749 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2750 if (!ExpOrErr) 2751 return ExpOrErr.takeError(); 2752 exponent = *ExpOrErr; 2753 } 2754 2755 return normalize(rounding_mode, lost_fraction); 2756 } 2757 2758 IEEEFloat::opStatus 2759 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2760 unsigned sigPartCount, int exp, 2761 roundingMode rounding_mode) { 2762 unsigned int parts, pow5PartCount; 2763 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2764 integerPart pow5Parts[maxPowerOfFiveParts]; 2765 bool isNearest; 2766 2767 isNearest = (rounding_mode == rmNearestTiesToEven || 2768 rounding_mode == rmNearestTiesToAway); 2769 2770 parts = partCountForBits(semantics->precision + 11); 2771 2772 /* Calculate pow(5, abs(exp)). */ 2773 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2774 2775 for (;; parts *= 2) { 2776 opStatus sigStatus, powStatus; 2777 unsigned int excessPrecision, truncatedBits; 2778 2779 calcSemantics.precision = parts * integerPartWidth - 1; 2780 excessPrecision = calcSemantics.precision - semantics->precision; 2781 truncatedBits = excessPrecision; 2782 2783 IEEEFloat decSig(calcSemantics, uninitialized); 2784 decSig.makeZero(sign); 2785 IEEEFloat pow5(calcSemantics); 2786 2787 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2788 rmNearestTiesToEven); 2789 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2790 rmNearestTiesToEven); 2791 /* Add exp, as 10^n = 5^n * 2^n. */ 2792 decSig.exponent += exp; 2793 2794 lostFraction calcLostFraction; 2795 integerPart HUerr, HUdistance; 2796 unsigned int powHUerr; 2797 2798 if (exp >= 0) { 2799 /* multiplySignificand leaves the precision-th bit set to 1. */ 2800 calcLostFraction = decSig.multiplySignificand(pow5); 2801 powHUerr = powStatus != opOK; 2802 } else { 2803 calcLostFraction = decSig.divideSignificand(pow5); 2804 /* Denormal numbers have less precision. */ 2805 if (decSig.exponent < semantics->minExponent) { 2806 excessPrecision += (semantics->minExponent - decSig.exponent); 2807 truncatedBits = excessPrecision; 2808 if (excessPrecision > calcSemantics.precision) 2809 excessPrecision = calcSemantics.precision; 2810 } 2811 /* Extra half-ulp lost in reciprocal of exponent. */ 2812 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2813 } 2814 2815 /* Both multiplySignificand and divideSignificand return the 2816 result with the integer bit set. */ 2817 assert(APInt::tcExtractBit 2818 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2819 2820 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2821 powHUerr); 2822 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2823 excessPrecision, isNearest); 2824 2825 /* Are we guaranteed to round correctly if we truncate? */ 2826 if (HUdistance >= HUerr) { 2827 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2828 calcSemantics.precision - excessPrecision, 2829 excessPrecision); 2830 /* Take the exponent of decSig. If we tcExtract-ed less bits 2831 above we must adjust our exponent to compensate for the 2832 implicit right shift. */ 2833 exponent = (decSig.exponent + semantics->precision 2834 - (calcSemantics.precision - excessPrecision)); 2835 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2836 decSig.partCount(), 2837 truncatedBits); 2838 return normalize(rounding_mode, calcLostFraction); 2839 } 2840 } 2841 } 2842 2843 Expected<IEEEFloat::opStatus> 2844 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2845 decimalInfo D; 2846 opStatus fs; 2847 2848 /* Scan the text. */ 2849 StringRef::iterator p = str.begin(); 2850 if (Error Err = interpretDecimal(p, str.end(), &D)) 2851 return std::move(Err); 2852 2853 /* Handle the quick cases. First the case of no significant digits, 2854 i.e. zero, and then exponents that are obviously too large or too 2855 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2856 definitely overflows if 2857 2858 (exp - 1) * L >= maxExponent 2859 2860 and definitely underflows to zero where 2861 2862 (exp + 1) * L <= minExponent - precision 2863 2864 With integer arithmetic the tightest bounds for L are 2865 2866 93/28 < L < 196/59 [ numerator <= 256 ] 2867 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2868 */ 2869 2870 // Test if we have a zero number allowing for strings with no null terminators 2871 // and zero decimals with non-zero exponents. 2872 // 2873 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2874 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2875 // be at most one dot. On the other hand, if we have a zero with a non-zero 2876 // exponent, then we know that D.firstSigDigit will be non-numeric. 2877 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2878 category = fcZero; 2879 fs = opOK; 2880 2881 /* Check whether the normalized exponent is high enough to overflow 2882 max during the log-rebasing in the max-exponent check below. */ 2883 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2884 fs = handleOverflow(rounding_mode); 2885 2886 /* If it wasn't, then it also wasn't high enough to overflow max 2887 during the log-rebasing in the min-exponent check. Check that it 2888 won't overflow min in either check, then perform the min-exponent 2889 check. */ 2890 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2891 (D.normalizedExponent + 1) * 28738 <= 2892 8651 * (semantics->minExponent - (int) semantics->precision)) { 2893 /* Underflow to zero and round. */ 2894 category = fcNormal; 2895 zeroSignificand(); 2896 fs = normalize(rounding_mode, lfLessThanHalf); 2897 2898 /* We can finally safely perform the max-exponent check. */ 2899 } else if ((D.normalizedExponent - 1) * 42039 2900 >= 12655 * semantics->maxExponent) { 2901 /* Overflow and round. */ 2902 fs = handleOverflow(rounding_mode); 2903 } else { 2904 integerPart *decSignificand; 2905 unsigned int partCount; 2906 2907 /* A tight upper bound on number of bits required to hold an 2908 N-digit decimal integer is N * 196 / 59. Allocate enough space 2909 to hold the full significand, and an extra part required by 2910 tcMultiplyPart. */ 2911 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2912 partCount = partCountForBits(1 + 196 * partCount / 59); 2913 decSignificand = new integerPart[partCount + 1]; 2914 partCount = 0; 2915 2916 /* Convert to binary efficiently - we do almost all multiplication 2917 in an integerPart. When this would overflow do we do a single 2918 bignum multiplication, and then revert again to multiplication 2919 in an integerPart. */ 2920 do { 2921 integerPart decValue, val, multiplier; 2922 2923 val = 0; 2924 multiplier = 1; 2925 2926 do { 2927 if (*p == '.') { 2928 p++; 2929 if (p == str.end()) { 2930 break; 2931 } 2932 } 2933 decValue = decDigitValue(*p++); 2934 if (decValue >= 10U) { 2935 delete[] decSignificand; 2936 return createError("Invalid character in significand"); 2937 } 2938 multiplier *= 10; 2939 val = val * 10 + decValue; 2940 /* The maximum number that can be multiplied by ten with any 2941 digit added without overflowing an integerPart. */ 2942 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2943 2944 /* Multiply out the current part. */ 2945 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2946 partCount, partCount + 1, false); 2947 2948 /* If we used another part (likely but not guaranteed), increase 2949 the count. */ 2950 if (decSignificand[partCount]) 2951 partCount++; 2952 } while (p <= D.lastSigDigit); 2953 2954 category = fcNormal; 2955 fs = roundSignificandWithExponent(decSignificand, partCount, 2956 D.exponent, rounding_mode); 2957 2958 delete [] decSignificand; 2959 } 2960 2961 return fs; 2962 } 2963 2964 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 2965 const size_t MIN_NAME_SIZE = 3; 2966 2967 if (str.size() < MIN_NAME_SIZE) 2968 return false; 2969 2970 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 2971 makeInf(false); 2972 return true; 2973 } 2974 2975 bool IsNegative = str.front() == '-'; 2976 if (IsNegative) { 2977 str = str.drop_front(); 2978 if (str.size() < MIN_NAME_SIZE) 2979 return false; 2980 2981 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 2982 makeInf(true); 2983 return true; 2984 } 2985 } 2986 2987 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 2988 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 2989 if (IsSignaling) { 2990 str = str.drop_front(); 2991 if (str.size() < MIN_NAME_SIZE) 2992 return false; 2993 } 2994 2995 if (str.startswith("nan") || str.startswith("NaN")) { 2996 str = str.drop_front(3); 2997 2998 // A NaN without payload. 2999 if (str.empty()) { 3000 makeNaN(IsSignaling, IsNegative); 3001 return true; 3002 } 3003 3004 // Allow the payload to be inside parentheses. 3005 if (str.front() == '(') { 3006 // Parentheses should be balanced (and not empty). 3007 if (str.size() <= 2 || str.back() != ')') 3008 return false; 3009 3010 str = str.slice(1, str.size() - 1); 3011 } 3012 3013 // Determine the payload number's radix. 3014 unsigned Radix = 10; 3015 if (str[0] == '0') { 3016 if (str.size() > 1 && tolower(str[1]) == 'x') { 3017 str = str.drop_front(2); 3018 Radix = 16; 3019 } else 3020 Radix = 8; 3021 } 3022 3023 // Parse the payload and make the NaN. 3024 APInt Payload; 3025 if (!str.getAsInteger(Radix, Payload)) { 3026 makeNaN(IsSignaling, IsNegative, &Payload); 3027 return true; 3028 } 3029 } 3030 3031 return false; 3032 } 3033 3034 Expected<IEEEFloat::opStatus> 3035 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3036 if (str.empty()) 3037 return createError("Invalid string length"); 3038 3039 // Handle special cases. 3040 if (convertFromStringSpecials(str)) 3041 return opOK; 3042 3043 /* Handle a leading minus sign. */ 3044 StringRef::iterator p = str.begin(); 3045 size_t slen = str.size(); 3046 sign = *p == '-' ? 1 : 0; 3047 if (*p == '-' || *p == '+') { 3048 p++; 3049 slen--; 3050 if (!slen) 3051 return createError("String has no digits"); 3052 } 3053 3054 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3055 if (slen == 2) 3056 return createError("Invalid string"); 3057 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3058 rounding_mode); 3059 } 3060 3061 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3062 } 3063 3064 /* Write out a hexadecimal representation of the floating point value 3065 to DST, which must be of sufficient size, in the C99 form 3066 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3067 excluding the terminating NUL. 3068 3069 If UPPERCASE, the output is in upper case, otherwise in lower case. 3070 3071 HEXDIGITS digits appear altogether, rounding the value if 3072 necessary. If HEXDIGITS is 0, the minimal precision to display the 3073 number precisely is used instead. If nothing would appear after 3074 the decimal point it is suppressed. 3075 3076 The decimal exponent is always printed and has at least one digit. 3077 Zero values display an exponent of zero. Infinities and NaNs 3078 appear as "infinity" or "nan" respectively. 3079 3080 The above rules are as specified by C99. There is ambiguity about 3081 what the leading hexadecimal digit should be. This implementation 3082 uses whatever is necessary so that the exponent is displayed as 3083 stored. This implies the exponent will fall within the IEEE format 3084 range, and the leading hexadecimal digit will be 0 (for denormals), 3085 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3086 any other digits zero). 3087 */ 3088 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3089 bool upperCase, 3090 roundingMode rounding_mode) const { 3091 char *p; 3092 3093 p = dst; 3094 if (sign) 3095 *dst++ = '-'; 3096 3097 switch (category) { 3098 case fcInfinity: 3099 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3100 dst += sizeof infinityL - 1; 3101 break; 3102 3103 case fcNaN: 3104 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3105 dst += sizeof NaNU - 1; 3106 break; 3107 3108 case fcZero: 3109 *dst++ = '0'; 3110 *dst++ = upperCase ? 'X': 'x'; 3111 *dst++ = '0'; 3112 if (hexDigits > 1) { 3113 *dst++ = '.'; 3114 memset (dst, '0', hexDigits - 1); 3115 dst += hexDigits - 1; 3116 } 3117 *dst++ = upperCase ? 'P': 'p'; 3118 *dst++ = '0'; 3119 break; 3120 3121 case fcNormal: 3122 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3123 break; 3124 } 3125 3126 *dst = 0; 3127 3128 return static_cast<unsigned int>(dst - p); 3129 } 3130 3131 /* Does the hard work of outputting the correctly rounded hexadecimal 3132 form of a normal floating point number with the specified number of 3133 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3134 digits necessary to print the value precisely is output. */ 3135 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3136 bool upperCase, 3137 roundingMode rounding_mode) const { 3138 unsigned int count, valueBits, shift, partsCount, outputDigits; 3139 const char *hexDigitChars; 3140 const integerPart *significand; 3141 char *p; 3142 bool roundUp; 3143 3144 *dst++ = '0'; 3145 *dst++ = upperCase ? 'X': 'x'; 3146 3147 roundUp = false; 3148 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3149 3150 significand = significandParts(); 3151 partsCount = partCount(); 3152 3153 /* +3 because the first digit only uses the single integer bit, so 3154 we have 3 virtual zero most-significant-bits. */ 3155 valueBits = semantics->precision + 3; 3156 shift = integerPartWidth - valueBits % integerPartWidth; 3157 3158 /* The natural number of digits required ignoring trailing 3159 insignificant zeroes. */ 3160 outputDigits = (valueBits - significandLSB () + 3) / 4; 3161 3162 /* hexDigits of zero means use the required number for the 3163 precision. Otherwise, see if we are truncating. If we are, 3164 find out if we need to round away from zero. */ 3165 if (hexDigits) { 3166 if (hexDigits < outputDigits) { 3167 /* We are dropping non-zero bits, so need to check how to round. 3168 "bits" is the number of dropped bits. */ 3169 unsigned int bits; 3170 lostFraction fraction; 3171 3172 bits = valueBits - hexDigits * 4; 3173 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3174 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3175 } 3176 outputDigits = hexDigits; 3177 } 3178 3179 /* Write the digits consecutively, and start writing in the location 3180 of the hexadecimal point. We move the most significant digit 3181 left and add the hexadecimal point later. */ 3182 p = ++dst; 3183 3184 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3185 3186 while (outputDigits && count) { 3187 integerPart part; 3188 3189 /* Put the most significant integerPartWidth bits in "part". */ 3190 if (--count == partsCount) 3191 part = 0; /* An imaginary higher zero part. */ 3192 else 3193 part = significand[count] << shift; 3194 3195 if (count && shift) 3196 part |= significand[count - 1] >> (integerPartWidth - shift); 3197 3198 /* Convert as much of "part" to hexdigits as we can. */ 3199 unsigned int curDigits = integerPartWidth / 4; 3200 3201 if (curDigits > outputDigits) 3202 curDigits = outputDigits; 3203 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3204 outputDigits -= curDigits; 3205 } 3206 3207 if (roundUp) { 3208 char *q = dst; 3209 3210 /* Note that hexDigitChars has a trailing '0'. */ 3211 do { 3212 q--; 3213 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3214 } while (*q == '0'); 3215 assert(q >= p); 3216 } else { 3217 /* Add trailing zeroes. */ 3218 memset (dst, '0', outputDigits); 3219 dst += outputDigits; 3220 } 3221 3222 /* Move the most significant digit to before the point, and if there 3223 is something after the decimal point add it. This must come 3224 after rounding above. */ 3225 p[-1] = p[0]; 3226 if (dst -1 == p) 3227 dst--; 3228 else 3229 p[0] = '.'; 3230 3231 /* Finally output the exponent. */ 3232 *dst++ = upperCase ? 'P': 'p'; 3233 3234 return writeSignedDecimal (dst, exponent); 3235 } 3236 3237 hash_code hash_value(const IEEEFloat &Arg) { 3238 if (!Arg.isFiniteNonZero()) 3239 return hash_combine((uint8_t)Arg.category, 3240 // NaN has no sign, fix it at zero. 3241 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3242 Arg.semantics->precision); 3243 3244 // Normal floats need their exponent and significand hashed. 3245 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3246 Arg.semantics->precision, Arg.exponent, 3247 hash_combine_range( 3248 Arg.significandParts(), 3249 Arg.significandParts() + Arg.partCount())); 3250 } 3251 3252 // Conversion from APFloat to/from host float/double. It may eventually be 3253 // possible to eliminate these and have everybody deal with APFloats, but that 3254 // will take a while. This approach will not easily extend to long double. 3255 // Current implementation requires integerPartWidth==64, which is correct at 3256 // the moment but could be made more general. 3257 3258 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3259 // the actual IEEE respresentations. We compensate for that here. 3260 3261 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3262 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3263 assert(partCount()==2); 3264 3265 uint64_t myexponent, mysignificand; 3266 3267 if (isFiniteNonZero()) { 3268 myexponent = exponent+16383; //bias 3269 mysignificand = significandParts()[0]; 3270 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3271 myexponent = 0; // denormal 3272 } else if (category==fcZero) { 3273 myexponent = 0; 3274 mysignificand = 0; 3275 } else if (category==fcInfinity) { 3276 myexponent = 0x7fff; 3277 mysignificand = 0x8000000000000000ULL; 3278 } else { 3279 assert(category == fcNaN && "Unknown category"); 3280 myexponent = 0x7fff; 3281 mysignificand = significandParts()[0]; 3282 } 3283 3284 uint64_t words[2]; 3285 words[0] = mysignificand; 3286 words[1] = ((uint64_t)(sign & 1) << 15) | 3287 (myexponent & 0x7fffLL); 3288 return APInt(80, words); 3289 } 3290 3291 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3292 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3293 assert(partCount()==2); 3294 3295 uint64_t words[2]; 3296 opStatus fs; 3297 bool losesInfo; 3298 3299 // Convert number to double. To avoid spurious underflows, we re- 3300 // normalize against the "double" minExponent first, and only *then* 3301 // truncate the mantissa. The result of that second conversion 3302 // may be inexact, but should never underflow. 3303 // Declare fltSemantics before APFloat that uses it (and 3304 // saves pointer to it) to ensure correct destruction order. 3305 fltSemantics extendedSemantics = *semantics; 3306 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3307 IEEEFloat extended(*this); 3308 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3309 assert(fs == opOK && !losesInfo); 3310 (void)fs; 3311 3312 IEEEFloat u(extended); 3313 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3314 assert(fs == opOK || fs == opInexact); 3315 (void)fs; 3316 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3317 3318 // If conversion was exact or resulted in a special case, we're done; 3319 // just set the second double to zero. Otherwise, re-convert back to 3320 // the extended format and compute the difference. This now should 3321 // convert exactly to double. 3322 if (u.isFiniteNonZero() && losesInfo) { 3323 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3324 assert(fs == opOK && !losesInfo); 3325 (void)fs; 3326 3327 IEEEFloat v(extended); 3328 v.subtract(u, rmNearestTiesToEven); 3329 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3330 assert(fs == opOK && !losesInfo); 3331 (void)fs; 3332 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3333 } else { 3334 words[1] = 0; 3335 } 3336 3337 return APInt(128, words); 3338 } 3339 3340 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3341 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); 3342 assert(partCount()==2); 3343 3344 uint64_t myexponent, mysignificand, mysignificand2; 3345 3346 if (isFiniteNonZero()) { 3347 myexponent = exponent+16383; //bias 3348 mysignificand = significandParts()[0]; 3349 mysignificand2 = significandParts()[1]; 3350 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 3351 myexponent = 0; // denormal 3352 } else if (category==fcZero) { 3353 myexponent = 0; 3354 mysignificand = mysignificand2 = 0; 3355 } else if (category==fcInfinity) { 3356 myexponent = 0x7fff; 3357 mysignificand = mysignificand2 = 0; 3358 } else { 3359 assert(category == fcNaN && "Unknown category!"); 3360 myexponent = 0x7fff; 3361 mysignificand = significandParts()[0]; 3362 mysignificand2 = significandParts()[1]; 3363 } 3364 3365 uint64_t words[2]; 3366 words[0] = mysignificand; 3367 words[1] = ((uint64_t)(sign & 1) << 63) | 3368 ((myexponent & 0x7fff) << 48) | 3369 (mysignificand2 & 0xffffffffffffLL); 3370 3371 return APInt(128, words); 3372 } 3373 3374 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3375 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); 3376 assert(partCount()==1); 3377 3378 uint64_t myexponent, mysignificand; 3379 3380 if (isFiniteNonZero()) { 3381 myexponent = exponent+1023; //bias 3382 mysignificand = *significandParts(); 3383 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3384 myexponent = 0; // denormal 3385 } else if (category==fcZero) { 3386 myexponent = 0; 3387 mysignificand = 0; 3388 } else if (category==fcInfinity) { 3389 myexponent = 0x7ff; 3390 mysignificand = 0; 3391 } else { 3392 assert(category == fcNaN && "Unknown category!"); 3393 myexponent = 0x7ff; 3394 mysignificand = *significandParts(); 3395 } 3396 3397 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3398 ((myexponent & 0x7ff) << 52) | 3399 (mysignificand & 0xfffffffffffffLL)))); 3400 } 3401 3402 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3403 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); 3404 assert(partCount()==1); 3405 3406 uint32_t myexponent, mysignificand; 3407 3408 if (isFiniteNonZero()) { 3409 myexponent = exponent+127; //bias 3410 mysignificand = (uint32_t)*significandParts(); 3411 if (myexponent == 1 && !(mysignificand & 0x800000)) 3412 myexponent = 0; // denormal 3413 } else if (category==fcZero) { 3414 myexponent = 0; 3415 mysignificand = 0; 3416 } else if (category==fcInfinity) { 3417 myexponent = 0xff; 3418 mysignificand = 0; 3419 } else { 3420 assert(category == fcNaN && "Unknown category!"); 3421 myexponent = 0xff; 3422 mysignificand = (uint32_t)*significandParts(); 3423 } 3424 3425 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3426 (mysignificand & 0x7fffff))); 3427 } 3428 3429 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3430 assert(semantics == (const llvm::fltSemantics *)&semBFloat); 3431 assert(partCount() == 1); 3432 3433 uint32_t myexponent, mysignificand; 3434 3435 if (isFiniteNonZero()) { 3436 myexponent = exponent + 127; // bias 3437 mysignificand = (uint32_t)*significandParts(); 3438 if (myexponent == 1 && !(mysignificand & 0x80)) 3439 myexponent = 0; // denormal 3440 } else if (category == fcZero) { 3441 myexponent = 0; 3442 mysignificand = 0; 3443 } else if (category == fcInfinity) { 3444 myexponent = 0xff; 3445 mysignificand = 0; 3446 } else { 3447 assert(category == fcNaN && "Unknown category!"); 3448 myexponent = 0xff; 3449 mysignificand = (uint32_t)*significandParts(); 3450 } 3451 3452 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | 3453 (mysignificand & 0x7f))); 3454 } 3455 3456 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3457 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); 3458 assert(partCount()==1); 3459 3460 uint32_t myexponent, mysignificand; 3461 3462 if (isFiniteNonZero()) { 3463 myexponent = exponent+15; //bias 3464 mysignificand = (uint32_t)*significandParts(); 3465 if (myexponent == 1 && !(mysignificand & 0x400)) 3466 myexponent = 0; // denormal 3467 } else if (category==fcZero) { 3468 myexponent = 0; 3469 mysignificand = 0; 3470 } else if (category==fcInfinity) { 3471 myexponent = 0x1f; 3472 mysignificand = 0; 3473 } else { 3474 assert(category == fcNaN && "Unknown category!"); 3475 myexponent = 0x1f; 3476 mysignificand = (uint32_t)*significandParts(); 3477 } 3478 3479 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3480 (mysignificand & 0x3ff))); 3481 } 3482 3483 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3484 assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2); 3485 assert(partCount() == 1); 3486 3487 uint32_t myexponent, mysignificand; 3488 3489 if (isFiniteNonZero()) { 3490 myexponent = exponent + 15; // bias 3491 mysignificand = (uint32_t)*significandParts(); 3492 if (myexponent == 1 && !(mysignificand & 0x4)) 3493 myexponent = 0; // denormal 3494 } else if (category == fcZero) { 3495 myexponent = 0; 3496 mysignificand = 0; 3497 } else if (category == fcInfinity) { 3498 myexponent = 0x1f; 3499 mysignificand = 0; 3500 } else { 3501 assert(category == fcNaN && "Unknown category!"); 3502 myexponent = 0x1f; 3503 mysignificand = (uint32_t)*significandParts(); 3504 } 3505 3506 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) | 3507 (mysignificand & 0x3))); 3508 } 3509 3510 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3511 assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN); 3512 assert(partCount() == 1); 3513 3514 uint32_t myexponent, mysignificand; 3515 3516 if (isFiniteNonZero()) { 3517 myexponent = exponent + 7; // bias 3518 mysignificand = (uint32_t)*significandParts(); 3519 if (myexponent == 1 && !(mysignificand & 0x8)) 3520 myexponent = 0; // denormal 3521 } else if (category == fcZero) { 3522 myexponent = 0; 3523 mysignificand = 0; 3524 } else if (category == fcInfinity) { 3525 myexponent = 0xf; 3526 mysignificand = 0; 3527 } else { 3528 assert(category == fcNaN && "Unknown category!"); 3529 myexponent = 0xf; 3530 mysignificand = (uint32_t)*significandParts(); 3531 } 3532 3533 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) | 3534 (mysignificand & 0x7))); 3535 } 3536 3537 // This function creates an APInt that is just a bit map of the floating 3538 // point constant as it would appear in memory. It is not a conversion, 3539 // and treating the result as a normal integer is unlikely to be useful. 3540 3541 APInt IEEEFloat::bitcastToAPInt() const { 3542 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3543 return convertHalfAPFloatToAPInt(); 3544 3545 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3546 return convertBFloatAPFloatToAPInt(); 3547 3548 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3549 return convertFloatAPFloatToAPInt(); 3550 3551 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3552 return convertDoubleAPFloatToAPInt(); 3553 3554 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3555 return convertQuadrupleAPFloatToAPInt(); 3556 3557 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3558 return convertPPCDoubleDoubleAPFloatToAPInt(); 3559 3560 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3561 return convertFloat8E5M2APFloatToAPInt(); 3562 3563 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3564 return convertFloat8E4M3FNAPFloatToAPInt(); 3565 3566 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3567 "unknown format!"); 3568 return convertF80LongDoubleAPFloatToAPInt(); 3569 } 3570 3571 float IEEEFloat::convertToFloat() const { 3572 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3573 "Float semantics are not IEEEsingle"); 3574 APInt api = bitcastToAPInt(); 3575 return api.bitsToFloat(); 3576 } 3577 3578 double IEEEFloat::convertToDouble() const { 3579 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3580 "Float semantics are not IEEEdouble"); 3581 APInt api = bitcastToAPInt(); 3582 return api.bitsToDouble(); 3583 } 3584 3585 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3586 /// does not support these bit patterns: 3587 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3588 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3589 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3590 /// exponent = 0, integer bit 1 ("pseudodenormal") 3591 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3592 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3593 uint64_t i1 = api.getRawData()[0]; 3594 uint64_t i2 = api.getRawData()[1]; 3595 uint64_t myexponent = (i2 & 0x7fff); 3596 uint64_t mysignificand = i1; 3597 uint8_t myintegerbit = mysignificand >> 63; 3598 3599 initialize(&semX87DoubleExtended); 3600 assert(partCount()==2); 3601 3602 sign = static_cast<unsigned int>(i2>>15); 3603 if (myexponent == 0 && mysignificand == 0) { 3604 makeZero(sign); 3605 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3606 makeInf(sign); 3607 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3608 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3609 category = fcNaN; 3610 exponent = exponentNaN(); 3611 significandParts()[0] = mysignificand; 3612 significandParts()[1] = 0; 3613 } else { 3614 category = fcNormal; 3615 exponent = myexponent - 16383; 3616 significandParts()[0] = mysignificand; 3617 significandParts()[1] = 0; 3618 if (myexponent==0) // denormal 3619 exponent = -16382; 3620 } 3621 } 3622 3623 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3624 uint64_t i1 = api.getRawData()[0]; 3625 uint64_t i2 = api.getRawData()[1]; 3626 opStatus fs; 3627 bool losesInfo; 3628 3629 // Get the first double and convert to our format. 3630 initFromDoubleAPInt(APInt(64, i1)); 3631 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3632 assert(fs == opOK && !losesInfo); 3633 (void)fs; 3634 3635 // Unless we have a special case, add in second double. 3636 if (isFiniteNonZero()) { 3637 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3638 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3639 assert(fs == opOK && !losesInfo); 3640 (void)fs; 3641 3642 add(v, rmNearestTiesToEven); 3643 } 3644 } 3645 3646 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3647 uint64_t i1 = api.getRawData()[0]; 3648 uint64_t i2 = api.getRawData()[1]; 3649 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3650 uint64_t mysignificand = i1; 3651 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3652 3653 initialize(&semIEEEquad); 3654 assert(partCount()==2); 3655 3656 sign = static_cast<unsigned int>(i2>>63); 3657 if (myexponent==0 && 3658 (mysignificand==0 && mysignificand2==0)) { 3659 makeZero(sign); 3660 } else if (myexponent==0x7fff && 3661 (mysignificand==0 && mysignificand2==0)) { 3662 makeInf(sign); 3663 } else if (myexponent==0x7fff && 3664 (mysignificand!=0 || mysignificand2 !=0)) { 3665 category = fcNaN; 3666 exponent = exponentNaN(); 3667 significandParts()[0] = mysignificand; 3668 significandParts()[1] = mysignificand2; 3669 } else { 3670 category = fcNormal; 3671 exponent = myexponent - 16383; 3672 significandParts()[0] = mysignificand; 3673 significandParts()[1] = mysignificand2; 3674 if (myexponent==0) // denormal 3675 exponent = -16382; 3676 else 3677 significandParts()[1] |= 0x1000000000000LL; // integer bit 3678 } 3679 } 3680 3681 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3682 uint64_t i = *api.getRawData(); 3683 uint64_t myexponent = (i >> 52) & 0x7ff; 3684 uint64_t mysignificand = i & 0xfffffffffffffLL; 3685 3686 initialize(&semIEEEdouble); 3687 assert(partCount()==1); 3688 3689 sign = static_cast<unsigned int>(i>>63); 3690 if (myexponent==0 && mysignificand==0) { 3691 makeZero(sign); 3692 } else if (myexponent==0x7ff && mysignificand==0) { 3693 makeInf(sign); 3694 } else if (myexponent==0x7ff && mysignificand!=0) { 3695 category = fcNaN; 3696 exponent = exponentNaN(); 3697 *significandParts() = mysignificand; 3698 } else { 3699 category = fcNormal; 3700 exponent = myexponent - 1023; 3701 *significandParts() = mysignificand; 3702 if (myexponent==0) // denormal 3703 exponent = -1022; 3704 else 3705 *significandParts() |= 0x10000000000000LL; // integer bit 3706 } 3707 } 3708 3709 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3710 uint32_t i = (uint32_t)*api.getRawData(); 3711 uint32_t myexponent = (i >> 23) & 0xff; 3712 uint32_t mysignificand = i & 0x7fffff; 3713 3714 initialize(&semIEEEsingle); 3715 assert(partCount()==1); 3716 3717 sign = i >> 31; 3718 if (myexponent==0 && mysignificand==0) { 3719 makeZero(sign); 3720 } else if (myexponent==0xff && mysignificand==0) { 3721 makeInf(sign); 3722 } else if (myexponent==0xff && mysignificand!=0) { 3723 category = fcNaN; 3724 exponent = exponentNaN(); 3725 *significandParts() = mysignificand; 3726 } else { 3727 category = fcNormal; 3728 exponent = myexponent - 127; //bias 3729 *significandParts() = mysignificand; 3730 if (myexponent==0) // denormal 3731 exponent = -126; 3732 else 3733 *significandParts() |= 0x800000; // integer bit 3734 } 3735 } 3736 3737 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3738 uint32_t i = (uint32_t)*api.getRawData(); 3739 uint32_t myexponent = (i >> 7) & 0xff; 3740 uint32_t mysignificand = i & 0x7f; 3741 3742 initialize(&semBFloat); 3743 assert(partCount() == 1); 3744 3745 sign = i >> 15; 3746 if (myexponent == 0 && mysignificand == 0) { 3747 makeZero(sign); 3748 } else if (myexponent == 0xff && mysignificand == 0) { 3749 makeInf(sign); 3750 } else if (myexponent == 0xff && mysignificand != 0) { 3751 category = fcNaN; 3752 exponent = exponentNaN(); 3753 *significandParts() = mysignificand; 3754 } else { 3755 category = fcNormal; 3756 exponent = myexponent - 127; // bias 3757 *significandParts() = mysignificand; 3758 if (myexponent == 0) // denormal 3759 exponent = -126; 3760 else 3761 *significandParts() |= 0x80; // integer bit 3762 } 3763 } 3764 3765 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3766 uint32_t i = (uint32_t)*api.getRawData(); 3767 uint32_t myexponent = (i >> 10) & 0x1f; 3768 uint32_t mysignificand = i & 0x3ff; 3769 3770 initialize(&semIEEEhalf); 3771 assert(partCount()==1); 3772 3773 sign = i >> 15; 3774 if (myexponent==0 && mysignificand==0) { 3775 makeZero(sign); 3776 } else if (myexponent==0x1f && mysignificand==0) { 3777 makeInf(sign); 3778 } else if (myexponent==0x1f && mysignificand!=0) { 3779 category = fcNaN; 3780 exponent = exponentNaN(); 3781 *significandParts() = mysignificand; 3782 } else { 3783 category = fcNormal; 3784 exponent = myexponent - 15; //bias 3785 *significandParts() = mysignificand; 3786 if (myexponent==0) // denormal 3787 exponent = -14; 3788 else 3789 *significandParts() |= 0x400; // integer bit 3790 } 3791 } 3792 3793 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3794 uint32_t i = (uint32_t)*api.getRawData(); 3795 uint32_t myexponent = (i >> 2) & 0x1f; 3796 uint32_t mysignificand = i & 0x3; 3797 3798 initialize(&semFloat8E5M2); 3799 assert(partCount() == 1); 3800 3801 sign = i >> 7; 3802 if (myexponent == 0 && mysignificand == 0) { 3803 makeZero(sign); 3804 } else if (myexponent == 0x1f && mysignificand == 0) { 3805 makeInf(sign); 3806 } else if (myexponent == 0x1f && mysignificand != 0) { 3807 category = fcNaN; 3808 exponent = exponentNaN(); 3809 *significandParts() = mysignificand; 3810 } else { 3811 category = fcNormal; 3812 exponent = myexponent - 15; // bias 3813 *significandParts() = mysignificand; 3814 if (myexponent == 0) // denormal 3815 exponent = -14; 3816 else 3817 *significandParts() |= 0x4; // integer bit 3818 } 3819 } 3820 3821 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3822 uint32_t i = (uint32_t)*api.getRawData(); 3823 uint32_t myexponent = (i >> 3) & 0xf; 3824 uint32_t mysignificand = i & 0x7; 3825 3826 initialize(&semFloat8E4M3FN); 3827 assert(partCount() == 1); 3828 3829 sign = i >> 7; 3830 if (myexponent == 0 && mysignificand == 0) { 3831 makeZero(sign); 3832 } else if (myexponent == 0xf && mysignificand == 7) { 3833 category = fcNaN; 3834 exponent = exponentNaN(); 3835 *significandParts() = mysignificand; 3836 } else { 3837 category = fcNormal; 3838 exponent = myexponent - 7; // bias 3839 *significandParts() = mysignificand; 3840 if (myexponent == 0) // denormal 3841 exponent = -6; 3842 else 3843 *significandParts() |= 0x8; // integer bit 3844 } 3845 } 3846 3847 /// Treat api as containing the bits of a floating point number. 3848 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3849 assert(api.getBitWidth() == Sem->sizeInBits); 3850 if (Sem == &semIEEEhalf) 3851 return initFromHalfAPInt(api); 3852 if (Sem == &semBFloat) 3853 return initFromBFloatAPInt(api); 3854 if (Sem == &semIEEEsingle) 3855 return initFromFloatAPInt(api); 3856 if (Sem == &semIEEEdouble) 3857 return initFromDoubleAPInt(api); 3858 if (Sem == &semX87DoubleExtended) 3859 return initFromF80LongDoubleAPInt(api); 3860 if (Sem == &semIEEEquad) 3861 return initFromQuadrupleAPInt(api); 3862 if (Sem == &semPPCDoubleDoubleLegacy) 3863 return initFromPPCDoubleDoubleAPInt(api); 3864 if (Sem == &semFloat8E5M2) 3865 return initFromFloat8E5M2APInt(api); 3866 if (Sem == &semFloat8E4M3FN) 3867 return initFromFloat8E4M3FNAPInt(api); 3868 3869 llvm_unreachable(nullptr); 3870 } 3871 3872 /// Make this number the largest magnitude normal number in the given 3873 /// semantics. 3874 void IEEEFloat::makeLargest(bool Negative) { 3875 // We want (in interchange format): 3876 // sign = {Negative} 3877 // exponent = 1..10 3878 // significand = 1..1 3879 category = fcNormal; 3880 sign = Negative; 3881 exponent = semantics->maxExponent; 3882 3883 // Use memset to set all but the highest integerPart to all ones. 3884 integerPart *significand = significandParts(); 3885 unsigned PartCount = partCount(); 3886 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3887 3888 // Set the high integerPart especially setting all unused top bits for 3889 // internal consistency. 3890 const unsigned NumUnusedHighBits = 3891 PartCount*integerPartWidth - semantics->precision; 3892 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3893 ? (~integerPart(0) >> NumUnusedHighBits) 3894 : 0; 3895 3896 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 3897 significand[0] &= ~integerPart(1); 3898 } 3899 3900 /// Make this number the smallest magnitude denormal number in the given 3901 /// semantics. 3902 void IEEEFloat::makeSmallest(bool Negative) { 3903 // We want (in interchange format): 3904 // sign = {Negative} 3905 // exponent = 0..0 3906 // significand = 0..01 3907 category = fcNormal; 3908 sign = Negative; 3909 exponent = semantics->minExponent; 3910 APInt::tcSet(significandParts(), 1, partCount()); 3911 } 3912 3913 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3914 // We want (in interchange format): 3915 // sign = {Negative} 3916 // exponent = 0..0 3917 // significand = 10..0 3918 3919 category = fcNormal; 3920 zeroSignificand(); 3921 sign = Negative; 3922 exponent = semantics->minExponent; 3923 APInt::tcSetBit(significandParts(), semantics->precision - 1); 3924 } 3925 3926 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3927 initFromAPInt(&Sem, API); 3928 } 3929 3930 IEEEFloat::IEEEFloat(float f) { 3931 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3932 } 3933 3934 IEEEFloat::IEEEFloat(double d) { 3935 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3936 } 3937 3938 namespace { 3939 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3940 Buffer.append(Str.begin(), Str.end()); 3941 } 3942 3943 /// Removes data from the given significand until it is no more 3944 /// precise than is required for the desired precision. 3945 void AdjustToPrecision(APInt &significand, 3946 int &exp, unsigned FormatPrecision) { 3947 unsigned bits = significand.getActiveBits(); 3948 3949 // 196/59 is a very slight overestimate of lg_2(10). 3950 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3951 3952 if (bits <= bitsRequired) return; 3953 3954 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3955 if (!tensRemovable) return; 3956 3957 exp += tensRemovable; 3958 3959 APInt divisor(significand.getBitWidth(), 1); 3960 APInt powten(significand.getBitWidth(), 10); 3961 while (true) { 3962 if (tensRemovable & 1) 3963 divisor *= powten; 3964 tensRemovable >>= 1; 3965 if (!tensRemovable) break; 3966 powten *= powten; 3967 } 3968 3969 significand = significand.udiv(divisor); 3970 3971 // Truncate the significand down to its active bit count. 3972 significand = significand.trunc(significand.getActiveBits()); 3973 } 3974 3975 3976 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3977 int &exp, unsigned FormatPrecision) { 3978 unsigned N = buffer.size(); 3979 if (N <= FormatPrecision) return; 3980 3981 // The most significant figures are the last ones in the buffer. 3982 unsigned FirstSignificant = N - FormatPrecision; 3983 3984 // Round. 3985 // FIXME: this probably shouldn't use 'round half up'. 3986 3987 // Rounding down is just a truncation, except we also want to drop 3988 // trailing zeros from the new result. 3989 if (buffer[FirstSignificant - 1] < '5') { 3990 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3991 FirstSignificant++; 3992 3993 exp += FirstSignificant; 3994 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3995 return; 3996 } 3997 3998 // Rounding up requires a decimal add-with-carry. If we continue 3999 // the carry, the newly-introduced zeros will just be truncated. 4000 for (unsigned I = FirstSignificant; I != N; ++I) { 4001 if (buffer[I] == '9') { 4002 FirstSignificant++; 4003 } else { 4004 buffer[I]++; 4005 break; 4006 } 4007 } 4008 4009 // If we carried through, we have exactly one digit of precision. 4010 if (FirstSignificant == N) { 4011 exp += FirstSignificant; 4012 buffer.clear(); 4013 buffer.push_back('1'); 4014 return; 4015 } 4016 4017 exp += FirstSignificant; 4018 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4019 } 4020 } // namespace 4021 4022 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4023 unsigned FormatMaxPadding, bool TruncateZero) const { 4024 switch (category) { 4025 case fcInfinity: 4026 if (isNegative()) 4027 return append(Str, "-Inf"); 4028 else 4029 return append(Str, "+Inf"); 4030 4031 case fcNaN: return append(Str, "NaN"); 4032 4033 case fcZero: 4034 if (isNegative()) 4035 Str.push_back('-'); 4036 4037 if (!FormatMaxPadding) { 4038 if (TruncateZero) 4039 append(Str, "0.0E+0"); 4040 else { 4041 append(Str, "0.0"); 4042 if (FormatPrecision > 1) 4043 Str.append(FormatPrecision - 1, '0'); 4044 append(Str, "e+00"); 4045 } 4046 } else 4047 Str.push_back('0'); 4048 return; 4049 4050 case fcNormal: 4051 break; 4052 } 4053 4054 if (isNegative()) 4055 Str.push_back('-'); 4056 4057 // Decompose the number into an APInt and an exponent. 4058 int exp = exponent - ((int) semantics->precision - 1); 4059 APInt significand( 4060 semantics->precision, 4061 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4062 4063 // Set FormatPrecision if zero. We want to do this before we 4064 // truncate trailing zeros, as those are part of the precision. 4065 if (!FormatPrecision) { 4066 // We use enough digits so the number can be round-tripped back to an 4067 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4068 // Accurately" by Steele and White. 4069 // FIXME: Using a formula based purely on the precision is conservative; 4070 // we can print fewer digits depending on the actual value being printed. 4071 4072 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4073 FormatPrecision = 2 + semantics->precision * 59 / 196; 4074 } 4075 4076 // Ignore trailing binary zeros. 4077 int trailingZeros = significand.countTrailingZeros(); 4078 exp += trailingZeros; 4079 significand.lshrInPlace(trailingZeros); 4080 4081 // Change the exponent from 2^e to 10^e. 4082 if (exp == 0) { 4083 // Nothing to do. 4084 } else if (exp > 0) { 4085 // Just shift left. 4086 significand = significand.zext(semantics->precision + exp); 4087 significand <<= exp; 4088 exp = 0; 4089 } else { /* exp < 0 */ 4090 int texp = -exp; 4091 4092 // We transform this using the identity: 4093 // (N)(2^-e) == (N)(5^e)(10^-e) 4094 // This means we have to multiply N (the significand) by 5^e. 4095 // To avoid overflow, we have to operate on numbers large 4096 // enough to store N * 5^e: 4097 // log2(N * 5^e) == log2(N) + e * log2(5) 4098 // <= semantics->precision + e * 137 / 59 4099 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4100 4101 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 4102 4103 // Multiply significand by 5^e. 4104 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4105 significand = significand.zext(precision); 4106 APInt five_to_the_i(precision, 5); 4107 while (true) { 4108 if (texp & 1) significand *= five_to_the_i; 4109 4110 texp >>= 1; 4111 if (!texp) break; 4112 five_to_the_i *= five_to_the_i; 4113 } 4114 } 4115 4116 AdjustToPrecision(significand, exp, FormatPrecision); 4117 4118 SmallVector<char, 256> buffer; 4119 4120 // Fill the buffer. 4121 unsigned precision = significand.getBitWidth(); 4122 if (precision < 4) { 4123 // We need enough precision to store the value 10. 4124 precision = 4; 4125 significand = significand.zext(precision); 4126 } 4127 APInt ten(precision, 10); 4128 APInt digit(precision, 0); 4129 4130 bool inTrail = true; 4131 while (significand != 0) { 4132 // digit <- significand % 10 4133 // significand <- significand / 10 4134 APInt::udivrem(significand, ten, significand, digit); 4135 4136 unsigned d = digit.getZExtValue(); 4137 4138 // Drop trailing zeros. 4139 if (inTrail && !d) exp++; 4140 else { 4141 buffer.push_back((char) ('0' + d)); 4142 inTrail = false; 4143 } 4144 } 4145 4146 assert(!buffer.empty() && "no characters in buffer!"); 4147 4148 // Drop down to FormatPrecision. 4149 // TODO: don't do more precise calculations above than are required. 4150 AdjustToPrecision(buffer, exp, FormatPrecision); 4151 4152 unsigned NDigits = buffer.size(); 4153 4154 // Check whether we should use scientific notation. 4155 bool FormatScientific; 4156 if (!FormatMaxPadding) 4157 FormatScientific = true; 4158 else { 4159 if (exp >= 0) { 4160 // 765e3 --> 765000 4161 // ^^^ 4162 // But we shouldn't make the number look more precise than it is. 4163 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4164 NDigits + (unsigned) exp > FormatPrecision); 4165 } else { 4166 // Power of the most significant digit. 4167 int MSD = exp + (int) (NDigits - 1); 4168 if (MSD >= 0) { 4169 // 765e-2 == 7.65 4170 FormatScientific = false; 4171 } else { 4172 // 765e-5 == 0.00765 4173 // ^ ^^ 4174 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4175 } 4176 } 4177 } 4178 4179 // Scientific formatting is pretty straightforward. 4180 if (FormatScientific) { 4181 exp += (NDigits - 1); 4182 4183 Str.push_back(buffer[NDigits-1]); 4184 Str.push_back('.'); 4185 if (NDigits == 1 && TruncateZero) 4186 Str.push_back('0'); 4187 else 4188 for (unsigned I = 1; I != NDigits; ++I) 4189 Str.push_back(buffer[NDigits-1-I]); 4190 // Fill with zeros up to FormatPrecision. 4191 if (!TruncateZero && FormatPrecision > NDigits - 1) 4192 Str.append(FormatPrecision - NDigits + 1, '0'); 4193 // For !TruncateZero we use lower 'e'. 4194 Str.push_back(TruncateZero ? 'E' : 'e'); 4195 4196 Str.push_back(exp >= 0 ? '+' : '-'); 4197 if (exp < 0) exp = -exp; 4198 SmallVector<char, 6> expbuf; 4199 do { 4200 expbuf.push_back((char) ('0' + (exp % 10))); 4201 exp /= 10; 4202 } while (exp); 4203 // Exponent always at least two digits if we do not truncate zeros. 4204 if (!TruncateZero && expbuf.size() < 2) 4205 expbuf.push_back('0'); 4206 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4207 Str.push_back(expbuf[E-1-I]); 4208 return; 4209 } 4210 4211 // Non-scientific, positive exponents. 4212 if (exp >= 0) { 4213 for (unsigned I = 0; I != NDigits; ++I) 4214 Str.push_back(buffer[NDigits-1-I]); 4215 for (unsigned I = 0; I != (unsigned) exp; ++I) 4216 Str.push_back('0'); 4217 return; 4218 } 4219 4220 // Non-scientific, negative exponents. 4221 4222 // The number of digits to the left of the decimal point. 4223 int NWholeDigits = exp + (int) NDigits; 4224 4225 unsigned I = 0; 4226 if (NWholeDigits > 0) { 4227 for (; I != (unsigned) NWholeDigits; ++I) 4228 Str.push_back(buffer[NDigits-I-1]); 4229 Str.push_back('.'); 4230 } else { 4231 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4232 4233 Str.push_back('0'); 4234 Str.push_back('.'); 4235 for (unsigned Z = 1; Z != NZeros; ++Z) 4236 Str.push_back('0'); 4237 } 4238 4239 for (; I != NDigits; ++I) 4240 Str.push_back(buffer[NDigits-I-1]); 4241 } 4242 4243 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4244 // Special floats and denormals have no exact inverse. 4245 if (!isFiniteNonZero()) 4246 return false; 4247 4248 // Check that the number is a power of two by making sure that only the 4249 // integer bit is set in the significand. 4250 if (significandLSB() != semantics->precision - 1) 4251 return false; 4252 4253 // Get the inverse. 4254 IEEEFloat reciprocal(*semantics, 1ULL); 4255 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4256 return false; 4257 4258 // Avoid multiplication with a denormal, it is not safe on all platforms and 4259 // may be slower than a normal division. 4260 if (reciprocal.isDenormal()) 4261 return false; 4262 4263 assert(reciprocal.isFiniteNonZero() && 4264 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4265 4266 if (inv) 4267 *inv = APFloat(reciprocal, *semantics); 4268 4269 return true; 4270 } 4271 4272 bool IEEEFloat::isSignaling() const { 4273 if (!isNaN()) 4274 return false; 4275 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 4276 return false; 4277 4278 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4279 // first bit of the trailing significand being 0. 4280 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4281 } 4282 4283 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4284 /// 4285 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4286 /// appropriate sign switching before/after the computation. 4287 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4288 // If we are performing nextDown, swap sign so we have -x. 4289 if (nextDown) 4290 changeSign(); 4291 4292 // Compute nextUp(x) 4293 opStatus result = opOK; 4294 4295 // Handle each float category separately. 4296 switch (category) { 4297 case fcInfinity: 4298 // nextUp(+inf) = +inf 4299 if (!isNegative()) 4300 break; 4301 // nextUp(-inf) = -getLargest() 4302 makeLargest(true); 4303 break; 4304 case fcNaN: 4305 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4306 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4307 // change the payload. 4308 if (isSignaling()) { 4309 result = opInvalidOp; 4310 // For consistency, propagate the sign of the sNaN to the qNaN. 4311 makeNaN(false, isNegative(), nullptr); 4312 } 4313 break; 4314 case fcZero: 4315 // nextUp(pm 0) = +getSmallest() 4316 makeSmallest(false); 4317 break; 4318 case fcNormal: 4319 // nextUp(-getSmallest()) = -0 4320 if (isSmallest() && isNegative()) { 4321 APInt::tcSet(significandParts(), 0, partCount()); 4322 category = fcZero; 4323 exponent = 0; 4324 break; 4325 } 4326 4327 if (isLargest() && !isNegative()) { 4328 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4329 // nextUp(getLargest()) == NAN 4330 makeNaN(); 4331 break; 4332 } else { 4333 // nextUp(getLargest()) == INFINITY 4334 APInt::tcSet(significandParts(), 0, partCount()); 4335 category = fcInfinity; 4336 exponent = semantics->maxExponent + 1; 4337 break; 4338 } 4339 } 4340 4341 // nextUp(normal) == normal + inc. 4342 if (isNegative()) { 4343 // If we are negative, we need to decrement the significand. 4344 4345 // We only cross a binade boundary that requires adjusting the exponent 4346 // if: 4347 // 1. exponent != semantics->minExponent. This implies we are not in the 4348 // smallest binade or are dealing with denormals. 4349 // 2. Our significand excluding the integral bit is all zeros. 4350 bool WillCrossBinadeBoundary = 4351 exponent != semantics->minExponent && isSignificandAllZeros(); 4352 4353 // Decrement the significand. 4354 // 4355 // We always do this since: 4356 // 1. If we are dealing with a non-binade decrement, by definition we 4357 // just decrement the significand. 4358 // 2. If we are dealing with a normal -> normal binade decrement, since 4359 // we have an explicit integral bit the fact that all bits but the 4360 // integral bit are zero implies that subtracting one will yield a 4361 // significand with 0 integral bit and 1 in all other spots. Thus we 4362 // must just adjust the exponent and set the integral bit to 1. 4363 // 3. If we are dealing with a normal -> denormal binade decrement, 4364 // since we set the integral bit to 0 when we represent denormals, we 4365 // just decrement the significand. 4366 integerPart *Parts = significandParts(); 4367 APInt::tcDecrement(Parts, partCount()); 4368 4369 if (WillCrossBinadeBoundary) { 4370 // Our result is a normal number. Do the following: 4371 // 1. Set the integral bit to 1. 4372 // 2. Decrement the exponent. 4373 APInt::tcSetBit(Parts, semantics->precision - 1); 4374 exponent--; 4375 } 4376 } else { 4377 // If we are positive, we need to increment the significand. 4378 4379 // We only cross a binade boundary that requires adjusting the exponent if 4380 // the input is not a denormal and all of said input's significand bits 4381 // are set. If all of said conditions are true: clear the significand, set 4382 // the integral bit to 1, and increment the exponent. If we have a 4383 // denormal always increment since moving denormals and the numbers in the 4384 // smallest normal binade have the same exponent in our representation. 4385 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4386 4387 if (WillCrossBinadeBoundary) { 4388 integerPart *Parts = significandParts(); 4389 APInt::tcSet(Parts, 0, partCount()); 4390 APInt::tcSetBit(Parts, semantics->precision - 1); 4391 assert(exponent != semantics->maxExponent && 4392 "We can not increment an exponent beyond the maxExponent allowed" 4393 " by the given floating point semantics."); 4394 exponent++; 4395 } else { 4396 incrementSignificand(); 4397 } 4398 } 4399 break; 4400 } 4401 4402 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4403 if (nextDown) 4404 changeSign(); 4405 4406 return result; 4407 } 4408 4409 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4410 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 4411 return semantics->maxExponent; 4412 return semantics->maxExponent + 1; 4413 } 4414 4415 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4416 return semantics->maxExponent + 1; 4417 } 4418 4419 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4420 return semantics->minExponent - 1; 4421 } 4422 4423 void IEEEFloat::makeInf(bool Negative) { 4424 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4425 // There is no Inf, so make NaN instead. 4426 makeNaN(false, Negative); 4427 return; 4428 } 4429 category = fcInfinity; 4430 sign = Negative; 4431 exponent = exponentInf(); 4432 APInt::tcSet(significandParts(), 0, partCount()); 4433 } 4434 4435 void IEEEFloat::makeZero(bool Negative) { 4436 category = fcZero; 4437 sign = Negative; 4438 exponent = exponentZero(); 4439 APInt::tcSet(significandParts(), 0, partCount()); 4440 } 4441 4442 void IEEEFloat::makeQuiet() { 4443 assert(isNaN()); 4444 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4445 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4446 } 4447 4448 int ilogb(const IEEEFloat &Arg) { 4449 if (Arg.isNaN()) 4450 return IEEEFloat::IEK_NaN; 4451 if (Arg.isZero()) 4452 return IEEEFloat::IEK_Zero; 4453 if (Arg.isInfinity()) 4454 return IEEEFloat::IEK_Inf; 4455 if (!Arg.isDenormal()) 4456 return Arg.exponent; 4457 4458 IEEEFloat Normalized(Arg); 4459 int SignificandBits = Arg.getSemantics().precision - 1; 4460 4461 Normalized.exponent += SignificandBits; 4462 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4463 return Normalized.exponent - SignificandBits; 4464 } 4465 4466 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4467 auto MaxExp = X.getSemantics().maxExponent; 4468 auto MinExp = X.getSemantics().minExponent; 4469 4470 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4471 // overflow; clamp it to a safe range before adding, but ensure that the range 4472 // is large enough that the clamp does not change the result. The range we 4473 // need to support is the difference between the largest possible exponent and 4474 // the normalized exponent of half the smallest denormal. 4475 4476 int SignificandBits = X.getSemantics().precision - 1; 4477 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4478 4479 // Clamp to one past the range ends to let normalize handle overlflow. 4480 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); 4481 X.normalize(RoundingMode, lfExactlyZero); 4482 if (X.isNaN()) 4483 X.makeQuiet(); 4484 return X; 4485 } 4486 4487 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4488 Exp = ilogb(Val); 4489 4490 // Quiet signalling nans. 4491 if (Exp == IEEEFloat::IEK_NaN) { 4492 IEEEFloat Quiet(Val); 4493 Quiet.makeQuiet(); 4494 return Quiet; 4495 } 4496 4497 if (Exp == IEEEFloat::IEK_Inf) 4498 return Val; 4499 4500 // 1 is added because frexp is defined to return a normalized fraction in 4501 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4502 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4503 return scalbn(Val, -Exp, RM); 4504 } 4505 4506 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4507 : Semantics(&S), 4508 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4509 assert(Semantics == &semPPCDoubleDouble); 4510 } 4511 4512 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4513 : Semantics(&S), 4514 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4515 APFloat(semIEEEdouble, uninitialized)}) { 4516 assert(Semantics == &semPPCDoubleDouble); 4517 } 4518 4519 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4520 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4521 APFloat(semIEEEdouble)}) { 4522 assert(Semantics == &semPPCDoubleDouble); 4523 } 4524 4525 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4526 : Semantics(&S), 4527 Floats(new APFloat[2]{ 4528 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4529 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4530 assert(Semantics == &semPPCDoubleDouble); 4531 } 4532 4533 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4534 APFloat &&Second) 4535 : Semantics(&S), 4536 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4537 assert(Semantics == &semPPCDoubleDouble); 4538 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4539 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4540 } 4541 4542 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4543 : Semantics(RHS.Semantics), 4544 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4545 APFloat(RHS.Floats[1])} 4546 : nullptr) { 4547 assert(Semantics == &semPPCDoubleDouble); 4548 } 4549 4550 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4551 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4552 RHS.Semantics = &semBogus; 4553 assert(Semantics == &semPPCDoubleDouble); 4554 } 4555 4556 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4557 if (Semantics == RHS.Semantics && RHS.Floats) { 4558 Floats[0] = RHS.Floats[0]; 4559 Floats[1] = RHS.Floats[1]; 4560 } else if (this != &RHS) { 4561 this->~DoubleAPFloat(); 4562 new (this) DoubleAPFloat(RHS); 4563 } 4564 return *this; 4565 } 4566 4567 // Implement addition, subtraction, multiplication and division based on: 4568 // "Software for Doubled-Precision Floating-Point Computations", 4569 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4570 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4571 const APFloat &c, const APFloat &cc, 4572 roundingMode RM) { 4573 int Status = opOK; 4574 APFloat z = a; 4575 Status |= z.add(c, RM); 4576 if (!z.isFinite()) { 4577 if (!z.isInfinity()) { 4578 Floats[0] = std::move(z); 4579 Floats[1].makeZero(/* Neg = */ false); 4580 return (opStatus)Status; 4581 } 4582 Status = opOK; 4583 auto AComparedToC = a.compareAbsoluteValue(c); 4584 z = cc; 4585 Status |= z.add(aa, RM); 4586 if (AComparedToC == APFloat::cmpGreaterThan) { 4587 // z = cc + aa + c + a; 4588 Status |= z.add(c, RM); 4589 Status |= z.add(a, RM); 4590 } else { 4591 // z = cc + aa + a + c; 4592 Status |= z.add(a, RM); 4593 Status |= z.add(c, RM); 4594 } 4595 if (!z.isFinite()) { 4596 Floats[0] = std::move(z); 4597 Floats[1].makeZero(/* Neg = */ false); 4598 return (opStatus)Status; 4599 } 4600 Floats[0] = z; 4601 APFloat zz = aa; 4602 Status |= zz.add(cc, RM); 4603 if (AComparedToC == APFloat::cmpGreaterThan) { 4604 // Floats[1] = a - z + c + zz; 4605 Floats[1] = a; 4606 Status |= Floats[1].subtract(z, RM); 4607 Status |= Floats[1].add(c, RM); 4608 Status |= Floats[1].add(zz, RM); 4609 } else { 4610 // Floats[1] = c - z + a + zz; 4611 Floats[1] = c; 4612 Status |= Floats[1].subtract(z, RM); 4613 Status |= Floats[1].add(a, RM); 4614 Status |= Floats[1].add(zz, RM); 4615 } 4616 } else { 4617 // q = a - z; 4618 APFloat q = a; 4619 Status |= q.subtract(z, RM); 4620 4621 // zz = q + c + (a - (q + z)) + aa + cc; 4622 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4623 auto zz = q; 4624 Status |= zz.add(c, RM); 4625 Status |= q.add(z, RM); 4626 Status |= q.subtract(a, RM); 4627 q.changeSign(); 4628 Status |= zz.add(q, RM); 4629 Status |= zz.add(aa, RM); 4630 Status |= zz.add(cc, RM); 4631 if (zz.isZero() && !zz.isNegative()) { 4632 Floats[0] = std::move(z); 4633 Floats[1].makeZero(/* Neg = */ false); 4634 return opOK; 4635 } 4636 Floats[0] = z; 4637 Status |= Floats[0].add(zz, RM); 4638 if (!Floats[0].isFinite()) { 4639 Floats[1].makeZero(/* Neg = */ false); 4640 return (opStatus)Status; 4641 } 4642 Floats[1] = std::move(z); 4643 Status |= Floats[1].subtract(Floats[0], RM); 4644 Status |= Floats[1].add(zz, RM); 4645 } 4646 return (opStatus)Status; 4647 } 4648 4649 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4650 const DoubleAPFloat &RHS, 4651 DoubleAPFloat &Out, 4652 roundingMode RM) { 4653 if (LHS.getCategory() == fcNaN) { 4654 Out = LHS; 4655 return opOK; 4656 } 4657 if (RHS.getCategory() == fcNaN) { 4658 Out = RHS; 4659 return opOK; 4660 } 4661 if (LHS.getCategory() == fcZero) { 4662 Out = RHS; 4663 return opOK; 4664 } 4665 if (RHS.getCategory() == fcZero) { 4666 Out = LHS; 4667 return opOK; 4668 } 4669 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4670 LHS.isNegative() != RHS.isNegative()) { 4671 Out.makeNaN(false, Out.isNegative(), nullptr); 4672 return opInvalidOp; 4673 } 4674 if (LHS.getCategory() == fcInfinity) { 4675 Out = LHS; 4676 return opOK; 4677 } 4678 if (RHS.getCategory() == fcInfinity) { 4679 Out = RHS; 4680 return opOK; 4681 } 4682 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4683 4684 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4685 CC(RHS.Floats[1]); 4686 assert(&A.getSemantics() == &semIEEEdouble); 4687 assert(&AA.getSemantics() == &semIEEEdouble); 4688 assert(&C.getSemantics() == &semIEEEdouble); 4689 assert(&CC.getSemantics() == &semIEEEdouble); 4690 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4691 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4692 return Out.addImpl(A, AA, C, CC, RM); 4693 } 4694 4695 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4696 roundingMode RM) { 4697 return addWithSpecial(*this, RHS, *this, RM); 4698 } 4699 4700 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4701 roundingMode RM) { 4702 changeSign(); 4703 auto Ret = add(RHS, RM); 4704 changeSign(); 4705 return Ret; 4706 } 4707 4708 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4709 APFloat::roundingMode RM) { 4710 const auto &LHS = *this; 4711 auto &Out = *this; 4712 /* Interesting observation: For special categories, finding the lowest 4713 common ancestor of the following layered graph gives the correct 4714 return category: 4715 4716 NaN 4717 / \ 4718 Zero Inf 4719 \ / 4720 Normal 4721 4722 e.g. NaN * NaN = NaN 4723 Zero * Inf = NaN 4724 Normal * Zero = Zero 4725 Normal * Inf = Inf 4726 */ 4727 if (LHS.getCategory() == fcNaN) { 4728 Out = LHS; 4729 return opOK; 4730 } 4731 if (RHS.getCategory() == fcNaN) { 4732 Out = RHS; 4733 return opOK; 4734 } 4735 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4736 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4737 Out.makeNaN(false, false, nullptr); 4738 return opOK; 4739 } 4740 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4741 Out = LHS; 4742 return opOK; 4743 } 4744 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4745 Out = RHS; 4746 return opOK; 4747 } 4748 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4749 "Special cases not handled exhaustively"); 4750 4751 int Status = opOK; 4752 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4753 // t = a * c 4754 APFloat T = A; 4755 Status |= T.multiply(C, RM); 4756 if (!T.isFiniteNonZero()) { 4757 Floats[0] = T; 4758 Floats[1].makeZero(/* Neg = */ false); 4759 return (opStatus)Status; 4760 } 4761 4762 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4763 APFloat Tau = A; 4764 T.changeSign(); 4765 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4766 T.changeSign(); 4767 { 4768 // v = a * d 4769 APFloat V = A; 4770 Status |= V.multiply(D, RM); 4771 // w = b * c 4772 APFloat W = B; 4773 Status |= W.multiply(C, RM); 4774 Status |= V.add(W, RM); 4775 // tau += v + w 4776 Status |= Tau.add(V, RM); 4777 } 4778 // u = t + tau 4779 APFloat U = T; 4780 Status |= U.add(Tau, RM); 4781 4782 Floats[0] = U; 4783 if (!U.isFinite()) { 4784 Floats[1].makeZero(/* Neg = */ false); 4785 } else { 4786 // Floats[1] = (t - u) + tau 4787 Status |= T.subtract(U, RM); 4788 Status |= T.add(Tau, RM); 4789 Floats[1] = T; 4790 } 4791 return (opStatus)Status; 4792 } 4793 4794 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4795 APFloat::roundingMode RM) { 4796 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4797 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4798 auto Ret = 4799 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4800 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4801 return Ret; 4802 } 4803 4804 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4805 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4806 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4807 auto Ret = 4808 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4809 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4810 return Ret; 4811 } 4812 4813 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4814 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4815 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4816 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4817 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4818 return Ret; 4819 } 4820 4821 APFloat::opStatus 4822 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4823 const DoubleAPFloat &Addend, 4824 APFloat::roundingMode RM) { 4825 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4826 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4827 auto Ret = Tmp.fusedMultiplyAdd( 4828 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4829 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4830 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4831 return Ret; 4832 } 4833 4834 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4835 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4836 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4837 auto Ret = Tmp.roundToIntegral(RM); 4838 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4839 return Ret; 4840 } 4841 4842 void DoubleAPFloat::changeSign() { 4843 Floats[0].changeSign(); 4844 Floats[1].changeSign(); 4845 } 4846 4847 APFloat::cmpResult 4848 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4849 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4850 if (Result != cmpEqual) 4851 return Result; 4852 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4853 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4854 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4855 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4856 if (Against && !RHSAgainst) 4857 return cmpLessThan; 4858 if (!Against && RHSAgainst) 4859 return cmpGreaterThan; 4860 if (!Against && !RHSAgainst) 4861 return Result; 4862 if (Against && RHSAgainst) 4863 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4864 } 4865 return Result; 4866 } 4867 4868 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4869 return Floats[0].getCategory(); 4870 } 4871 4872 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4873 4874 void DoubleAPFloat::makeInf(bool Neg) { 4875 Floats[0].makeInf(Neg); 4876 Floats[1].makeZero(/* Neg = */ false); 4877 } 4878 4879 void DoubleAPFloat::makeZero(bool Neg) { 4880 Floats[0].makeZero(Neg); 4881 Floats[1].makeZero(/* Neg = */ false); 4882 } 4883 4884 void DoubleAPFloat::makeLargest(bool Neg) { 4885 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4886 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4887 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4888 if (Neg) 4889 changeSign(); 4890 } 4891 4892 void DoubleAPFloat::makeSmallest(bool Neg) { 4893 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4894 Floats[0].makeSmallest(Neg); 4895 Floats[1].makeZero(/* Neg = */ false); 4896 } 4897 4898 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4899 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4900 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4901 if (Neg) 4902 Floats[0].changeSign(); 4903 Floats[1].makeZero(/* Neg = */ false); 4904 } 4905 4906 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4907 Floats[0].makeNaN(SNaN, Neg, fill); 4908 Floats[1].makeZero(/* Neg = */ false); 4909 } 4910 4911 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4912 auto Result = Floats[0].compare(RHS.Floats[0]); 4913 // |Float[0]| > |Float[1]| 4914 if (Result == APFloat::cmpEqual) 4915 return Floats[1].compare(RHS.Floats[1]); 4916 return Result; 4917 } 4918 4919 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4920 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4921 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4922 } 4923 4924 hash_code hash_value(const DoubleAPFloat &Arg) { 4925 if (Arg.Floats) 4926 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4927 return hash_combine(Arg.Semantics); 4928 } 4929 4930 APInt DoubleAPFloat::bitcastToAPInt() const { 4931 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4932 uint64_t Data[] = { 4933 Floats[0].bitcastToAPInt().getRawData()[0], 4934 Floats[1].bitcastToAPInt().getRawData()[0], 4935 }; 4936 return APInt(128, 2, Data); 4937 } 4938 4939 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4940 roundingMode RM) { 4941 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4942 APFloat Tmp(semPPCDoubleDoubleLegacy); 4943 auto Ret = Tmp.convertFromString(S, RM); 4944 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4945 return Ret; 4946 } 4947 4948 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4949 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4950 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4951 auto Ret = Tmp.next(nextDown); 4952 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4953 return Ret; 4954 } 4955 4956 APFloat::opStatus 4957 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4958 unsigned int Width, bool IsSigned, 4959 roundingMode RM, bool *IsExact) const { 4960 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4961 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4962 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4963 } 4964 4965 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4966 bool IsSigned, 4967 roundingMode RM) { 4968 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4969 APFloat Tmp(semPPCDoubleDoubleLegacy); 4970 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4971 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4972 return Ret; 4973 } 4974 4975 APFloat::opStatus 4976 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 4977 unsigned int InputSize, 4978 bool IsSigned, roundingMode RM) { 4979 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4980 APFloat Tmp(semPPCDoubleDoubleLegacy); 4981 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 4982 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4983 return Ret; 4984 } 4985 4986 APFloat::opStatus 4987 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 4988 unsigned int InputSize, 4989 bool IsSigned, roundingMode RM) { 4990 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4991 APFloat Tmp(semPPCDoubleDoubleLegacy); 4992 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 4993 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4994 return Ret; 4995 } 4996 4997 unsigned int DoubleAPFloat::convertToHexString(char *DST, 4998 unsigned int HexDigits, 4999 bool UpperCase, 5000 roundingMode RM) const { 5001 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5002 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5003 .convertToHexString(DST, HexDigits, UpperCase, RM); 5004 } 5005 5006 bool DoubleAPFloat::isDenormal() const { 5007 return getCategory() == fcNormal && 5008 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5009 // (double)(Hi + Lo) == Hi defines a normal number. 5010 Floats[0] != Floats[0] + Floats[1]); 5011 } 5012 5013 bool DoubleAPFloat::isSmallest() const { 5014 if (getCategory() != fcNormal) 5015 return false; 5016 DoubleAPFloat Tmp(*this); 5017 Tmp.makeSmallest(this->isNegative()); 5018 return Tmp.compare(*this) == cmpEqual; 5019 } 5020 5021 bool DoubleAPFloat::isSmallestNormalized() const { 5022 if (getCategory() != fcNormal) 5023 return false; 5024 5025 DoubleAPFloat Tmp(*this); 5026 Tmp.makeSmallestNormalized(this->isNegative()); 5027 return Tmp.compare(*this) == cmpEqual; 5028 } 5029 5030 bool DoubleAPFloat::isLargest() const { 5031 if (getCategory() != fcNormal) 5032 return false; 5033 DoubleAPFloat Tmp(*this); 5034 Tmp.makeLargest(this->isNegative()); 5035 return Tmp.compare(*this) == cmpEqual; 5036 } 5037 5038 bool DoubleAPFloat::isInteger() const { 5039 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5040 return Floats[0].isInteger() && Floats[1].isInteger(); 5041 } 5042 5043 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5044 unsigned FormatPrecision, 5045 unsigned FormatMaxPadding, 5046 bool TruncateZero) const { 5047 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5048 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5049 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5050 } 5051 5052 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5053 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5054 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5055 if (!inv) 5056 return Tmp.getExactInverse(nullptr); 5057 APFloat Inv(semPPCDoubleDoubleLegacy); 5058 auto Ret = Tmp.getExactInverse(&Inv); 5059 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5060 return Ret; 5061 } 5062 5063 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5064 APFloat::roundingMode RM) { 5065 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5066 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5067 scalbn(Arg.Floats[1], Exp, RM)); 5068 } 5069 5070 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5071 APFloat::roundingMode RM) { 5072 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5073 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5074 APFloat Second = Arg.Floats[1]; 5075 if (Arg.getCategory() == APFloat::fcNormal) 5076 Second = scalbn(Second, -Exp, RM); 5077 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5078 } 5079 5080 } // namespace detail 5081 5082 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5083 if (usesLayout<IEEEFloat>(Semantics)) { 5084 new (&IEEE) IEEEFloat(std::move(F)); 5085 return; 5086 } 5087 if (usesLayout<DoubleAPFloat>(Semantics)) { 5088 const fltSemantics& S = F.getSemantics(); 5089 new (&Double) 5090 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5091 APFloat(semIEEEdouble)); 5092 return; 5093 } 5094 llvm_unreachable("Unexpected semantics"); 5095 } 5096 5097 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5098 roundingMode RM) { 5099 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5100 } 5101 5102 hash_code hash_value(const APFloat &Arg) { 5103 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5104 return hash_value(Arg.U.IEEE); 5105 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5106 return hash_value(Arg.U.Double); 5107 llvm_unreachable("Unexpected semantics"); 5108 } 5109 5110 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5111 : APFloat(Semantics) { 5112 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5113 assert(StatusOrErr && "Invalid floating point representation"); 5114 consumeError(StatusOrErr.takeError()); 5115 } 5116 5117 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5118 roundingMode RM, bool *losesInfo) { 5119 if (&getSemantics() == &ToSemantics) { 5120 *losesInfo = false; 5121 return opOK; 5122 } 5123 if (usesLayout<IEEEFloat>(getSemantics()) && 5124 usesLayout<IEEEFloat>(ToSemantics)) 5125 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5126 if (usesLayout<IEEEFloat>(getSemantics()) && 5127 usesLayout<DoubleAPFloat>(ToSemantics)) { 5128 assert(&ToSemantics == &semPPCDoubleDouble); 5129 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5130 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5131 return Ret; 5132 } 5133 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5134 usesLayout<IEEEFloat>(ToSemantics)) { 5135 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5136 *this = APFloat(std::move(getIEEE()), ToSemantics); 5137 return Ret; 5138 } 5139 llvm_unreachable("Unexpected semantics"); 5140 } 5141 5142 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5143 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5144 } 5145 5146 void APFloat::print(raw_ostream &OS) const { 5147 SmallVector<char, 16> Buffer; 5148 toString(Buffer); 5149 OS << Buffer << "\n"; 5150 } 5151 5152 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5153 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5154 #endif 5155 5156 void APFloat::Profile(FoldingSetNodeID &NID) const { 5157 NID.Add(bitcastToAPInt()); 5158 } 5159 5160 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5161 an APSInt, whose initial bit-width and signed-ness are used to determine the 5162 precision of the conversion. 5163 */ 5164 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5165 roundingMode rounding_mode, 5166 bool *isExact) const { 5167 unsigned bitWidth = result.getBitWidth(); 5168 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5169 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5170 rounding_mode, isExact); 5171 // Keeps the original signed-ness. 5172 result = APInt(bitWidth, parts); 5173 return status; 5174 } 5175 5176 double APFloat::convertToDouble() const { 5177 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5178 return getIEEE().convertToDouble(); 5179 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5180 "Float semantics is not representable by IEEEdouble"); 5181 APFloat Temp = *this; 5182 bool LosesInfo; 5183 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5184 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5185 (void)St; 5186 return Temp.getIEEE().convertToDouble(); 5187 } 5188 5189 float APFloat::convertToFloat() const { 5190 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5191 return getIEEE().convertToFloat(); 5192 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5193 "Float semantics is not representable by IEEEsingle"); 5194 APFloat Temp = *this; 5195 bool LosesInfo; 5196 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5197 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5198 (void)St; 5199 return Temp.getIEEE().convertToFloat(); 5200 } 5201 5202 } // namespace llvm 5203 5204 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5205