1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 }; 72 73 // How NaN values are represented. This is curently only used in combination 74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 75 // while having IEEE non-finite behavior is liable to lead to unexpected 76 // results. 77 enum class fltNanEncoding { 78 // Represents the standard IEEE behavior where a value is NaN if its 79 // exponent is all 1s and the significand is non-zero. 80 IEEE, 81 82 // Represents the behavior in the Float8E4M3 floating point type where NaN is 83 // represented by having the exponent and mantissa set to all 1s. 84 // This behavior matches the FP8 E4M3 type described in 85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 86 // as non-signalling, although the paper does not state whether the NaN 87 // values are signalling or not. 88 AllOnes, 89 90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 93 // there is only one NaN value, it is treated as quiet NaN. This matches the 94 // behavior described in https://arxiv.org/abs/2206.02915 . 95 NegativeZero, 96 }; 97 98 /* Represents floating point arithmetic semantics. */ 99 struct fltSemantics { 100 /* The largest E such that 2^E is representable; this matches the 101 definition of IEEE 754. */ 102 APFloatBase::ExponentType maxExponent; 103 104 /* The smallest E such that 2^E is a normalized number; this 105 matches the definition of IEEE 754. */ 106 APFloatBase::ExponentType minExponent; 107 108 /* Number of bits in the significand. This includes the integer 109 bit. */ 110 unsigned int precision; 111 112 /* Number of bits actually used in the semantics. */ 113 unsigned int sizeInBits; 114 115 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 116 117 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 118 // Returns true if any number described by this semantics can be precisely 119 // represented by the specified semantics. Does not take into account 120 // the value of fltNonfiniteBehavior. 121 bool isRepresentableBy(const fltSemantics &S) const { 122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 123 precision <= S.precision; 124 } 125 }; 126 127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 133 static constexpr fltSemantics semFloat8E5M2FNUZ = { 134 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 135 static constexpr fltSemantics semFloat8E4M3FN = { 136 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 137 static constexpr fltSemantics semFloat8E4M3FNUZ = { 138 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 140 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 143 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 144 145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 149 to each other, and two 11-bit exponents. 150 151 Note: we need to make the value different from semBogus as otherwise 152 an unsafe optimization may collapse both values to a single address, 153 and we heavily rely on them having distinct addresses. */ 154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 155 156 /* These are legacy semantics for the fallback, inaccrurate implementation of 157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 158 operation. It's equivalent to having an IEEE number with consecutive 106 159 bits of mantissa and 11 bits of exponent. 160 161 It's not equivalent to IBM double-double. For example, a legit IBM 162 double-double, 1 + epsilon: 163 164 1 + epsilon = 1 + (1 >> 1076) 165 166 is not representable by a consecutive 106 bits of mantissa. 167 168 Currently, these semantics are used in the following way: 169 170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 172 semPPCDoubleDoubleLegacy -> IEEE operations 173 174 We use bitcastToAPInt() to get the bit representation (in APInt) of the 175 underlying IEEEdouble, then use the APInt constructor to construct the 176 legacy IEEE float. 177 178 TODO: Implement all operations in semPPCDoubleDouble, and delete these 179 semantics. */ 180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 181 53 + 53, 128}; 182 183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 184 switch (S) { 185 case S_IEEEhalf: 186 return IEEEhalf(); 187 case S_BFloat: 188 return BFloat(); 189 case S_IEEEsingle: 190 return IEEEsingle(); 191 case S_IEEEdouble: 192 return IEEEdouble(); 193 case S_IEEEquad: 194 return IEEEquad(); 195 case S_PPCDoubleDouble: 196 return PPCDoubleDouble(); 197 case S_Float8E5M2: 198 return Float8E5M2(); 199 case S_Float8E5M2FNUZ: 200 return Float8E5M2FNUZ(); 201 case S_Float8E4M3FN: 202 return Float8E4M3FN(); 203 case S_Float8E4M3FNUZ: 204 return Float8E4M3FNUZ(); 205 case S_Float8E4M3B11FNUZ: 206 return Float8E4M3B11FNUZ(); 207 case S_FloatTF32: 208 return FloatTF32(); 209 case S_x87DoubleExtended: 210 return x87DoubleExtended(); 211 } 212 llvm_unreachable("Unrecognised floating semantics"); 213 } 214 215 APFloatBase::Semantics 216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 217 if (&Sem == &llvm::APFloat::IEEEhalf()) 218 return S_IEEEhalf; 219 else if (&Sem == &llvm::APFloat::BFloat()) 220 return S_BFloat; 221 else if (&Sem == &llvm::APFloat::IEEEsingle()) 222 return S_IEEEsingle; 223 else if (&Sem == &llvm::APFloat::IEEEdouble()) 224 return S_IEEEdouble; 225 else if (&Sem == &llvm::APFloat::IEEEquad()) 226 return S_IEEEquad; 227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 228 return S_PPCDoubleDouble; 229 else if (&Sem == &llvm::APFloat::Float8E5M2()) 230 return S_Float8E5M2; 231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 232 return S_Float8E5M2FNUZ; 233 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 234 return S_Float8E4M3FN; 235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 236 return S_Float8E4M3FNUZ; 237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 238 return S_Float8E4M3B11FNUZ; 239 else if (&Sem == &llvm::APFloat::FloatTF32()) 240 return S_FloatTF32; 241 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 242 return S_x87DoubleExtended; 243 else 244 llvm_unreachable("Unknown floating semantics"); 245 } 246 247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 252 const fltSemantics &APFloatBase::PPCDoubleDouble() { 253 return semPPCDoubleDouble; 254 } 255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 260 return semFloat8E4M3B11FNUZ; 261 } 262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 263 const fltSemantics &APFloatBase::x87DoubleExtended() { 264 return semX87DoubleExtended; 265 } 266 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 267 268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 269 constexpr RoundingMode APFloatBase::rmTowardPositive; 270 constexpr RoundingMode APFloatBase::rmTowardNegative; 271 constexpr RoundingMode APFloatBase::rmTowardZero; 272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 273 274 /* A tight upper bound on number of parts required to hold the value 275 pow(5, power) is 276 277 power * 815 / (351 * integerPartWidth) + 1 278 279 However, whilst the result may require only this many parts, 280 because we are multiplying two values to get it, the 281 multiplication may require an extra part with the excess part 282 being zero (consider the trivial case of 1 * 1, tcFullMultiply 283 requires two parts to hold the single-part result). So we add an 284 extra one to guarantee enough space whilst multiplying. */ 285 const unsigned int maxExponent = 16383; 286 const unsigned int maxPrecision = 113; 287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 288 const unsigned int maxPowerOfFiveParts = 289 2 + 290 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 291 292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 293 return semantics.precision; 294 } 295 APFloatBase::ExponentType 296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 297 return semantics.maxExponent; 298 } 299 APFloatBase::ExponentType 300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 301 return semantics.minExponent; 302 } 303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 304 return semantics.sizeInBits; 305 } 306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 307 bool isSigned) { 308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 309 // at least one more bit than the MaxExponent to hold the max FP value. 310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 311 // Extra sign bit needed. 312 if (isSigned) 313 ++MinBitWidth; 314 return MinBitWidth; 315 } 316 317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 318 const fltSemantics &Dst) { 319 // Exponent range must be larger. 320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 321 return false; 322 323 // If the mantissa is long enough, the result value could still be denormal 324 // with a larger exponent range. 325 // 326 // FIXME: This condition is probably not accurate but also shouldn't be a 327 // practical concern with existing types. 328 return Dst.precision >= Src.precision; 329 } 330 331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 332 return Sem.sizeInBits; 333 } 334 335 static constexpr APFloatBase::ExponentType 336 exponentZero(const fltSemantics &semantics) { 337 return semantics.minExponent - 1; 338 } 339 340 static constexpr APFloatBase::ExponentType 341 exponentInf(const fltSemantics &semantics) { 342 return semantics.maxExponent + 1; 343 } 344 345 static constexpr APFloatBase::ExponentType 346 exponentNaN(const fltSemantics &semantics) { 347 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 348 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 349 return exponentZero(semantics); 350 return semantics.maxExponent; 351 } 352 return semantics.maxExponent + 1; 353 } 354 355 /* A bunch of private, handy routines. */ 356 357 static inline Error createError(const Twine &Err) { 358 return make_error<StringError>(Err, inconvertibleErrorCode()); 359 } 360 361 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 362 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 363 } 364 365 /* Returns 0U-9U. Return values >= 10U are not digits. */ 366 static inline unsigned int 367 decDigitValue(unsigned int c) 368 { 369 return c - '0'; 370 } 371 372 /* Return the value of a decimal exponent of the form 373 [+-]ddddddd. 374 375 If the exponent overflows, returns a large exponent with the 376 appropriate sign. */ 377 static Expected<int> readExponent(StringRef::iterator begin, 378 StringRef::iterator end) { 379 bool isNegative; 380 unsigned int absExponent; 381 const unsigned int overlargeExponent = 24000; /* FIXME. */ 382 StringRef::iterator p = begin; 383 384 // Treat no exponent as 0 to match binutils 385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 386 return 0; 387 } 388 389 isNegative = (*p == '-'); 390 if (*p == '-' || *p == '+') { 391 p++; 392 if (p == end) 393 return createError("Exponent has no digits"); 394 } 395 396 absExponent = decDigitValue(*p++); 397 if (absExponent >= 10U) 398 return createError("Invalid character in exponent"); 399 400 for (; p != end; ++p) { 401 unsigned int value; 402 403 value = decDigitValue(*p); 404 if (value >= 10U) 405 return createError("Invalid character in exponent"); 406 407 absExponent = absExponent * 10U + value; 408 if (absExponent >= overlargeExponent) { 409 absExponent = overlargeExponent; 410 break; 411 } 412 } 413 414 if (isNegative) 415 return -(int) absExponent; 416 else 417 return (int) absExponent; 418 } 419 420 /* This is ugly and needs cleaning up, but I don't immediately see 421 how whilst remaining safe. */ 422 static Expected<int> totalExponent(StringRef::iterator p, 423 StringRef::iterator end, 424 int exponentAdjustment) { 425 int unsignedExponent; 426 bool negative, overflow; 427 int exponent = 0; 428 429 if (p == end) 430 return createError("Exponent has no digits"); 431 432 negative = *p == '-'; 433 if (*p == '-' || *p == '+') { 434 p++; 435 if (p == end) 436 return createError("Exponent has no digits"); 437 } 438 439 unsignedExponent = 0; 440 overflow = false; 441 for (; p != end; ++p) { 442 unsigned int value; 443 444 value = decDigitValue(*p); 445 if (value >= 10U) 446 return createError("Invalid character in exponent"); 447 448 unsignedExponent = unsignedExponent * 10 + value; 449 if (unsignedExponent > 32767) { 450 overflow = true; 451 break; 452 } 453 } 454 455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 456 overflow = true; 457 458 if (!overflow) { 459 exponent = unsignedExponent; 460 if (negative) 461 exponent = -exponent; 462 exponent += exponentAdjustment; 463 if (exponent > 32767 || exponent < -32768) 464 overflow = true; 465 } 466 467 if (overflow) 468 exponent = negative ? -32768: 32767; 469 470 return exponent; 471 } 472 473 static Expected<StringRef::iterator> 474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 475 StringRef::iterator *dot) { 476 StringRef::iterator p = begin; 477 *dot = end; 478 while (p != end && *p == '0') 479 p++; 480 481 if (p != end && *p == '.') { 482 *dot = p++; 483 484 if (end - begin == 1) 485 return createError("Significand has no digits"); 486 487 while (p != end && *p == '0') 488 p++; 489 } 490 491 return p; 492 } 493 494 /* Given a normal decimal floating point number of the form 495 496 dddd.dddd[eE][+-]ddd 497 498 where the decimal point and exponent are optional, fill out the 499 structure D. Exponent is appropriate if the significand is 500 treated as an integer, and normalizedExponent if the significand 501 is taken to have the decimal point after a single leading 502 non-zero digit. 503 504 If the value is zero, V->firstSigDigit points to a non-digit, and 505 the return exponent is zero. 506 */ 507 struct decimalInfo { 508 const char *firstSigDigit; 509 const char *lastSigDigit; 510 int exponent; 511 int normalizedExponent; 512 }; 513 514 static Error interpretDecimal(StringRef::iterator begin, 515 StringRef::iterator end, decimalInfo *D) { 516 StringRef::iterator dot = end; 517 518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 519 if (!PtrOrErr) 520 return PtrOrErr.takeError(); 521 StringRef::iterator p = *PtrOrErr; 522 523 D->firstSigDigit = p; 524 D->exponent = 0; 525 D->normalizedExponent = 0; 526 527 for (; p != end; ++p) { 528 if (*p == '.') { 529 if (dot != end) 530 return createError("String contains multiple dots"); 531 dot = p++; 532 if (p == end) 533 break; 534 } 535 if (decDigitValue(*p) >= 10U) 536 break; 537 } 538 539 if (p != end) { 540 if (*p != 'e' && *p != 'E') 541 return createError("Invalid character in significand"); 542 if (p == begin) 543 return createError("Significand has no digits"); 544 if (dot != end && p - begin == 1) 545 return createError("Significand has no digits"); 546 547 /* p points to the first non-digit in the string */ 548 auto ExpOrErr = readExponent(p + 1, end); 549 if (!ExpOrErr) 550 return ExpOrErr.takeError(); 551 D->exponent = *ExpOrErr; 552 553 /* Implied decimal point? */ 554 if (dot == end) 555 dot = p; 556 } 557 558 /* If number is all zeroes accept any exponent. */ 559 if (p != D->firstSigDigit) { 560 /* Drop insignificant trailing zeroes. */ 561 if (p != begin) { 562 do 563 do 564 p--; 565 while (p != begin && *p == '0'); 566 while (p != begin && *p == '.'); 567 } 568 569 /* Adjust the exponents for any decimal point. */ 570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 571 D->normalizedExponent = (D->exponent + 572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 573 - (dot > D->firstSigDigit && dot < p))); 574 } 575 576 D->lastSigDigit = p; 577 return Error::success(); 578 } 579 580 /* Return the trailing fraction of a hexadecimal number. 581 DIGITVALUE is the first hex digit of the fraction, P points to 582 the next digit. */ 583 static Expected<lostFraction> 584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 585 unsigned int digitValue) { 586 unsigned int hexDigit; 587 588 /* If the first trailing digit isn't 0 or 8 we can work out the 589 fraction immediately. */ 590 if (digitValue > 8) 591 return lfMoreThanHalf; 592 else if (digitValue < 8 && digitValue > 0) 593 return lfLessThanHalf; 594 595 // Otherwise we need to find the first non-zero digit. 596 while (p != end && (*p == '0' || *p == '.')) 597 p++; 598 599 if (p == end) 600 return createError("Invalid trailing hexadecimal fraction!"); 601 602 hexDigit = hexDigitValue(*p); 603 604 /* If we ran off the end it is exactly zero or one-half, otherwise 605 a little more. */ 606 if (hexDigit == UINT_MAX) 607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 608 else 609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 610 } 611 612 /* Return the fraction lost were a bignum truncated losing the least 613 significant BITS bits. */ 614 static lostFraction 615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 616 unsigned int partCount, 617 unsigned int bits) 618 { 619 unsigned int lsb; 620 621 lsb = APInt::tcLSB(parts, partCount); 622 623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 624 if (bits <= lsb) 625 return lfExactlyZero; 626 if (bits == lsb + 1) 627 return lfExactlyHalf; 628 if (bits <= partCount * APFloatBase::integerPartWidth && 629 APInt::tcExtractBit(parts, bits - 1)) 630 return lfMoreThanHalf; 631 632 return lfLessThanHalf; 633 } 634 635 /* Shift DST right BITS bits noting lost fraction. */ 636 static lostFraction 637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 638 { 639 lostFraction lost_fraction; 640 641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 642 643 APInt::tcShiftRight(dst, parts, bits); 644 645 return lost_fraction; 646 } 647 648 /* Combine the effect of two lost fractions. */ 649 static lostFraction 650 combineLostFractions(lostFraction moreSignificant, 651 lostFraction lessSignificant) 652 { 653 if (lessSignificant != lfExactlyZero) { 654 if (moreSignificant == lfExactlyZero) 655 moreSignificant = lfLessThanHalf; 656 else if (moreSignificant == lfExactlyHalf) 657 moreSignificant = lfMoreThanHalf; 658 } 659 660 return moreSignificant; 661 } 662 663 /* The error from the true value, in half-ulps, on multiplying two 664 floating point numbers, which differ from the value they 665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 666 than the returned value. 667 668 See "How to Read Floating Point Numbers Accurately" by William D 669 Clinger. */ 670 static unsigned int 671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 672 { 673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 674 675 if (HUerr1 + HUerr2 == 0) 676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 677 else 678 return inexactMultiply + 2 * (HUerr1 + HUerr2); 679 } 680 681 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 682 when the least significant BITS are truncated. BITS cannot be 683 zero. */ 684 static APFloatBase::integerPart 685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 686 bool isNearest) { 687 unsigned int count, partBits; 688 APFloatBase::integerPart part, boundary; 689 690 assert(bits != 0); 691 692 bits--; 693 count = bits / APFloatBase::integerPartWidth; 694 partBits = bits % APFloatBase::integerPartWidth + 1; 695 696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 697 698 if (isNearest) 699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 700 else 701 boundary = 0; 702 703 if (count == 0) { 704 if (part - boundary <= boundary - part) 705 return part - boundary; 706 else 707 return boundary - part; 708 } 709 710 if (part == boundary) { 711 while (--count) 712 if (parts[count]) 713 return ~(APFloatBase::integerPart) 0; /* A lot. */ 714 715 return parts[0]; 716 } else if (part == boundary - 1) { 717 while (--count) 718 if (~parts[count]) 719 return ~(APFloatBase::integerPart) 0; /* A lot. */ 720 721 return -parts[0]; 722 } 723 724 return ~(APFloatBase::integerPart) 0; /* A lot. */ 725 } 726 727 /* Place pow(5, power) in DST, and return the number of parts used. 728 DST must be at least one part larger than size of the answer. */ 729 static unsigned int 730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 732 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 733 pow5s[0] = 78125 * 5; 734 735 unsigned int partsCount[16] = { 1 }; 736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 737 unsigned int result; 738 assert(power <= maxExponent); 739 740 p1 = dst; 741 p2 = scratch; 742 743 *p1 = firstEightPowers[power & 7]; 744 power >>= 3; 745 746 result = 1; 747 pow5 = pow5s; 748 749 for (unsigned int n = 0; power; power >>= 1, n++) { 750 unsigned int pc; 751 752 pc = partsCount[n]; 753 754 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 755 if (pc == 0) { 756 pc = partsCount[n - 1]; 757 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 758 pc *= 2; 759 if (pow5[pc - 1] == 0) 760 pc--; 761 partsCount[n] = pc; 762 } 763 764 if (power & 1) { 765 APFloatBase::integerPart *tmp; 766 767 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 768 result += pc; 769 if (p2[result - 1] == 0) 770 result--; 771 772 /* Now result is in p1 with partsCount parts and p2 is scratch 773 space. */ 774 tmp = p1; 775 p1 = p2; 776 p2 = tmp; 777 } 778 779 pow5 += pc; 780 } 781 782 if (p1 != dst) 783 APInt::tcAssign(dst, p1, result); 784 785 return result; 786 } 787 788 /* Zero at the end to avoid modular arithmetic when adding one; used 789 when rounding up during hexadecimal output. */ 790 static const char hexDigitsLower[] = "0123456789abcdef0"; 791 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 792 static const char infinityL[] = "infinity"; 793 static const char infinityU[] = "INFINITY"; 794 static const char NaNL[] = "nan"; 795 static const char NaNU[] = "NAN"; 796 797 /* Write out an integerPart in hexadecimal, starting with the most 798 significant nibble. Write out exactly COUNT hexdigits, return 799 COUNT. */ 800 static unsigned int 801 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 802 const char *hexDigitChars) 803 { 804 unsigned int result = count; 805 806 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 807 808 part >>= (APFloatBase::integerPartWidth - 4 * count); 809 while (count--) { 810 dst[count] = hexDigitChars[part & 0xf]; 811 part >>= 4; 812 } 813 814 return result; 815 } 816 817 /* Write out an unsigned decimal integer. */ 818 static char * 819 writeUnsignedDecimal (char *dst, unsigned int n) 820 { 821 char buff[40], *p; 822 823 p = buff; 824 do 825 *p++ = '0' + n % 10; 826 while (n /= 10); 827 828 do 829 *dst++ = *--p; 830 while (p != buff); 831 832 return dst; 833 } 834 835 /* Write out a signed decimal integer. */ 836 static char * 837 writeSignedDecimal (char *dst, int value) 838 { 839 if (value < 0) { 840 *dst++ = '-'; 841 dst = writeUnsignedDecimal(dst, -(unsigned) value); 842 } else 843 dst = writeUnsignedDecimal(dst, value); 844 845 return dst; 846 } 847 848 namespace detail { 849 /* Constructors. */ 850 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 851 unsigned int count; 852 853 semantics = ourSemantics; 854 count = partCount(); 855 if (count > 1) 856 significand.parts = new integerPart[count]; 857 } 858 859 void IEEEFloat::freeSignificand() { 860 if (needsCleanup()) 861 delete [] significand.parts; 862 } 863 864 void IEEEFloat::assign(const IEEEFloat &rhs) { 865 assert(semantics == rhs.semantics); 866 867 sign = rhs.sign; 868 category = rhs.category; 869 exponent = rhs.exponent; 870 if (isFiniteNonZero() || category == fcNaN) 871 copySignificand(rhs); 872 } 873 874 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 875 assert(isFiniteNonZero() || category == fcNaN); 876 assert(rhs.partCount() >= partCount()); 877 878 APInt::tcAssign(significandParts(), rhs.significandParts(), 879 partCount()); 880 } 881 882 /* Make this number a NaN, with an arbitrary but deterministic value 883 for the significand. If double or longer, this is a signalling NaN, 884 which may not be ideal. If float, this is QNaN(0). */ 885 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 886 category = fcNaN; 887 sign = Negative; 888 exponent = exponentNaN(); 889 890 integerPart *significand = significandParts(); 891 unsigned numParts = partCount(); 892 893 APInt fill_storage; 894 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 895 // Finite-only types do not distinguish signalling and quiet NaN, so 896 // make them all signalling. 897 SNaN = false; 898 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 899 sign = true; 900 fill_storage = APInt::getZero(semantics->precision - 1); 901 } else { 902 fill_storage = APInt::getAllOnes(semantics->precision - 1); 903 } 904 fill = &fill_storage; 905 } 906 907 // Set the significand bits to the fill. 908 if (!fill || fill->getNumWords() < numParts) 909 APInt::tcSet(significand, 0, numParts); 910 if (fill) { 911 APInt::tcAssign(significand, fill->getRawData(), 912 std::min(fill->getNumWords(), numParts)); 913 914 // Zero out the excess bits of the significand. 915 unsigned bitsToPreserve = semantics->precision - 1; 916 unsigned part = bitsToPreserve / 64; 917 bitsToPreserve %= 64; 918 significand[part] &= ((1ULL << bitsToPreserve) - 1); 919 for (part++; part != numParts; ++part) 920 significand[part] = 0; 921 } 922 923 unsigned QNaNBit = semantics->precision - 2; 924 925 if (SNaN) { 926 // We always have to clear the QNaN bit to make it an SNaN. 927 APInt::tcClearBit(significand, QNaNBit); 928 929 // If there are no bits set in the payload, we have to set 930 // *something* to make it a NaN instead of an infinity; 931 // conventionally, this is the next bit down from the QNaN bit. 932 if (APInt::tcIsZero(significand, numParts)) 933 APInt::tcSetBit(significand, QNaNBit - 1); 934 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 935 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 936 // Do nothing. 937 } else { 938 // We always have to set the QNaN bit to make it a QNaN. 939 APInt::tcSetBit(significand, QNaNBit); 940 } 941 942 // For x87 extended precision, we want to make a NaN, not a 943 // pseudo-NaN. Maybe we should expose the ability to make 944 // pseudo-NaNs? 945 if (semantics == &semX87DoubleExtended) 946 APInt::tcSetBit(significand, QNaNBit + 1); 947 } 948 949 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 950 if (this != &rhs) { 951 if (semantics != rhs.semantics) { 952 freeSignificand(); 953 initialize(rhs.semantics); 954 } 955 assign(rhs); 956 } 957 958 return *this; 959 } 960 961 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 962 freeSignificand(); 963 964 semantics = rhs.semantics; 965 significand = rhs.significand; 966 exponent = rhs.exponent; 967 category = rhs.category; 968 sign = rhs.sign; 969 970 rhs.semantics = &semBogus; 971 return *this; 972 } 973 974 bool IEEEFloat::isDenormal() const { 975 return isFiniteNonZero() && (exponent == semantics->minExponent) && 976 (APInt::tcExtractBit(significandParts(), 977 semantics->precision - 1) == 0); 978 } 979 980 bool IEEEFloat::isSmallest() const { 981 // The smallest number by magnitude in our format will be the smallest 982 // denormal, i.e. the floating point number with exponent being minimum 983 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 984 return isFiniteNonZero() && exponent == semantics->minExponent && 985 significandMSB() == 0; 986 } 987 988 bool IEEEFloat::isSmallestNormalized() const { 989 return getCategory() == fcNormal && exponent == semantics->minExponent && 990 isSignificandAllZerosExceptMSB(); 991 } 992 993 bool IEEEFloat::isSignificandAllOnes() const { 994 // Test if the significand excluding the integral bit is all ones. This allows 995 // us to test for binade boundaries. 996 const integerPart *Parts = significandParts(); 997 const unsigned PartCount = partCountForBits(semantics->precision); 998 for (unsigned i = 0; i < PartCount - 1; i++) 999 if (~Parts[i]) 1000 return false; 1001 1002 // Set the unused high bits to all ones when we compare. 1003 const unsigned NumHighBits = 1004 PartCount*integerPartWidth - semantics->precision + 1; 1005 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1006 "Can not have more high bits to fill than integerPartWidth"); 1007 const integerPart HighBitFill = 1008 ~integerPart(0) << (integerPartWidth - NumHighBits); 1009 if (~(Parts[PartCount - 1] | HighBitFill)) 1010 return false; 1011 1012 return true; 1013 } 1014 1015 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1016 // Test if the significand excluding the integral bit is all ones except for 1017 // the least significant bit. 1018 const integerPart *Parts = significandParts(); 1019 1020 if (Parts[0] & 1) 1021 return false; 1022 1023 const unsigned PartCount = partCountForBits(semantics->precision); 1024 for (unsigned i = 0; i < PartCount - 1; i++) { 1025 if (~Parts[i] & ~unsigned{!i}) 1026 return false; 1027 } 1028 1029 // Set the unused high bits to all ones when we compare. 1030 const unsigned NumHighBits = 1031 PartCount * integerPartWidth - semantics->precision + 1; 1032 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1033 "Can not have more high bits to fill than integerPartWidth"); 1034 const integerPart HighBitFill = ~integerPart(0) 1035 << (integerPartWidth - NumHighBits); 1036 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1037 return false; 1038 1039 return true; 1040 } 1041 1042 bool IEEEFloat::isSignificandAllZeros() const { 1043 // Test if the significand excluding the integral bit is all zeros. This 1044 // allows us to test for binade boundaries. 1045 const integerPart *Parts = significandParts(); 1046 const unsigned PartCount = partCountForBits(semantics->precision); 1047 1048 for (unsigned i = 0; i < PartCount - 1; i++) 1049 if (Parts[i]) 1050 return false; 1051 1052 // Compute how many bits are used in the final word. 1053 const unsigned NumHighBits = 1054 PartCount*integerPartWidth - semantics->precision + 1; 1055 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1056 "clear than integerPartWidth"); 1057 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1058 1059 if (Parts[PartCount - 1] & HighBitMask) 1060 return false; 1061 1062 return true; 1063 } 1064 1065 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1066 const integerPart *Parts = significandParts(); 1067 const unsigned PartCount = partCountForBits(semantics->precision); 1068 1069 for (unsigned i = 0; i < PartCount - 1; i++) { 1070 if (Parts[i]) 1071 return false; 1072 } 1073 1074 const unsigned NumHighBits = 1075 PartCount * integerPartWidth - semantics->precision + 1; 1076 return Parts[PartCount - 1] == integerPart(1) 1077 << (integerPartWidth - NumHighBits); 1078 } 1079 1080 bool IEEEFloat::isLargest() const { 1081 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1082 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1083 // The largest number by magnitude in our format will be the floating point 1084 // number with maximum exponent and with significand that is all ones except 1085 // the LSB. 1086 return isFiniteNonZero() && exponent == semantics->maxExponent && 1087 isSignificandAllOnesExceptLSB(); 1088 } else { 1089 // The largest number by magnitude in our format will be the floating point 1090 // number with maximum exponent and with significand that is all ones. 1091 return isFiniteNonZero() && exponent == semantics->maxExponent && 1092 isSignificandAllOnes(); 1093 } 1094 } 1095 1096 bool IEEEFloat::isInteger() const { 1097 // This could be made more efficient; I'm going for obviously correct. 1098 if (!isFinite()) return false; 1099 IEEEFloat truncated = *this; 1100 truncated.roundToIntegral(rmTowardZero); 1101 return compare(truncated) == cmpEqual; 1102 } 1103 1104 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1105 if (this == &rhs) 1106 return true; 1107 if (semantics != rhs.semantics || 1108 category != rhs.category || 1109 sign != rhs.sign) 1110 return false; 1111 if (category==fcZero || category==fcInfinity) 1112 return true; 1113 1114 if (isFiniteNonZero() && exponent != rhs.exponent) 1115 return false; 1116 1117 return std::equal(significandParts(), significandParts() + partCount(), 1118 rhs.significandParts()); 1119 } 1120 1121 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1122 initialize(&ourSemantics); 1123 sign = 0; 1124 category = fcNormal; 1125 zeroSignificand(); 1126 exponent = ourSemantics.precision - 1; 1127 significandParts()[0] = value; 1128 normalize(rmNearestTiesToEven, lfExactlyZero); 1129 } 1130 1131 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1132 initialize(&ourSemantics); 1133 makeZero(false); 1134 } 1135 1136 // Delegate to the previous constructor, because later copy constructor may 1137 // actually inspects category, which can't be garbage. 1138 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1139 : IEEEFloat(ourSemantics) {} 1140 1141 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1142 initialize(rhs.semantics); 1143 assign(rhs); 1144 } 1145 1146 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1147 *this = std::move(rhs); 1148 } 1149 1150 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1151 1152 unsigned int IEEEFloat::partCount() const { 1153 return partCountForBits(semantics->precision + 1); 1154 } 1155 1156 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1157 return const_cast<IEEEFloat *>(this)->significandParts(); 1158 } 1159 1160 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1161 if (partCount() > 1) 1162 return significand.parts; 1163 else 1164 return &significand.part; 1165 } 1166 1167 void IEEEFloat::zeroSignificand() { 1168 APInt::tcSet(significandParts(), 0, partCount()); 1169 } 1170 1171 /* Increment an fcNormal floating point number's significand. */ 1172 void IEEEFloat::incrementSignificand() { 1173 integerPart carry; 1174 1175 carry = APInt::tcIncrement(significandParts(), partCount()); 1176 1177 /* Our callers should never cause us to overflow. */ 1178 assert(carry == 0); 1179 (void)carry; 1180 } 1181 1182 /* Add the significand of the RHS. Returns the carry flag. */ 1183 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1184 integerPart *parts; 1185 1186 parts = significandParts(); 1187 1188 assert(semantics == rhs.semantics); 1189 assert(exponent == rhs.exponent); 1190 1191 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1192 } 1193 1194 /* Subtract the significand of the RHS with a borrow flag. Returns 1195 the borrow flag. */ 1196 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1197 integerPart borrow) { 1198 integerPart *parts; 1199 1200 parts = significandParts(); 1201 1202 assert(semantics == rhs.semantics); 1203 assert(exponent == rhs.exponent); 1204 1205 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1206 partCount()); 1207 } 1208 1209 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1210 on to the full-precision result of the multiplication. Returns the 1211 lost fraction. */ 1212 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1213 IEEEFloat addend) { 1214 unsigned int omsb; // One, not zero, based MSB. 1215 unsigned int partsCount, newPartsCount, precision; 1216 integerPart *lhsSignificand; 1217 integerPart scratch[4]; 1218 integerPart *fullSignificand; 1219 lostFraction lost_fraction; 1220 bool ignored; 1221 1222 assert(semantics == rhs.semantics); 1223 1224 precision = semantics->precision; 1225 1226 // Allocate space for twice as many bits as the original significand, plus one 1227 // extra bit for the addition to overflow into. 1228 newPartsCount = partCountForBits(precision * 2 + 1); 1229 1230 if (newPartsCount > 4) 1231 fullSignificand = new integerPart[newPartsCount]; 1232 else 1233 fullSignificand = scratch; 1234 1235 lhsSignificand = significandParts(); 1236 partsCount = partCount(); 1237 1238 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1239 rhs.significandParts(), partsCount, partsCount); 1240 1241 lost_fraction = lfExactlyZero; 1242 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1243 exponent += rhs.exponent; 1244 1245 // Assume the operands involved in the multiplication are single-precision 1246 // FP, and the two multiplicants are: 1247 // *this = a23 . a22 ... a0 * 2^e1 1248 // rhs = b23 . b22 ... b0 * 2^e2 1249 // the result of multiplication is: 1250 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1251 // Note that there are three significant bits at the left-hand side of the 1252 // radix point: two for the multiplication, and an overflow bit for the 1253 // addition (that will always be zero at this point). Move the radix point 1254 // toward left by two bits, and adjust exponent accordingly. 1255 exponent += 2; 1256 1257 if (addend.isNonZero()) { 1258 // The intermediate result of the multiplication has "2 * precision" 1259 // signicant bit; adjust the addend to be consistent with mul result. 1260 // 1261 Significand savedSignificand = significand; 1262 const fltSemantics *savedSemantics = semantics; 1263 fltSemantics extendedSemantics; 1264 opStatus status; 1265 unsigned int extendedPrecision; 1266 1267 // Normalize our MSB to one below the top bit to allow for overflow. 1268 extendedPrecision = 2 * precision + 1; 1269 if (omsb != extendedPrecision - 1) { 1270 assert(extendedPrecision > omsb); 1271 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1272 (extendedPrecision - 1) - omsb); 1273 exponent -= (extendedPrecision - 1) - omsb; 1274 } 1275 1276 /* Create new semantics. */ 1277 extendedSemantics = *semantics; 1278 extendedSemantics.precision = extendedPrecision; 1279 1280 if (newPartsCount == 1) 1281 significand.part = fullSignificand[0]; 1282 else 1283 significand.parts = fullSignificand; 1284 semantics = &extendedSemantics; 1285 1286 // Make a copy so we can convert it to the extended semantics. 1287 // Note that we cannot convert the addend directly, as the extendedSemantics 1288 // is a local variable (which we take a reference to). 1289 IEEEFloat extendedAddend(addend); 1290 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1291 assert(status == opOK); 1292 (void)status; 1293 1294 // Shift the significand of the addend right by one bit. This guarantees 1295 // that the high bit of the significand is zero (same as fullSignificand), 1296 // so the addition will overflow (if it does overflow at all) into the top bit. 1297 lost_fraction = extendedAddend.shiftSignificandRight(1); 1298 assert(lost_fraction == lfExactlyZero && 1299 "Lost precision while shifting addend for fused-multiply-add."); 1300 1301 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1302 1303 /* Restore our state. */ 1304 if (newPartsCount == 1) 1305 fullSignificand[0] = significand.part; 1306 significand = savedSignificand; 1307 semantics = savedSemantics; 1308 1309 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1310 } 1311 1312 // Convert the result having "2 * precision" significant-bits back to the one 1313 // having "precision" significant-bits. First, move the radix point from 1314 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1315 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1316 exponent -= precision + 1; 1317 1318 // In case MSB resides at the left-hand side of radix point, shift the 1319 // mantissa right by some amount to make sure the MSB reside right before 1320 // the radix point (i.e. "MSB . rest-significant-bits"). 1321 // 1322 // Note that the result is not normalized when "omsb < precision". So, the 1323 // caller needs to call IEEEFloat::normalize() if normalized value is 1324 // expected. 1325 if (omsb > precision) { 1326 unsigned int bits, significantParts; 1327 lostFraction lf; 1328 1329 bits = omsb - precision; 1330 significantParts = partCountForBits(omsb); 1331 lf = shiftRight(fullSignificand, significantParts, bits); 1332 lost_fraction = combineLostFractions(lf, lost_fraction); 1333 exponent += bits; 1334 } 1335 1336 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1337 1338 if (newPartsCount > 4) 1339 delete [] fullSignificand; 1340 1341 return lost_fraction; 1342 } 1343 1344 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1345 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1346 } 1347 1348 /* Multiply the significands of LHS and RHS to DST. */ 1349 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1350 unsigned int bit, i, partsCount; 1351 const integerPart *rhsSignificand; 1352 integerPart *lhsSignificand, *dividend, *divisor; 1353 integerPart scratch[4]; 1354 lostFraction lost_fraction; 1355 1356 assert(semantics == rhs.semantics); 1357 1358 lhsSignificand = significandParts(); 1359 rhsSignificand = rhs.significandParts(); 1360 partsCount = partCount(); 1361 1362 if (partsCount > 2) 1363 dividend = new integerPart[partsCount * 2]; 1364 else 1365 dividend = scratch; 1366 1367 divisor = dividend + partsCount; 1368 1369 /* Copy the dividend and divisor as they will be modified in-place. */ 1370 for (i = 0; i < partsCount; i++) { 1371 dividend[i] = lhsSignificand[i]; 1372 divisor[i] = rhsSignificand[i]; 1373 lhsSignificand[i] = 0; 1374 } 1375 1376 exponent -= rhs.exponent; 1377 1378 unsigned int precision = semantics->precision; 1379 1380 /* Normalize the divisor. */ 1381 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1382 if (bit) { 1383 exponent += bit; 1384 APInt::tcShiftLeft(divisor, partsCount, bit); 1385 } 1386 1387 /* Normalize the dividend. */ 1388 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1389 if (bit) { 1390 exponent -= bit; 1391 APInt::tcShiftLeft(dividend, partsCount, bit); 1392 } 1393 1394 /* Ensure the dividend >= divisor initially for the loop below. 1395 Incidentally, this means that the division loop below is 1396 guaranteed to set the integer bit to one. */ 1397 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1398 exponent--; 1399 APInt::tcShiftLeft(dividend, partsCount, 1); 1400 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1401 } 1402 1403 /* Long division. */ 1404 for (bit = precision; bit; bit -= 1) { 1405 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1406 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1407 APInt::tcSetBit(lhsSignificand, bit - 1); 1408 } 1409 1410 APInt::tcShiftLeft(dividend, partsCount, 1); 1411 } 1412 1413 /* Figure out the lost fraction. */ 1414 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1415 1416 if (cmp > 0) 1417 lost_fraction = lfMoreThanHalf; 1418 else if (cmp == 0) 1419 lost_fraction = lfExactlyHalf; 1420 else if (APInt::tcIsZero(dividend, partsCount)) 1421 lost_fraction = lfExactlyZero; 1422 else 1423 lost_fraction = lfLessThanHalf; 1424 1425 if (partsCount > 2) 1426 delete [] dividend; 1427 1428 return lost_fraction; 1429 } 1430 1431 unsigned int IEEEFloat::significandMSB() const { 1432 return APInt::tcMSB(significandParts(), partCount()); 1433 } 1434 1435 unsigned int IEEEFloat::significandLSB() const { 1436 return APInt::tcLSB(significandParts(), partCount()); 1437 } 1438 1439 /* Note that a zero result is NOT normalized to fcZero. */ 1440 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1441 /* Our exponent should not overflow. */ 1442 assert((ExponentType) (exponent + bits) >= exponent); 1443 1444 exponent += bits; 1445 1446 return shiftRight(significandParts(), partCount(), bits); 1447 } 1448 1449 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1450 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1451 assert(bits < semantics->precision); 1452 1453 if (bits) { 1454 unsigned int partsCount = partCount(); 1455 1456 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1457 exponent -= bits; 1458 1459 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1460 } 1461 } 1462 1463 IEEEFloat::cmpResult 1464 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1465 int compare; 1466 1467 assert(semantics == rhs.semantics); 1468 assert(isFiniteNonZero()); 1469 assert(rhs.isFiniteNonZero()); 1470 1471 compare = exponent - rhs.exponent; 1472 1473 /* If exponents are equal, do an unsigned bignum comparison of the 1474 significands. */ 1475 if (compare == 0) 1476 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1477 partCount()); 1478 1479 if (compare > 0) 1480 return cmpGreaterThan; 1481 else if (compare < 0) 1482 return cmpLessThan; 1483 else 1484 return cmpEqual; 1485 } 1486 1487 /* Set the least significant BITS bits of a bignum, clear the 1488 rest. */ 1489 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1490 unsigned bits) { 1491 unsigned i = 0; 1492 while (bits > APInt::APINT_BITS_PER_WORD) { 1493 dst[i++] = ~(APInt::WordType)0; 1494 bits -= APInt::APINT_BITS_PER_WORD; 1495 } 1496 1497 if (bits) 1498 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1499 1500 while (i < parts) 1501 dst[i++] = 0; 1502 } 1503 1504 /* Handle overflow. Sign is preserved. We either become infinity or 1505 the largest finite number. */ 1506 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1507 /* Infinity? */ 1508 if (rounding_mode == rmNearestTiesToEven || 1509 rounding_mode == rmNearestTiesToAway || 1510 (rounding_mode == rmTowardPositive && !sign) || 1511 (rounding_mode == rmTowardNegative && sign)) { 1512 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1513 makeNaN(false, sign); 1514 else 1515 category = fcInfinity; 1516 return (opStatus) (opOverflow | opInexact); 1517 } 1518 1519 /* Otherwise we become the largest finite number. */ 1520 category = fcNormal; 1521 exponent = semantics->maxExponent; 1522 tcSetLeastSignificantBits(significandParts(), partCount(), 1523 semantics->precision); 1524 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1525 semantics->nanEncoding == fltNanEncoding::AllOnes) 1526 APInt::tcClearBit(significandParts(), 0); 1527 1528 return opInexact; 1529 } 1530 1531 /* Returns TRUE if, when truncating the current number, with BIT the 1532 new LSB, with the given lost fraction and rounding mode, the result 1533 would need to be rounded away from zero (i.e., by increasing the 1534 signficand). This routine must work for fcZero of both signs, and 1535 fcNormal numbers. */ 1536 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1537 lostFraction lost_fraction, 1538 unsigned int bit) const { 1539 /* NaNs and infinities should not have lost fractions. */ 1540 assert(isFiniteNonZero() || category == fcZero); 1541 1542 /* Current callers never pass this so we don't handle it. */ 1543 assert(lost_fraction != lfExactlyZero); 1544 1545 switch (rounding_mode) { 1546 case rmNearestTiesToAway: 1547 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1548 1549 case rmNearestTiesToEven: 1550 if (lost_fraction == lfMoreThanHalf) 1551 return true; 1552 1553 /* Our zeroes don't have a significand to test. */ 1554 if (lost_fraction == lfExactlyHalf && category != fcZero) 1555 return APInt::tcExtractBit(significandParts(), bit); 1556 1557 return false; 1558 1559 case rmTowardZero: 1560 return false; 1561 1562 case rmTowardPositive: 1563 return !sign; 1564 1565 case rmTowardNegative: 1566 return sign; 1567 1568 default: 1569 break; 1570 } 1571 llvm_unreachable("Invalid rounding mode found"); 1572 } 1573 1574 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1575 lostFraction lost_fraction) { 1576 unsigned int omsb; /* One, not zero, based MSB. */ 1577 int exponentChange; 1578 1579 if (!isFiniteNonZero()) 1580 return opOK; 1581 1582 /* Before rounding normalize the exponent of fcNormal numbers. */ 1583 omsb = significandMSB() + 1; 1584 1585 if (omsb) { 1586 /* OMSB is numbered from 1. We want to place it in the integer 1587 bit numbered PRECISION if possible, with a compensating change in 1588 the exponent. */ 1589 exponentChange = omsb - semantics->precision; 1590 1591 /* If the resulting exponent is too high, overflow according to 1592 the rounding mode. */ 1593 if (exponent + exponentChange > semantics->maxExponent) 1594 return handleOverflow(rounding_mode); 1595 1596 /* Subnormal numbers have exponent minExponent, and their MSB 1597 is forced based on that. */ 1598 if (exponent + exponentChange < semantics->minExponent) 1599 exponentChange = semantics->minExponent - exponent; 1600 1601 /* Shifting left is easy as we don't lose precision. */ 1602 if (exponentChange < 0) { 1603 assert(lost_fraction == lfExactlyZero); 1604 1605 shiftSignificandLeft(-exponentChange); 1606 1607 return opOK; 1608 } 1609 1610 if (exponentChange > 0) { 1611 lostFraction lf; 1612 1613 /* Shift right and capture any new lost fraction. */ 1614 lf = shiftSignificandRight(exponentChange); 1615 1616 lost_fraction = combineLostFractions(lf, lost_fraction); 1617 1618 /* Keep OMSB up-to-date. */ 1619 if (omsb > (unsigned) exponentChange) 1620 omsb -= exponentChange; 1621 else 1622 omsb = 0; 1623 } 1624 } 1625 1626 // The all-ones values is an overflow if NaN is all ones. If NaN is 1627 // represented by negative zero, then it is a valid finite value. 1628 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1629 semantics->nanEncoding == fltNanEncoding::AllOnes && 1630 exponent == semantics->maxExponent && isSignificandAllOnes()) 1631 return handleOverflow(rounding_mode); 1632 1633 /* Now round the number according to rounding_mode given the lost 1634 fraction. */ 1635 1636 /* As specified in IEEE 754, since we do not trap we do not report 1637 underflow for exact results. */ 1638 if (lost_fraction == lfExactlyZero) { 1639 /* Canonicalize zeroes. */ 1640 if (omsb == 0) { 1641 category = fcZero; 1642 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1643 sign = false; 1644 } 1645 1646 return opOK; 1647 } 1648 1649 /* Increment the significand if we're rounding away from zero. */ 1650 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1651 if (omsb == 0) 1652 exponent = semantics->minExponent; 1653 1654 incrementSignificand(); 1655 omsb = significandMSB() + 1; 1656 1657 /* Did the significand increment overflow? */ 1658 if (omsb == (unsigned) semantics->precision + 1) { 1659 /* Renormalize by incrementing the exponent and shifting our 1660 significand right one. However if we already have the 1661 maximum exponent we overflow to infinity. */ 1662 if (exponent == semantics->maxExponent) 1663 // Invoke overflow handling with a rounding mode that will guarantee 1664 // that the result gets turned into the correct infinity representation. 1665 // This is needed instead of just setting the category to infinity to 1666 // account for 8-bit floating point types that have no inf, only NaN. 1667 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1668 1669 shiftSignificandRight(1); 1670 1671 return opInexact; 1672 } 1673 1674 // The all-ones values is an overflow if NaN is all ones. If NaN is 1675 // represented by negative zero, then it is a valid finite value. 1676 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1677 semantics->nanEncoding == fltNanEncoding::AllOnes && 1678 exponent == semantics->maxExponent && isSignificandAllOnes()) 1679 return handleOverflow(rounding_mode); 1680 } 1681 1682 /* The normal case - we were and are not denormal, and any 1683 significand increment above didn't overflow. */ 1684 if (omsb == semantics->precision) 1685 return opInexact; 1686 1687 /* We have a non-zero denormal. */ 1688 assert(omsb < semantics->precision); 1689 1690 /* Canonicalize zeroes. */ 1691 if (omsb == 0) { 1692 category = fcZero; 1693 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1694 sign = false; 1695 } 1696 1697 /* The fcZero case is a denormal that underflowed to zero. */ 1698 return (opStatus) (opUnderflow | opInexact); 1699 } 1700 1701 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1702 bool subtract) { 1703 switch (PackCategoriesIntoKey(category, rhs.category)) { 1704 default: 1705 llvm_unreachable(nullptr); 1706 1707 case PackCategoriesIntoKey(fcZero, fcNaN): 1708 case PackCategoriesIntoKey(fcNormal, fcNaN): 1709 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1710 assign(rhs); 1711 [[fallthrough]]; 1712 case PackCategoriesIntoKey(fcNaN, fcZero): 1713 case PackCategoriesIntoKey(fcNaN, fcNormal): 1714 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1715 case PackCategoriesIntoKey(fcNaN, fcNaN): 1716 if (isSignaling()) { 1717 makeQuiet(); 1718 return opInvalidOp; 1719 } 1720 return rhs.isSignaling() ? opInvalidOp : opOK; 1721 1722 case PackCategoriesIntoKey(fcNormal, fcZero): 1723 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1724 case PackCategoriesIntoKey(fcInfinity, fcZero): 1725 return opOK; 1726 1727 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1728 case PackCategoriesIntoKey(fcZero, fcInfinity): 1729 category = fcInfinity; 1730 sign = rhs.sign ^ subtract; 1731 return opOK; 1732 1733 case PackCategoriesIntoKey(fcZero, fcNormal): 1734 assign(rhs); 1735 sign = rhs.sign ^ subtract; 1736 return opOK; 1737 1738 case PackCategoriesIntoKey(fcZero, fcZero): 1739 /* Sign depends on rounding mode; handled by caller. */ 1740 return opOK; 1741 1742 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1743 /* Differently signed infinities can only be validly 1744 subtracted. */ 1745 if (((sign ^ rhs.sign)!=0) != subtract) { 1746 makeNaN(); 1747 return opInvalidOp; 1748 } 1749 1750 return opOK; 1751 1752 case PackCategoriesIntoKey(fcNormal, fcNormal): 1753 return opDivByZero; 1754 } 1755 } 1756 1757 /* Add or subtract two normal numbers. */ 1758 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1759 bool subtract) { 1760 integerPart carry; 1761 lostFraction lost_fraction; 1762 int bits; 1763 1764 /* Determine if the operation on the absolute values is effectively 1765 an addition or subtraction. */ 1766 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1767 1768 /* Are we bigger exponent-wise than the RHS? */ 1769 bits = exponent - rhs.exponent; 1770 1771 /* Subtraction is more subtle than one might naively expect. */ 1772 if (subtract) { 1773 IEEEFloat temp_rhs(rhs); 1774 1775 if (bits == 0) 1776 lost_fraction = lfExactlyZero; 1777 else if (bits > 0) { 1778 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1779 shiftSignificandLeft(1); 1780 } else { 1781 lost_fraction = shiftSignificandRight(-bits - 1); 1782 temp_rhs.shiftSignificandLeft(1); 1783 } 1784 1785 // Should we reverse the subtraction. 1786 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1787 carry = temp_rhs.subtractSignificand 1788 (*this, lost_fraction != lfExactlyZero); 1789 copySignificand(temp_rhs); 1790 sign = !sign; 1791 } else { 1792 carry = subtractSignificand 1793 (temp_rhs, lost_fraction != lfExactlyZero); 1794 } 1795 1796 /* Invert the lost fraction - it was on the RHS and 1797 subtracted. */ 1798 if (lost_fraction == lfLessThanHalf) 1799 lost_fraction = lfMoreThanHalf; 1800 else if (lost_fraction == lfMoreThanHalf) 1801 lost_fraction = lfLessThanHalf; 1802 1803 /* The code above is intended to ensure that no borrow is 1804 necessary. */ 1805 assert(!carry); 1806 (void)carry; 1807 } else { 1808 if (bits > 0) { 1809 IEEEFloat temp_rhs(rhs); 1810 1811 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1812 carry = addSignificand(temp_rhs); 1813 } else { 1814 lost_fraction = shiftSignificandRight(-bits); 1815 carry = addSignificand(rhs); 1816 } 1817 1818 /* We have a guard bit; generating a carry cannot happen. */ 1819 assert(!carry); 1820 (void)carry; 1821 } 1822 1823 return lost_fraction; 1824 } 1825 1826 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1827 switch (PackCategoriesIntoKey(category, rhs.category)) { 1828 default: 1829 llvm_unreachable(nullptr); 1830 1831 case PackCategoriesIntoKey(fcZero, fcNaN): 1832 case PackCategoriesIntoKey(fcNormal, fcNaN): 1833 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1834 assign(rhs); 1835 sign = false; 1836 [[fallthrough]]; 1837 case PackCategoriesIntoKey(fcNaN, fcZero): 1838 case PackCategoriesIntoKey(fcNaN, fcNormal): 1839 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1840 case PackCategoriesIntoKey(fcNaN, fcNaN): 1841 sign ^= rhs.sign; // restore the original sign 1842 if (isSignaling()) { 1843 makeQuiet(); 1844 return opInvalidOp; 1845 } 1846 return rhs.isSignaling() ? opInvalidOp : opOK; 1847 1848 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1849 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1850 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1851 category = fcInfinity; 1852 return opOK; 1853 1854 case PackCategoriesIntoKey(fcZero, fcNormal): 1855 case PackCategoriesIntoKey(fcNormal, fcZero): 1856 case PackCategoriesIntoKey(fcZero, fcZero): 1857 category = fcZero; 1858 return opOK; 1859 1860 case PackCategoriesIntoKey(fcZero, fcInfinity): 1861 case PackCategoriesIntoKey(fcInfinity, fcZero): 1862 makeNaN(); 1863 return opInvalidOp; 1864 1865 case PackCategoriesIntoKey(fcNormal, fcNormal): 1866 return opOK; 1867 } 1868 } 1869 1870 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1871 switch (PackCategoriesIntoKey(category, rhs.category)) { 1872 default: 1873 llvm_unreachable(nullptr); 1874 1875 case PackCategoriesIntoKey(fcZero, fcNaN): 1876 case PackCategoriesIntoKey(fcNormal, fcNaN): 1877 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1878 assign(rhs); 1879 sign = false; 1880 [[fallthrough]]; 1881 case PackCategoriesIntoKey(fcNaN, fcZero): 1882 case PackCategoriesIntoKey(fcNaN, fcNormal): 1883 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1884 case PackCategoriesIntoKey(fcNaN, fcNaN): 1885 sign ^= rhs.sign; // restore the original sign 1886 if (isSignaling()) { 1887 makeQuiet(); 1888 return opInvalidOp; 1889 } 1890 return rhs.isSignaling() ? opInvalidOp : opOK; 1891 1892 case PackCategoriesIntoKey(fcInfinity, fcZero): 1893 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1894 case PackCategoriesIntoKey(fcZero, fcInfinity): 1895 case PackCategoriesIntoKey(fcZero, fcNormal): 1896 return opOK; 1897 1898 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1899 category = fcZero; 1900 return opOK; 1901 1902 case PackCategoriesIntoKey(fcNormal, fcZero): 1903 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1904 makeNaN(false, sign); 1905 else 1906 category = fcInfinity; 1907 return opDivByZero; 1908 1909 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1910 case PackCategoriesIntoKey(fcZero, fcZero): 1911 makeNaN(); 1912 return opInvalidOp; 1913 1914 case PackCategoriesIntoKey(fcNormal, fcNormal): 1915 return opOK; 1916 } 1917 } 1918 1919 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1920 switch (PackCategoriesIntoKey(category, rhs.category)) { 1921 default: 1922 llvm_unreachable(nullptr); 1923 1924 case PackCategoriesIntoKey(fcZero, fcNaN): 1925 case PackCategoriesIntoKey(fcNormal, fcNaN): 1926 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1927 assign(rhs); 1928 [[fallthrough]]; 1929 case PackCategoriesIntoKey(fcNaN, fcZero): 1930 case PackCategoriesIntoKey(fcNaN, fcNormal): 1931 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1932 case PackCategoriesIntoKey(fcNaN, fcNaN): 1933 if (isSignaling()) { 1934 makeQuiet(); 1935 return opInvalidOp; 1936 } 1937 return rhs.isSignaling() ? opInvalidOp : opOK; 1938 1939 case PackCategoriesIntoKey(fcZero, fcInfinity): 1940 case PackCategoriesIntoKey(fcZero, fcNormal): 1941 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1942 return opOK; 1943 1944 case PackCategoriesIntoKey(fcNormal, fcZero): 1945 case PackCategoriesIntoKey(fcInfinity, fcZero): 1946 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1947 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1948 case PackCategoriesIntoKey(fcZero, fcZero): 1949 makeNaN(); 1950 return opInvalidOp; 1951 1952 case PackCategoriesIntoKey(fcNormal, fcNormal): 1953 return opOK; 1954 } 1955 } 1956 1957 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1958 switch (PackCategoriesIntoKey(category, rhs.category)) { 1959 default: 1960 llvm_unreachable(nullptr); 1961 1962 case PackCategoriesIntoKey(fcZero, fcNaN): 1963 case PackCategoriesIntoKey(fcNormal, fcNaN): 1964 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1965 assign(rhs); 1966 [[fallthrough]]; 1967 case PackCategoriesIntoKey(fcNaN, fcZero): 1968 case PackCategoriesIntoKey(fcNaN, fcNormal): 1969 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1970 case PackCategoriesIntoKey(fcNaN, fcNaN): 1971 if (isSignaling()) { 1972 makeQuiet(); 1973 return opInvalidOp; 1974 } 1975 return rhs.isSignaling() ? opInvalidOp : opOK; 1976 1977 case PackCategoriesIntoKey(fcZero, fcInfinity): 1978 case PackCategoriesIntoKey(fcZero, fcNormal): 1979 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1980 return opOK; 1981 1982 case PackCategoriesIntoKey(fcNormal, fcZero): 1983 case PackCategoriesIntoKey(fcInfinity, fcZero): 1984 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1985 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1986 case PackCategoriesIntoKey(fcZero, fcZero): 1987 makeNaN(); 1988 return opInvalidOp; 1989 1990 case PackCategoriesIntoKey(fcNormal, fcNormal): 1991 return opDivByZero; // fake status, indicating this is not a special case 1992 } 1993 } 1994 1995 /* Change sign. */ 1996 void IEEEFloat::changeSign() { 1997 // With NaN-as-negative-zero, neither NaN or negative zero can change 1998 // their signs. 1999 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2000 (isZero() || isNaN())) 2001 return; 2002 /* Look mummy, this one's easy. */ 2003 sign = !sign; 2004 } 2005 2006 /* Normalized addition or subtraction. */ 2007 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2008 roundingMode rounding_mode, 2009 bool subtract) { 2010 opStatus fs; 2011 2012 fs = addOrSubtractSpecials(rhs, subtract); 2013 2014 /* This return code means it was not a simple case. */ 2015 if (fs == opDivByZero) { 2016 lostFraction lost_fraction; 2017 2018 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2019 fs = normalize(rounding_mode, lost_fraction); 2020 2021 /* Can only be zero if we lost no fraction. */ 2022 assert(category != fcZero || lost_fraction == lfExactlyZero); 2023 } 2024 2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2026 positive zero unless rounding to minus infinity, except that 2027 adding two like-signed zeroes gives that zero. */ 2028 if (category == fcZero) { 2029 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2030 sign = (rounding_mode == rmTowardNegative); 2031 // NaN-in-negative-zero means zeros need to be normalized to +0. 2032 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2033 sign = false; 2034 } 2035 2036 return fs; 2037 } 2038 2039 /* Normalized addition. */ 2040 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2041 roundingMode rounding_mode) { 2042 return addOrSubtract(rhs, rounding_mode, false); 2043 } 2044 2045 /* Normalized subtraction. */ 2046 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2047 roundingMode rounding_mode) { 2048 return addOrSubtract(rhs, rounding_mode, true); 2049 } 2050 2051 /* Normalized multiply. */ 2052 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2053 roundingMode rounding_mode) { 2054 opStatus fs; 2055 2056 sign ^= rhs.sign; 2057 fs = multiplySpecials(rhs); 2058 2059 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2060 sign = false; 2061 if (isFiniteNonZero()) { 2062 lostFraction lost_fraction = multiplySignificand(rhs); 2063 fs = normalize(rounding_mode, lost_fraction); 2064 if (lost_fraction != lfExactlyZero) 2065 fs = (opStatus) (fs | opInexact); 2066 } 2067 2068 return fs; 2069 } 2070 2071 /* Normalized divide. */ 2072 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2073 roundingMode rounding_mode) { 2074 opStatus fs; 2075 2076 sign ^= rhs.sign; 2077 fs = divideSpecials(rhs); 2078 2079 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2080 sign = false; 2081 if (isFiniteNonZero()) { 2082 lostFraction lost_fraction = divideSignificand(rhs); 2083 fs = normalize(rounding_mode, lost_fraction); 2084 if (lost_fraction != lfExactlyZero) 2085 fs = (opStatus) (fs | opInexact); 2086 } 2087 2088 return fs; 2089 } 2090 2091 /* Normalized remainder. */ 2092 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2093 opStatus fs; 2094 unsigned int origSign = sign; 2095 2096 // First handle the special cases. 2097 fs = remainderSpecials(rhs); 2098 if (fs != opDivByZero) 2099 return fs; 2100 2101 fs = opOK; 2102 2103 // Make sure the current value is less than twice the denom. If the addition 2104 // did not succeed (an overflow has happened), which means that the finite 2105 // value we currently posses must be less than twice the denom (as we are 2106 // using the same semantics). 2107 IEEEFloat P2 = rhs; 2108 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2109 fs = mod(P2); 2110 assert(fs == opOK); 2111 } 2112 2113 // Lets work with absolute numbers. 2114 IEEEFloat P = rhs; 2115 P.sign = false; 2116 sign = false; 2117 2118 // 2119 // To calculate the remainder we use the following scheme. 2120 // 2121 // The remainder is defained as follows: 2122 // 2123 // remainder = numer - rquot * denom = x - r * p 2124 // 2125 // Where r is the result of: x/p, rounded toward the nearest integral value 2126 // (with halfway cases rounded toward the even number). 2127 // 2128 // Currently, (after x mod 2p): 2129 // r is the number of 2p's present inside x, which is inherently, an even 2130 // number of p's. 2131 // 2132 // We may split the remaining calculation into 4 options: 2133 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2134 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2135 // are done as well. 2136 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2137 // to subtract 1p at least once. 2138 // - if x >= p then we must subtract p at least once, as x must be a 2139 // remainder. 2140 // 2141 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2142 // 2143 // We can now split the remaining calculation to the following 3 options: 2144 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2145 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2146 // must round up to the next even number. so we must subtract p once more. 2147 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2148 // integral, and subtract p once more. 2149 // 2150 2151 // Extend the semantics to prevent an overflow/underflow or inexact result. 2152 bool losesInfo; 2153 fltSemantics extendedSemantics = *semantics; 2154 extendedSemantics.maxExponent++; 2155 extendedSemantics.minExponent--; 2156 extendedSemantics.precision += 2; 2157 2158 IEEEFloat VEx = *this; 2159 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2160 assert(fs == opOK && !losesInfo); 2161 IEEEFloat PEx = P; 2162 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2163 assert(fs == opOK && !losesInfo); 2164 2165 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2166 // any fraction. 2167 fs = VEx.add(VEx, rmNearestTiesToEven); 2168 assert(fs == opOK); 2169 2170 if (VEx.compare(PEx) == cmpGreaterThan) { 2171 fs = subtract(P, rmNearestTiesToEven); 2172 assert(fs == opOK); 2173 2174 // Make VEx = this.add(this), but because we have different semantics, we do 2175 // not want to `convert` again, so we just subtract PEx twice (which equals 2176 // to the desired value). 2177 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2178 assert(fs == opOK); 2179 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2180 assert(fs == opOK); 2181 2182 cmpResult result = VEx.compare(PEx); 2183 if (result == cmpGreaterThan || result == cmpEqual) { 2184 fs = subtract(P, rmNearestTiesToEven); 2185 assert(fs == opOK); 2186 } 2187 } 2188 2189 if (isZero()) { 2190 sign = origSign; // IEEE754 requires this 2191 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2192 // But some 8-bit floats only have positive 0. 2193 sign = false; 2194 } 2195 2196 else 2197 sign ^= origSign; 2198 return fs; 2199 } 2200 2201 /* Normalized llvm frem (C fmod). */ 2202 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2203 opStatus fs; 2204 fs = modSpecials(rhs); 2205 unsigned int origSign = sign; 2206 2207 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2208 compareAbsoluteValue(rhs) != cmpLessThan) { 2209 int Exp = ilogb(*this) - ilogb(rhs); 2210 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2211 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2212 // check for it. 2213 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2214 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2215 V.sign = sign; 2216 2217 fs = subtract(V, rmNearestTiesToEven); 2218 assert(fs==opOK); 2219 } 2220 if (isZero()) { 2221 sign = origSign; // fmod requires this 2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2223 sign = false; 2224 } 2225 return fs; 2226 } 2227 2228 /* Normalized fused-multiply-add. */ 2229 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2230 const IEEEFloat &addend, 2231 roundingMode rounding_mode) { 2232 opStatus fs; 2233 2234 /* Post-multiplication sign, before addition. */ 2235 sign ^= multiplicand.sign; 2236 2237 /* If and only if all arguments are normal do we need to do an 2238 extended-precision calculation. */ 2239 if (isFiniteNonZero() && 2240 multiplicand.isFiniteNonZero() && 2241 addend.isFinite()) { 2242 lostFraction lost_fraction; 2243 2244 lost_fraction = multiplySignificand(multiplicand, addend); 2245 fs = normalize(rounding_mode, lost_fraction); 2246 if (lost_fraction != lfExactlyZero) 2247 fs = (opStatus) (fs | opInexact); 2248 2249 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2250 positive zero unless rounding to minus infinity, except that 2251 adding two like-signed zeroes gives that zero. */ 2252 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2253 sign = (rounding_mode == rmTowardNegative); 2254 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2255 sign = false; 2256 } 2257 } else { 2258 fs = multiplySpecials(multiplicand); 2259 2260 /* FS can only be opOK or opInvalidOp. There is no more work 2261 to do in the latter case. The IEEE-754R standard says it is 2262 implementation-defined in this case whether, if ADDEND is a 2263 quiet NaN, we raise invalid op; this implementation does so. 2264 2265 If we need to do the addition we can do so with normal 2266 precision. */ 2267 if (fs == opOK) 2268 fs = addOrSubtract(addend, rounding_mode, false); 2269 } 2270 2271 return fs; 2272 } 2273 2274 /* Rounding-mode correct round to integral value. */ 2275 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2276 opStatus fs; 2277 2278 if (isInfinity()) 2279 // [IEEE Std 754-2008 6.1]: 2280 // The behavior of infinity in floating-point arithmetic is derived from the 2281 // limiting cases of real arithmetic with operands of arbitrarily 2282 // large magnitude, when such a limit exists. 2283 // ... 2284 // Operations on infinite operands are usually exact and therefore signal no 2285 // exceptions ... 2286 return opOK; 2287 2288 if (isNaN()) { 2289 if (isSignaling()) { 2290 // [IEEE Std 754-2008 6.2]: 2291 // Under default exception handling, any operation signaling an invalid 2292 // operation exception and for which a floating-point result is to be 2293 // delivered shall deliver a quiet NaN. 2294 makeQuiet(); 2295 // [IEEE Std 754-2008 6.2]: 2296 // Signaling NaNs shall be reserved operands that, under default exception 2297 // handling, signal the invalid operation exception(see 7.2) for every 2298 // general-computational and signaling-computational operation except for 2299 // the conversions described in 5.12. 2300 return opInvalidOp; 2301 } else { 2302 // [IEEE Std 754-2008 6.2]: 2303 // For an operation with quiet NaN inputs, other than maximum and minimum 2304 // operations, if a floating-point result is to be delivered the result 2305 // shall be a quiet NaN which should be one of the input NaNs. 2306 // ... 2307 // Every general-computational and quiet-computational operation involving 2308 // one or more input NaNs, none of them signaling, shall signal no 2309 // exception, except fusedMultiplyAdd might signal the invalid operation 2310 // exception(see 7.2). 2311 return opOK; 2312 } 2313 } 2314 2315 if (isZero()) { 2316 // [IEEE Std 754-2008 6.3]: 2317 // ... the sign of the result of conversions, the quantize operation, the 2318 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2319 // the sign of the first or only operand. 2320 return opOK; 2321 } 2322 2323 // If the exponent is large enough, we know that this value is already 2324 // integral, and the arithmetic below would potentially cause it to saturate 2325 // to +/-Inf. Bail out early instead. 2326 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2327 return opOK; 2328 2329 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2330 // precision of our format, and then subtract it back off again. The choice 2331 // of rounding modes for the addition/subtraction determines the rounding mode 2332 // for our integral rounding as well. 2333 // NOTE: When the input value is negative, we do subtraction followed by 2334 // addition instead. 2335 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2336 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2337 IEEEFloat MagicConstant(*semantics); 2338 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2339 rmNearestTiesToEven); 2340 assert(fs == opOK); 2341 MagicConstant.sign = sign; 2342 2343 // Preserve the input sign so that we can handle the case of zero result 2344 // correctly. 2345 bool inputSign = isNegative(); 2346 2347 fs = add(MagicConstant, rounding_mode); 2348 2349 // Current value and 'MagicConstant' are both integers, so the result of the 2350 // subtraction is always exact according to Sterbenz' lemma. 2351 subtract(MagicConstant, rounding_mode); 2352 2353 // Restore the input sign. 2354 if (inputSign != isNegative()) 2355 changeSign(); 2356 2357 return fs; 2358 } 2359 2360 2361 /* Comparison requires normalized numbers. */ 2362 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2363 cmpResult result; 2364 2365 assert(semantics == rhs.semantics); 2366 2367 switch (PackCategoriesIntoKey(category, rhs.category)) { 2368 default: 2369 llvm_unreachable(nullptr); 2370 2371 case PackCategoriesIntoKey(fcNaN, fcZero): 2372 case PackCategoriesIntoKey(fcNaN, fcNormal): 2373 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2374 case PackCategoriesIntoKey(fcNaN, fcNaN): 2375 case PackCategoriesIntoKey(fcZero, fcNaN): 2376 case PackCategoriesIntoKey(fcNormal, fcNaN): 2377 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2378 return cmpUnordered; 2379 2380 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2381 case PackCategoriesIntoKey(fcInfinity, fcZero): 2382 case PackCategoriesIntoKey(fcNormal, fcZero): 2383 if (sign) 2384 return cmpLessThan; 2385 else 2386 return cmpGreaterThan; 2387 2388 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2389 case PackCategoriesIntoKey(fcZero, fcInfinity): 2390 case PackCategoriesIntoKey(fcZero, fcNormal): 2391 if (rhs.sign) 2392 return cmpGreaterThan; 2393 else 2394 return cmpLessThan; 2395 2396 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2397 if (sign == rhs.sign) 2398 return cmpEqual; 2399 else if (sign) 2400 return cmpLessThan; 2401 else 2402 return cmpGreaterThan; 2403 2404 case PackCategoriesIntoKey(fcZero, fcZero): 2405 return cmpEqual; 2406 2407 case PackCategoriesIntoKey(fcNormal, fcNormal): 2408 break; 2409 } 2410 2411 /* Two normal numbers. Do they have the same sign? */ 2412 if (sign != rhs.sign) { 2413 if (sign) 2414 result = cmpLessThan; 2415 else 2416 result = cmpGreaterThan; 2417 } else { 2418 /* Compare absolute values; invert result if negative. */ 2419 result = compareAbsoluteValue(rhs); 2420 2421 if (sign) { 2422 if (result == cmpLessThan) 2423 result = cmpGreaterThan; 2424 else if (result == cmpGreaterThan) 2425 result = cmpLessThan; 2426 } 2427 } 2428 2429 return result; 2430 } 2431 2432 /// IEEEFloat::convert - convert a value of one floating point type to another. 2433 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2434 /// records whether the transformation lost information, i.e. whether 2435 /// converting the result back to the original type will produce the 2436 /// original value (this is almost the same as return value==fsOK, but there 2437 /// are edge cases where this is not so). 2438 2439 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2440 roundingMode rounding_mode, 2441 bool *losesInfo) { 2442 lostFraction lostFraction; 2443 unsigned int newPartCount, oldPartCount; 2444 opStatus fs; 2445 int shift; 2446 const fltSemantics &fromSemantics = *semantics; 2447 bool is_signaling = isSignaling(); 2448 2449 lostFraction = lfExactlyZero; 2450 newPartCount = partCountForBits(toSemantics.precision + 1); 2451 oldPartCount = partCount(); 2452 shift = toSemantics.precision - fromSemantics.precision; 2453 2454 bool X86SpecialNan = false; 2455 if (&fromSemantics == &semX87DoubleExtended && 2456 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2457 (!(*significandParts() & 0x8000000000000000ULL) || 2458 !(*significandParts() & 0x4000000000000000ULL))) { 2459 // x86 has some unusual NaNs which cannot be represented in any other 2460 // format; note them here. 2461 X86SpecialNan = true; 2462 } 2463 2464 // If this is a truncation of a denormal number, and the target semantics 2465 // has larger exponent range than the source semantics (this can happen 2466 // when truncating from PowerPC double-double to double format), the 2467 // right shift could lose result mantissa bits. Adjust exponent instead 2468 // of performing excessive shift. 2469 // Also do a similar trick in case shifting denormal would produce zero 2470 // significand as this case isn't handled correctly by normalize. 2471 if (shift < 0 && isFiniteNonZero()) { 2472 int omsb = significandMSB() + 1; 2473 int exponentChange = omsb - fromSemantics.precision; 2474 if (exponent + exponentChange < toSemantics.minExponent) 2475 exponentChange = toSemantics.minExponent - exponent; 2476 if (exponentChange < shift) 2477 exponentChange = shift; 2478 if (exponentChange < 0) { 2479 shift -= exponentChange; 2480 exponent += exponentChange; 2481 } else if (omsb <= -shift) { 2482 exponentChange = omsb + shift - 1; // leave at least one bit set 2483 shift -= exponentChange; 2484 exponent += exponentChange; 2485 } 2486 } 2487 2488 // If this is a truncation, perform the shift before we narrow the storage. 2489 if (shift < 0 && (isFiniteNonZero() || 2490 (category == fcNaN && semantics->nonFiniteBehavior != 2491 fltNonfiniteBehavior::NanOnly))) 2492 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2493 2494 // Fix the storage so it can hold to new value. 2495 if (newPartCount > oldPartCount) { 2496 // The new type requires more storage; make it available. 2497 integerPart *newParts; 2498 newParts = new integerPart[newPartCount]; 2499 APInt::tcSet(newParts, 0, newPartCount); 2500 if (isFiniteNonZero() || category==fcNaN) 2501 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2502 freeSignificand(); 2503 significand.parts = newParts; 2504 } else if (newPartCount == 1 && oldPartCount != 1) { 2505 // Switch to built-in storage for a single part. 2506 integerPart newPart = 0; 2507 if (isFiniteNonZero() || category==fcNaN) 2508 newPart = significandParts()[0]; 2509 freeSignificand(); 2510 significand.part = newPart; 2511 } 2512 2513 // Now that we have the right storage, switch the semantics. 2514 semantics = &toSemantics; 2515 2516 // If this is an extension, perform the shift now that the storage is 2517 // available. 2518 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2519 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2520 2521 if (isFiniteNonZero()) { 2522 fs = normalize(rounding_mode, lostFraction); 2523 *losesInfo = (fs != opOK); 2524 } else if (category == fcNaN) { 2525 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2526 *losesInfo = 2527 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2528 makeNaN(false, sign); 2529 return is_signaling ? opInvalidOp : opOK; 2530 } 2531 2532 // If NaN is negative zero, we need to create a new NaN to avoid converting 2533 // NaN to -Inf. 2534 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2535 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2536 makeNaN(false, false); 2537 2538 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2539 2540 // For x87 extended precision, we want to make a NaN, not a special NaN if 2541 // the input wasn't special either. 2542 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2543 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2544 2545 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2546 // This also guarantees that a sNaN does not become Inf on a truncation 2547 // that loses all payload bits. 2548 if (is_signaling) { 2549 makeQuiet(); 2550 fs = opInvalidOp; 2551 } else { 2552 fs = opOK; 2553 } 2554 } else if (category == fcInfinity && 2555 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2556 makeNaN(false, sign); 2557 *losesInfo = true; 2558 fs = opInexact; 2559 } else if (category == fcZero && 2560 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2561 // Negative zero loses info, but positive zero doesn't. 2562 *losesInfo = 2563 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2564 fs = *losesInfo ? opInexact : opOK; 2565 // NaN is negative zero means -0 -> +0, which can lose information 2566 sign = false; 2567 } else { 2568 *losesInfo = false; 2569 fs = opOK; 2570 } 2571 2572 return fs; 2573 } 2574 2575 /* Convert a floating point number to an integer according to the 2576 rounding mode. If the rounded integer value is out of range this 2577 returns an invalid operation exception and the contents of the 2578 destination parts are unspecified. If the rounded value is in 2579 range but the floating point number is not the exact integer, the C 2580 standard doesn't require an inexact exception to be raised. IEEE 2581 854 does require it so we do that. 2582 2583 Note that for conversions to integer type the C standard requires 2584 round-to-zero to always be used. */ 2585 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2586 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2587 roundingMode rounding_mode, bool *isExact) const { 2588 lostFraction lost_fraction; 2589 const integerPart *src; 2590 unsigned int dstPartsCount, truncatedBits; 2591 2592 *isExact = false; 2593 2594 /* Handle the three special cases first. */ 2595 if (category == fcInfinity || category == fcNaN) 2596 return opInvalidOp; 2597 2598 dstPartsCount = partCountForBits(width); 2599 assert(dstPartsCount <= parts.size() && "Integer too big"); 2600 2601 if (category == fcZero) { 2602 APInt::tcSet(parts.data(), 0, dstPartsCount); 2603 // Negative zero can't be represented as an int. 2604 *isExact = !sign; 2605 return opOK; 2606 } 2607 2608 src = significandParts(); 2609 2610 /* Step 1: place our absolute value, with any fraction truncated, in 2611 the destination. */ 2612 if (exponent < 0) { 2613 /* Our absolute value is less than one; truncate everything. */ 2614 APInt::tcSet(parts.data(), 0, dstPartsCount); 2615 /* For exponent -1 the integer bit represents .5, look at that. 2616 For smaller exponents leftmost truncated bit is 0. */ 2617 truncatedBits = semantics->precision -1U - exponent; 2618 } else { 2619 /* We want the most significant (exponent + 1) bits; the rest are 2620 truncated. */ 2621 unsigned int bits = exponent + 1U; 2622 2623 /* Hopelessly large in magnitude? */ 2624 if (bits > width) 2625 return opInvalidOp; 2626 2627 if (bits < semantics->precision) { 2628 /* We truncate (semantics->precision - bits) bits. */ 2629 truncatedBits = semantics->precision - bits; 2630 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2631 } else { 2632 /* We want at least as many bits as are available. */ 2633 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2634 0); 2635 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2636 bits - semantics->precision); 2637 truncatedBits = 0; 2638 } 2639 } 2640 2641 /* Step 2: work out any lost fraction, and increment the absolute 2642 value if we would round away from zero. */ 2643 if (truncatedBits) { 2644 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2645 truncatedBits); 2646 if (lost_fraction != lfExactlyZero && 2647 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2648 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2649 return opInvalidOp; /* Overflow. */ 2650 } 2651 } else { 2652 lost_fraction = lfExactlyZero; 2653 } 2654 2655 /* Step 3: check if we fit in the destination. */ 2656 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2657 2658 if (sign) { 2659 if (!isSigned) { 2660 /* Negative numbers cannot be represented as unsigned. */ 2661 if (omsb != 0) 2662 return opInvalidOp; 2663 } else { 2664 /* It takes omsb bits to represent the unsigned integer value. 2665 We lose a bit for the sign, but care is needed as the 2666 maximally negative integer is a special case. */ 2667 if (omsb == width && 2668 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2669 return opInvalidOp; 2670 2671 /* This case can happen because of rounding. */ 2672 if (omsb > width) 2673 return opInvalidOp; 2674 } 2675 2676 APInt::tcNegate (parts.data(), dstPartsCount); 2677 } else { 2678 if (omsb >= width + !isSigned) 2679 return opInvalidOp; 2680 } 2681 2682 if (lost_fraction == lfExactlyZero) { 2683 *isExact = true; 2684 return opOK; 2685 } else 2686 return opInexact; 2687 } 2688 2689 /* Same as convertToSignExtendedInteger, except we provide 2690 deterministic values in case of an invalid operation exception, 2691 namely zero for NaNs and the minimal or maximal value respectively 2692 for underflow or overflow. 2693 The *isExact output tells whether the result is exact, in the sense 2694 that converting it back to the original floating point type produces 2695 the original value. This is almost equivalent to result==opOK, 2696 except for negative zeroes. 2697 */ 2698 IEEEFloat::opStatus 2699 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2700 unsigned int width, bool isSigned, 2701 roundingMode rounding_mode, bool *isExact) const { 2702 opStatus fs; 2703 2704 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2705 isExact); 2706 2707 if (fs == opInvalidOp) { 2708 unsigned int bits, dstPartsCount; 2709 2710 dstPartsCount = partCountForBits(width); 2711 assert(dstPartsCount <= parts.size() && "Integer too big"); 2712 2713 if (category == fcNaN) 2714 bits = 0; 2715 else if (sign) 2716 bits = isSigned; 2717 else 2718 bits = width - isSigned; 2719 2720 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2721 if (sign && isSigned) 2722 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2723 } 2724 2725 return fs; 2726 } 2727 2728 /* Convert an unsigned integer SRC to a floating point number, 2729 rounding according to ROUNDING_MODE. The sign of the floating 2730 point number is not modified. */ 2731 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2732 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2733 unsigned int omsb, precision, dstCount; 2734 integerPart *dst; 2735 lostFraction lost_fraction; 2736 2737 category = fcNormal; 2738 omsb = APInt::tcMSB(src, srcCount) + 1; 2739 dst = significandParts(); 2740 dstCount = partCount(); 2741 precision = semantics->precision; 2742 2743 /* We want the most significant PRECISION bits of SRC. There may not 2744 be that many; extract what we can. */ 2745 if (precision <= omsb) { 2746 exponent = omsb - 1; 2747 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2748 omsb - precision); 2749 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2750 } else { 2751 exponent = precision - 1; 2752 lost_fraction = lfExactlyZero; 2753 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2754 } 2755 2756 return normalize(rounding_mode, lost_fraction); 2757 } 2758 2759 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2760 roundingMode rounding_mode) { 2761 unsigned int partCount = Val.getNumWords(); 2762 APInt api = Val; 2763 2764 sign = false; 2765 if (isSigned && api.isNegative()) { 2766 sign = true; 2767 api = -api; 2768 } 2769 2770 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2771 } 2772 2773 /* Convert a two's complement integer SRC to a floating point number, 2774 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2775 integer is signed, in which case it must be sign-extended. */ 2776 IEEEFloat::opStatus 2777 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2778 unsigned int srcCount, bool isSigned, 2779 roundingMode rounding_mode) { 2780 opStatus status; 2781 2782 if (isSigned && 2783 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2784 integerPart *copy; 2785 2786 /* If we're signed and negative negate a copy. */ 2787 sign = true; 2788 copy = new integerPart[srcCount]; 2789 APInt::tcAssign(copy, src, srcCount); 2790 APInt::tcNegate(copy, srcCount); 2791 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2792 delete [] copy; 2793 } else { 2794 sign = false; 2795 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2796 } 2797 2798 return status; 2799 } 2800 2801 /* FIXME: should this just take a const APInt reference? */ 2802 IEEEFloat::opStatus 2803 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2804 unsigned int width, bool isSigned, 2805 roundingMode rounding_mode) { 2806 unsigned int partCount = partCountForBits(width); 2807 APInt api = APInt(width, ArrayRef(parts, partCount)); 2808 2809 sign = false; 2810 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2811 sign = true; 2812 api = -api; 2813 } 2814 2815 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2816 } 2817 2818 Expected<IEEEFloat::opStatus> 2819 IEEEFloat::convertFromHexadecimalString(StringRef s, 2820 roundingMode rounding_mode) { 2821 lostFraction lost_fraction = lfExactlyZero; 2822 2823 category = fcNormal; 2824 zeroSignificand(); 2825 exponent = 0; 2826 2827 integerPart *significand = significandParts(); 2828 unsigned partsCount = partCount(); 2829 unsigned bitPos = partsCount * integerPartWidth; 2830 bool computedTrailingFraction = false; 2831 2832 // Skip leading zeroes and any (hexa)decimal point. 2833 StringRef::iterator begin = s.begin(); 2834 StringRef::iterator end = s.end(); 2835 StringRef::iterator dot; 2836 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2837 if (!PtrOrErr) 2838 return PtrOrErr.takeError(); 2839 StringRef::iterator p = *PtrOrErr; 2840 StringRef::iterator firstSignificantDigit = p; 2841 2842 while (p != end) { 2843 integerPart hex_value; 2844 2845 if (*p == '.') { 2846 if (dot != end) 2847 return createError("String contains multiple dots"); 2848 dot = p++; 2849 continue; 2850 } 2851 2852 hex_value = hexDigitValue(*p); 2853 if (hex_value == UINT_MAX) 2854 break; 2855 2856 p++; 2857 2858 // Store the number while we have space. 2859 if (bitPos) { 2860 bitPos -= 4; 2861 hex_value <<= bitPos % integerPartWidth; 2862 significand[bitPos / integerPartWidth] |= hex_value; 2863 } else if (!computedTrailingFraction) { 2864 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2865 if (!FractOrErr) 2866 return FractOrErr.takeError(); 2867 lost_fraction = *FractOrErr; 2868 computedTrailingFraction = true; 2869 } 2870 } 2871 2872 /* Hex floats require an exponent but not a hexadecimal point. */ 2873 if (p == end) 2874 return createError("Hex strings require an exponent"); 2875 if (*p != 'p' && *p != 'P') 2876 return createError("Invalid character in significand"); 2877 if (p == begin) 2878 return createError("Significand has no digits"); 2879 if (dot != end && p - begin == 1) 2880 return createError("Significand has no digits"); 2881 2882 /* Ignore the exponent if we are zero. */ 2883 if (p != firstSignificantDigit) { 2884 int expAdjustment; 2885 2886 /* Implicit hexadecimal point? */ 2887 if (dot == end) 2888 dot = p; 2889 2890 /* Calculate the exponent adjustment implicit in the number of 2891 significant digits. */ 2892 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2893 if (expAdjustment < 0) 2894 expAdjustment++; 2895 expAdjustment = expAdjustment * 4 - 1; 2896 2897 /* Adjust for writing the significand starting at the most 2898 significant nibble. */ 2899 expAdjustment += semantics->precision; 2900 expAdjustment -= partsCount * integerPartWidth; 2901 2902 /* Adjust for the given exponent. */ 2903 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2904 if (!ExpOrErr) 2905 return ExpOrErr.takeError(); 2906 exponent = *ExpOrErr; 2907 } 2908 2909 return normalize(rounding_mode, lost_fraction); 2910 } 2911 2912 IEEEFloat::opStatus 2913 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2914 unsigned sigPartCount, int exp, 2915 roundingMode rounding_mode) { 2916 unsigned int parts, pow5PartCount; 2917 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2918 integerPart pow5Parts[maxPowerOfFiveParts]; 2919 bool isNearest; 2920 2921 isNearest = (rounding_mode == rmNearestTiesToEven || 2922 rounding_mode == rmNearestTiesToAway); 2923 2924 parts = partCountForBits(semantics->precision + 11); 2925 2926 /* Calculate pow(5, abs(exp)). */ 2927 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2928 2929 for (;; parts *= 2) { 2930 opStatus sigStatus, powStatus; 2931 unsigned int excessPrecision, truncatedBits; 2932 2933 calcSemantics.precision = parts * integerPartWidth - 1; 2934 excessPrecision = calcSemantics.precision - semantics->precision; 2935 truncatedBits = excessPrecision; 2936 2937 IEEEFloat decSig(calcSemantics, uninitialized); 2938 decSig.makeZero(sign); 2939 IEEEFloat pow5(calcSemantics); 2940 2941 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2942 rmNearestTiesToEven); 2943 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2944 rmNearestTiesToEven); 2945 /* Add exp, as 10^n = 5^n * 2^n. */ 2946 decSig.exponent += exp; 2947 2948 lostFraction calcLostFraction; 2949 integerPart HUerr, HUdistance; 2950 unsigned int powHUerr; 2951 2952 if (exp >= 0) { 2953 /* multiplySignificand leaves the precision-th bit set to 1. */ 2954 calcLostFraction = decSig.multiplySignificand(pow5); 2955 powHUerr = powStatus != opOK; 2956 } else { 2957 calcLostFraction = decSig.divideSignificand(pow5); 2958 /* Denormal numbers have less precision. */ 2959 if (decSig.exponent < semantics->minExponent) { 2960 excessPrecision += (semantics->minExponent - decSig.exponent); 2961 truncatedBits = excessPrecision; 2962 if (excessPrecision > calcSemantics.precision) 2963 excessPrecision = calcSemantics.precision; 2964 } 2965 /* Extra half-ulp lost in reciprocal of exponent. */ 2966 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2967 } 2968 2969 /* Both multiplySignificand and divideSignificand return the 2970 result with the integer bit set. */ 2971 assert(APInt::tcExtractBit 2972 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2973 2974 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2975 powHUerr); 2976 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2977 excessPrecision, isNearest); 2978 2979 /* Are we guaranteed to round correctly if we truncate? */ 2980 if (HUdistance >= HUerr) { 2981 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2982 calcSemantics.precision - excessPrecision, 2983 excessPrecision); 2984 /* Take the exponent of decSig. If we tcExtract-ed less bits 2985 above we must adjust our exponent to compensate for the 2986 implicit right shift. */ 2987 exponent = (decSig.exponent + semantics->precision 2988 - (calcSemantics.precision - excessPrecision)); 2989 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2990 decSig.partCount(), 2991 truncatedBits); 2992 return normalize(rounding_mode, calcLostFraction); 2993 } 2994 } 2995 } 2996 2997 Expected<IEEEFloat::opStatus> 2998 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2999 decimalInfo D; 3000 opStatus fs; 3001 3002 /* Scan the text. */ 3003 StringRef::iterator p = str.begin(); 3004 if (Error Err = interpretDecimal(p, str.end(), &D)) 3005 return std::move(Err); 3006 3007 /* Handle the quick cases. First the case of no significant digits, 3008 i.e. zero, and then exponents that are obviously too large or too 3009 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3010 definitely overflows if 3011 3012 (exp - 1) * L >= maxExponent 3013 3014 and definitely underflows to zero where 3015 3016 (exp + 1) * L <= minExponent - precision 3017 3018 With integer arithmetic the tightest bounds for L are 3019 3020 93/28 < L < 196/59 [ numerator <= 256 ] 3021 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3022 */ 3023 3024 // Test if we have a zero number allowing for strings with no null terminators 3025 // and zero decimals with non-zero exponents. 3026 // 3027 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3028 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3029 // be at most one dot. On the other hand, if we have a zero with a non-zero 3030 // exponent, then we know that D.firstSigDigit will be non-numeric. 3031 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3032 category = fcZero; 3033 fs = opOK; 3034 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3035 sign = false; 3036 3037 /* Check whether the normalized exponent is high enough to overflow 3038 max during the log-rebasing in the max-exponent check below. */ 3039 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3040 fs = handleOverflow(rounding_mode); 3041 3042 /* If it wasn't, then it also wasn't high enough to overflow max 3043 during the log-rebasing in the min-exponent check. Check that it 3044 won't overflow min in either check, then perform the min-exponent 3045 check. */ 3046 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3047 (D.normalizedExponent + 1) * 28738 <= 3048 8651 * (semantics->minExponent - (int) semantics->precision)) { 3049 /* Underflow to zero and round. */ 3050 category = fcNormal; 3051 zeroSignificand(); 3052 fs = normalize(rounding_mode, lfLessThanHalf); 3053 3054 /* We can finally safely perform the max-exponent check. */ 3055 } else if ((D.normalizedExponent - 1) * 42039 3056 >= 12655 * semantics->maxExponent) { 3057 /* Overflow and round. */ 3058 fs = handleOverflow(rounding_mode); 3059 } else { 3060 integerPart *decSignificand; 3061 unsigned int partCount; 3062 3063 /* A tight upper bound on number of bits required to hold an 3064 N-digit decimal integer is N * 196 / 59. Allocate enough space 3065 to hold the full significand, and an extra part required by 3066 tcMultiplyPart. */ 3067 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3068 partCount = partCountForBits(1 + 196 * partCount / 59); 3069 decSignificand = new integerPart[partCount + 1]; 3070 partCount = 0; 3071 3072 /* Convert to binary efficiently - we do almost all multiplication 3073 in an integerPart. When this would overflow do we do a single 3074 bignum multiplication, and then revert again to multiplication 3075 in an integerPart. */ 3076 do { 3077 integerPart decValue, val, multiplier; 3078 3079 val = 0; 3080 multiplier = 1; 3081 3082 do { 3083 if (*p == '.') { 3084 p++; 3085 if (p == str.end()) { 3086 break; 3087 } 3088 } 3089 decValue = decDigitValue(*p++); 3090 if (decValue >= 10U) { 3091 delete[] decSignificand; 3092 return createError("Invalid character in significand"); 3093 } 3094 multiplier *= 10; 3095 val = val * 10 + decValue; 3096 /* The maximum number that can be multiplied by ten with any 3097 digit added without overflowing an integerPart. */ 3098 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3099 3100 /* Multiply out the current part. */ 3101 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3102 partCount, partCount + 1, false); 3103 3104 /* If we used another part (likely but not guaranteed), increase 3105 the count. */ 3106 if (decSignificand[partCount]) 3107 partCount++; 3108 } while (p <= D.lastSigDigit); 3109 3110 category = fcNormal; 3111 fs = roundSignificandWithExponent(decSignificand, partCount, 3112 D.exponent, rounding_mode); 3113 3114 delete [] decSignificand; 3115 } 3116 3117 return fs; 3118 } 3119 3120 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3121 const size_t MIN_NAME_SIZE = 3; 3122 3123 if (str.size() < MIN_NAME_SIZE) 3124 return false; 3125 3126 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { 3127 makeInf(false); 3128 return true; 3129 } 3130 3131 bool IsNegative = str.front() == '-'; 3132 if (IsNegative) { 3133 str = str.drop_front(); 3134 if (str.size() < MIN_NAME_SIZE) 3135 return false; 3136 3137 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) { 3138 makeInf(true); 3139 return true; 3140 } 3141 } 3142 3143 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3144 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3145 if (IsSignaling) { 3146 str = str.drop_front(); 3147 if (str.size() < MIN_NAME_SIZE) 3148 return false; 3149 } 3150 3151 if (str.startswith("nan") || str.startswith("NaN")) { 3152 str = str.drop_front(3); 3153 3154 // A NaN without payload. 3155 if (str.empty()) { 3156 makeNaN(IsSignaling, IsNegative); 3157 return true; 3158 } 3159 3160 // Allow the payload to be inside parentheses. 3161 if (str.front() == '(') { 3162 // Parentheses should be balanced (and not empty). 3163 if (str.size() <= 2 || str.back() != ')') 3164 return false; 3165 3166 str = str.slice(1, str.size() - 1); 3167 } 3168 3169 // Determine the payload number's radix. 3170 unsigned Radix = 10; 3171 if (str[0] == '0') { 3172 if (str.size() > 1 && tolower(str[1]) == 'x') { 3173 str = str.drop_front(2); 3174 Radix = 16; 3175 } else 3176 Radix = 8; 3177 } 3178 3179 // Parse the payload and make the NaN. 3180 APInt Payload; 3181 if (!str.getAsInteger(Radix, Payload)) { 3182 makeNaN(IsSignaling, IsNegative, &Payload); 3183 return true; 3184 } 3185 } 3186 3187 return false; 3188 } 3189 3190 Expected<IEEEFloat::opStatus> 3191 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3192 if (str.empty()) 3193 return createError("Invalid string length"); 3194 3195 // Handle special cases. 3196 if (convertFromStringSpecials(str)) 3197 return opOK; 3198 3199 /* Handle a leading minus sign. */ 3200 StringRef::iterator p = str.begin(); 3201 size_t slen = str.size(); 3202 sign = *p == '-' ? 1 : 0; 3203 if (*p == '-' || *p == '+') { 3204 p++; 3205 slen--; 3206 if (!slen) 3207 return createError("String has no digits"); 3208 } 3209 3210 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3211 if (slen == 2) 3212 return createError("Invalid string"); 3213 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3214 rounding_mode); 3215 } 3216 3217 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3218 } 3219 3220 /* Write out a hexadecimal representation of the floating point value 3221 to DST, which must be of sufficient size, in the C99 form 3222 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3223 excluding the terminating NUL. 3224 3225 If UPPERCASE, the output is in upper case, otherwise in lower case. 3226 3227 HEXDIGITS digits appear altogether, rounding the value if 3228 necessary. If HEXDIGITS is 0, the minimal precision to display the 3229 number precisely is used instead. If nothing would appear after 3230 the decimal point it is suppressed. 3231 3232 The decimal exponent is always printed and has at least one digit. 3233 Zero values display an exponent of zero. Infinities and NaNs 3234 appear as "infinity" or "nan" respectively. 3235 3236 The above rules are as specified by C99. There is ambiguity about 3237 what the leading hexadecimal digit should be. This implementation 3238 uses whatever is necessary so that the exponent is displayed as 3239 stored. This implies the exponent will fall within the IEEE format 3240 range, and the leading hexadecimal digit will be 0 (for denormals), 3241 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3242 any other digits zero). 3243 */ 3244 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3245 bool upperCase, 3246 roundingMode rounding_mode) const { 3247 char *p; 3248 3249 p = dst; 3250 if (sign) 3251 *dst++ = '-'; 3252 3253 switch (category) { 3254 case fcInfinity: 3255 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3256 dst += sizeof infinityL - 1; 3257 break; 3258 3259 case fcNaN: 3260 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3261 dst += sizeof NaNU - 1; 3262 break; 3263 3264 case fcZero: 3265 *dst++ = '0'; 3266 *dst++ = upperCase ? 'X': 'x'; 3267 *dst++ = '0'; 3268 if (hexDigits > 1) { 3269 *dst++ = '.'; 3270 memset (dst, '0', hexDigits - 1); 3271 dst += hexDigits - 1; 3272 } 3273 *dst++ = upperCase ? 'P': 'p'; 3274 *dst++ = '0'; 3275 break; 3276 3277 case fcNormal: 3278 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3279 break; 3280 } 3281 3282 *dst = 0; 3283 3284 return static_cast<unsigned int>(dst - p); 3285 } 3286 3287 /* Does the hard work of outputting the correctly rounded hexadecimal 3288 form of a normal floating point number with the specified number of 3289 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3290 digits necessary to print the value precisely is output. */ 3291 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3292 bool upperCase, 3293 roundingMode rounding_mode) const { 3294 unsigned int count, valueBits, shift, partsCount, outputDigits; 3295 const char *hexDigitChars; 3296 const integerPart *significand; 3297 char *p; 3298 bool roundUp; 3299 3300 *dst++ = '0'; 3301 *dst++ = upperCase ? 'X': 'x'; 3302 3303 roundUp = false; 3304 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3305 3306 significand = significandParts(); 3307 partsCount = partCount(); 3308 3309 /* +3 because the first digit only uses the single integer bit, so 3310 we have 3 virtual zero most-significant-bits. */ 3311 valueBits = semantics->precision + 3; 3312 shift = integerPartWidth - valueBits % integerPartWidth; 3313 3314 /* The natural number of digits required ignoring trailing 3315 insignificant zeroes. */ 3316 outputDigits = (valueBits - significandLSB () + 3) / 4; 3317 3318 /* hexDigits of zero means use the required number for the 3319 precision. Otherwise, see if we are truncating. If we are, 3320 find out if we need to round away from zero. */ 3321 if (hexDigits) { 3322 if (hexDigits < outputDigits) { 3323 /* We are dropping non-zero bits, so need to check how to round. 3324 "bits" is the number of dropped bits. */ 3325 unsigned int bits; 3326 lostFraction fraction; 3327 3328 bits = valueBits - hexDigits * 4; 3329 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3330 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3331 } 3332 outputDigits = hexDigits; 3333 } 3334 3335 /* Write the digits consecutively, and start writing in the location 3336 of the hexadecimal point. We move the most significant digit 3337 left and add the hexadecimal point later. */ 3338 p = ++dst; 3339 3340 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3341 3342 while (outputDigits && count) { 3343 integerPart part; 3344 3345 /* Put the most significant integerPartWidth bits in "part". */ 3346 if (--count == partsCount) 3347 part = 0; /* An imaginary higher zero part. */ 3348 else 3349 part = significand[count] << shift; 3350 3351 if (count && shift) 3352 part |= significand[count - 1] >> (integerPartWidth - shift); 3353 3354 /* Convert as much of "part" to hexdigits as we can. */ 3355 unsigned int curDigits = integerPartWidth / 4; 3356 3357 if (curDigits > outputDigits) 3358 curDigits = outputDigits; 3359 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3360 outputDigits -= curDigits; 3361 } 3362 3363 if (roundUp) { 3364 char *q = dst; 3365 3366 /* Note that hexDigitChars has a trailing '0'. */ 3367 do { 3368 q--; 3369 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3370 } while (*q == '0'); 3371 assert(q >= p); 3372 } else { 3373 /* Add trailing zeroes. */ 3374 memset (dst, '0', outputDigits); 3375 dst += outputDigits; 3376 } 3377 3378 /* Move the most significant digit to before the point, and if there 3379 is something after the decimal point add it. This must come 3380 after rounding above. */ 3381 p[-1] = p[0]; 3382 if (dst -1 == p) 3383 dst--; 3384 else 3385 p[0] = '.'; 3386 3387 /* Finally output the exponent. */ 3388 *dst++ = upperCase ? 'P': 'p'; 3389 3390 return writeSignedDecimal (dst, exponent); 3391 } 3392 3393 hash_code hash_value(const IEEEFloat &Arg) { 3394 if (!Arg.isFiniteNonZero()) 3395 return hash_combine((uint8_t)Arg.category, 3396 // NaN has no sign, fix it at zero. 3397 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3398 Arg.semantics->precision); 3399 3400 // Normal floats need their exponent and significand hashed. 3401 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3402 Arg.semantics->precision, Arg.exponent, 3403 hash_combine_range( 3404 Arg.significandParts(), 3405 Arg.significandParts() + Arg.partCount())); 3406 } 3407 3408 // Conversion from APFloat to/from host float/double. It may eventually be 3409 // possible to eliminate these and have everybody deal with APFloats, but that 3410 // will take a while. This approach will not easily extend to long double. 3411 // Current implementation requires integerPartWidth==64, which is correct at 3412 // the moment but could be made more general. 3413 3414 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3415 // the actual IEEE respresentations. We compensate for that here. 3416 3417 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3418 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3419 assert(partCount()==2); 3420 3421 uint64_t myexponent, mysignificand; 3422 3423 if (isFiniteNonZero()) { 3424 myexponent = exponent+16383; //bias 3425 mysignificand = significandParts()[0]; 3426 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3427 myexponent = 0; // denormal 3428 } else if (category==fcZero) { 3429 myexponent = 0; 3430 mysignificand = 0; 3431 } else if (category==fcInfinity) { 3432 myexponent = 0x7fff; 3433 mysignificand = 0x8000000000000000ULL; 3434 } else { 3435 assert(category == fcNaN && "Unknown category"); 3436 myexponent = 0x7fff; 3437 mysignificand = significandParts()[0]; 3438 } 3439 3440 uint64_t words[2]; 3441 words[0] = mysignificand; 3442 words[1] = ((uint64_t)(sign & 1) << 15) | 3443 (myexponent & 0x7fffLL); 3444 return APInt(80, words); 3445 } 3446 3447 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3448 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3449 assert(partCount()==2); 3450 3451 uint64_t words[2]; 3452 opStatus fs; 3453 bool losesInfo; 3454 3455 // Convert number to double. To avoid spurious underflows, we re- 3456 // normalize against the "double" minExponent first, and only *then* 3457 // truncate the mantissa. The result of that second conversion 3458 // may be inexact, but should never underflow. 3459 // Declare fltSemantics before APFloat that uses it (and 3460 // saves pointer to it) to ensure correct destruction order. 3461 fltSemantics extendedSemantics = *semantics; 3462 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3463 IEEEFloat extended(*this); 3464 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3465 assert(fs == opOK && !losesInfo); 3466 (void)fs; 3467 3468 IEEEFloat u(extended); 3469 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3470 assert(fs == opOK || fs == opInexact); 3471 (void)fs; 3472 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3473 3474 // If conversion was exact or resulted in a special case, we're done; 3475 // just set the second double to zero. Otherwise, re-convert back to 3476 // the extended format and compute the difference. This now should 3477 // convert exactly to double. 3478 if (u.isFiniteNonZero() && losesInfo) { 3479 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3480 assert(fs == opOK && !losesInfo); 3481 (void)fs; 3482 3483 IEEEFloat v(extended); 3484 v.subtract(u, rmNearestTiesToEven); 3485 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3486 assert(fs == opOK && !losesInfo); 3487 (void)fs; 3488 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3489 } else { 3490 words[1] = 0; 3491 } 3492 3493 return APInt(128, words); 3494 } 3495 3496 template <const fltSemantics &S> 3497 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3498 assert(semantics == &S); 3499 3500 constexpr int bias = -(S.minExponent - 1); 3501 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3502 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3503 constexpr integerPart integer_bit = 3504 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3505 constexpr uint64_t significand_mask = integer_bit - 1; 3506 constexpr unsigned int exponent_bits = 3507 S.sizeInBits - 1 - trailing_significand_bits; 3508 static_assert(exponent_bits < 64); 3509 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3510 3511 uint64_t myexponent; 3512 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3513 mysignificand; 3514 3515 if (isFiniteNonZero()) { 3516 myexponent = exponent + bias; 3517 std::copy_n(significandParts(), mysignificand.size(), 3518 mysignificand.begin()); 3519 if (myexponent == 1 && 3520 !(significandParts()[integer_bit_part] & integer_bit)) 3521 myexponent = 0; // denormal 3522 } else if (category == fcZero) { 3523 myexponent = ::exponentZero(S) + bias; 3524 mysignificand.fill(0); 3525 } else if (category == fcInfinity) { 3526 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 3527 llvm_unreachable("semantics don't support inf!"); 3528 } 3529 myexponent = ::exponentInf(S) + bias; 3530 mysignificand.fill(0); 3531 } else { 3532 assert(category == fcNaN && "Unknown category!"); 3533 myexponent = ::exponentNaN(S) + bias; 3534 std::copy_n(significandParts(), mysignificand.size(), 3535 mysignificand.begin()); 3536 } 3537 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3538 auto words_iter = 3539 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3540 if constexpr (significand_mask != 0) { 3541 // Clear the integer bit. 3542 words[mysignificand.size() - 1] &= significand_mask; 3543 } 3544 std::fill(words_iter, words.end(), uint64_t{0}); 3545 constexpr size_t last_word = words.size() - 1; 3546 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3547 << ((S.sizeInBits - 1) % 64); 3548 words[last_word] |= shifted_sign; 3549 uint64_t shifted_exponent = (myexponent & exponent_mask) 3550 << (trailing_significand_bits % 64); 3551 words[last_word] |= shifted_exponent; 3552 if constexpr (last_word == 0) { 3553 return APInt(S.sizeInBits, words[0]); 3554 } 3555 return APInt(S.sizeInBits, words); 3556 } 3557 3558 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3559 assert(partCount() == 2); 3560 return convertIEEEFloatToAPInt<semIEEEquad>(); 3561 } 3562 3563 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3564 assert(partCount()==1); 3565 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3566 } 3567 3568 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3569 assert(partCount()==1); 3570 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3571 } 3572 3573 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3574 assert(partCount() == 1); 3575 return convertIEEEFloatToAPInt<semBFloat>(); 3576 } 3577 3578 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3579 assert(partCount()==1); 3580 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3581 } 3582 3583 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3584 assert(partCount() == 1); 3585 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3586 } 3587 3588 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3589 assert(partCount() == 1); 3590 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3591 } 3592 3593 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3594 assert(partCount() == 1); 3595 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3596 } 3597 3598 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3599 assert(partCount() == 1); 3600 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3601 } 3602 3603 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3604 assert(partCount() == 1); 3605 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3606 } 3607 3608 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3609 assert(partCount() == 1); 3610 return convertIEEEFloatToAPInt<semFloatTF32>(); 3611 } 3612 3613 // This function creates an APInt that is just a bit map of the floating 3614 // point constant as it would appear in memory. It is not a conversion, 3615 // and treating the result as a normal integer is unlikely to be useful. 3616 3617 APInt IEEEFloat::bitcastToAPInt() const { 3618 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3619 return convertHalfAPFloatToAPInt(); 3620 3621 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3622 return convertBFloatAPFloatToAPInt(); 3623 3624 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3625 return convertFloatAPFloatToAPInt(); 3626 3627 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3628 return convertDoubleAPFloatToAPInt(); 3629 3630 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3631 return convertQuadrupleAPFloatToAPInt(); 3632 3633 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3634 return convertPPCDoubleDoubleAPFloatToAPInt(); 3635 3636 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3637 return convertFloat8E5M2APFloatToAPInt(); 3638 3639 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3640 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3641 3642 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3643 return convertFloat8E4M3FNAPFloatToAPInt(); 3644 3645 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3646 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3647 3648 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3649 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3650 3651 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3652 return convertFloatTF32APFloatToAPInt(); 3653 3654 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3655 "unknown format!"); 3656 return convertF80LongDoubleAPFloatToAPInt(); 3657 } 3658 3659 float IEEEFloat::convertToFloat() const { 3660 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3661 "Float semantics are not IEEEsingle"); 3662 APInt api = bitcastToAPInt(); 3663 return api.bitsToFloat(); 3664 } 3665 3666 double IEEEFloat::convertToDouble() const { 3667 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3668 "Float semantics are not IEEEdouble"); 3669 APInt api = bitcastToAPInt(); 3670 return api.bitsToDouble(); 3671 } 3672 3673 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3674 /// does not support these bit patterns: 3675 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3676 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3677 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3678 /// exponent = 0, integer bit 1 ("pseudodenormal") 3679 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3680 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3681 uint64_t i1 = api.getRawData()[0]; 3682 uint64_t i2 = api.getRawData()[1]; 3683 uint64_t myexponent = (i2 & 0x7fff); 3684 uint64_t mysignificand = i1; 3685 uint8_t myintegerbit = mysignificand >> 63; 3686 3687 initialize(&semX87DoubleExtended); 3688 assert(partCount()==2); 3689 3690 sign = static_cast<unsigned int>(i2>>15); 3691 if (myexponent == 0 && mysignificand == 0) { 3692 makeZero(sign); 3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3694 makeInf(sign); 3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3697 category = fcNaN; 3698 exponent = exponentNaN(); 3699 significandParts()[0] = mysignificand; 3700 significandParts()[1] = 0; 3701 } else { 3702 category = fcNormal; 3703 exponent = myexponent - 16383; 3704 significandParts()[0] = mysignificand; 3705 significandParts()[1] = 0; 3706 if (myexponent==0) // denormal 3707 exponent = -16382; 3708 } 3709 } 3710 3711 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3712 uint64_t i1 = api.getRawData()[0]; 3713 uint64_t i2 = api.getRawData()[1]; 3714 opStatus fs; 3715 bool losesInfo; 3716 3717 // Get the first double and convert to our format. 3718 initFromDoubleAPInt(APInt(64, i1)); 3719 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3720 assert(fs == opOK && !losesInfo); 3721 (void)fs; 3722 3723 // Unless we have a special case, add in second double. 3724 if (isFiniteNonZero()) { 3725 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3726 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3727 assert(fs == opOK && !losesInfo); 3728 (void)fs; 3729 3730 add(v, rmNearestTiesToEven); 3731 } 3732 } 3733 3734 template <const fltSemantics &S> 3735 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3736 assert(api.getBitWidth() == S.sizeInBits); 3737 constexpr integerPart integer_bit = integerPart{1} 3738 << ((S.precision - 1) % integerPartWidth); 3739 constexpr uint64_t significand_mask = integer_bit - 1; 3740 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3741 constexpr unsigned int stored_significand_parts = 3742 partCountForBits(trailing_significand_bits); 3743 constexpr unsigned int exponent_bits = 3744 S.sizeInBits - 1 - trailing_significand_bits; 3745 static_assert(exponent_bits < 64); 3746 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3747 constexpr int bias = -(S.minExponent - 1); 3748 3749 // Copy the bits of the significand. We need to clear out the exponent and 3750 // sign bit in the last word. 3751 std::array<integerPart, stored_significand_parts> mysignificand; 3752 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3753 if constexpr (significand_mask != 0) { 3754 mysignificand[mysignificand.size() - 1] &= significand_mask; 3755 } 3756 3757 // We assume the last word holds the sign bit, the exponent, and potentially 3758 // some of the trailing significand field. 3759 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3760 uint64_t myexponent = 3761 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3762 3763 initialize(&S); 3764 assert(partCount() == mysignificand.size()); 3765 3766 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3767 3768 bool all_zero_significand = 3769 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3770 3771 bool is_zero = myexponent == 0 && all_zero_significand; 3772 3773 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3774 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3775 makeInf(sign); 3776 return; 3777 } 3778 } 3779 3780 bool is_nan = false; 3781 3782 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3783 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3784 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3785 bool all_ones_significand = 3786 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3787 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3788 (!significand_mask || 3789 mysignificand[mysignificand.size() - 1] == significand_mask); 3790 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3791 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3792 is_nan = is_zero && sign; 3793 } 3794 3795 if (is_nan) { 3796 category = fcNaN; 3797 exponent = ::exponentNaN(S); 3798 std::copy_n(mysignificand.begin(), mysignificand.size(), 3799 significandParts()); 3800 return; 3801 } 3802 3803 if (is_zero) { 3804 makeZero(sign); 3805 return; 3806 } 3807 3808 category = fcNormal; 3809 exponent = myexponent - bias; 3810 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3811 if (myexponent == 0) // denormal 3812 exponent = S.minExponent; 3813 else 3814 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3815 } 3816 3817 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3818 initFromIEEEAPInt<semIEEEquad>(api); 3819 } 3820 3821 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3822 initFromIEEEAPInt<semIEEEdouble>(api); 3823 } 3824 3825 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3826 initFromIEEEAPInt<semIEEEsingle>(api); 3827 } 3828 3829 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3830 initFromIEEEAPInt<semBFloat>(api); 3831 } 3832 3833 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3834 initFromIEEEAPInt<semIEEEhalf>(api); 3835 } 3836 3837 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3838 initFromIEEEAPInt<semFloat8E5M2>(api); 3839 } 3840 3841 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3842 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3843 } 3844 3845 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3846 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3847 } 3848 3849 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3850 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3851 } 3852 3853 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3854 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3855 } 3856 3857 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3858 initFromIEEEAPInt<semFloatTF32>(api); 3859 } 3860 3861 /// Treat api as containing the bits of a floating point number. 3862 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3863 assert(api.getBitWidth() == Sem->sizeInBits); 3864 if (Sem == &semIEEEhalf) 3865 return initFromHalfAPInt(api); 3866 if (Sem == &semBFloat) 3867 return initFromBFloatAPInt(api); 3868 if (Sem == &semIEEEsingle) 3869 return initFromFloatAPInt(api); 3870 if (Sem == &semIEEEdouble) 3871 return initFromDoubleAPInt(api); 3872 if (Sem == &semX87DoubleExtended) 3873 return initFromF80LongDoubleAPInt(api); 3874 if (Sem == &semIEEEquad) 3875 return initFromQuadrupleAPInt(api); 3876 if (Sem == &semPPCDoubleDoubleLegacy) 3877 return initFromPPCDoubleDoubleAPInt(api); 3878 if (Sem == &semFloat8E5M2) 3879 return initFromFloat8E5M2APInt(api); 3880 if (Sem == &semFloat8E5M2FNUZ) 3881 return initFromFloat8E5M2FNUZAPInt(api); 3882 if (Sem == &semFloat8E4M3FN) 3883 return initFromFloat8E4M3FNAPInt(api); 3884 if (Sem == &semFloat8E4M3FNUZ) 3885 return initFromFloat8E4M3FNUZAPInt(api); 3886 if (Sem == &semFloat8E4M3B11FNUZ) 3887 return initFromFloat8E4M3B11FNUZAPInt(api); 3888 if (Sem == &semFloatTF32) 3889 return initFromFloatTF32APInt(api); 3890 3891 llvm_unreachable(nullptr); 3892 } 3893 3894 /// Make this number the largest magnitude normal number in the given 3895 /// semantics. 3896 void IEEEFloat::makeLargest(bool Negative) { 3897 // We want (in interchange format): 3898 // sign = {Negative} 3899 // exponent = 1..10 3900 // significand = 1..1 3901 category = fcNormal; 3902 sign = Negative; 3903 exponent = semantics->maxExponent; 3904 3905 // Use memset to set all but the highest integerPart to all ones. 3906 integerPart *significand = significandParts(); 3907 unsigned PartCount = partCount(); 3908 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3909 3910 // Set the high integerPart especially setting all unused top bits for 3911 // internal consistency. 3912 const unsigned NumUnusedHighBits = 3913 PartCount*integerPartWidth - semantics->precision; 3914 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3915 ? (~integerPart(0) >> NumUnusedHighBits) 3916 : 0; 3917 3918 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 3919 semantics->nanEncoding == fltNanEncoding::AllOnes) 3920 significand[0] &= ~integerPart(1); 3921 } 3922 3923 /// Make this number the smallest magnitude denormal number in the given 3924 /// semantics. 3925 void IEEEFloat::makeSmallest(bool Negative) { 3926 // We want (in interchange format): 3927 // sign = {Negative} 3928 // exponent = 0..0 3929 // significand = 0..01 3930 category = fcNormal; 3931 sign = Negative; 3932 exponent = semantics->minExponent; 3933 APInt::tcSet(significandParts(), 1, partCount()); 3934 } 3935 3936 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3937 // We want (in interchange format): 3938 // sign = {Negative} 3939 // exponent = 0..0 3940 // significand = 10..0 3941 3942 category = fcNormal; 3943 zeroSignificand(); 3944 sign = Negative; 3945 exponent = semantics->minExponent; 3946 APInt::tcSetBit(significandParts(), semantics->precision - 1); 3947 } 3948 3949 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3950 initFromAPInt(&Sem, API); 3951 } 3952 3953 IEEEFloat::IEEEFloat(float f) { 3954 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3955 } 3956 3957 IEEEFloat::IEEEFloat(double d) { 3958 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3959 } 3960 3961 namespace { 3962 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3963 Buffer.append(Str.begin(), Str.end()); 3964 } 3965 3966 /// Removes data from the given significand until it is no more 3967 /// precise than is required for the desired precision. 3968 void AdjustToPrecision(APInt &significand, 3969 int &exp, unsigned FormatPrecision) { 3970 unsigned bits = significand.getActiveBits(); 3971 3972 // 196/59 is a very slight overestimate of lg_2(10). 3973 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3974 3975 if (bits <= bitsRequired) return; 3976 3977 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3978 if (!tensRemovable) return; 3979 3980 exp += tensRemovable; 3981 3982 APInt divisor(significand.getBitWidth(), 1); 3983 APInt powten(significand.getBitWidth(), 10); 3984 while (true) { 3985 if (tensRemovable & 1) 3986 divisor *= powten; 3987 tensRemovable >>= 1; 3988 if (!tensRemovable) break; 3989 powten *= powten; 3990 } 3991 3992 significand = significand.udiv(divisor); 3993 3994 // Truncate the significand down to its active bit count. 3995 significand = significand.trunc(significand.getActiveBits()); 3996 } 3997 3998 3999 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4000 int &exp, unsigned FormatPrecision) { 4001 unsigned N = buffer.size(); 4002 if (N <= FormatPrecision) return; 4003 4004 // The most significant figures are the last ones in the buffer. 4005 unsigned FirstSignificant = N - FormatPrecision; 4006 4007 // Round. 4008 // FIXME: this probably shouldn't use 'round half up'. 4009 4010 // Rounding down is just a truncation, except we also want to drop 4011 // trailing zeros from the new result. 4012 if (buffer[FirstSignificant - 1] < '5') { 4013 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4014 FirstSignificant++; 4015 4016 exp += FirstSignificant; 4017 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4018 return; 4019 } 4020 4021 // Rounding up requires a decimal add-with-carry. If we continue 4022 // the carry, the newly-introduced zeros will just be truncated. 4023 for (unsigned I = FirstSignificant; I != N; ++I) { 4024 if (buffer[I] == '9') { 4025 FirstSignificant++; 4026 } else { 4027 buffer[I]++; 4028 break; 4029 } 4030 } 4031 4032 // If we carried through, we have exactly one digit of precision. 4033 if (FirstSignificant == N) { 4034 exp += FirstSignificant; 4035 buffer.clear(); 4036 buffer.push_back('1'); 4037 return; 4038 } 4039 4040 exp += FirstSignificant; 4041 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4042 } 4043 } // namespace 4044 4045 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4046 unsigned FormatMaxPadding, bool TruncateZero) const { 4047 switch (category) { 4048 case fcInfinity: 4049 if (isNegative()) 4050 return append(Str, "-Inf"); 4051 else 4052 return append(Str, "+Inf"); 4053 4054 case fcNaN: return append(Str, "NaN"); 4055 4056 case fcZero: 4057 if (isNegative()) 4058 Str.push_back('-'); 4059 4060 if (!FormatMaxPadding) { 4061 if (TruncateZero) 4062 append(Str, "0.0E+0"); 4063 else { 4064 append(Str, "0.0"); 4065 if (FormatPrecision > 1) 4066 Str.append(FormatPrecision - 1, '0'); 4067 append(Str, "e+00"); 4068 } 4069 } else 4070 Str.push_back('0'); 4071 return; 4072 4073 case fcNormal: 4074 break; 4075 } 4076 4077 if (isNegative()) 4078 Str.push_back('-'); 4079 4080 // Decompose the number into an APInt and an exponent. 4081 int exp = exponent - ((int) semantics->precision - 1); 4082 APInt significand( 4083 semantics->precision, 4084 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4085 4086 // Set FormatPrecision if zero. We want to do this before we 4087 // truncate trailing zeros, as those are part of the precision. 4088 if (!FormatPrecision) { 4089 // We use enough digits so the number can be round-tripped back to an 4090 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4091 // Accurately" by Steele and White. 4092 // FIXME: Using a formula based purely on the precision is conservative; 4093 // we can print fewer digits depending on the actual value being printed. 4094 4095 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4096 FormatPrecision = 2 + semantics->precision * 59 / 196; 4097 } 4098 4099 // Ignore trailing binary zeros. 4100 int trailingZeros = significand.countr_zero(); 4101 exp += trailingZeros; 4102 significand.lshrInPlace(trailingZeros); 4103 4104 // Change the exponent from 2^e to 10^e. 4105 if (exp == 0) { 4106 // Nothing to do. 4107 } else if (exp > 0) { 4108 // Just shift left. 4109 significand = significand.zext(semantics->precision + exp); 4110 significand <<= exp; 4111 exp = 0; 4112 } else { /* exp < 0 */ 4113 int texp = -exp; 4114 4115 // We transform this using the identity: 4116 // (N)(2^-e) == (N)(5^e)(10^-e) 4117 // This means we have to multiply N (the significand) by 5^e. 4118 // To avoid overflow, we have to operate on numbers large 4119 // enough to store N * 5^e: 4120 // log2(N * 5^e) == log2(N) + e * log2(5) 4121 // <= semantics->precision + e * 137 / 59 4122 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4123 4124 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 4125 4126 // Multiply significand by 5^e. 4127 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4128 significand = significand.zext(precision); 4129 APInt five_to_the_i(precision, 5); 4130 while (true) { 4131 if (texp & 1) significand *= five_to_the_i; 4132 4133 texp >>= 1; 4134 if (!texp) break; 4135 five_to_the_i *= five_to_the_i; 4136 } 4137 } 4138 4139 AdjustToPrecision(significand, exp, FormatPrecision); 4140 4141 SmallVector<char, 256> buffer; 4142 4143 // Fill the buffer. 4144 unsigned precision = significand.getBitWidth(); 4145 if (precision < 4) { 4146 // We need enough precision to store the value 10. 4147 precision = 4; 4148 significand = significand.zext(precision); 4149 } 4150 APInt ten(precision, 10); 4151 APInt digit(precision, 0); 4152 4153 bool inTrail = true; 4154 while (significand != 0) { 4155 // digit <- significand % 10 4156 // significand <- significand / 10 4157 APInt::udivrem(significand, ten, significand, digit); 4158 4159 unsigned d = digit.getZExtValue(); 4160 4161 // Drop trailing zeros. 4162 if (inTrail && !d) exp++; 4163 else { 4164 buffer.push_back((char) ('0' + d)); 4165 inTrail = false; 4166 } 4167 } 4168 4169 assert(!buffer.empty() && "no characters in buffer!"); 4170 4171 // Drop down to FormatPrecision. 4172 // TODO: don't do more precise calculations above than are required. 4173 AdjustToPrecision(buffer, exp, FormatPrecision); 4174 4175 unsigned NDigits = buffer.size(); 4176 4177 // Check whether we should use scientific notation. 4178 bool FormatScientific; 4179 if (!FormatMaxPadding) 4180 FormatScientific = true; 4181 else { 4182 if (exp >= 0) { 4183 // 765e3 --> 765000 4184 // ^^^ 4185 // But we shouldn't make the number look more precise than it is. 4186 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4187 NDigits + (unsigned) exp > FormatPrecision); 4188 } else { 4189 // Power of the most significant digit. 4190 int MSD = exp + (int) (NDigits - 1); 4191 if (MSD >= 0) { 4192 // 765e-2 == 7.65 4193 FormatScientific = false; 4194 } else { 4195 // 765e-5 == 0.00765 4196 // ^ ^^ 4197 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4198 } 4199 } 4200 } 4201 4202 // Scientific formatting is pretty straightforward. 4203 if (FormatScientific) { 4204 exp += (NDigits - 1); 4205 4206 Str.push_back(buffer[NDigits-1]); 4207 Str.push_back('.'); 4208 if (NDigits == 1 && TruncateZero) 4209 Str.push_back('0'); 4210 else 4211 for (unsigned I = 1; I != NDigits; ++I) 4212 Str.push_back(buffer[NDigits-1-I]); 4213 // Fill with zeros up to FormatPrecision. 4214 if (!TruncateZero && FormatPrecision > NDigits - 1) 4215 Str.append(FormatPrecision - NDigits + 1, '0'); 4216 // For !TruncateZero we use lower 'e'. 4217 Str.push_back(TruncateZero ? 'E' : 'e'); 4218 4219 Str.push_back(exp >= 0 ? '+' : '-'); 4220 if (exp < 0) exp = -exp; 4221 SmallVector<char, 6> expbuf; 4222 do { 4223 expbuf.push_back((char) ('0' + (exp % 10))); 4224 exp /= 10; 4225 } while (exp); 4226 // Exponent always at least two digits if we do not truncate zeros. 4227 if (!TruncateZero && expbuf.size() < 2) 4228 expbuf.push_back('0'); 4229 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4230 Str.push_back(expbuf[E-1-I]); 4231 return; 4232 } 4233 4234 // Non-scientific, positive exponents. 4235 if (exp >= 0) { 4236 for (unsigned I = 0; I != NDigits; ++I) 4237 Str.push_back(buffer[NDigits-1-I]); 4238 for (unsigned I = 0; I != (unsigned) exp; ++I) 4239 Str.push_back('0'); 4240 return; 4241 } 4242 4243 // Non-scientific, negative exponents. 4244 4245 // The number of digits to the left of the decimal point. 4246 int NWholeDigits = exp + (int) NDigits; 4247 4248 unsigned I = 0; 4249 if (NWholeDigits > 0) { 4250 for (; I != (unsigned) NWholeDigits; ++I) 4251 Str.push_back(buffer[NDigits-I-1]); 4252 Str.push_back('.'); 4253 } else { 4254 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4255 4256 Str.push_back('0'); 4257 Str.push_back('.'); 4258 for (unsigned Z = 1; Z != NZeros; ++Z) 4259 Str.push_back('0'); 4260 } 4261 4262 for (; I != NDigits; ++I) 4263 Str.push_back(buffer[NDigits-I-1]); 4264 } 4265 4266 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4267 // Special floats and denormals have no exact inverse. 4268 if (!isFiniteNonZero()) 4269 return false; 4270 4271 // Check that the number is a power of two by making sure that only the 4272 // integer bit is set in the significand. 4273 if (significandLSB() != semantics->precision - 1) 4274 return false; 4275 4276 // Get the inverse. 4277 IEEEFloat reciprocal(*semantics, 1ULL); 4278 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4279 return false; 4280 4281 // Avoid multiplication with a denormal, it is not safe on all platforms and 4282 // may be slower than a normal division. 4283 if (reciprocal.isDenormal()) 4284 return false; 4285 4286 assert(reciprocal.isFiniteNonZero() && 4287 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4288 4289 if (inv) 4290 *inv = APFloat(reciprocal, *semantics); 4291 4292 return true; 4293 } 4294 4295 bool IEEEFloat::isSignaling() const { 4296 if (!isNaN()) 4297 return false; 4298 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 4299 return false; 4300 4301 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4302 // first bit of the trailing significand being 0. 4303 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4304 } 4305 4306 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4307 /// 4308 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4309 /// appropriate sign switching before/after the computation. 4310 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4311 // If we are performing nextDown, swap sign so we have -x. 4312 if (nextDown) 4313 changeSign(); 4314 4315 // Compute nextUp(x) 4316 opStatus result = opOK; 4317 4318 // Handle each float category separately. 4319 switch (category) { 4320 case fcInfinity: 4321 // nextUp(+inf) = +inf 4322 if (!isNegative()) 4323 break; 4324 // nextUp(-inf) = -getLargest() 4325 makeLargest(true); 4326 break; 4327 case fcNaN: 4328 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4329 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4330 // change the payload. 4331 if (isSignaling()) { 4332 result = opInvalidOp; 4333 // For consistency, propagate the sign of the sNaN to the qNaN. 4334 makeNaN(false, isNegative(), nullptr); 4335 } 4336 break; 4337 case fcZero: 4338 // nextUp(pm 0) = +getSmallest() 4339 makeSmallest(false); 4340 break; 4341 case fcNormal: 4342 // nextUp(-getSmallest()) = -0 4343 if (isSmallest() && isNegative()) { 4344 APInt::tcSet(significandParts(), 0, partCount()); 4345 category = fcZero; 4346 exponent = 0; 4347 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4348 sign = false; 4349 break; 4350 } 4351 4352 if (isLargest() && !isNegative()) { 4353 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4354 // nextUp(getLargest()) == NAN 4355 makeNaN(); 4356 break; 4357 } else { 4358 // nextUp(getLargest()) == INFINITY 4359 APInt::tcSet(significandParts(), 0, partCount()); 4360 category = fcInfinity; 4361 exponent = semantics->maxExponent + 1; 4362 break; 4363 } 4364 } 4365 4366 // nextUp(normal) == normal + inc. 4367 if (isNegative()) { 4368 // If we are negative, we need to decrement the significand. 4369 4370 // We only cross a binade boundary that requires adjusting the exponent 4371 // if: 4372 // 1. exponent != semantics->minExponent. This implies we are not in the 4373 // smallest binade or are dealing with denormals. 4374 // 2. Our significand excluding the integral bit is all zeros. 4375 bool WillCrossBinadeBoundary = 4376 exponent != semantics->minExponent && isSignificandAllZeros(); 4377 4378 // Decrement the significand. 4379 // 4380 // We always do this since: 4381 // 1. If we are dealing with a non-binade decrement, by definition we 4382 // just decrement the significand. 4383 // 2. If we are dealing with a normal -> normal binade decrement, since 4384 // we have an explicit integral bit the fact that all bits but the 4385 // integral bit are zero implies that subtracting one will yield a 4386 // significand with 0 integral bit and 1 in all other spots. Thus we 4387 // must just adjust the exponent and set the integral bit to 1. 4388 // 3. If we are dealing with a normal -> denormal binade decrement, 4389 // since we set the integral bit to 0 when we represent denormals, we 4390 // just decrement the significand. 4391 integerPart *Parts = significandParts(); 4392 APInt::tcDecrement(Parts, partCount()); 4393 4394 if (WillCrossBinadeBoundary) { 4395 // Our result is a normal number. Do the following: 4396 // 1. Set the integral bit to 1. 4397 // 2. Decrement the exponent. 4398 APInt::tcSetBit(Parts, semantics->precision - 1); 4399 exponent--; 4400 } 4401 } else { 4402 // If we are positive, we need to increment the significand. 4403 4404 // We only cross a binade boundary that requires adjusting the exponent if 4405 // the input is not a denormal and all of said input's significand bits 4406 // are set. If all of said conditions are true: clear the significand, set 4407 // the integral bit to 1, and increment the exponent. If we have a 4408 // denormal always increment since moving denormals and the numbers in the 4409 // smallest normal binade have the same exponent in our representation. 4410 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4411 4412 if (WillCrossBinadeBoundary) { 4413 integerPart *Parts = significandParts(); 4414 APInt::tcSet(Parts, 0, partCount()); 4415 APInt::tcSetBit(Parts, semantics->precision - 1); 4416 assert(exponent != semantics->maxExponent && 4417 "We can not increment an exponent beyond the maxExponent allowed" 4418 " by the given floating point semantics."); 4419 exponent++; 4420 } else { 4421 incrementSignificand(); 4422 } 4423 } 4424 break; 4425 } 4426 4427 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4428 if (nextDown) 4429 changeSign(); 4430 4431 return result; 4432 } 4433 4434 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4435 return ::exponentNaN(*semantics); 4436 } 4437 4438 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4439 return ::exponentInf(*semantics); 4440 } 4441 4442 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4443 return ::exponentZero(*semantics); 4444 } 4445 4446 void IEEEFloat::makeInf(bool Negative) { 4447 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4448 // There is no Inf, so make NaN instead. 4449 makeNaN(false, Negative); 4450 return; 4451 } 4452 category = fcInfinity; 4453 sign = Negative; 4454 exponent = exponentInf(); 4455 APInt::tcSet(significandParts(), 0, partCount()); 4456 } 4457 4458 void IEEEFloat::makeZero(bool Negative) { 4459 category = fcZero; 4460 sign = Negative; 4461 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4462 // Merge negative zero to positive because 0b10000...000 is used for NaN 4463 sign = false; 4464 } 4465 exponent = exponentZero(); 4466 APInt::tcSet(significandParts(), 0, partCount()); 4467 } 4468 4469 void IEEEFloat::makeQuiet() { 4470 assert(isNaN()); 4471 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4472 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4473 } 4474 4475 int ilogb(const IEEEFloat &Arg) { 4476 if (Arg.isNaN()) 4477 return IEEEFloat::IEK_NaN; 4478 if (Arg.isZero()) 4479 return IEEEFloat::IEK_Zero; 4480 if (Arg.isInfinity()) 4481 return IEEEFloat::IEK_Inf; 4482 if (!Arg.isDenormal()) 4483 return Arg.exponent; 4484 4485 IEEEFloat Normalized(Arg); 4486 int SignificandBits = Arg.getSemantics().precision - 1; 4487 4488 Normalized.exponent += SignificandBits; 4489 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4490 return Normalized.exponent - SignificandBits; 4491 } 4492 4493 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4494 auto MaxExp = X.getSemantics().maxExponent; 4495 auto MinExp = X.getSemantics().minExponent; 4496 4497 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4498 // overflow; clamp it to a safe range before adding, but ensure that the range 4499 // is large enough that the clamp does not change the result. The range we 4500 // need to support is the difference between the largest possible exponent and 4501 // the normalized exponent of half the smallest denormal. 4502 4503 int SignificandBits = X.getSemantics().precision - 1; 4504 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4505 4506 // Clamp to one past the range ends to let normalize handle overlflow. 4507 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4508 X.normalize(RoundingMode, lfExactlyZero); 4509 if (X.isNaN()) 4510 X.makeQuiet(); 4511 return X; 4512 } 4513 4514 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4515 Exp = ilogb(Val); 4516 4517 // Quiet signalling nans. 4518 if (Exp == IEEEFloat::IEK_NaN) { 4519 IEEEFloat Quiet(Val); 4520 Quiet.makeQuiet(); 4521 return Quiet; 4522 } 4523 4524 if (Exp == IEEEFloat::IEK_Inf) 4525 return Val; 4526 4527 // 1 is added because frexp is defined to return a normalized fraction in 4528 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4529 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4530 return scalbn(Val, -Exp, RM); 4531 } 4532 4533 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4534 : Semantics(&S), 4535 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4536 assert(Semantics == &semPPCDoubleDouble); 4537 } 4538 4539 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4540 : Semantics(&S), 4541 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4542 APFloat(semIEEEdouble, uninitialized)}) { 4543 assert(Semantics == &semPPCDoubleDouble); 4544 } 4545 4546 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4547 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4548 APFloat(semIEEEdouble)}) { 4549 assert(Semantics == &semPPCDoubleDouble); 4550 } 4551 4552 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4553 : Semantics(&S), 4554 Floats(new APFloat[2]{ 4555 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4556 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4557 assert(Semantics == &semPPCDoubleDouble); 4558 } 4559 4560 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4561 APFloat &&Second) 4562 : Semantics(&S), 4563 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4564 assert(Semantics == &semPPCDoubleDouble); 4565 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4566 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4567 } 4568 4569 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4570 : Semantics(RHS.Semantics), 4571 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4572 APFloat(RHS.Floats[1])} 4573 : nullptr) { 4574 assert(Semantics == &semPPCDoubleDouble); 4575 } 4576 4577 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4578 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4579 RHS.Semantics = &semBogus; 4580 assert(Semantics == &semPPCDoubleDouble); 4581 } 4582 4583 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4584 if (Semantics == RHS.Semantics && RHS.Floats) { 4585 Floats[0] = RHS.Floats[0]; 4586 Floats[1] = RHS.Floats[1]; 4587 } else if (this != &RHS) { 4588 this->~DoubleAPFloat(); 4589 new (this) DoubleAPFloat(RHS); 4590 } 4591 return *this; 4592 } 4593 4594 // Implement addition, subtraction, multiplication and division based on: 4595 // "Software for Doubled-Precision Floating-Point Computations", 4596 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4597 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4598 const APFloat &c, const APFloat &cc, 4599 roundingMode RM) { 4600 int Status = opOK; 4601 APFloat z = a; 4602 Status |= z.add(c, RM); 4603 if (!z.isFinite()) { 4604 if (!z.isInfinity()) { 4605 Floats[0] = std::move(z); 4606 Floats[1].makeZero(/* Neg = */ false); 4607 return (opStatus)Status; 4608 } 4609 Status = opOK; 4610 auto AComparedToC = a.compareAbsoluteValue(c); 4611 z = cc; 4612 Status |= z.add(aa, RM); 4613 if (AComparedToC == APFloat::cmpGreaterThan) { 4614 // z = cc + aa + c + a; 4615 Status |= z.add(c, RM); 4616 Status |= z.add(a, RM); 4617 } else { 4618 // z = cc + aa + a + c; 4619 Status |= z.add(a, RM); 4620 Status |= z.add(c, RM); 4621 } 4622 if (!z.isFinite()) { 4623 Floats[0] = std::move(z); 4624 Floats[1].makeZero(/* Neg = */ false); 4625 return (opStatus)Status; 4626 } 4627 Floats[0] = z; 4628 APFloat zz = aa; 4629 Status |= zz.add(cc, RM); 4630 if (AComparedToC == APFloat::cmpGreaterThan) { 4631 // Floats[1] = a - z + c + zz; 4632 Floats[1] = a; 4633 Status |= Floats[1].subtract(z, RM); 4634 Status |= Floats[1].add(c, RM); 4635 Status |= Floats[1].add(zz, RM); 4636 } else { 4637 // Floats[1] = c - z + a + zz; 4638 Floats[1] = c; 4639 Status |= Floats[1].subtract(z, RM); 4640 Status |= Floats[1].add(a, RM); 4641 Status |= Floats[1].add(zz, RM); 4642 } 4643 } else { 4644 // q = a - z; 4645 APFloat q = a; 4646 Status |= q.subtract(z, RM); 4647 4648 // zz = q + c + (a - (q + z)) + aa + cc; 4649 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4650 auto zz = q; 4651 Status |= zz.add(c, RM); 4652 Status |= q.add(z, RM); 4653 Status |= q.subtract(a, RM); 4654 q.changeSign(); 4655 Status |= zz.add(q, RM); 4656 Status |= zz.add(aa, RM); 4657 Status |= zz.add(cc, RM); 4658 if (zz.isZero() && !zz.isNegative()) { 4659 Floats[0] = std::move(z); 4660 Floats[1].makeZero(/* Neg = */ false); 4661 return opOK; 4662 } 4663 Floats[0] = z; 4664 Status |= Floats[0].add(zz, RM); 4665 if (!Floats[0].isFinite()) { 4666 Floats[1].makeZero(/* Neg = */ false); 4667 return (opStatus)Status; 4668 } 4669 Floats[1] = std::move(z); 4670 Status |= Floats[1].subtract(Floats[0], RM); 4671 Status |= Floats[1].add(zz, RM); 4672 } 4673 return (opStatus)Status; 4674 } 4675 4676 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4677 const DoubleAPFloat &RHS, 4678 DoubleAPFloat &Out, 4679 roundingMode RM) { 4680 if (LHS.getCategory() == fcNaN) { 4681 Out = LHS; 4682 return opOK; 4683 } 4684 if (RHS.getCategory() == fcNaN) { 4685 Out = RHS; 4686 return opOK; 4687 } 4688 if (LHS.getCategory() == fcZero) { 4689 Out = RHS; 4690 return opOK; 4691 } 4692 if (RHS.getCategory() == fcZero) { 4693 Out = LHS; 4694 return opOK; 4695 } 4696 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4697 LHS.isNegative() != RHS.isNegative()) { 4698 Out.makeNaN(false, Out.isNegative(), nullptr); 4699 return opInvalidOp; 4700 } 4701 if (LHS.getCategory() == fcInfinity) { 4702 Out = LHS; 4703 return opOK; 4704 } 4705 if (RHS.getCategory() == fcInfinity) { 4706 Out = RHS; 4707 return opOK; 4708 } 4709 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4710 4711 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4712 CC(RHS.Floats[1]); 4713 assert(&A.getSemantics() == &semIEEEdouble); 4714 assert(&AA.getSemantics() == &semIEEEdouble); 4715 assert(&C.getSemantics() == &semIEEEdouble); 4716 assert(&CC.getSemantics() == &semIEEEdouble); 4717 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4718 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4719 return Out.addImpl(A, AA, C, CC, RM); 4720 } 4721 4722 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4723 roundingMode RM) { 4724 return addWithSpecial(*this, RHS, *this, RM); 4725 } 4726 4727 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4728 roundingMode RM) { 4729 changeSign(); 4730 auto Ret = add(RHS, RM); 4731 changeSign(); 4732 return Ret; 4733 } 4734 4735 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4736 APFloat::roundingMode RM) { 4737 const auto &LHS = *this; 4738 auto &Out = *this; 4739 /* Interesting observation: For special categories, finding the lowest 4740 common ancestor of the following layered graph gives the correct 4741 return category: 4742 4743 NaN 4744 / \ 4745 Zero Inf 4746 \ / 4747 Normal 4748 4749 e.g. NaN * NaN = NaN 4750 Zero * Inf = NaN 4751 Normal * Zero = Zero 4752 Normal * Inf = Inf 4753 */ 4754 if (LHS.getCategory() == fcNaN) { 4755 Out = LHS; 4756 return opOK; 4757 } 4758 if (RHS.getCategory() == fcNaN) { 4759 Out = RHS; 4760 return opOK; 4761 } 4762 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4763 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4764 Out.makeNaN(false, false, nullptr); 4765 return opOK; 4766 } 4767 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4768 Out = LHS; 4769 return opOK; 4770 } 4771 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4772 Out = RHS; 4773 return opOK; 4774 } 4775 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4776 "Special cases not handled exhaustively"); 4777 4778 int Status = opOK; 4779 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4780 // t = a * c 4781 APFloat T = A; 4782 Status |= T.multiply(C, RM); 4783 if (!T.isFiniteNonZero()) { 4784 Floats[0] = T; 4785 Floats[1].makeZero(/* Neg = */ false); 4786 return (opStatus)Status; 4787 } 4788 4789 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4790 APFloat Tau = A; 4791 T.changeSign(); 4792 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4793 T.changeSign(); 4794 { 4795 // v = a * d 4796 APFloat V = A; 4797 Status |= V.multiply(D, RM); 4798 // w = b * c 4799 APFloat W = B; 4800 Status |= W.multiply(C, RM); 4801 Status |= V.add(W, RM); 4802 // tau += v + w 4803 Status |= Tau.add(V, RM); 4804 } 4805 // u = t + tau 4806 APFloat U = T; 4807 Status |= U.add(Tau, RM); 4808 4809 Floats[0] = U; 4810 if (!U.isFinite()) { 4811 Floats[1].makeZero(/* Neg = */ false); 4812 } else { 4813 // Floats[1] = (t - u) + tau 4814 Status |= T.subtract(U, RM); 4815 Status |= T.add(Tau, RM); 4816 Floats[1] = T; 4817 } 4818 return (opStatus)Status; 4819 } 4820 4821 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4822 APFloat::roundingMode RM) { 4823 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4824 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4825 auto Ret = 4826 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4827 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4828 return Ret; 4829 } 4830 4831 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4832 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4833 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4834 auto Ret = 4835 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4836 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4837 return Ret; 4838 } 4839 4840 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4841 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4842 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4843 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4844 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4845 return Ret; 4846 } 4847 4848 APFloat::opStatus 4849 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4850 const DoubleAPFloat &Addend, 4851 APFloat::roundingMode RM) { 4852 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4853 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4854 auto Ret = Tmp.fusedMultiplyAdd( 4855 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4856 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4857 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4858 return Ret; 4859 } 4860 4861 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4862 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4863 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4864 auto Ret = Tmp.roundToIntegral(RM); 4865 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4866 return Ret; 4867 } 4868 4869 void DoubleAPFloat::changeSign() { 4870 Floats[0].changeSign(); 4871 Floats[1].changeSign(); 4872 } 4873 4874 APFloat::cmpResult 4875 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4876 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4877 if (Result != cmpEqual) 4878 return Result; 4879 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4880 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4881 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4882 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4883 if (Against && !RHSAgainst) 4884 return cmpLessThan; 4885 if (!Against && RHSAgainst) 4886 return cmpGreaterThan; 4887 if (!Against && !RHSAgainst) 4888 return Result; 4889 if (Against && RHSAgainst) 4890 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4891 } 4892 return Result; 4893 } 4894 4895 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4896 return Floats[0].getCategory(); 4897 } 4898 4899 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4900 4901 void DoubleAPFloat::makeInf(bool Neg) { 4902 Floats[0].makeInf(Neg); 4903 Floats[1].makeZero(/* Neg = */ false); 4904 } 4905 4906 void DoubleAPFloat::makeZero(bool Neg) { 4907 Floats[0].makeZero(Neg); 4908 Floats[1].makeZero(/* Neg = */ false); 4909 } 4910 4911 void DoubleAPFloat::makeLargest(bool Neg) { 4912 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4913 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4914 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4915 if (Neg) 4916 changeSign(); 4917 } 4918 4919 void DoubleAPFloat::makeSmallest(bool Neg) { 4920 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4921 Floats[0].makeSmallest(Neg); 4922 Floats[1].makeZero(/* Neg = */ false); 4923 } 4924 4925 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4926 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4927 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4928 if (Neg) 4929 Floats[0].changeSign(); 4930 Floats[1].makeZero(/* Neg = */ false); 4931 } 4932 4933 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4934 Floats[0].makeNaN(SNaN, Neg, fill); 4935 Floats[1].makeZero(/* Neg = */ false); 4936 } 4937 4938 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4939 auto Result = Floats[0].compare(RHS.Floats[0]); 4940 // |Float[0]| > |Float[1]| 4941 if (Result == APFloat::cmpEqual) 4942 return Floats[1].compare(RHS.Floats[1]); 4943 return Result; 4944 } 4945 4946 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4947 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4948 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4949 } 4950 4951 hash_code hash_value(const DoubleAPFloat &Arg) { 4952 if (Arg.Floats) 4953 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4954 return hash_combine(Arg.Semantics); 4955 } 4956 4957 APInt DoubleAPFloat::bitcastToAPInt() const { 4958 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4959 uint64_t Data[] = { 4960 Floats[0].bitcastToAPInt().getRawData()[0], 4961 Floats[1].bitcastToAPInt().getRawData()[0], 4962 }; 4963 return APInt(128, 2, Data); 4964 } 4965 4966 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4967 roundingMode RM) { 4968 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4969 APFloat Tmp(semPPCDoubleDoubleLegacy); 4970 auto Ret = Tmp.convertFromString(S, RM); 4971 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4972 return Ret; 4973 } 4974 4975 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 4976 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4977 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4978 auto Ret = Tmp.next(nextDown); 4979 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4980 return Ret; 4981 } 4982 4983 APFloat::opStatus 4984 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 4985 unsigned int Width, bool IsSigned, 4986 roundingMode RM, bool *IsExact) const { 4987 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4988 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 4989 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 4990 } 4991 4992 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 4993 bool IsSigned, 4994 roundingMode RM) { 4995 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4996 APFloat Tmp(semPPCDoubleDoubleLegacy); 4997 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 4998 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4999 return Ret; 5000 } 5001 5002 APFloat::opStatus 5003 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5004 unsigned int InputSize, 5005 bool IsSigned, roundingMode RM) { 5006 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5007 APFloat Tmp(semPPCDoubleDoubleLegacy); 5008 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5009 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5010 return Ret; 5011 } 5012 5013 APFloat::opStatus 5014 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5015 unsigned int InputSize, 5016 bool IsSigned, roundingMode RM) { 5017 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5018 APFloat Tmp(semPPCDoubleDoubleLegacy); 5019 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5020 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5021 return Ret; 5022 } 5023 5024 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5025 unsigned int HexDigits, 5026 bool UpperCase, 5027 roundingMode RM) const { 5028 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5029 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5030 .convertToHexString(DST, HexDigits, UpperCase, RM); 5031 } 5032 5033 bool DoubleAPFloat::isDenormal() const { 5034 return getCategory() == fcNormal && 5035 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5036 // (double)(Hi + Lo) == Hi defines a normal number. 5037 Floats[0] != Floats[0] + Floats[1]); 5038 } 5039 5040 bool DoubleAPFloat::isSmallest() const { 5041 if (getCategory() != fcNormal) 5042 return false; 5043 DoubleAPFloat Tmp(*this); 5044 Tmp.makeSmallest(this->isNegative()); 5045 return Tmp.compare(*this) == cmpEqual; 5046 } 5047 5048 bool DoubleAPFloat::isSmallestNormalized() const { 5049 if (getCategory() != fcNormal) 5050 return false; 5051 5052 DoubleAPFloat Tmp(*this); 5053 Tmp.makeSmallestNormalized(this->isNegative()); 5054 return Tmp.compare(*this) == cmpEqual; 5055 } 5056 5057 bool DoubleAPFloat::isLargest() const { 5058 if (getCategory() != fcNormal) 5059 return false; 5060 DoubleAPFloat Tmp(*this); 5061 Tmp.makeLargest(this->isNegative()); 5062 return Tmp.compare(*this) == cmpEqual; 5063 } 5064 5065 bool DoubleAPFloat::isInteger() const { 5066 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5067 return Floats[0].isInteger() && Floats[1].isInteger(); 5068 } 5069 5070 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5071 unsigned FormatPrecision, 5072 unsigned FormatMaxPadding, 5073 bool TruncateZero) const { 5074 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5075 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5076 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5077 } 5078 5079 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5080 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5081 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5082 if (!inv) 5083 return Tmp.getExactInverse(nullptr); 5084 APFloat Inv(semPPCDoubleDoubleLegacy); 5085 auto Ret = Tmp.getExactInverse(&Inv); 5086 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5087 return Ret; 5088 } 5089 5090 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5091 APFloat::roundingMode RM) { 5092 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5093 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5094 scalbn(Arg.Floats[1], Exp, RM)); 5095 } 5096 5097 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5098 APFloat::roundingMode RM) { 5099 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5100 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5101 APFloat Second = Arg.Floats[1]; 5102 if (Arg.getCategory() == APFloat::fcNormal) 5103 Second = scalbn(Second, -Exp, RM); 5104 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5105 } 5106 5107 } // namespace detail 5108 5109 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5110 if (usesLayout<IEEEFloat>(Semantics)) { 5111 new (&IEEE) IEEEFloat(std::move(F)); 5112 return; 5113 } 5114 if (usesLayout<DoubleAPFloat>(Semantics)) { 5115 const fltSemantics& S = F.getSemantics(); 5116 new (&Double) 5117 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5118 APFloat(semIEEEdouble)); 5119 return; 5120 } 5121 llvm_unreachable("Unexpected semantics"); 5122 } 5123 5124 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5125 roundingMode RM) { 5126 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5127 } 5128 5129 hash_code hash_value(const APFloat &Arg) { 5130 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5131 return hash_value(Arg.U.IEEE); 5132 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5133 return hash_value(Arg.U.Double); 5134 llvm_unreachable("Unexpected semantics"); 5135 } 5136 5137 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5138 : APFloat(Semantics) { 5139 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5140 assert(StatusOrErr && "Invalid floating point representation"); 5141 consumeError(StatusOrErr.takeError()); 5142 } 5143 5144 FPClassTest APFloat::classify() const { 5145 if (isZero()) 5146 return isNegative() ? fcNegZero : fcPosZero; 5147 if (isNormal()) 5148 return isNegative() ? fcNegNormal : fcPosNormal; 5149 if (isDenormal()) 5150 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5151 if (isInfinity()) 5152 return isNegative() ? fcNegInf : fcPosInf; 5153 assert(isNaN() && "Other class of FP constant"); 5154 return isSignaling() ? fcSNan : fcQNan; 5155 } 5156 5157 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5158 roundingMode RM, bool *losesInfo) { 5159 if (&getSemantics() == &ToSemantics) { 5160 *losesInfo = false; 5161 return opOK; 5162 } 5163 if (usesLayout<IEEEFloat>(getSemantics()) && 5164 usesLayout<IEEEFloat>(ToSemantics)) 5165 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5166 if (usesLayout<IEEEFloat>(getSemantics()) && 5167 usesLayout<DoubleAPFloat>(ToSemantics)) { 5168 assert(&ToSemantics == &semPPCDoubleDouble); 5169 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5170 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5171 return Ret; 5172 } 5173 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5174 usesLayout<IEEEFloat>(ToSemantics)) { 5175 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5176 *this = APFloat(std::move(getIEEE()), ToSemantics); 5177 return Ret; 5178 } 5179 llvm_unreachable("Unexpected semantics"); 5180 } 5181 5182 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5183 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5184 } 5185 5186 void APFloat::print(raw_ostream &OS) const { 5187 SmallVector<char, 16> Buffer; 5188 toString(Buffer); 5189 OS << Buffer << "\n"; 5190 } 5191 5192 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5193 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5194 #endif 5195 5196 void APFloat::Profile(FoldingSetNodeID &NID) const { 5197 NID.Add(bitcastToAPInt()); 5198 } 5199 5200 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5201 an APSInt, whose initial bit-width and signed-ness are used to determine the 5202 precision of the conversion. 5203 */ 5204 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5205 roundingMode rounding_mode, 5206 bool *isExact) const { 5207 unsigned bitWidth = result.getBitWidth(); 5208 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5209 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5210 rounding_mode, isExact); 5211 // Keeps the original signed-ness. 5212 result = APInt(bitWidth, parts); 5213 return status; 5214 } 5215 5216 double APFloat::convertToDouble() const { 5217 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5218 return getIEEE().convertToDouble(); 5219 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5220 "Float semantics is not representable by IEEEdouble"); 5221 APFloat Temp = *this; 5222 bool LosesInfo; 5223 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5224 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5225 (void)St; 5226 return Temp.getIEEE().convertToDouble(); 5227 } 5228 5229 float APFloat::convertToFloat() const { 5230 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5231 return getIEEE().convertToFloat(); 5232 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5233 "Float semantics is not representable by IEEEsingle"); 5234 APFloat Temp = *this; 5235 bool LosesInfo; 5236 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5237 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5238 (void)St; 5239 return Temp.getIEEE().convertToFloat(); 5240 } 5241 5242 } // namespace llvm 5243 5244 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5245