1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 // Returns true if any number described by this semantics can be precisely 123 // represented by the specified semantics. Does not take into account 124 // the value of fltNonfiniteBehavior. 125 bool isRepresentableBy(const fltSemantics &S) const { 126 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 127 precision <= S.precision; 128 } 129 }; 130 131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 137 static constexpr fltSemantics semFloat8E5M2FNUZ = { 138 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 140 static constexpr fltSemantics semFloat8E4M3FN = { 141 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 142 static constexpr fltSemantics semFloat8E4M3FNUZ = { 143 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 144 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 145 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 147 static constexpr fltSemantics semFloat6E3M2FN = { 148 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 149 static constexpr fltSemantics semFloat6E2M3FN = { 150 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 151 static constexpr fltSemantics semFloat4E2M1FN = { 152 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 153 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 154 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 155 156 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 157 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 158 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 159 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 160 to each other, and two 11-bit exponents. 161 162 Note: we need to make the value different from semBogus as otherwise 163 an unsafe optimization may collapse both values to a single address, 164 and we heavily rely on them having distinct addresses. */ 165 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 166 167 /* These are legacy semantics for the fallback, inaccrurate implementation of 168 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 169 operation. It's equivalent to having an IEEE number with consecutive 106 170 bits of mantissa and 11 bits of exponent. 171 172 It's not equivalent to IBM double-double. For example, a legit IBM 173 double-double, 1 + epsilon: 174 175 1 + epsilon = 1 + (1 >> 1076) 176 177 is not representable by a consecutive 106 bits of mantissa. 178 179 Currently, these semantics are used in the following way: 180 181 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 182 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 183 semPPCDoubleDoubleLegacy -> IEEE operations 184 185 We use bitcastToAPInt() to get the bit representation (in APInt) of the 186 underlying IEEEdouble, then use the APInt constructor to construct the 187 legacy IEEE float. 188 189 TODO: Implement all operations in semPPCDoubleDouble, and delete these 190 semantics. */ 191 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 192 53 + 53, 128}; 193 194 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 195 switch (S) { 196 case S_IEEEhalf: 197 return IEEEhalf(); 198 case S_BFloat: 199 return BFloat(); 200 case S_IEEEsingle: 201 return IEEEsingle(); 202 case S_IEEEdouble: 203 return IEEEdouble(); 204 case S_IEEEquad: 205 return IEEEquad(); 206 case S_PPCDoubleDouble: 207 return PPCDoubleDouble(); 208 case S_Float8E5M2: 209 return Float8E5M2(); 210 case S_Float8E5M2FNUZ: 211 return Float8E5M2FNUZ(); 212 case S_Float8E4M3: 213 return Float8E4M3(); 214 case S_Float8E4M3FN: 215 return Float8E4M3FN(); 216 case S_Float8E4M3FNUZ: 217 return Float8E4M3FNUZ(); 218 case S_Float8E4M3B11FNUZ: 219 return Float8E4M3B11FNUZ(); 220 case S_FloatTF32: 221 return FloatTF32(); 222 case S_Float6E3M2FN: 223 return Float6E3M2FN(); 224 case S_Float6E2M3FN: 225 return Float6E2M3FN(); 226 case S_Float4E2M1FN: 227 return Float4E2M1FN(); 228 case S_x87DoubleExtended: 229 return x87DoubleExtended(); 230 } 231 llvm_unreachable("Unrecognised floating semantics"); 232 } 233 234 APFloatBase::Semantics 235 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 236 if (&Sem == &llvm::APFloat::IEEEhalf()) 237 return S_IEEEhalf; 238 else if (&Sem == &llvm::APFloat::BFloat()) 239 return S_BFloat; 240 else if (&Sem == &llvm::APFloat::IEEEsingle()) 241 return S_IEEEsingle; 242 else if (&Sem == &llvm::APFloat::IEEEdouble()) 243 return S_IEEEdouble; 244 else if (&Sem == &llvm::APFloat::IEEEquad()) 245 return S_IEEEquad; 246 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 247 return S_PPCDoubleDouble; 248 else if (&Sem == &llvm::APFloat::Float8E5M2()) 249 return S_Float8E5M2; 250 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 251 return S_Float8E5M2FNUZ; 252 else if (&Sem == &llvm::APFloat::Float8E4M3()) 253 return S_Float8E4M3; 254 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 255 return S_Float8E4M3FN; 256 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 257 return S_Float8E4M3FNUZ; 258 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 259 return S_Float8E4M3B11FNUZ; 260 else if (&Sem == &llvm::APFloat::FloatTF32()) 261 return S_FloatTF32; 262 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 263 return S_Float6E3M2FN; 264 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 265 return S_Float6E2M3FN; 266 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 267 return S_Float4E2M1FN; 268 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 269 return S_x87DoubleExtended; 270 else 271 llvm_unreachable("Unknown floating semantics"); 272 } 273 274 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 275 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 276 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 277 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 278 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 279 const fltSemantics &APFloatBase::PPCDoubleDouble() { 280 return semPPCDoubleDouble; 281 } 282 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 283 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 284 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 285 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 286 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 287 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 288 return semFloat8E4M3B11FNUZ; 289 } 290 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 291 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 292 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 293 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 294 const fltSemantics &APFloatBase::x87DoubleExtended() { 295 return semX87DoubleExtended; 296 } 297 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 298 299 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 300 constexpr RoundingMode APFloatBase::rmTowardPositive; 301 constexpr RoundingMode APFloatBase::rmTowardNegative; 302 constexpr RoundingMode APFloatBase::rmTowardZero; 303 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 304 305 /* A tight upper bound on number of parts required to hold the value 306 pow(5, power) is 307 308 power * 815 / (351 * integerPartWidth) + 1 309 310 However, whilst the result may require only this many parts, 311 because we are multiplying two values to get it, the 312 multiplication may require an extra part with the excess part 313 being zero (consider the trivial case of 1 * 1, tcFullMultiply 314 requires two parts to hold the single-part result). So we add an 315 extra one to guarantee enough space whilst multiplying. */ 316 const unsigned int maxExponent = 16383; 317 const unsigned int maxPrecision = 113; 318 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 319 const unsigned int maxPowerOfFiveParts = 320 2 + 321 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 322 323 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 324 return semantics.precision; 325 } 326 APFloatBase::ExponentType 327 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 328 return semantics.maxExponent; 329 } 330 APFloatBase::ExponentType 331 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 332 return semantics.minExponent; 333 } 334 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 335 return semantics.sizeInBits; 336 } 337 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 338 bool isSigned) { 339 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 340 // at least one more bit than the MaxExponent to hold the max FP value. 341 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 342 // Extra sign bit needed. 343 if (isSigned) 344 ++MinBitWidth; 345 return MinBitWidth; 346 } 347 348 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 349 const fltSemantics &Dst) { 350 // Exponent range must be larger. 351 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 352 return false; 353 354 // If the mantissa is long enough, the result value could still be denormal 355 // with a larger exponent range. 356 // 357 // FIXME: This condition is probably not accurate but also shouldn't be a 358 // practical concern with existing types. 359 return Dst.precision >= Src.precision; 360 } 361 362 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 363 return Sem.sizeInBits; 364 } 365 366 static constexpr APFloatBase::ExponentType 367 exponentZero(const fltSemantics &semantics) { 368 return semantics.minExponent - 1; 369 } 370 371 static constexpr APFloatBase::ExponentType 372 exponentInf(const fltSemantics &semantics) { 373 return semantics.maxExponent + 1; 374 } 375 376 static constexpr APFloatBase::ExponentType 377 exponentNaN(const fltSemantics &semantics) { 378 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 379 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 380 return exponentZero(semantics); 381 return semantics.maxExponent; 382 } 383 return semantics.maxExponent + 1; 384 } 385 386 /* A bunch of private, handy routines. */ 387 388 static inline Error createError(const Twine &Err) { 389 return make_error<StringError>(Err, inconvertibleErrorCode()); 390 } 391 392 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 393 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 394 } 395 396 /* Returns 0U-9U. Return values >= 10U are not digits. */ 397 static inline unsigned int 398 decDigitValue(unsigned int c) 399 { 400 return c - '0'; 401 } 402 403 /* Return the value of a decimal exponent of the form 404 [+-]ddddddd. 405 406 If the exponent overflows, returns a large exponent with the 407 appropriate sign. */ 408 static Expected<int> readExponent(StringRef::iterator begin, 409 StringRef::iterator end) { 410 bool isNegative; 411 unsigned int absExponent; 412 const unsigned int overlargeExponent = 24000; /* FIXME. */ 413 StringRef::iterator p = begin; 414 415 // Treat no exponent as 0 to match binutils 416 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 417 return 0; 418 } 419 420 isNegative = (*p == '-'); 421 if (*p == '-' || *p == '+') { 422 p++; 423 if (p == end) 424 return createError("Exponent has no digits"); 425 } 426 427 absExponent = decDigitValue(*p++); 428 if (absExponent >= 10U) 429 return createError("Invalid character in exponent"); 430 431 for (; p != end; ++p) { 432 unsigned int value; 433 434 value = decDigitValue(*p); 435 if (value >= 10U) 436 return createError("Invalid character in exponent"); 437 438 absExponent = absExponent * 10U + value; 439 if (absExponent >= overlargeExponent) { 440 absExponent = overlargeExponent; 441 break; 442 } 443 } 444 445 if (isNegative) 446 return -(int) absExponent; 447 else 448 return (int) absExponent; 449 } 450 451 /* This is ugly and needs cleaning up, but I don't immediately see 452 how whilst remaining safe. */ 453 static Expected<int> totalExponent(StringRef::iterator p, 454 StringRef::iterator end, 455 int exponentAdjustment) { 456 int unsignedExponent; 457 bool negative, overflow; 458 int exponent = 0; 459 460 if (p == end) 461 return createError("Exponent has no digits"); 462 463 negative = *p == '-'; 464 if (*p == '-' || *p == '+') { 465 p++; 466 if (p == end) 467 return createError("Exponent has no digits"); 468 } 469 470 unsignedExponent = 0; 471 overflow = false; 472 for (; p != end; ++p) { 473 unsigned int value; 474 475 value = decDigitValue(*p); 476 if (value >= 10U) 477 return createError("Invalid character in exponent"); 478 479 unsignedExponent = unsignedExponent * 10 + value; 480 if (unsignedExponent > 32767) { 481 overflow = true; 482 break; 483 } 484 } 485 486 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 487 overflow = true; 488 489 if (!overflow) { 490 exponent = unsignedExponent; 491 if (negative) 492 exponent = -exponent; 493 exponent += exponentAdjustment; 494 if (exponent > 32767 || exponent < -32768) 495 overflow = true; 496 } 497 498 if (overflow) 499 exponent = negative ? -32768: 32767; 500 501 return exponent; 502 } 503 504 static Expected<StringRef::iterator> 505 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 506 StringRef::iterator *dot) { 507 StringRef::iterator p = begin; 508 *dot = end; 509 while (p != end && *p == '0') 510 p++; 511 512 if (p != end && *p == '.') { 513 *dot = p++; 514 515 if (end - begin == 1) 516 return createError("Significand has no digits"); 517 518 while (p != end && *p == '0') 519 p++; 520 } 521 522 return p; 523 } 524 525 /* Given a normal decimal floating point number of the form 526 527 dddd.dddd[eE][+-]ddd 528 529 where the decimal point and exponent are optional, fill out the 530 structure D. Exponent is appropriate if the significand is 531 treated as an integer, and normalizedExponent if the significand 532 is taken to have the decimal point after a single leading 533 non-zero digit. 534 535 If the value is zero, V->firstSigDigit points to a non-digit, and 536 the return exponent is zero. 537 */ 538 struct decimalInfo { 539 const char *firstSigDigit; 540 const char *lastSigDigit; 541 int exponent; 542 int normalizedExponent; 543 }; 544 545 static Error interpretDecimal(StringRef::iterator begin, 546 StringRef::iterator end, decimalInfo *D) { 547 StringRef::iterator dot = end; 548 549 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 550 if (!PtrOrErr) 551 return PtrOrErr.takeError(); 552 StringRef::iterator p = *PtrOrErr; 553 554 D->firstSigDigit = p; 555 D->exponent = 0; 556 D->normalizedExponent = 0; 557 558 for (; p != end; ++p) { 559 if (*p == '.') { 560 if (dot != end) 561 return createError("String contains multiple dots"); 562 dot = p++; 563 if (p == end) 564 break; 565 } 566 if (decDigitValue(*p) >= 10U) 567 break; 568 } 569 570 if (p != end) { 571 if (*p != 'e' && *p != 'E') 572 return createError("Invalid character in significand"); 573 if (p == begin) 574 return createError("Significand has no digits"); 575 if (dot != end && p - begin == 1) 576 return createError("Significand has no digits"); 577 578 /* p points to the first non-digit in the string */ 579 auto ExpOrErr = readExponent(p + 1, end); 580 if (!ExpOrErr) 581 return ExpOrErr.takeError(); 582 D->exponent = *ExpOrErr; 583 584 /* Implied decimal point? */ 585 if (dot == end) 586 dot = p; 587 } 588 589 /* If number is all zeroes accept any exponent. */ 590 if (p != D->firstSigDigit) { 591 /* Drop insignificant trailing zeroes. */ 592 if (p != begin) { 593 do 594 do 595 p--; 596 while (p != begin && *p == '0'); 597 while (p != begin && *p == '.'); 598 } 599 600 /* Adjust the exponents for any decimal point. */ 601 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 602 D->normalizedExponent = (D->exponent + 603 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 604 - (dot > D->firstSigDigit && dot < p))); 605 } 606 607 D->lastSigDigit = p; 608 return Error::success(); 609 } 610 611 /* Return the trailing fraction of a hexadecimal number. 612 DIGITVALUE is the first hex digit of the fraction, P points to 613 the next digit. */ 614 static Expected<lostFraction> 615 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 616 unsigned int digitValue) { 617 unsigned int hexDigit; 618 619 /* If the first trailing digit isn't 0 or 8 we can work out the 620 fraction immediately. */ 621 if (digitValue > 8) 622 return lfMoreThanHalf; 623 else if (digitValue < 8 && digitValue > 0) 624 return lfLessThanHalf; 625 626 // Otherwise we need to find the first non-zero digit. 627 while (p != end && (*p == '0' || *p == '.')) 628 p++; 629 630 if (p == end) 631 return createError("Invalid trailing hexadecimal fraction!"); 632 633 hexDigit = hexDigitValue(*p); 634 635 /* If we ran off the end it is exactly zero or one-half, otherwise 636 a little more. */ 637 if (hexDigit == UINT_MAX) 638 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 639 else 640 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 641 } 642 643 /* Return the fraction lost were a bignum truncated losing the least 644 significant BITS bits. */ 645 static lostFraction 646 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 647 unsigned int partCount, 648 unsigned int bits) 649 { 650 unsigned int lsb; 651 652 lsb = APInt::tcLSB(parts, partCount); 653 654 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 655 if (bits <= lsb) 656 return lfExactlyZero; 657 if (bits == lsb + 1) 658 return lfExactlyHalf; 659 if (bits <= partCount * APFloatBase::integerPartWidth && 660 APInt::tcExtractBit(parts, bits - 1)) 661 return lfMoreThanHalf; 662 663 return lfLessThanHalf; 664 } 665 666 /* Shift DST right BITS bits noting lost fraction. */ 667 static lostFraction 668 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 669 { 670 lostFraction lost_fraction; 671 672 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 673 674 APInt::tcShiftRight(dst, parts, bits); 675 676 return lost_fraction; 677 } 678 679 /* Combine the effect of two lost fractions. */ 680 static lostFraction 681 combineLostFractions(lostFraction moreSignificant, 682 lostFraction lessSignificant) 683 { 684 if (lessSignificant != lfExactlyZero) { 685 if (moreSignificant == lfExactlyZero) 686 moreSignificant = lfLessThanHalf; 687 else if (moreSignificant == lfExactlyHalf) 688 moreSignificant = lfMoreThanHalf; 689 } 690 691 return moreSignificant; 692 } 693 694 /* The error from the true value, in half-ulps, on multiplying two 695 floating point numbers, which differ from the value they 696 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 697 than the returned value. 698 699 See "How to Read Floating Point Numbers Accurately" by William D 700 Clinger. */ 701 static unsigned int 702 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 703 { 704 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 705 706 if (HUerr1 + HUerr2 == 0) 707 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 708 else 709 return inexactMultiply + 2 * (HUerr1 + HUerr2); 710 } 711 712 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 713 when the least significant BITS are truncated. BITS cannot be 714 zero. */ 715 static APFloatBase::integerPart 716 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 717 bool isNearest) { 718 unsigned int count, partBits; 719 APFloatBase::integerPart part, boundary; 720 721 assert(bits != 0); 722 723 bits--; 724 count = bits / APFloatBase::integerPartWidth; 725 partBits = bits % APFloatBase::integerPartWidth + 1; 726 727 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 728 729 if (isNearest) 730 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 731 else 732 boundary = 0; 733 734 if (count == 0) { 735 if (part - boundary <= boundary - part) 736 return part - boundary; 737 else 738 return boundary - part; 739 } 740 741 if (part == boundary) { 742 while (--count) 743 if (parts[count]) 744 return ~(APFloatBase::integerPart) 0; /* A lot. */ 745 746 return parts[0]; 747 } else if (part == boundary - 1) { 748 while (--count) 749 if (~parts[count]) 750 return ~(APFloatBase::integerPart) 0; /* A lot. */ 751 752 return -parts[0]; 753 } 754 755 return ~(APFloatBase::integerPart) 0; /* A lot. */ 756 } 757 758 /* Place pow(5, power) in DST, and return the number of parts used. 759 DST must be at least one part larger than size of the answer. */ 760 static unsigned int 761 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 762 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 763 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 764 pow5s[0] = 78125 * 5; 765 766 unsigned int partsCount = 1; 767 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 768 unsigned int result; 769 assert(power <= maxExponent); 770 771 p1 = dst; 772 p2 = scratch; 773 774 *p1 = firstEightPowers[power & 7]; 775 power >>= 3; 776 777 result = 1; 778 pow5 = pow5s; 779 780 for (unsigned int n = 0; power; power >>= 1, n++) { 781 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 782 if (n != 0) { 783 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 784 partsCount, partsCount); 785 partsCount *= 2; 786 if (pow5[partsCount - 1] == 0) 787 partsCount--; 788 } 789 790 if (power & 1) { 791 APFloatBase::integerPart *tmp; 792 793 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 794 result += partsCount; 795 if (p2[result - 1] == 0) 796 result--; 797 798 /* Now result is in p1 with partsCount parts and p2 is scratch 799 space. */ 800 tmp = p1; 801 p1 = p2; 802 p2 = tmp; 803 } 804 805 pow5 += partsCount; 806 } 807 808 if (p1 != dst) 809 APInt::tcAssign(dst, p1, result); 810 811 return result; 812 } 813 814 /* Zero at the end to avoid modular arithmetic when adding one; used 815 when rounding up during hexadecimal output. */ 816 static const char hexDigitsLower[] = "0123456789abcdef0"; 817 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 818 static const char infinityL[] = "infinity"; 819 static const char infinityU[] = "INFINITY"; 820 static const char NaNL[] = "nan"; 821 static const char NaNU[] = "NAN"; 822 823 /* Write out an integerPart in hexadecimal, starting with the most 824 significant nibble. Write out exactly COUNT hexdigits, return 825 COUNT. */ 826 static unsigned int 827 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 828 const char *hexDigitChars) 829 { 830 unsigned int result = count; 831 832 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 833 834 part >>= (APFloatBase::integerPartWidth - 4 * count); 835 while (count--) { 836 dst[count] = hexDigitChars[part & 0xf]; 837 part >>= 4; 838 } 839 840 return result; 841 } 842 843 /* Write out an unsigned decimal integer. */ 844 static char * 845 writeUnsignedDecimal (char *dst, unsigned int n) 846 { 847 char buff[40], *p; 848 849 p = buff; 850 do 851 *p++ = '0' + n % 10; 852 while (n /= 10); 853 854 do 855 *dst++ = *--p; 856 while (p != buff); 857 858 return dst; 859 } 860 861 /* Write out a signed decimal integer. */ 862 static char * 863 writeSignedDecimal (char *dst, int value) 864 { 865 if (value < 0) { 866 *dst++ = '-'; 867 dst = writeUnsignedDecimal(dst, -(unsigned) value); 868 } else 869 dst = writeUnsignedDecimal(dst, value); 870 871 return dst; 872 } 873 874 namespace detail { 875 /* Constructors. */ 876 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 877 unsigned int count; 878 879 semantics = ourSemantics; 880 count = partCount(); 881 if (count > 1) 882 significand.parts = new integerPart[count]; 883 } 884 885 void IEEEFloat::freeSignificand() { 886 if (needsCleanup()) 887 delete [] significand.parts; 888 } 889 890 void IEEEFloat::assign(const IEEEFloat &rhs) { 891 assert(semantics == rhs.semantics); 892 893 sign = rhs.sign; 894 category = rhs.category; 895 exponent = rhs.exponent; 896 if (isFiniteNonZero() || category == fcNaN) 897 copySignificand(rhs); 898 } 899 900 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 901 assert(isFiniteNonZero() || category == fcNaN); 902 assert(rhs.partCount() >= partCount()); 903 904 APInt::tcAssign(significandParts(), rhs.significandParts(), 905 partCount()); 906 } 907 908 /* Make this number a NaN, with an arbitrary but deterministic value 909 for the significand. If double or longer, this is a signalling NaN, 910 which may not be ideal. If float, this is QNaN(0). */ 911 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 912 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 913 llvm_unreachable("This floating point format does not support NaN"); 914 915 category = fcNaN; 916 sign = Negative; 917 exponent = exponentNaN(); 918 919 integerPart *significand = significandParts(); 920 unsigned numParts = partCount(); 921 922 APInt fill_storage; 923 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 924 // Finite-only types do not distinguish signalling and quiet NaN, so 925 // make them all signalling. 926 SNaN = false; 927 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 928 sign = true; 929 fill_storage = APInt::getZero(semantics->precision - 1); 930 } else { 931 fill_storage = APInt::getAllOnes(semantics->precision - 1); 932 } 933 fill = &fill_storage; 934 } 935 936 // Set the significand bits to the fill. 937 if (!fill || fill->getNumWords() < numParts) 938 APInt::tcSet(significand, 0, numParts); 939 if (fill) { 940 APInt::tcAssign(significand, fill->getRawData(), 941 std::min(fill->getNumWords(), numParts)); 942 943 // Zero out the excess bits of the significand. 944 unsigned bitsToPreserve = semantics->precision - 1; 945 unsigned part = bitsToPreserve / 64; 946 bitsToPreserve %= 64; 947 significand[part] &= ((1ULL << bitsToPreserve) - 1); 948 for (part++; part != numParts; ++part) 949 significand[part] = 0; 950 } 951 952 unsigned QNaNBit = semantics->precision - 2; 953 954 if (SNaN) { 955 // We always have to clear the QNaN bit to make it an SNaN. 956 APInt::tcClearBit(significand, QNaNBit); 957 958 // If there are no bits set in the payload, we have to set 959 // *something* to make it a NaN instead of an infinity; 960 // conventionally, this is the next bit down from the QNaN bit. 961 if (APInt::tcIsZero(significand, numParts)) 962 APInt::tcSetBit(significand, QNaNBit - 1); 963 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 964 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 965 // Do nothing. 966 } else { 967 // We always have to set the QNaN bit to make it a QNaN. 968 APInt::tcSetBit(significand, QNaNBit); 969 } 970 971 // For x87 extended precision, we want to make a NaN, not a 972 // pseudo-NaN. Maybe we should expose the ability to make 973 // pseudo-NaNs? 974 if (semantics == &semX87DoubleExtended) 975 APInt::tcSetBit(significand, QNaNBit + 1); 976 } 977 978 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 979 if (this != &rhs) { 980 if (semantics != rhs.semantics) { 981 freeSignificand(); 982 initialize(rhs.semantics); 983 } 984 assign(rhs); 985 } 986 987 return *this; 988 } 989 990 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 991 freeSignificand(); 992 993 semantics = rhs.semantics; 994 significand = rhs.significand; 995 exponent = rhs.exponent; 996 category = rhs.category; 997 sign = rhs.sign; 998 999 rhs.semantics = &semBogus; 1000 return *this; 1001 } 1002 1003 bool IEEEFloat::isDenormal() const { 1004 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1005 (APInt::tcExtractBit(significandParts(), 1006 semantics->precision - 1) == 0); 1007 } 1008 1009 bool IEEEFloat::isSmallest() const { 1010 // The smallest number by magnitude in our format will be the smallest 1011 // denormal, i.e. the floating point number with exponent being minimum 1012 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1013 return isFiniteNonZero() && exponent == semantics->minExponent && 1014 significandMSB() == 0; 1015 } 1016 1017 bool IEEEFloat::isSmallestNormalized() const { 1018 return getCategory() == fcNormal && exponent == semantics->minExponent && 1019 isSignificandAllZerosExceptMSB(); 1020 } 1021 1022 bool IEEEFloat::isSignificandAllOnes() const { 1023 // Test if the significand excluding the integral bit is all ones. This allows 1024 // us to test for binade boundaries. 1025 const integerPart *Parts = significandParts(); 1026 const unsigned PartCount = partCountForBits(semantics->precision); 1027 for (unsigned i = 0; i < PartCount - 1; i++) 1028 if (~Parts[i]) 1029 return false; 1030 1031 // Set the unused high bits to all ones when we compare. 1032 const unsigned NumHighBits = 1033 PartCount*integerPartWidth - semantics->precision + 1; 1034 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1035 "Can not have more high bits to fill than integerPartWidth"); 1036 const integerPart HighBitFill = 1037 ~integerPart(0) << (integerPartWidth - NumHighBits); 1038 if (~(Parts[PartCount - 1] | HighBitFill)) 1039 return false; 1040 1041 return true; 1042 } 1043 1044 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1045 // Test if the significand excluding the integral bit is all ones except for 1046 // the least significant bit. 1047 const integerPart *Parts = significandParts(); 1048 1049 if (Parts[0] & 1) 1050 return false; 1051 1052 const unsigned PartCount = partCountForBits(semantics->precision); 1053 for (unsigned i = 0; i < PartCount - 1; i++) { 1054 if (~Parts[i] & ~unsigned{!i}) 1055 return false; 1056 } 1057 1058 // Set the unused high bits to all ones when we compare. 1059 const unsigned NumHighBits = 1060 PartCount * integerPartWidth - semantics->precision + 1; 1061 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1062 "Can not have more high bits to fill than integerPartWidth"); 1063 const integerPart HighBitFill = ~integerPart(0) 1064 << (integerPartWidth - NumHighBits); 1065 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1066 return false; 1067 1068 return true; 1069 } 1070 1071 bool IEEEFloat::isSignificandAllZeros() const { 1072 // Test if the significand excluding the integral bit is all zeros. This 1073 // allows us to test for binade boundaries. 1074 const integerPart *Parts = significandParts(); 1075 const unsigned PartCount = partCountForBits(semantics->precision); 1076 1077 for (unsigned i = 0; i < PartCount - 1; i++) 1078 if (Parts[i]) 1079 return false; 1080 1081 // Compute how many bits are used in the final word. 1082 const unsigned NumHighBits = 1083 PartCount*integerPartWidth - semantics->precision + 1; 1084 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1085 "clear than integerPartWidth"); 1086 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1087 1088 if (Parts[PartCount - 1] & HighBitMask) 1089 return false; 1090 1091 return true; 1092 } 1093 1094 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1095 const integerPart *Parts = significandParts(); 1096 const unsigned PartCount = partCountForBits(semantics->precision); 1097 1098 for (unsigned i = 0; i < PartCount - 1; i++) { 1099 if (Parts[i]) 1100 return false; 1101 } 1102 1103 const unsigned NumHighBits = 1104 PartCount * integerPartWidth - semantics->precision + 1; 1105 return Parts[PartCount - 1] == integerPart(1) 1106 << (integerPartWidth - NumHighBits); 1107 } 1108 1109 bool IEEEFloat::isLargest() const { 1110 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1111 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1112 // The largest number by magnitude in our format will be the floating point 1113 // number with maximum exponent and with significand that is all ones except 1114 // the LSB. 1115 return isFiniteNonZero() && exponent == semantics->maxExponent && 1116 isSignificandAllOnesExceptLSB(); 1117 } else { 1118 // The largest number by magnitude in our format will be the floating point 1119 // number with maximum exponent and with significand that is all ones. 1120 return isFiniteNonZero() && exponent == semantics->maxExponent && 1121 isSignificandAllOnes(); 1122 } 1123 } 1124 1125 bool IEEEFloat::isInteger() const { 1126 // This could be made more efficient; I'm going for obviously correct. 1127 if (!isFinite()) return false; 1128 IEEEFloat truncated = *this; 1129 truncated.roundToIntegral(rmTowardZero); 1130 return compare(truncated) == cmpEqual; 1131 } 1132 1133 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1134 if (this == &rhs) 1135 return true; 1136 if (semantics != rhs.semantics || 1137 category != rhs.category || 1138 sign != rhs.sign) 1139 return false; 1140 if (category==fcZero || category==fcInfinity) 1141 return true; 1142 1143 if (isFiniteNonZero() && exponent != rhs.exponent) 1144 return false; 1145 1146 return std::equal(significandParts(), significandParts() + partCount(), 1147 rhs.significandParts()); 1148 } 1149 1150 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1151 initialize(&ourSemantics); 1152 sign = 0; 1153 category = fcNormal; 1154 zeroSignificand(); 1155 exponent = ourSemantics.precision - 1; 1156 significandParts()[0] = value; 1157 normalize(rmNearestTiesToEven, lfExactlyZero); 1158 } 1159 1160 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1161 initialize(&ourSemantics); 1162 makeZero(false); 1163 } 1164 1165 // Delegate to the previous constructor, because later copy constructor may 1166 // actually inspects category, which can't be garbage. 1167 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1168 : IEEEFloat(ourSemantics) {} 1169 1170 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1171 initialize(rhs.semantics); 1172 assign(rhs); 1173 } 1174 1175 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1176 *this = std::move(rhs); 1177 } 1178 1179 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1180 1181 unsigned int IEEEFloat::partCount() const { 1182 return partCountForBits(semantics->precision + 1); 1183 } 1184 1185 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1186 return const_cast<IEEEFloat *>(this)->significandParts(); 1187 } 1188 1189 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1190 if (partCount() > 1) 1191 return significand.parts; 1192 else 1193 return &significand.part; 1194 } 1195 1196 void IEEEFloat::zeroSignificand() { 1197 APInt::tcSet(significandParts(), 0, partCount()); 1198 } 1199 1200 /* Increment an fcNormal floating point number's significand. */ 1201 void IEEEFloat::incrementSignificand() { 1202 integerPart carry; 1203 1204 carry = APInt::tcIncrement(significandParts(), partCount()); 1205 1206 /* Our callers should never cause us to overflow. */ 1207 assert(carry == 0); 1208 (void)carry; 1209 } 1210 1211 /* Add the significand of the RHS. Returns the carry flag. */ 1212 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1213 integerPart *parts; 1214 1215 parts = significandParts(); 1216 1217 assert(semantics == rhs.semantics); 1218 assert(exponent == rhs.exponent); 1219 1220 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1221 } 1222 1223 /* Subtract the significand of the RHS with a borrow flag. Returns 1224 the borrow flag. */ 1225 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1226 integerPart borrow) { 1227 integerPart *parts; 1228 1229 parts = significandParts(); 1230 1231 assert(semantics == rhs.semantics); 1232 assert(exponent == rhs.exponent); 1233 1234 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1235 partCount()); 1236 } 1237 1238 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1239 on to the full-precision result of the multiplication. Returns the 1240 lost fraction. */ 1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1242 IEEEFloat addend) { 1243 unsigned int omsb; // One, not zero, based MSB. 1244 unsigned int partsCount, newPartsCount, precision; 1245 integerPart *lhsSignificand; 1246 integerPart scratch[4]; 1247 integerPart *fullSignificand; 1248 lostFraction lost_fraction; 1249 bool ignored; 1250 1251 assert(semantics == rhs.semantics); 1252 1253 precision = semantics->precision; 1254 1255 // Allocate space for twice as many bits as the original significand, plus one 1256 // extra bit for the addition to overflow into. 1257 newPartsCount = partCountForBits(precision * 2 + 1); 1258 1259 if (newPartsCount > 4) 1260 fullSignificand = new integerPart[newPartsCount]; 1261 else 1262 fullSignificand = scratch; 1263 1264 lhsSignificand = significandParts(); 1265 partsCount = partCount(); 1266 1267 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1268 rhs.significandParts(), partsCount, partsCount); 1269 1270 lost_fraction = lfExactlyZero; 1271 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1272 exponent += rhs.exponent; 1273 1274 // Assume the operands involved in the multiplication are single-precision 1275 // FP, and the two multiplicants are: 1276 // *this = a23 . a22 ... a0 * 2^e1 1277 // rhs = b23 . b22 ... b0 * 2^e2 1278 // the result of multiplication is: 1279 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1280 // Note that there are three significant bits at the left-hand side of the 1281 // radix point: two for the multiplication, and an overflow bit for the 1282 // addition (that will always be zero at this point). Move the radix point 1283 // toward left by two bits, and adjust exponent accordingly. 1284 exponent += 2; 1285 1286 if (addend.isNonZero()) { 1287 // The intermediate result of the multiplication has "2 * precision" 1288 // signicant bit; adjust the addend to be consistent with mul result. 1289 // 1290 Significand savedSignificand = significand; 1291 const fltSemantics *savedSemantics = semantics; 1292 fltSemantics extendedSemantics; 1293 opStatus status; 1294 unsigned int extendedPrecision; 1295 1296 // Normalize our MSB to one below the top bit to allow for overflow. 1297 extendedPrecision = 2 * precision + 1; 1298 if (omsb != extendedPrecision - 1) { 1299 assert(extendedPrecision > omsb); 1300 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1301 (extendedPrecision - 1) - omsb); 1302 exponent -= (extendedPrecision - 1) - omsb; 1303 } 1304 1305 /* Create new semantics. */ 1306 extendedSemantics = *semantics; 1307 extendedSemantics.precision = extendedPrecision; 1308 1309 if (newPartsCount == 1) 1310 significand.part = fullSignificand[0]; 1311 else 1312 significand.parts = fullSignificand; 1313 semantics = &extendedSemantics; 1314 1315 // Make a copy so we can convert it to the extended semantics. 1316 // Note that we cannot convert the addend directly, as the extendedSemantics 1317 // is a local variable (which we take a reference to). 1318 IEEEFloat extendedAddend(addend); 1319 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1320 assert(status == opOK); 1321 (void)status; 1322 1323 // Shift the significand of the addend right by one bit. This guarantees 1324 // that the high bit of the significand is zero (same as fullSignificand), 1325 // so the addition will overflow (if it does overflow at all) into the top bit. 1326 lost_fraction = extendedAddend.shiftSignificandRight(1); 1327 assert(lost_fraction == lfExactlyZero && 1328 "Lost precision while shifting addend for fused-multiply-add."); 1329 1330 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1331 1332 /* Restore our state. */ 1333 if (newPartsCount == 1) 1334 fullSignificand[0] = significand.part; 1335 significand = savedSignificand; 1336 semantics = savedSemantics; 1337 1338 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1339 } 1340 1341 // Convert the result having "2 * precision" significant-bits back to the one 1342 // having "precision" significant-bits. First, move the radix point from 1343 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1344 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1345 exponent -= precision + 1; 1346 1347 // In case MSB resides at the left-hand side of radix point, shift the 1348 // mantissa right by some amount to make sure the MSB reside right before 1349 // the radix point (i.e. "MSB . rest-significant-bits"). 1350 // 1351 // Note that the result is not normalized when "omsb < precision". So, the 1352 // caller needs to call IEEEFloat::normalize() if normalized value is 1353 // expected. 1354 if (omsb > precision) { 1355 unsigned int bits, significantParts; 1356 lostFraction lf; 1357 1358 bits = omsb - precision; 1359 significantParts = partCountForBits(omsb); 1360 lf = shiftRight(fullSignificand, significantParts, bits); 1361 lost_fraction = combineLostFractions(lf, lost_fraction); 1362 exponent += bits; 1363 } 1364 1365 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1366 1367 if (newPartsCount > 4) 1368 delete [] fullSignificand; 1369 1370 return lost_fraction; 1371 } 1372 1373 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1374 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1375 } 1376 1377 /* Multiply the significands of LHS and RHS to DST. */ 1378 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1379 unsigned int bit, i, partsCount; 1380 const integerPart *rhsSignificand; 1381 integerPart *lhsSignificand, *dividend, *divisor; 1382 integerPart scratch[4]; 1383 lostFraction lost_fraction; 1384 1385 assert(semantics == rhs.semantics); 1386 1387 lhsSignificand = significandParts(); 1388 rhsSignificand = rhs.significandParts(); 1389 partsCount = partCount(); 1390 1391 if (partsCount > 2) 1392 dividend = new integerPart[partsCount * 2]; 1393 else 1394 dividend = scratch; 1395 1396 divisor = dividend + partsCount; 1397 1398 /* Copy the dividend and divisor as they will be modified in-place. */ 1399 for (i = 0; i < partsCount; i++) { 1400 dividend[i] = lhsSignificand[i]; 1401 divisor[i] = rhsSignificand[i]; 1402 lhsSignificand[i] = 0; 1403 } 1404 1405 exponent -= rhs.exponent; 1406 1407 unsigned int precision = semantics->precision; 1408 1409 /* Normalize the divisor. */ 1410 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1411 if (bit) { 1412 exponent += bit; 1413 APInt::tcShiftLeft(divisor, partsCount, bit); 1414 } 1415 1416 /* Normalize the dividend. */ 1417 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1418 if (bit) { 1419 exponent -= bit; 1420 APInt::tcShiftLeft(dividend, partsCount, bit); 1421 } 1422 1423 /* Ensure the dividend >= divisor initially for the loop below. 1424 Incidentally, this means that the division loop below is 1425 guaranteed to set the integer bit to one. */ 1426 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1427 exponent--; 1428 APInt::tcShiftLeft(dividend, partsCount, 1); 1429 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1430 } 1431 1432 /* Long division. */ 1433 for (bit = precision; bit; bit -= 1) { 1434 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1435 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1436 APInt::tcSetBit(lhsSignificand, bit - 1); 1437 } 1438 1439 APInt::tcShiftLeft(dividend, partsCount, 1); 1440 } 1441 1442 /* Figure out the lost fraction. */ 1443 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1444 1445 if (cmp > 0) 1446 lost_fraction = lfMoreThanHalf; 1447 else if (cmp == 0) 1448 lost_fraction = lfExactlyHalf; 1449 else if (APInt::tcIsZero(dividend, partsCount)) 1450 lost_fraction = lfExactlyZero; 1451 else 1452 lost_fraction = lfLessThanHalf; 1453 1454 if (partsCount > 2) 1455 delete [] dividend; 1456 1457 return lost_fraction; 1458 } 1459 1460 unsigned int IEEEFloat::significandMSB() const { 1461 return APInt::tcMSB(significandParts(), partCount()); 1462 } 1463 1464 unsigned int IEEEFloat::significandLSB() const { 1465 return APInt::tcLSB(significandParts(), partCount()); 1466 } 1467 1468 /* Note that a zero result is NOT normalized to fcZero. */ 1469 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1470 /* Our exponent should not overflow. */ 1471 assert((ExponentType) (exponent + bits) >= exponent); 1472 1473 exponent += bits; 1474 1475 return shiftRight(significandParts(), partCount(), bits); 1476 } 1477 1478 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1479 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1480 assert(bits < semantics->precision); 1481 1482 if (bits) { 1483 unsigned int partsCount = partCount(); 1484 1485 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1486 exponent -= bits; 1487 1488 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1489 } 1490 } 1491 1492 IEEEFloat::cmpResult 1493 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1494 int compare; 1495 1496 assert(semantics == rhs.semantics); 1497 assert(isFiniteNonZero()); 1498 assert(rhs.isFiniteNonZero()); 1499 1500 compare = exponent - rhs.exponent; 1501 1502 /* If exponents are equal, do an unsigned bignum comparison of the 1503 significands. */ 1504 if (compare == 0) 1505 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1506 partCount()); 1507 1508 if (compare > 0) 1509 return cmpGreaterThan; 1510 else if (compare < 0) 1511 return cmpLessThan; 1512 else 1513 return cmpEqual; 1514 } 1515 1516 /* Set the least significant BITS bits of a bignum, clear the 1517 rest. */ 1518 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1519 unsigned bits) { 1520 unsigned i = 0; 1521 while (bits > APInt::APINT_BITS_PER_WORD) { 1522 dst[i++] = ~(APInt::WordType)0; 1523 bits -= APInt::APINT_BITS_PER_WORD; 1524 } 1525 1526 if (bits) 1527 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1528 1529 while (i < parts) 1530 dst[i++] = 0; 1531 } 1532 1533 /* Handle overflow. Sign is preserved. We either become infinity or 1534 the largest finite number. */ 1535 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1536 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1537 /* Infinity? */ 1538 if (rounding_mode == rmNearestTiesToEven || 1539 rounding_mode == rmNearestTiesToAway || 1540 (rounding_mode == rmTowardPositive && !sign) || 1541 (rounding_mode == rmTowardNegative && sign)) { 1542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1543 makeNaN(false, sign); 1544 else 1545 category = fcInfinity; 1546 return static_cast<opStatus>(opOverflow | opInexact); 1547 } 1548 } 1549 1550 /* Otherwise we become the largest finite number. */ 1551 category = fcNormal; 1552 exponent = semantics->maxExponent; 1553 tcSetLeastSignificantBits(significandParts(), partCount(), 1554 semantics->precision); 1555 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1556 semantics->nanEncoding == fltNanEncoding::AllOnes) 1557 APInt::tcClearBit(significandParts(), 0); 1558 1559 return opInexact; 1560 } 1561 1562 /* Returns TRUE if, when truncating the current number, with BIT the 1563 new LSB, with the given lost fraction and rounding mode, the result 1564 would need to be rounded away from zero (i.e., by increasing the 1565 signficand). This routine must work for fcZero of both signs, and 1566 fcNormal numbers. */ 1567 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1568 lostFraction lost_fraction, 1569 unsigned int bit) const { 1570 /* NaNs and infinities should not have lost fractions. */ 1571 assert(isFiniteNonZero() || category == fcZero); 1572 1573 /* Current callers never pass this so we don't handle it. */ 1574 assert(lost_fraction != lfExactlyZero); 1575 1576 switch (rounding_mode) { 1577 case rmNearestTiesToAway: 1578 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1579 1580 case rmNearestTiesToEven: 1581 if (lost_fraction == lfMoreThanHalf) 1582 return true; 1583 1584 /* Our zeroes don't have a significand to test. */ 1585 if (lost_fraction == lfExactlyHalf && category != fcZero) 1586 return APInt::tcExtractBit(significandParts(), bit); 1587 1588 return false; 1589 1590 case rmTowardZero: 1591 return false; 1592 1593 case rmTowardPositive: 1594 return !sign; 1595 1596 case rmTowardNegative: 1597 return sign; 1598 1599 default: 1600 break; 1601 } 1602 llvm_unreachable("Invalid rounding mode found"); 1603 } 1604 1605 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1606 lostFraction lost_fraction) { 1607 unsigned int omsb; /* One, not zero, based MSB. */ 1608 int exponentChange; 1609 1610 if (!isFiniteNonZero()) 1611 return opOK; 1612 1613 /* Before rounding normalize the exponent of fcNormal numbers. */ 1614 omsb = significandMSB() + 1; 1615 1616 if (omsb) { 1617 /* OMSB is numbered from 1. We want to place it in the integer 1618 bit numbered PRECISION if possible, with a compensating change in 1619 the exponent. */ 1620 exponentChange = omsb - semantics->precision; 1621 1622 /* If the resulting exponent is too high, overflow according to 1623 the rounding mode. */ 1624 if (exponent + exponentChange > semantics->maxExponent) 1625 return handleOverflow(rounding_mode); 1626 1627 /* Subnormal numbers have exponent minExponent, and their MSB 1628 is forced based on that. */ 1629 if (exponent + exponentChange < semantics->minExponent) 1630 exponentChange = semantics->minExponent - exponent; 1631 1632 /* Shifting left is easy as we don't lose precision. */ 1633 if (exponentChange < 0) { 1634 assert(lost_fraction == lfExactlyZero); 1635 1636 shiftSignificandLeft(-exponentChange); 1637 1638 return opOK; 1639 } 1640 1641 if (exponentChange > 0) { 1642 lostFraction lf; 1643 1644 /* Shift right and capture any new lost fraction. */ 1645 lf = shiftSignificandRight(exponentChange); 1646 1647 lost_fraction = combineLostFractions(lf, lost_fraction); 1648 1649 /* Keep OMSB up-to-date. */ 1650 if (omsb > (unsigned) exponentChange) 1651 omsb -= exponentChange; 1652 else 1653 omsb = 0; 1654 } 1655 } 1656 1657 // The all-ones values is an overflow if NaN is all ones. If NaN is 1658 // represented by negative zero, then it is a valid finite value. 1659 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1660 semantics->nanEncoding == fltNanEncoding::AllOnes && 1661 exponent == semantics->maxExponent && isSignificandAllOnes()) 1662 return handleOverflow(rounding_mode); 1663 1664 /* Now round the number according to rounding_mode given the lost 1665 fraction. */ 1666 1667 /* As specified in IEEE 754, since we do not trap we do not report 1668 underflow for exact results. */ 1669 if (lost_fraction == lfExactlyZero) { 1670 /* Canonicalize zeroes. */ 1671 if (omsb == 0) { 1672 category = fcZero; 1673 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1674 sign = false; 1675 } 1676 1677 return opOK; 1678 } 1679 1680 /* Increment the significand if we're rounding away from zero. */ 1681 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1682 if (omsb == 0) 1683 exponent = semantics->minExponent; 1684 1685 incrementSignificand(); 1686 omsb = significandMSB() + 1; 1687 1688 /* Did the significand increment overflow? */ 1689 if (omsb == (unsigned) semantics->precision + 1) { 1690 /* Renormalize by incrementing the exponent and shifting our 1691 significand right one. However if we already have the 1692 maximum exponent we overflow to infinity. */ 1693 if (exponent == semantics->maxExponent) 1694 // Invoke overflow handling with a rounding mode that will guarantee 1695 // that the result gets turned into the correct infinity representation. 1696 // This is needed instead of just setting the category to infinity to 1697 // account for 8-bit floating point types that have no inf, only NaN. 1698 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1699 1700 shiftSignificandRight(1); 1701 1702 return opInexact; 1703 } 1704 1705 // The all-ones values is an overflow if NaN is all ones. If NaN is 1706 // represented by negative zero, then it is a valid finite value. 1707 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1708 semantics->nanEncoding == fltNanEncoding::AllOnes && 1709 exponent == semantics->maxExponent && isSignificandAllOnes()) 1710 return handleOverflow(rounding_mode); 1711 } 1712 1713 /* The normal case - we were and are not denormal, and any 1714 significand increment above didn't overflow. */ 1715 if (omsb == semantics->precision) 1716 return opInexact; 1717 1718 /* We have a non-zero denormal. */ 1719 assert(omsb < semantics->precision); 1720 1721 /* Canonicalize zeroes. */ 1722 if (omsb == 0) { 1723 category = fcZero; 1724 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1725 sign = false; 1726 } 1727 1728 /* The fcZero case is a denormal that underflowed to zero. */ 1729 return (opStatus) (opUnderflow | opInexact); 1730 } 1731 1732 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1733 bool subtract) { 1734 switch (PackCategoriesIntoKey(category, rhs.category)) { 1735 default: 1736 llvm_unreachable(nullptr); 1737 1738 case PackCategoriesIntoKey(fcZero, fcNaN): 1739 case PackCategoriesIntoKey(fcNormal, fcNaN): 1740 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1741 assign(rhs); 1742 [[fallthrough]]; 1743 case PackCategoriesIntoKey(fcNaN, fcZero): 1744 case PackCategoriesIntoKey(fcNaN, fcNormal): 1745 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1746 case PackCategoriesIntoKey(fcNaN, fcNaN): 1747 if (isSignaling()) { 1748 makeQuiet(); 1749 return opInvalidOp; 1750 } 1751 return rhs.isSignaling() ? opInvalidOp : opOK; 1752 1753 case PackCategoriesIntoKey(fcNormal, fcZero): 1754 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1755 case PackCategoriesIntoKey(fcInfinity, fcZero): 1756 return opOK; 1757 1758 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1759 case PackCategoriesIntoKey(fcZero, fcInfinity): 1760 category = fcInfinity; 1761 sign = rhs.sign ^ subtract; 1762 return opOK; 1763 1764 case PackCategoriesIntoKey(fcZero, fcNormal): 1765 assign(rhs); 1766 sign = rhs.sign ^ subtract; 1767 return opOK; 1768 1769 case PackCategoriesIntoKey(fcZero, fcZero): 1770 /* Sign depends on rounding mode; handled by caller. */ 1771 return opOK; 1772 1773 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1774 /* Differently signed infinities can only be validly 1775 subtracted. */ 1776 if (((sign ^ rhs.sign)!=0) != subtract) { 1777 makeNaN(); 1778 return opInvalidOp; 1779 } 1780 1781 return opOK; 1782 1783 case PackCategoriesIntoKey(fcNormal, fcNormal): 1784 return opDivByZero; 1785 } 1786 } 1787 1788 /* Add or subtract two normal numbers. */ 1789 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1790 bool subtract) { 1791 integerPart carry; 1792 lostFraction lost_fraction; 1793 int bits; 1794 1795 /* Determine if the operation on the absolute values is effectively 1796 an addition or subtraction. */ 1797 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1798 1799 /* Are we bigger exponent-wise than the RHS? */ 1800 bits = exponent - rhs.exponent; 1801 1802 /* Subtraction is more subtle than one might naively expect. */ 1803 if (subtract) { 1804 IEEEFloat temp_rhs(rhs); 1805 1806 if (bits == 0) 1807 lost_fraction = lfExactlyZero; 1808 else if (bits > 0) { 1809 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1810 shiftSignificandLeft(1); 1811 } else { 1812 lost_fraction = shiftSignificandRight(-bits - 1); 1813 temp_rhs.shiftSignificandLeft(1); 1814 } 1815 1816 // Should we reverse the subtraction. 1817 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1818 carry = temp_rhs.subtractSignificand 1819 (*this, lost_fraction != lfExactlyZero); 1820 copySignificand(temp_rhs); 1821 sign = !sign; 1822 } else { 1823 carry = subtractSignificand 1824 (temp_rhs, lost_fraction != lfExactlyZero); 1825 } 1826 1827 /* Invert the lost fraction - it was on the RHS and 1828 subtracted. */ 1829 if (lost_fraction == lfLessThanHalf) 1830 lost_fraction = lfMoreThanHalf; 1831 else if (lost_fraction == lfMoreThanHalf) 1832 lost_fraction = lfLessThanHalf; 1833 1834 /* The code above is intended to ensure that no borrow is 1835 necessary. */ 1836 assert(!carry); 1837 (void)carry; 1838 } else { 1839 if (bits > 0) { 1840 IEEEFloat temp_rhs(rhs); 1841 1842 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1843 carry = addSignificand(temp_rhs); 1844 } else { 1845 lost_fraction = shiftSignificandRight(-bits); 1846 carry = addSignificand(rhs); 1847 } 1848 1849 /* We have a guard bit; generating a carry cannot happen. */ 1850 assert(!carry); 1851 (void)carry; 1852 } 1853 1854 return lost_fraction; 1855 } 1856 1857 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1858 switch (PackCategoriesIntoKey(category, rhs.category)) { 1859 default: 1860 llvm_unreachable(nullptr); 1861 1862 case PackCategoriesIntoKey(fcZero, fcNaN): 1863 case PackCategoriesIntoKey(fcNormal, fcNaN): 1864 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1865 assign(rhs); 1866 sign = false; 1867 [[fallthrough]]; 1868 case PackCategoriesIntoKey(fcNaN, fcZero): 1869 case PackCategoriesIntoKey(fcNaN, fcNormal): 1870 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1871 case PackCategoriesIntoKey(fcNaN, fcNaN): 1872 sign ^= rhs.sign; // restore the original sign 1873 if (isSignaling()) { 1874 makeQuiet(); 1875 return opInvalidOp; 1876 } 1877 return rhs.isSignaling() ? opInvalidOp : opOK; 1878 1879 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1880 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1881 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1882 category = fcInfinity; 1883 return opOK; 1884 1885 case PackCategoriesIntoKey(fcZero, fcNormal): 1886 case PackCategoriesIntoKey(fcNormal, fcZero): 1887 case PackCategoriesIntoKey(fcZero, fcZero): 1888 category = fcZero; 1889 return opOK; 1890 1891 case PackCategoriesIntoKey(fcZero, fcInfinity): 1892 case PackCategoriesIntoKey(fcInfinity, fcZero): 1893 makeNaN(); 1894 return opInvalidOp; 1895 1896 case PackCategoriesIntoKey(fcNormal, fcNormal): 1897 return opOK; 1898 } 1899 } 1900 1901 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1902 switch (PackCategoriesIntoKey(category, rhs.category)) { 1903 default: 1904 llvm_unreachable(nullptr); 1905 1906 case PackCategoriesIntoKey(fcZero, fcNaN): 1907 case PackCategoriesIntoKey(fcNormal, fcNaN): 1908 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1909 assign(rhs); 1910 sign = false; 1911 [[fallthrough]]; 1912 case PackCategoriesIntoKey(fcNaN, fcZero): 1913 case PackCategoriesIntoKey(fcNaN, fcNormal): 1914 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1915 case PackCategoriesIntoKey(fcNaN, fcNaN): 1916 sign ^= rhs.sign; // restore the original sign 1917 if (isSignaling()) { 1918 makeQuiet(); 1919 return opInvalidOp; 1920 } 1921 return rhs.isSignaling() ? opInvalidOp : opOK; 1922 1923 case PackCategoriesIntoKey(fcInfinity, fcZero): 1924 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1925 case PackCategoriesIntoKey(fcZero, fcInfinity): 1926 case PackCategoriesIntoKey(fcZero, fcNormal): 1927 return opOK; 1928 1929 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1930 category = fcZero; 1931 return opOK; 1932 1933 case PackCategoriesIntoKey(fcNormal, fcZero): 1934 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1935 makeNaN(false, sign); 1936 else 1937 category = fcInfinity; 1938 return opDivByZero; 1939 1940 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1941 case PackCategoriesIntoKey(fcZero, fcZero): 1942 makeNaN(); 1943 return opInvalidOp; 1944 1945 case PackCategoriesIntoKey(fcNormal, fcNormal): 1946 return opOK; 1947 } 1948 } 1949 1950 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1951 switch (PackCategoriesIntoKey(category, rhs.category)) { 1952 default: 1953 llvm_unreachable(nullptr); 1954 1955 case PackCategoriesIntoKey(fcZero, fcNaN): 1956 case PackCategoriesIntoKey(fcNormal, fcNaN): 1957 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1958 assign(rhs); 1959 [[fallthrough]]; 1960 case PackCategoriesIntoKey(fcNaN, fcZero): 1961 case PackCategoriesIntoKey(fcNaN, fcNormal): 1962 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1963 case PackCategoriesIntoKey(fcNaN, fcNaN): 1964 if (isSignaling()) { 1965 makeQuiet(); 1966 return opInvalidOp; 1967 } 1968 return rhs.isSignaling() ? opInvalidOp : opOK; 1969 1970 case PackCategoriesIntoKey(fcZero, fcInfinity): 1971 case PackCategoriesIntoKey(fcZero, fcNormal): 1972 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1973 return opOK; 1974 1975 case PackCategoriesIntoKey(fcNormal, fcZero): 1976 case PackCategoriesIntoKey(fcInfinity, fcZero): 1977 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1978 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1979 case PackCategoriesIntoKey(fcZero, fcZero): 1980 makeNaN(); 1981 return opInvalidOp; 1982 1983 case PackCategoriesIntoKey(fcNormal, fcNormal): 1984 return opOK; 1985 } 1986 } 1987 1988 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1989 switch (PackCategoriesIntoKey(category, rhs.category)) { 1990 default: 1991 llvm_unreachable(nullptr); 1992 1993 case PackCategoriesIntoKey(fcZero, fcNaN): 1994 case PackCategoriesIntoKey(fcNormal, fcNaN): 1995 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1996 assign(rhs); 1997 [[fallthrough]]; 1998 case PackCategoriesIntoKey(fcNaN, fcZero): 1999 case PackCategoriesIntoKey(fcNaN, fcNormal): 2000 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2001 case PackCategoriesIntoKey(fcNaN, fcNaN): 2002 if (isSignaling()) { 2003 makeQuiet(); 2004 return opInvalidOp; 2005 } 2006 return rhs.isSignaling() ? opInvalidOp : opOK; 2007 2008 case PackCategoriesIntoKey(fcZero, fcInfinity): 2009 case PackCategoriesIntoKey(fcZero, fcNormal): 2010 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2011 return opOK; 2012 2013 case PackCategoriesIntoKey(fcNormal, fcZero): 2014 case PackCategoriesIntoKey(fcInfinity, fcZero): 2015 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2016 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2017 case PackCategoriesIntoKey(fcZero, fcZero): 2018 makeNaN(); 2019 return opInvalidOp; 2020 2021 case PackCategoriesIntoKey(fcNormal, fcNormal): 2022 return opDivByZero; // fake status, indicating this is not a special case 2023 } 2024 } 2025 2026 /* Change sign. */ 2027 void IEEEFloat::changeSign() { 2028 // With NaN-as-negative-zero, neither NaN or negative zero can change 2029 // their signs. 2030 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2031 (isZero() || isNaN())) 2032 return; 2033 /* Look mummy, this one's easy. */ 2034 sign = !sign; 2035 } 2036 2037 /* Normalized addition or subtraction. */ 2038 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2039 roundingMode rounding_mode, 2040 bool subtract) { 2041 opStatus fs; 2042 2043 fs = addOrSubtractSpecials(rhs, subtract); 2044 2045 /* This return code means it was not a simple case. */ 2046 if (fs == opDivByZero) { 2047 lostFraction lost_fraction; 2048 2049 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2050 fs = normalize(rounding_mode, lost_fraction); 2051 2052 /* Can only be zero if we lost no fraction. */ 2053 assert(category != fcZero || lost_fraction == lfExactlyZero); 2054 } 2055 2056 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2057 positive zero unless rounding to minus infinity, except that 2058 adding two like-signed zeroes gives that zero. */ 2059 if (category == fcZero) { 2060 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2061 sign = (rounding_mode == rmTowardNegative); 2062 // NaN-in-negative-zero means zeros need to be normalized to +0. 2063 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2064 sign = false; 2065 } 2066 2067 return fs; 2068 } 2069 2070 /* Normalized addition. */ 2071 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2072 roundingMode rounding_mode) { 2073 return addOrSubtract(rhs, rounding_mode, false); 2074 } 2075 2076 /* Normalized subtraction. */ 2077 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2078 roundingMode rounding_mode) { 2079 return addOrSubtract(rhs, rounding_mode, true); 2080 } 2081 2082 /* Normalized multiply. */ 2083 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2084 roundingMode rounding_mode) { 2085 opStatus fs; 2086 2087 sign ^= rhs.sign; 2088 fs = multiplySpecials(rhs); 2089 2090 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2091 sign = false; 2092 if (isFiniteNonZero()) { 2093 lostFraction lost_fraction = multiplySignificand(rhs); 2094 fs = normalize(rounding_mode, lost_fraction); 2095 if (lost_fraction != lfExactlyZero) 2096 fs = (opStatus) (fs | opInexact); 2097 } 2098 2099 return fs; 2100 } 2101 2102 /* Normalized divide. */ 2103 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2104 roundingMode rounding_mode) { 2105 opStatus fs; 2106 2107 sign ^= rhs.sign; 2108 fs = divideSpecials(rhs); 2109 2110 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2111 sign = false; 2112 if (isFiniteNonZero()) { 2113 lostFraction lost_fraction = divideSignificand(rhs); 2114 fs = normalize(rounding_mode, lost_fraction); 2115 if (lost_fraction != lfExactlyZero) 2116 fs = (opStatus) (fs | opInexact); 2117 } 2118 2119 return fs; 2120 } 2121 2122 /* Normalized remainder. */ 2123 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2124 opStatus fs; 2125 unsigned int origSign = sign; 2126 2127 // First handle the special cases. 2128 fs = remainderSpecials(rhs); 2129 if (fs != opDivByZero) 2130 return fs; 2131 2132 fs = opOK; 2133 2134 // Make sure the current value is less than twice the denom. If the addition 2135 // did not succeed (an overflow has happened), which means that the finite 2136 // value we currently posses must be less than twice the denom (as we are 2137 // using the same semantics). 2138 IEEEFloat P2 = rhs; 2139 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2140 fs = mod(P2); 2141 assert(fs == opOK); 2142 } 2143 2144 // Lets work with absolute numbers. 2145 IEEEFloat P = rhs; 2146 P.sign = false; 2147 sign = false; 2148 2149 // 2150 // To calculate the remainder we use the following scheme. 2151 // 2152 // The remainder is defained as follows: 2153 // 2154 // remainder = numer - rquot * denom = x - r * p 2155 // 2156 // Where r is the result of: x/p, rounded toward the nearest integral value 2157 // (with halfway cases rounded toward the even number). 2158 // 2159 // Currently, (after x mod 2p): 2160 // r is the number of 2p's present inside x, which is inherently, an even 2161 // number of p's. 2162 // 2163 // We may split the remaining calculation into 4 options: 2164 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2165 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2166 // are done as well. 2167 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2168 // to subtract 1p at least once. 2169 // - if x >= p then we must subtract p at least once, as x must be a 2170 // remainder. 2171 // 2172 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2173 // 2174 // We can now split the remaining calculation to the following 3 options: 2175 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2176 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2177 // must round up to the next even number. so we must subtract p once more. 2178 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2179 // integral, and subtract p once more. 2180 // 2181 2182 // Extend the semantics to prevent an overflow/underflow or inexact result. 2183 bool losesInfo; 2184 fltSemantics extendedSemantics = *semantics; 2185 extendedSemantics.maxExponent++; 2186 extendedSemantics.minExponent--; 2187 extendedSemantics.precision += 2; 2188 2189 IEEEFloat VEx = *this; 2190 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2191 assert(fs == opOK && !losesInfo); 2192 IEEEFloat PEx = P; 2193 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2194 assert(fs == opOK && !losesInfo); 2195 2196 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2197 // any fraction. 2198 fs = VEx.add(VEx, rmNearestTiesToEven); 2199 assert(fs == opOK); 2200 2201 if (VEx.compare(PEx) == cmpGreaterThan) { 2202 fs = subtract(P, rmNearestTiesToEven); 2203 assert(fs == opOK); 2204 2205 // Make VEx = this.add(this), but because we have different semantics, we do 2206 // not want to `convert` again, so we just subtract PEx twice (which equals 2207 // to the desired value). 2208 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2209 assert(fs == opOK); 2210 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2211 assert(fs == opOK); 2212 2213 cmpResult result = VEx.compare(PEx); 2214 if (result == cmpGreaterThan || result == cmpEqual) { 2215 fs = subtract(P, rmNearestTiesToEven); 2216 assert(fs == opOK); 2217 } 2218 } 2219 2220 if (isZero()) { 2221 sign = origSign; // IEEE754 requires this 2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2223 // But some 8-bit floats only have positive 0. 2224 sign = false; 2225 } 2226 2227 else 2228 sign ^= origSign; 2229 return fs; 2230 } 2231 2232 /* Normalized llvm frem (C fmod). */ 2233 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2234 opStatus fs; 2235 fs = modSpecials(rhs); 2236 unsigned int origSign = sign; 2237 2238 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2239 compareAbsoluteValue(rhs) != cmpLessThan) { 2240 int Exp = ilogb(*this) - ilogb(rhs); 2241 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2242 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2243 // check for it. 2244 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2245 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2246 V.sign = sign; 2247 2248 fs = subtract(V, rmNearestTiesToEven); 2249 assert(fs==opOK); 2250 } 2251 if (isZero()) { 2252 sign = origSign; // fmod requires this 2253 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2254 sign = false; 2255 } 2256 return fs; 2257 } 2258 2259 /* Normalized fused-multiply-add. */ 2260 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2261 const IEEEFloat &addend, 2262 roundingMode rounding_mode) { 2263 opStatus fs; 2264 2265 /* Post-multiplication sign, before addition. */ 2266 sign ^= multiplicand.sign; 2267 2268 /* If and only if all arguments are normal do we need to do an 2269 extended-precision calculation. */ 2270 if (isFiniteNonZero() && 2271 multiplicand.isFiniteNonZero() && 2272 addend.isFinite()) { 2273 lostFraction lost_fraction; 2274 2275 lost_fraction = multiplySignificand(multiplicand, addend); 2276 fs = normalize(rounding_mode, lost_fraction); 2277 if (lost_fraction != lfExactlyZero) 2278 fs = (opStatus) (fs | opInexact); 2279 2280 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2281 positive zero unless rounding to minus infinity, except that 2282 adding two like-signed zeroes gives that zero. */ 2283 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2284 sign = (rounding_mode == rmTowardNegative); 2285 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2286 sign = false; 2287 } 2288 } else { 2289 fs = multiplySpecials(multiplicand); 2290 2291 /* FS can only be opOK or opInvalidOp. There is no more work 2292 to do in the latter case. The IEEE-754R standard says it is 2293 implementation-defined in this case whether, if ADDEND is a 2294 quiet NaN, we raise invalid op; this implementation does so. 2295 2296 If we need to do the addition we can do so with normal 2297 precision. */ 2298 if (fs == opOK) 2299 fs = addOrSubtract(addend, rounding_mode, false); 2300 } 2301 2302 return fs; 2303 } 2304 2305 /* Rounding-mode correct round to integral value. */ 2306 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2307 opStatus fs; 2308 2309 if (isInfinity()) 2310 // [IEEE Std 754-2008 6.1]: 2311 // The behavior of infinity in floating-point arithmetic is derived from the 2312 // limiting cases of real arithmetic with operands of arbitrarily 2313 // large magnitude, when such a limit exists. 2314 // ... 2315 // Operations on infinite operands are usually exact and therefore signal no 2316 // exceptions ... 2317 return opOK; 2318 2319 if (isNaN()) { 2320 if (isSignaling()) { 2321 // [IEEE Std 754-2008 6.2]: 2322 // Under default exception handling, any operation signaling an invalid 2323 // operation exception and for which a floating-point result is to be 2324 // delivered shall deliver a quiet NaN. 2325 makeQuiet(); 2326 // [IEEE Std 754-2008 6.2]: 2327 // Signaling NaNs shall be reserved operands that, under default exception 2328 // handling, signal the invalid operation exception(see 7.2) for every 2329 // general-computational and signaling-computational operation except for 2330 // the conversions described in 5.12. 2331 return opInvalidOp; 2332 } else { 2333 // [IEEE Std 754-2008 6.2]: 2334 // For an operation with quiet NaN inputs, other than maximum and minimum 2335 // operations, if a floating-point result is to be delivered the result 2336 // shall be a quiet NaN which should be one of the input NaNs. 2337 // ... 2338 // Every general-computational and quiet-computational operation involving 2339 // one or more input NaNs, none of them signaling, shall signal no 2340 // exception, except fusedMultiplyAdd might signal the invalid operation 2341 // exception(see 7.2). 2342 return opOK; 2343 } 2344 } 2345 2346 if (isZero()) { 2347 // [IEEE Std 754-2008 6.3]: 2348 // ... the sign of the result of conversions, the quantize operation, the 2349 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2350 // the sign of the first or only operand. 2351 return opOK; 2352 } 2353 2354 // If the exponent is large enough, we know that this value is already 2355 // integral, and the arithmetic below would potentially cause it to saturate 2356 // to +/-Inf. Bail out early instead. 2357 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2358 return opOK; 2359 2360 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2361 // precision of our format, and then subtract it back off again. The choice 2362 // of rounding modes for the addition/subtraction determines the rounding mode 2363 // for our integral rounding as well. 2364 // NOTE: When the input value is negative, we do subtraction followed by 2365 // addition instead. 2366 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2367 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2368 IEEEFloat MagicConstant(*semantics); 2369 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2370 rmNearestTiesToEven); 2371 assert(fs == opOK); 2372 MagicConstant.sign = sign; 2373 2374 // Preserve the input sign so that we can handle the case of zero result 2375 // correctly. 2376 bool inputSign = isNegative(); 2377 2378 fs = add(MagicConstant, rounding_mode); 2379 2380 // Current value and 'MagicConstant' are both integers, so the result of the 2381 // subtraction is always exact according to Sterbenz' lemma. 2382 subtract(MagicConstant, rounding_mode); 2383 2384 // Restore the input sign. 2385 if (inputSign != isNegative()) 2386 changeSign(); 2387 2388 return fs; 2389 } 2390 2391 2392 /* Comparison requires normalized numbers. */ 2393 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2394 cmpResult result; 2395 2396 assert(semantics == rhs.semantics); 2397 2398 switch (PackCategoriesIntoKey(category, rhs.category)) { 2399 default: 2400 llvm_unreachable(nullptr); 2401 2402 case PackCategoriesIntoKey(fcNaN, fcZero): 2403 case PackCategoriesIntoKey(fcNaN, fcNormal): 2404 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2405 case PackCategoriesIntoKey(fcNaN, fcNaN): 2406 case PackCategoriesIntoKey(fcZero, fcNaN): 2407 case PackCategoriesIntoKey(fcNormal, fcNaN): 2408 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2409 return cmpUnordered; 2410 2411 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2412 case PackCategoriesIntoKey(fcInfinity, fcZero): 2413 case PackCategoriesIntoKey(fcNormal, fcZero): 2414 if (sign) 2415 return cmpLessThan; 2416 else 2417 return cmpGreaterThan; 2418 2419 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2420 case PackCategoriesIntoKey(fcZero, fcInfinity): 2421 case PackCategoriesIntoKey(fcZero, fcNormal): 2422 if (rhs.sign) 2423 return cmpGreaterThan; 2424 else 2425 return cmpLessThan; 2426 2427 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2428 if (sign == rhs.sign) 2429 return cmpEqual; 2430 else if (sign) 2431 return cmpLessThan; 2432 else 2433 return cmpGreaterThan; 2434 2435 case PackCategoriesIntoKey(fcZero, fcZero): 2436 return cmpEqual; 2437 2438 case PackCategoriesIntoKey(fcNormal, fcNormal): 2439 break; 2440 } 2441 2442 /* Two normal numbers. Do they have the same sign? */ 2443 if (sign != rhs.sign) { 2444 if (sign) 2445 result = cmpLessThan; 2446 else 2447 result = cmpGreaterThan; 2448 } else { 2449 /* Compare absolute values; invert result if negative. */ 2450 result = compareAbsoluteValue(rhs); 2451 2452 if (sign) { 2453 if (result == cmpLessThan) 2454 result = cmpGreaterThan; 2455 else if (result == cmpGreaterThan) 2456 result = cmpLessThan; 2457 } 2458 } 2459 2460 return result; 2461 } 2462 2463 /// IEEEFloat::convert - convert a value of one floating point type to another. 2464 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2465 /// records whether the transformation lost information, i.e. whether 2466 /// converting the result back to the original type will produce the 2467 /// original value (this is almost the same as return value==fsOK, but there 2468 /// are edge cases where this is not so). 2469 2470 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2471 roundingMode rounding_mode, 2472 bool *losesInfo) { 2473 lostFraction lostFraction; 2474 unsigned int newPartCount, oldPartCount; 2475 opStatus fs; 2476 int shift; 2477 const fltSemantics &fromSemantics = *semantics; 2478 bool is_signaling = isSignaling(); 2479 2480 lostFraction = lfExactlyZero; 2481 newPartCount = partCountForBits(toSemantics.precision + 1); 2482 oldPartCount = partCount(); 2483 shift = toSemantics.precision - fromSemantics.precision; 2484 2485 bool X86SpecialNan = false; 2486 if (&fromSemantics == &semX87DoubleExtended && 2487 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2488 (!(*significandParts() & 0x8000000000000000ULL) || 2489 !(*significandParts() & 0x4000000000000000ULL))) { 2490 // x86 has some unusual NaNs which cannot be represented in any other 2491 // format; note them here. 2492 X86SpecialNan = true; 2493 } 2494 2495 // If this is a truncation of a denormal number, and the target semantics 2496 // has larger exponent range than the source semantics (this can happen 2497 // when truncating from PowerPC double-double to double format), the 2498 // right shift could lose result mantissa bits. Adjust exponent instead 2499 // of performing excessive shift. 2500 // Also do a similar trick in case shifting denormal would produce zero 2501 // significand as this case isn't handled correctly by normalize. 2502 if (shift < 0 && isFiniteNonZero()) { 2503 int omsb = significandMSB() + 1; 2504 int exponentChange = omsb - fromSemantics.precision; 2505 if (exponent + exponentChange < toSemantics.minExponent) 2506 exponentChange = toSemantics.minExponent - exponent; 2507 if (exponentChange < shift) 2508 exponentChange = shift; 2509 if (exponentChange < 0) { 2510 shift -= exponentChange; 2511 exponent += exponentChange; 2512 } else if (omsb <= -shift) { 2513 exponentChange = omsb + shift - 1; // leave at least one bit set 2514 shift -= exponentChange; 2515 exponent += exponentChange; 2516 } 2517 } 2518 2519 // If this is a truncation, perform the shift before we narrow the storage. 2520 if (shift < 0 && (isFiniteNonZero() || 2521 (category == fcNaN && semantics->nonFiniteBehavior != 2522 fltNonfiniteBehavior::NanOnly))) 2523 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2524 2525 // Fix the storage so it can hold to new value. 2526 if (newPartCount > oldPartCount) { 2527 // The new type requires more storage; make it available. 2528 integerPart *newParts; 2529 newParts = new integerPart[newPartCount]; 2530 APInt::tcSet(newParts, 0, newPartCount); 2531 if (isFiniteNonZero() || category==fcNaN) 2532 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2533 freeSignificand(); 2534 significand.parts = newParts; 2535 } else if (newPartCount == 1 && oldPartCount != 1) { 2536 // Switch to built-in storage for a single part. 2537 integerPart newPart = 0; 2538 if (isFiniteNonZero() || category==fcNaN) 2539 newPart = significandParts()[0]; 2540 freeSignificand(); 2541 significand.part = newPart; 2542 } 2543 2544 // Now that we have the right storage, switch the semantics. 2545 semantics = &toSemantics; 2546 2547 // If this is an extension, perform the shift now that the storage is 2548 // available. 2549 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2550 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2551 2552 if (isFiniteNonZero()) { 2553 fs = normalize(rounding_mode, lostFraction); 2554 *losesInfo = (fs != opOK); 2555 } else if (category == fcNaN) { 2556 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2557 *losesInfo = 2558 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2559 makeNaN(false, sign); 2560 return is_signaling ? opInvalidOp : opOK; 2561 } 2562 2563 // If NaN is negative zero, we need to create a new NaN to avoid converting 2564 // NaN to -Inf. 2565 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2566 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2567 makeNaN(false, false); 2568 2569 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2570 2571 // For x87 extended precision, we want to make a NaN, not a special NaN if 2572 // the input wasn't special either. 2573 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2574 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2575 2576 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2577 // This also guarantees that a sNaN does not become Inf on a truncation 2578 // that loses all payload bits. 2579 if (is_signaling) { 2580 makeQuiet(); 2581 fs = opInvalidOp; 2582 } else { 2583 fs = opOK; 2584 } 2585 } else if (category == fcInfinity && 2586 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2587 makeNaN(false, sign); 2588 *losesInfo = true; 2589 fs = opInexact; 2590 } else if (category == fcZero && 2591 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2592 // Negative zero loses info, but positive zero doesn't. 2593 *losesInfo = 2594 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2595 fs = *losesInfo ? opInexact : opOK; 2596 // NaN is negative zero means -0 -> +0, which can lose information 2597 sign = false; 2598 } else { 2599 *losesInfo = false; 2600 fs = opOK; 2601 } 2602 2603 return fs; 2604 } 2605 2606 /* Convert a floating point number to an integer according to the 2607 rounding mode. If the rounded integer value is out of range this 2608 returns an invalid operation exception and the contents of the 2609 destination parts are unspecified. If the rounded value is in 2610 range but the floating point number is not the exact integer, the C 2611 standard doesn't require an inexact exception to be raised. IEEE 2612 854 does require it so we do that. 2613 2614 Note that for conversions to integer type the C standard requires 2615 round-to-zero to always be used. */ 2616 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2617 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2618 roundingMode rounding_mode, bool *isExact) const { 2619 lostFraction lost_fraction; 2620 const integerPart *src; 2621 unsigned int dstPartsCount, truncatedBits; 2622 2623 *isExact = false; 2624 2625 /* Handle the three special cases first. */ 2626 if (category == fcInfinity || category == fcNaN) 2627 return opInvalidOp; 2628 2629 dstPartsCount = partCountForBits(width); 2630 assert(dstPartsCount <= parts.size() && "Integer too big"); 2631 2632 if (category == fcZero) { 2633 APInt::tcSet(parts.data(), 0, dstPartsCount); 2634 // Negative zero can't be represented as an int. 2635 *isExact = !sign; 2636 return opOK; 2637 } 2638 2639 src = significandParts(); 2640 2641 /* Step 1: place our absolute value, with any fraction truncated, in 2642 the destination. */ 2643 if (exponent < 0) { 2644 /* Our absolute value is less than one; truncate everything. */ 2645 APInt::tcSet(parts.data(), 0, dstPartsCount); 2646 /* For exponent -1 the integer bit represents .5, look at that. 2647 For smaller exponents leftmost truncated bit is 0. */ 2648 truncatedBits = semantics->precision -1U - exponent; 2649 } else { 2650 /* We want the most significant (exponent + 1) bits; the rest are 2651 truncated. */ 2652 unsigned int bits = exponent + 1U; 2653 2654 /* Hopelessly large in magnitude? */ 2655 if (bits > width) 2656 return opInvalidOp; 2657 2658 if (bits < semantics->precision) { 2659 /* We truncate (semantics->precision - bits) bits. */ 2660 truncatedBits = semantics->precision - bits; 2661 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2662 } else { 2663 /* We want at least as many bits as are available. */ 2664 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2665 0); 2666 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2667 bits - semantics->precision); 2668 truncatedBits = 0; 2669 } 2670 } 2671 2672 /* Step 2: work out any lost fraction, and increment the absolute 2673 value if we would round away from zero. */ 2674 if (truncatedBits) { 2675 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2676 truncatedBits); 2677 if (lost_fraction != lfExactlyZero && 2678 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2679 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2680 return opInvalidOp; /* Overflow. */ 2681 } 2682 } else { 2683 lost_fraction = lfExactlyZero; 2684 } 2685 2686 /* Step 3: check if we fit in the destination. */ 2687 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2688 2689 if (sign) { 2690 if (!isSigned) { 2691 /* Negative numbers cannot be represented as unsigned. */ 2692 if (omsb != 0) 2693 return opInvalidOp; 2694 } else { 2695 /* It takes omsb bits to represent the unsigned integer value. 2696 We lose a bit for the sign, but care is needed as the 2697 maximally negative integer is a special case. */ 2698 if (omsb == width && 2699 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2700 return opInvalidOp; 2701 2702 /* This case can happen because of rounding. */ 2703 if (omsb > width) 2704 return opInvalidOp; 2705 } 2706 2707 APInt::tcNegate (parts.data(), dstPartsCount); 2708 } else { 2709 if (omsb >= width + !isSigned) 2710 return opInvalidOp; 2711 } 2712 2713 if (lost_fraction == lfExactlyZero) { 2714 *isExact = true; 2715 return opOK; 2716 } else 2717 return opInexact; 2718 } 2719 2720 /* Same as convertToSignExtendedInteger, except we provide 2721 deterministic values in case of an invalid operation exception, 2722 namely zero for NaNs and the minimal or maximal value respectively 2723 for underflow or overflow. 2724 The *isExact output tells whether the result is exact, in the sense 2725 that converting it back to the original floating point type produces 2726 the original value. This is almost equivalent to result==opOK, 2727 except for negative zeroes. 2728 */ 2729 IEEEFloat::opStatus 2730 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2731 unsigned int width, bool isSigned, 2732 roundingMode rounding_mode, bool *isExact) const { 2733 opStatus fs; 2734 2735 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2736 isExact); 2737 2738 if (fs == opInvalidOp) { 2739 unsigned int bits, dstPartsCount; 2740 2741 dstPartsCount = partCountForBits(width); 2742 assert(dstPartsCount <= parts.size() && "Integer too big"); 2743 2744 if (category == fcNaN) 2745 bits = 0; 2746 else if (sign) 2747 bits = isSigned; 2748 else 2749 bits = width - isSigned; 2750 2751 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2752 if (sign && isSigned) 2753 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2754 } 2755 2756 return fs; 2757 } 2758 2759 /* Convert an unsigned integer SRC to a floating point number, 2760 rounding according to ROUNDING_MODE. The sign of the floating 2761 point number is not modified. */ 2762 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2763 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2764 unsigned int omsb, precision, dstCount; 2765 integerPart *dst; 2766 lostFraction lost_fraction; 2767 2768 category = fcNormal; 2769 omsb = APInt::tcMSB(src, srcCount) + 1; 2770 dst = significandParts(); 2771 dstCount = partCount(); 2772 precision = semantics->precision; 2773 2774 /* We want the most significant PRECISION bits of SRC. There may not 2775 be that many; extract what we can. */ 2776 if (precision <= omsb) { 2777 exponent = omsb - 1; 2778 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2779 omsb - precision); 2780 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2781 } else { 2782 exponent = precision - 1; 2783 lost_fraction = lfExactlyZero; 2784 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2785 } 2786 2787 return normalize(rounding_mode, lost_fraction); 2788 } 2789 2790 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2791 roundingMode rounding_mode) { 2792 unsigned int partCount = Val.getNumWords(); 2793 APInt api = Val; 2794 2795 sign = false; 2796 if (isSigned && api.isNegative()) { 2797 sign = true; 2798 api = -api; 2799 } 2800 2801 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2802 } 2803 2804 /* Convert a two's complement integer SRC to a floating point number, 2805 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2806 integer is signed, in which case it must be sign-extended. */ 2807 IEEEFloat::opStatus 2808 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2809 unsigned int srcCount, bool isSigned, 2810 roundingMode rounding_mode) { 2811 opStatus status; 2812 2813 if (isSigned && 2814 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2815 integerPart *copy; 2816 2817 /* If we're signed and negative negate a copy. */ 2818 sign = true; 2819 copy = new integerPart[srcCount]; 2820 APInt::tcAssign(copy, src, srcCount); 2821 APInt::tcNegate(copy, srcCount); 2822 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2823 delete [] copy; 2824 } else { 2825 sign = false; 2826 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2827 } 2828 2829 return status; 2830 } 2831 2832 /* FIXME: should this just take a const APInt reference? */ 2833 IEEEFloat::opStatus 2834 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2835 unsigned int width, bool isSigned, 2836 roundingMode rounding_mode) { 2837 unsigned int partCount = partCountForBits(width); 2838 APInt api = APInt(width, ArrayRef(parts, partCount)); 2839 2840 sign = false; 2841 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2842 sign = true; 2843 api = -api; 2844 } 2845 2846 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2847 } 2848 2849 Expected<IEEEFloat::opStatus> 2850 IEEEFloat::convertFromHexadecimalString(StringRef s, 2851 roundingMode rounding_mode) { 2852 lostFraction lost_fraction = lfExactlyZero; 2853 2854 category = fcNormal; 2855 zeroSignificand(); 2856 exponent = 0; 2857 2858 integerPart *significand = significandParts(); 2859 unsigned partsCount = partCount(); 2860 unsigned bitPos = partsCount * integerPartWidth; 2861 bool computedTrailingFraction = false; 2862 2863 // Skip leading zeroes and any (hexa)decimal point. 2864 StringRef::iterator begin = s.begin(); 2865 StringRef::iterator end = s.end(); 2866 StringRef::iterator dot; 2867 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2868 if (!PtrOrErr) 2869 return PtrOrErr.takeError(); 2870 StringRef::iterator p = *PtrOrErr; 2871 StringRef::iterator firstSignificantDigit = p; 2872 2873 while (p != end) { 2874 integerPart hex_value; 2875 2876 if (*p == '.') { 2877 if (dot != end) 2878 return createError("String contains multiple dots"); 2879 dot = p++; 2880 continue; 2881 } 2882 2883 hex_value = hexDigitValue(*p); 2884 if (hex_value == UINT_MAX) 2885 break; 2886 2887 p++; 2888 2889 // Store the number while we have space. 2890 if (bitPos) { 2891 bitPos -= 4; 2892 hex_value <<= bitPos % integerPartWidth; 2893 significand[bitPos / integerPartWidth] |= hex_value; 2894 } else if (!computedTrailingFraction) { 2895 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2896 if (!FractOrErr) 2897 return FractOrErr.takeError(); 2898 lost_fraction = *FractOrErr; 2899 computedTrailingFraction = true; 2900 } 2901 } 2902 2903 /* Hex floats require an exponent but not a hexadecimal point. */ 2904 if (p == end) 2905 return createError("Hex strings require an exponent"); 2906 if (*p != 'p' && *p != 'P') 2907 return createError("Invalid character in significand"); 2908 if (p == begin) 2909 return createError("Significand has no digits"); 2910 if (dot != end && p - begin == 1) 2911 return createError("Significand has no digits"); 2912 2913 /* Ignore the exponent if we are zero. */ 2914 if (p != firstSignificantDigit) { 2915 int expAdjustment; 2916 2917 /* Implicit hexadecimal point? */ 2918 if (dot == end) 2919 dot = p; 2920 2921 /* Calculate the exponent adjustment implicit in the number of 2922 significant digits. */ 2923 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2924 if (expAdjustment < 0) 2925 expAdjustment++; 2926 expAdjustment = expAdjustment * 4 - 1; 2927 2928 /* Adjust for writing the significand starting at the most 2929 significant nibble. */ 2930 expAdjustment += semantics->precision; 2931 expAdjustment -= partsCount * integerPartWidth; 2932 2933 /* Adjust for the given exponent. */ 2934 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2935 if (!ExpOrErr) 2936 return ExpOrErr.takeError(); 2937 exponent = *ExpOrErr; 2938 } 2939 2940 return normalize(rounding_mode, lost_fraction); 2941 } 2942 2943 IEEEFloat::opStatus 2944 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2945 unsigned sigPartCount, int exp, 2946 roundingMode rounding_mode) { 2947 unsigned int parts, pow5PartCount; 2948 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2949 integerPart pow5Parts[maxPowerOfFiveParts]; 2950 bool isNearest; 2951 2952 isNearest = (rounding_mode == rmNearestTiesToEven || 2953 rounding_mode == rmNearestTiesToAway); 2954 2955 parts = partCountForBits(semantics->precision + 11); 2956 2957 /* Calculate pow(5, abs(exp)). */ 2958 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2959 2960 for (;; parts *= 2) { 2961 opStatus sigStatus, powStatus; 2962 unsigned int excessPrecision, truncatedBits; 2963 2964 calcSemantics.precision = parts * integerPartWidth - 1; 2965 excessPrecision = calcSemantics.precision - semantics->precision; 2966 truncatedBits = excessPrecision; 2967 2968 IEEEFloat decSig(calcSemantics, uninitialized); 2969 decSig.makeZero(sign); 2970 IEEEFloat pow5(calcSemantics); 2971 2972 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2973 rmNearestTiesToEven); 2974 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2975 rmNearestTiesToEven); 2976 /* Add exp, as 10^n = 5^n * 2^n. */ 2977 decSig.exponent += exp; 2978 2979 lostFraction calcLostFraction; 2980 integerPart HUerr, HUdistance; 2981 unsigned int powHUerr; 2982 2983 if (exp >= 0) { 2984 /* multiplySignificand leaves the precision-th bit set to 1. */ 2985 calcLostFraction = decSig.multiplySignificand(pow5); 2986 powHUerr = powStatus != opOK; 2987 } else { 2988 calcLostFraction = decSig.divideSignificand(pow5); 2989 /* Denormal numbers have less precision. */ 2990 if (decSig.exponent < semantics->minExponent) { 2991 excessPrecision += (semantics->minExponent - decSig.exponent); 2992 truncatedBits = excessPrecision; 2993 if (excessPrecision > calcSemantics.precision) 2994 excessPrecision = calcSemantics.precision; 2995 } 2996 /* Extra half-ulp lost in reciprocal of exponent. */ 2997 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2998 } 2999 3000 /* Both multiplySignificand and divideSignificand return the 3001 result with the integer bit set. */ 3002 assert(APInt::tcExtractBit 3003 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3004 3005 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3006 powHUerr); 3007 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3008 excessPrecision, isNearest); 3009 3010 /* Are we guaranteed to round correctly if we truncate? */ 3011 if (HUdistance >= HUerr) { 3012 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3013 calcSemantics.precision - excessPrecision, 3014 excessPrecision); 3015 /* Take the exponent of decSig. If we tcExtract-ed less bits 3016 above we must adjust our exponent to compensate for the 3017 implicit right shift. */ 3018 exponent = (decSig.exponent + semantics->precision 3019 - (calcSemantics.precision - excessPrecision)); 3020 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3021 decSig.partCount(), 3022 truncatedBits); 3023 return normalize(rounding_mode, calcLostFraction); 3024 } 3025 } 3026 } 3027 3028 Expected<IEEEFloat::opStatus> 3029 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3030 decimalInfo D; 3031 opStatus fs; 3032 3033 /* Scan the text. */ 3034 StringRef::iterator p = str.begin(); 3035 if (Error Err = interpretDecimal(p, str.end(), &D)) 3036 return std::move(Err); 3037 3038 /* Handle the quick cases. First the case of no significant digits, 3039 i.e. zero, and then exponents that are obviously too large or too 3040 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3041 definitely overflows if 3042 3043 (exp - 1) * L >= maxExponent 3044 3045 and definitely underflows to zero where 3046 3047 (exp + 1) * L <= minExponent - precision 3048 3049 With integer arithmetic the tightest bounds for L are 3050 3051 93/28 < L < 196/59 [ numerator <= 256 ] 3052 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3053 */ 3054 3055 // Test if we have a zero number allowing for strings with no null terminators 3056 // and zero decimals with non-zero exponents. 3057 // 3058 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3059 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3060 // be at most one dot. On the other hand, if we have a zero with a non-zero 3061 // exponent, then we know that D.firstSigDigit will be non-numeric. 3062 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3063 category = fcZero; 3064 fs = opOK; 3065 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3066 sign = false; 3067 3068 /* Check whether the normalized exponent is high enough to overflow 3069 max during the log-rebasing in the max-exponent check below. */ 3070 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3071 fs = handleOverflow(rounding_mode); 3072 3073 /* If it wasn't, then it also wasn't high enough to overflow max 3074 during the log-rebasing in the min-exponent check. Check that it 3075 won't overflow min in either check, then perform the min-exponent 3076 check. */ 3077 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3078 (D.normalizedExponent + 1) * 28738 <= 3079 8651 * (semantics->minExponent - (int) semantics->precision)) { 3080 /* Underflow to zero and round. */ 3081 category = fcNormal; 3082 zeroSignificand(); 3083 fs = normalize(rounding_mode, lfLessThanHalf); 3084 3085 /* We can finally safely perform the max-exponent check. */ 3086 } else if ((D.normalizedExponent - 1) * 42039 3087 >= 12655 * semantics->maxExponent) { 3088 /* Overflow and round. */ 3089 fs = handleOverflow(rounding_mode); 3090 } else { 3091 integerPart *decSignificand; 3092 unsigned int partCount; 3093 3094 /* A tight upper bound on number of bits required to hold an 3095 N-digit decimal integer is N * 196 / 59. Allocate enough space 3096 to hold the full significand, and an extra part required by 3097 tcMultiplyPart. */ 3098 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3099 partCount = partCountForBits(1 + 196 * partCount / 59); 3100 decSignificand = new integerPart[partCount + 1]; 3101 partCount = 0; 3102 3103 /* Convert to binary efficiently - we do almost all multiplication 3104 in an integerPart. When this would overflow do we do a single 3105 bignum multiplication, and then revert again to multiplication 3106 in an integerPart. */ 3107 do { 3108 integerPart decValue, val, multiplier; 3109 3110 val = 0; 3111 multiplier = 1; 3112 3113 do { 3114 if (*p == '.') { 3115 p++; 3116 if (p == str.end()) { 3117 break; 3118 } 3119 } 3120 decValue = decDigitValue(*p++); 3121 if (decValue >= 10U) { 3122 delete[] decSignificand; 3123 return createError("Invalid character in significand"); 3124 } 3125 multiplier *= 10; 3126 val = val * 10 + decValue; 3127 /* The maximum number that can be multiplied by ten with any 3128 digit added without overflowing an integerPart. */ 3129 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3130 3131 /* Multiply out the current part. */ 3132 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3133 partCount, partCount + 1, false); 3134 3135 /* If we used another part (likely but not guaranteed), increase 3136 the count. */ 3137 if (decSignificand[partCount]) 3138 partCount++; 3139 } while (p <= D.lastSigDigit); 3140 3141 category = fcNormal; 3142 fs = roundSignificandWithExponent(decSignificand, partCount, 3143 D.exponent, rounding_mode); 3144 3145 delete [] decSignificand; 3146 } 3147 3148 return fs; 3149 } 3150 3151 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3152 const size_t MIN_NAME_SIZE = 3; 3153 3154 if (str.size() < MIN_NAME_SIZE) 3155 return false; 3156 3157 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3158 makeInf(false); 3159 return true; 3160 } 3161 3162 bool IsNegative = str.front() == '-'; 3163 if (IsNegative) { 3164 str = str.drop_front(); 3165 if (str.size() < MIN_NAME_SIZE) 3166 return false; 3167 3168 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3169 makeInf(true); 3170 return true; 3171 } 3172 } 3173 3174 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3175 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3176 if (IsSignaling) { 3177 str = str.drop_front(); 3178 if (str.size() < MIN_NAME_SIZE) 3179 return false; 3180 } 3181 3182 if (str.starts_with("nan") || str.starts_with("NaN")) { 3183 str = str.drop_front(3); 3184 3185 // A NaN without payload. 3186 if (str.empty()) { 3187 makeNaN(IsSignaling, IsNegative); 3188 return true; 3189 } 3190 3191 // Allow the payload to be inside parentheses. 3192 if (str.front() == '(') { 3193 // Parentheses should be balanced (and not empty). 3194 if (str.size() <= 2 || str.back() != ')') 3195 return false; 3196 3197 str = str.slice(1, str.size() - 1); 3198 } 3199 3200 // Determine the payload number's radix. 3201 unsigned Radix = 10; 3202 if (str[0] == '0') { 3203 if (str.size() > 1 && tolower(str[1]) == 'x') { 3204 str = str.drop_front(2); 3205 Radix = 16; 3206 } else 3207 Radix = 8; 3208 } 3209 3210 // Parse the payload and make the NaN. 3211 APInt Payload; 3212 if (!str.getAsInteger(Radix, Payload)) { 3213 makeNaN(IsSignaling, IsNegative, &Payload); 3214 return true; 3215 } 3216 } 3217 3218 return false; 3219 } 3220 3221 Expected<IEEEFloat::opStatus> 3222 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3223 if (str.empty()) 3224 return createError("Invalid string length"); 3225 3226 // Handle special cases. 3227 if (convertFromStringSpecials(str)) 3228 return opOK; 3229 3230 /* Handle a leading minus sign. */ 3231 StringRef::iterator p = str.begin(); 3232 size_t slen = str.size(); 3233 sign = *p == '-' ? 1 : 0; 3234 if (*p == '-' || *p == '+') { 3235 p++; 3236 slen--; 3237 if (!slen) 3238 return createError("String has no digits"); 3239 } 3240 3241 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3242 if (slen == 2) 3243 return createError("Invalid string"); 3244 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3245 rounding_mode); 3246 } 3247 3248 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3249 } 3250 3251 /* Write out a hexadecimal representation of the floating point value 3252 to DST, which must be of sufficient size, in the C99 form 3253 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3254 excluding the terminating NUL. 3255 3256 If UPPERCASE, the output is in upper case, otherwise in lower case. 3257 3258 HEXDIGITS digits appear altogether, rounding the value if 3259 necessary. If HEXDIGITS is 0, the minimal precision to display the 3260 number precisely is used instead. If nothing would appear after 3261 the decimal point it is suppressed. 3262 3263 The decimal exponent is always printed and has at least one digit. 3264 Zero values display an exponent of zero. Infinities and NaNs 3265 appear as "infinity" or "nan" respectively. 3266 3267 The above rules are as specified by C99. There is ambiguity about 3268 what the leading hexadecimal digit should be. This implementation 3269 uses whatever is necessary so that the exponent is displayed as 3270 stored. This implies the exponent will fall within the IEEE format 3271 range, and the leading hexadecimal digit will be 0 (for denormals), 3272 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3273 any other digits zero). 3274 */ 3275 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3276 bool upperCase, 3277 roundingMode rounding_mode) const { 3278 char *p; 3279 3280 p = dst; 3281 if (sign) 3282 *dst++ = '-'; 3283 3284 switch (category) { 3285 case fcInfinity: 3286 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3287 dst += sizeof infinityL - 1; 3288 break; 3289 3290 case fcNaN: 3291 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3292 dst += sizeof NaNU - 1; 3293 break; 3294 3295 case fcZero: 3296 *dst++ = '0'; 3297 *dst++ = upperCase ? 'X': 'x'; 3298 *dst++ = '0'; 3299 if (hexDigits > 1) { 3300 *dst++ = '.'; 3301 memset (dst, '0', hexDigits - 1); 3302 dst += hexDigits - 1; 3303 } 3304 *dst++ = upperCase ? 'P': 'p'; 3305 *dst++ = '0'; 3306 break; 3307 3308 case fcNormal: 3309 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3310 break; 3311 } 3312 3313 *dst = 0; 3314 3315 return static_cast<unsigned int>(dst - p); 3316 } 3317 3318 /* Does the hard work of outputting the correctly rounded hexadecimal 3319 form of a normal floating point number with the specified number of 3320 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3321 digits necessary to print the value precisely is output. */ 3322 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3323 bool upperCase, 3324 roundingMode rounding_mode) const { 3325 unsigned int count, valueBits, shift, partsCount, outputDigits; 3326 const char *hexDigitChars; 3327 const integerPart *significand; 3328 char *p; 3329 bool roundUp; 3330 3331 *dst++ = '0'; 3332 *dst++ = upperCase ? 'X': 'x'; 3333 3334 roundUp = false; 3335 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3336 3337 significand = significandParts(); 3338 partsCount = partCount(); 3339 3340 /* +3 because the first digit only uses the single integer bit, so 3341 we have 3 virtual zero most-significant-bits. */ 3342 valueBits = semantics->precision + 3; 3343 shift = integerPartWidth - valueBits % integerPartWidth; 3344 3345 /* The natural number of digits required ignoring trailing 3346 insignificant zeroes. */ 3347 outputDigits = (valueBits - significandLSB () + 3) / 4; 3348 3349 /* hexDigits of zero means use the required number for the 3350 precision. Otherwise, see if we are truncating. If we are, 3351 find out if we need to round away from zero. */ 3352 if (hexDigits) { 3353 if (hexDigits < outputDigits) { 3354 /* We are dropping non-zero bits, so need to check how to round. 3355 "bits" is the number of dropped bits. */ 3356 unsigned int bits; 3357 lostFraction fraction; 3358 3359 bits = valueBits - hexDigits * 4; 3360 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3361 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3362 } 3363 outputDigits = hexDigits; 3364 } 3365 3366 /* Write the digits consecutively, and start writing in the location 3367 of the hexadecimal point. We move the most significant digit 3368 left and add the hexadecimal point later. */ 3369 p = ++dst; 3370 3371 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3372 3373 while (outputDigits && count) { 3374 integerPart part; 3375 3376 /* Put the most significant integerPartWidth bits in "part". */ 3377 if (--count == partsCount) 3378 part = 0; /* An imaginary higher zero part. */ 3379 else 3380 part = significand[count] << shift; 3381 3382 if (count && shift) 3383 part |= significand[count - 1] >> (integerPartWidth - shift); 3384 3385 /* Convert as much of "part" to hexdigits as we can. */ 3386 unsigned int curDigits = integerPartWidth / 4; 3387 3388 if (curDigits > outputDigits) 3389 curDigits = outputDigits; 3390 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3391 outputDigits -= curDigits; 3392 } 3393 3394 if (roundUp) { 3395 char *q = dst; 3396 3397 /* Note that hexDigitChars has a trailing '0'. */ 3398 do { 3399 q--; 3400 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3401 } while (*q == '0'); 3402 assert(q >= p); 3403 } else { 3404 /* Add trailing zeroes. */ 3405 memset (dst, '0', outputDigits); 3406 dst += outputDigits; 3407 } 3408 3409 /* Move the most significant digit to before the point, and if there 3410 is something after the decimal point add it. This must come 3411 after rounding above. */ 3412 p[-1] = p[0]; 3413 if (dst -1 == p) 3414 dst--; 3415 else 3416 p[0] = '.'; 3417 3418 /* Finally output the exponent. */ 3419 *dst++ = upperCase ? 'P': 'p'; 3420 3421 return writeSignedDecimal (dst, exponent); 3422 } 3423 3424 hash_code hash_value(const IEEEFloat &Arg) { 3425 if (!Arg.isFiniteNonZero()) 3426 return hash_combine((uint8_t)Arg.category, 3427 // NaN has no sign, fix it at zero. 3428 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3429 Arg.semantics->precision); 3430 3431 // Normal floats need their exponent and significand hashed. 3432 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3433 Arg.semantics->precision, Arg.exponent, 3434 hash_combine_range( 3435 Arg.significandParts(), 3436 Arg.significandParts() + Arg.partCount())); 3437 } 3438 3439 // Conversion from APFloat to/from host float/double. It may eventually be 3440 // possible to eliminate these and have everybody deal with APFloats, but that 3441 // will take a while. This approach will not easily extend to long double. 3442 // Current implementation requires integerPartWidth==64, which is correct at 3443 // the moment but could be made more general. 3444 3445 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3446 // the actual IEEE respresentations. We compensate for that here. 3447 3448 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3449 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3450 assert(partCount()==2); 3451 3452 uint64_t myexponent, mysignificand; 3453 3454 if (isFiniteNonZero()) { 3455 myexponent = exponent+16383; //bias 3456 mysignificand = significandParts()[0]; 3457 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3458 myexponent = 0; // denormal 3459 } else if (category==fcZero) { 3460 myexponent = 0; 3461 mysignificand = 0; 3462 } else if (category==fcInfinity) { 3463 myexponent = 0x7fff; 3464 mysignificand = 0x8000000000000000ULL; 3465 } else { 3466 assert(category == fcNaN && "Unknown category"); 3467 myexponent = 0x7fff; 3468 mysignificand = significandParts()[0]; 3469 } 3470 3471 uint64_t words[2]; 3472 words[0] = mysignificand; 3473 words[1] = ((uint64_t)(sign & 1) << 15) | 3474 (myexponent & 0x7fffLL); 3475 return APInt(80, words); 3476 } 3477 3478 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3479 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3480 assert(partCount()==2); 3481 3482 uint64_t words[2]; 3483 opStatus fs; 3484 bool losesInfo; 3485 3486 // Convert number to double. To avoid spurious underflows, we re- 3487 // normalize against the "double" minExponent first, and only *then* 3488 // truncate the mantissa. The result of that second conversion 3489 // may be inexact, but should never underflow. 3490 // Declare fltSemantics before APFloat that uses it (and 3491 // saves pointer to it) to ensure correct destruction order. 3492 fltSemantics extendedSemantics = *semantics; 3493 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3494 IEEEFloat extended(*this); 3495 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3496 assert(fs == opOK && !losesInfo); 3497 (void)fs; 3498 3499 IEEEFloat u(extended); 3500 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3501 assert(fs == opOK || fs == opInexact); 3502 (void)fs; 3503 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3504 3505 // If conversion was exact or resulted in a special case, we're done; 3506 // just set the second double to zero. Otherwise, re-convert back to 3507 // the extended format and compute the difference. This now should 3508 // convert exactly to double. 3509 if (u.isFiniteNonZero() && losesInfo) { 3510 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3511 assert(fs == opOK && !losesInfo); 3512 (void)fs; 3513 3514 IEEEFloat v(extended); 3515 v.subtract(u, rmNearestTiesToEven); 3516 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3517 assert(fs == opOK && !losesInfo); 3518 (void)fs; 3519 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3520 } else { 3521 words[1] = 0; 3522 } 3523 3524 return APInt(128, words); 3525 } 3526 3527 template <const fltSemantics &S> 3528 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3529 assert(semantics == &S); 3530 3531 constexpr int bias = -(S.minExponent - 1); 3532 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3533 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3534 constexpr integerPart integer_bit = 3535 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3536 constexpr uint64_t significand_mask = integer_bit - 1; 3537 constexpr unsigned int exponent_bits = 3538 S.sizeInBits - 1 - trailing_significand_bits; 3539 static_assert(exponent_bits < 64); 3540 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3541 3542 uint64_t myexponent; 3543 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3544 mysignificand; 3545 3546 if (isFiniteNonZero()) { 3547 myexponent = exponent + bias; 3548 std::copy_n(significandParts(), mysignificand.size(), 3549 mysignificand.begin()); 3550 if (myexponent == 1 && 3551 !(significandParts()[integer_bit_part] & integer_bit)) 3552 myexponent = 0; // denormal 3553 } else if (category == fcZero) { 3554 myexponent = ::exponentZero(S) + bias; 3555 mysignificand.fill(0); 3556 } else if (category == fcInfinity) { 3557 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3558 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3559 llvm_unreachable("semantics don't support inf!"); 3560 myexponent = ::exponentInf(S) + bias; 3561 mysignificand.fill(0); 3562 } else { 3563 assert(category == fcNaN && "Unknown category!"); 3564 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3565 llvm_unreachable("semantics don't support NaN!"); 3566 myexponent = ::exponentNaN(S) + bias; 3567 std::copy_n(significandParts(), mysignificand.size(), 3568 mysignificand.begin()); 3569 } 3570 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3571 auto words_iter = 3572 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3573 if constexpr (significand_mask != 0) { 3574 // Clear the integer bit. 3575 words[mysignificand.size() - 1] &= significand_mask; 3576 } 3577 std::fill(words_iter, words.end(), uint64_t{0}); 3578 constexpr size_t last_word = words.size() - 1; 3579 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3580 << ((S.sizeInBits - 1) % 64); 3581 words[last_word] |= shifted_sign; 3582 uint64_t shifted_exponent = (myexponent & exponent_mask) 3583 << (trailing_significand_bits % 64); 3584 words[last_word] |= shifted_exponent; 3585 if constexpr (last_word == 0) { 3586 return APInt(S.sizeInBits, words[0]); 3587 } 3588 return APInt(S.sizeInBits, words); 3589 } 3590 3591 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3592 assert(partCount() == 2); 3593 return convertIEEEFloatToAPInt<semIEEEquad>(); 3594 } 3595 3596 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3597 assert(partCount()==1); 3598 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3599 } 3600 3601 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3602 assert(partCount()==1); 3603 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3604 } 3605 3606 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3607 assert(partCount() == 1); 3608 return convertIEEEFloatToAPInt<semBFloat>(); 3609 } 3610 3611 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3612 assert(partCount()==1); 3613 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3614 } 3615 3616 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3617 assert(partCount() == 1); 3618 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3619 } 3620 3621 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3622 assert(partCount() == 1); 3623 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3624 } 3625 3626 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3627 assert(partCount() == 1); 3628 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3629 } 3630 3631 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3632 assert(partCount() == 1); 3633 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3634 } 3635 3636 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3637 assert(partCount() == 1); 3638 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3639 } 3640 3641 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3642 assert(partCount() == 1); 3643 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3644 } 3645 3646 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3647 assert(partCount() == 1); 3648 return convertIEEEFloatToAPInt<semFloatTF32>(); 3649 } 3650 3651 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3652 assert(partCount() == 1); 3653 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3654 } 3655 3656 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3657 assert(partCount() == 1); 3658 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3659 } 3660 3661 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3662 assert(partCount() == 1); 3663 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3664 } 3665 3666 // This function creates an APInt that is just a bit map of the floating 3667 // point constant as it would appear in memory. It is not a conversion, 3668 // and treating the result as a normal integer is unlikely to be useful. 3669 3670 APInt IEEEFloat::bitcastToAPInt() const { 3671 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3672 return convertHalfAPFloatToAPInt(); 3673 3674 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3675 return convertBFloatAPFloatToAPInt(); 3676 3677 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3678 return convertFloatAPFloatToAPInt(); 3679 3680 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3681 return convertDoubleAPFloatToAPInt(); 3682 3683 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3684 return convertQuadrupleAPFloatToAPInt(); 3685 3686 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3687 return convertPPCDoubleDoubleAPFloatToAPInt(); 3688 3689 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3690 return convertFloat8E5M2APFloatToAPInt(); 3691 3692 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3693 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3694 3695 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3696 return convertFloat8E4M3APFloatToAPInt(); 3697 3698 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3699 return convertFloat8E4M3FNAPFloatToAPInt(); 3700 3701 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3702 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3703 3704 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3705 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3706 3707 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3708 return convertFloatTF32APFloatToAPInt(); 3709 3710 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3711 return convertFloat6E3M2FNAPFloatToAPInt(); 3712 3713 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3714 return convertFloat6E2M3FNAPFloatToAPInt(); 3715 3716 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3717 return convertFloat4E2M1FNAPFloatToAPInt(); 3718 3719 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3720 "unknown format!"); 3721 return convertF80LongDoubleAPFloatToAPInt(); 3722 } 3723 3724 float IEEEFloat::convertToFloat() const { 3725 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3726 "Float semantics are not IEEEsingle"); 3727 APInt api = bitcastToAPInt(); 3728 return api.bitsToFloat(); 3729 } 3730 3731 double IEEEFloat::convertToDouble() const { 3732 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3733 "Float semantics are not IEEEdouble"); 3734 APInt api = bitcastToAPInt(); 3735 return api.bitsToDouble(); 3736 } 3737 3738 #ifdef HAS_IEE754_FLOAT128 3739 float128 IEEEFloat::convertToQuad() const { 3740 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3741 "Float semantics are not IEEEquads"); 3742 APInt api = bitcastToAPInt(); 3743 return api.bitsToQuad(); 3744 } 3745 #endif 3746 3747 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3748 /// does not support these bit patterns: 3749 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3750 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3751 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3752 /// exponent = 0, integer bit 1 ("pseudodenormal") 3753 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3754 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3755 uint64_t i1 = api.getRawData()[0]; 3756 uint64_t i2 = api.getRawData()[1]; 3757 uint64_t myexponent = (i2 & 0x7fff); 3758 uint64_t mysignificand = i1; 3759 uint8_t myintegerbit = mysignificand >> 63; 3760 3761 initialize(&semX87DoubleExtended); 3762 assert(partCount()==2); 3763 3764 sign = static_cast<unsigned int>(i2>>15); 3765 if (myexponent == 0 && mysignificand == 0) { 3766 makeZero(sign); 3767 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3768 makeInf(sign); 3769 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3770 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3771 category = fcNaN; 3772 exponent = exponentNaN(); 3773 significandParts()[0] = mysignificand; 3774 significandParts()[1] = 0; 3775 } else { 3776 category = fcNormal; 3777 exponent = myexponent - 16383; 3778 significandParts()[0] = mysignificand; 3779 significandParts()[1] = 0; 3780 if (myexponent==0) // denormal 3781 exponent = -16382; 3782 } 3783 } 3784 3785 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3786 uint64_t i1 = api.getRawData()[0]; 3787 uint64_t i2 = api.getRawData()[1]; 3788 opStatus fs; 3789 bool losesInfo; 3790 3791 // Get the first double and convert to our format. 3792 initFromDoubleAPInt(APInt(64, i1)); 3793 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3794 assert(fs == opOK && !losesInfo); 3795 (void)fs; 3796 3797 // Unless we have a special case, add in second double. 3798 if (isFiniteNonZero()) { 3799 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3800 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3801 assert(fs == opOK && !losesInfo); 3802 (void)fs; 3803 3804 add(v, rmNearestTiesToEven); 3805 } 3806 } 3807 3808 template <const fltSemantics &S> 3809 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3810 assert(api.getBitWidth() == S.sizeInBits); 3811 constexpr integerPart integer_bit = integerPart{1} 3812 << ((S.precision - 1) % integerPartWidth); 3813 constexpr uint64_t significand_mask = integer_bit - 1; 3814 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3815 constexpr unsigned int stored_significand_parts = 3816 partCountForBits(trailing_significand_bits); 3817 constexpr unsigned int exponent_bits = 3818 S.sizeInBits - 1 - trailing_significand_bits; 3819 static_assert(exponent_bits < 64); 3820 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3821 constexpr int bias = -(S.minExponent - 1); 3822 3823 // Copy the bits of the significand. We need to clear out the exponent and 3824 // sign bit in the last word. 3825 std::array<integerPart, stored_significand_parts> mysignificand; 3826 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3827 if constexpr (significand_mask != 0) { 3828 mysignificand[mysignificand.size() - 1] &= significand_mask; 3829 } 3830 3831 // We assume the last word holds the sign bit, the exponent, and potentially 3832 // some of the trailing significand field. 3833 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3834 uint64_t myexponent = 3835 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3836 3837 initialize(&S); 3838 assert(partCount() == mysignificand.size()); 3839 3840 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3841 3842 bool all_zero_significand = 3843 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3844 3845 bool is_zero = myexponent == 0 && all_zero_significand; 3846 3847 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3848 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3849 makeInf(sign); 3850 return; 3851 } 3852 } 3853 3854 bool is_nan = false; 3855 3856 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3857 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3858 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3859 bool all_ones_significand = 3860 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3861 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3862 (!significand_mask || 3863 mysignificand[mysignificand.size() - 1] == significand_mask); 3864 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3865 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3866 is_nan = is_zero && sign; 3867 } 3868 3869 if (is_nan) { 3870 category = fcNaN; 3871 exponent = ::exponentNaN(S); 3872 std::copy_n(mysignificand.begin(), mysignificand.size(), 3873 significandParts()); 3874 return; 3875 } 3876 3877 if (is_zero) { 3878 makeZero(sign); 3879 return; 3880 } 3881 3882 category = fcNormal; 3883 exponent = myexponent - bias; 3884 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3885 if (myexponent == 0) // denormal 3886 exponent = S.minExponent; 3887 else 3888 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3889 } 3890 3891 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3892 initFromIEEEAPInt<semIEEEquad>(api); 3893 } 3894 3895 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3896 initFromIEEEAPInt<semIEEEdouble>(api); 3897 } 3898 3899 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3900 initFromIEEEAPInt<semIEEEsingle>(api); 3901 } 3902 3903 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3904 initFromIEEEAPInt<semBFloat>(api); 3905 } 3906 3907 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3908 initFromIEEEAPInt<semIEEEhalf>(api); 3909 } 3910 3911 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3912 initFromIEEEAPInt<semFloat8E5M2>(api); 3913 } 3914 3915 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3916 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3917 } 3918 3919 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 3920 initFromIEEEAPInt<semFloat8E4M3>(api); 3921 } 3922 3923 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3924 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3925 } 3926 3927 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3928 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3929 } 3930 3931 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3932 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3933 } 3934 3935 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3936 initFromIEEEAPInt<semFloatTF32>(api); 3937 } 3938 3939 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 3940 initFromIEEEAPInt<semFloat6E3M2FN>(api); 3941 } 3942 3943 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 3944 initFromIEEEAPInt<semFloat6E2M3FN>(api); 3945 } 3946 3947 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 3948 initFromIEEEAPInt<semFloat4E2M1FN>(api); 3949 } 3950 3951 /// Treat api as containing the bits of a floating point number. 3952 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3953 assert(api.getBitWidth() == Sem->sizeInBits); 3954 if (Sem == &semIEEEhalf) 3955 return initFromHalfAPInt(api); 3956 if (Sem == &semBFloat) 3957 return initFromBFloatAPInt(api); 3958 if (Sem == &semIEEEsingle) 3959 return initFromFloatAPInt(api); 3960 if (Sem == &semIEEEdouble) 3961 return initFromDoubleAPInt(api); 3962 if (Sem == &semX87DoubleExtended) 3963 return initFromF80LongDoubleAPInt(api); 3964 if (Sem == &semIEEEquad) 3965 return initFromQuadrupleAPInt(api); 3966 if (Sem == &semPPCDoubleDoubleLegacy) 3967 return initFromPPCDoubleDoubleAPInt(api); 3968 if (Sem == &semFloat8E5M2) 3969 return initFromFloat8E5M2APInt(api); 3970 if (Sem == &semFloat8E5M2FNUZ) 3971 return initFromFloat8E5M2FNUZAPInt(api); 3972 if (Sem == &semFloat8E4M3) 3973 return initFromFloat8E4M3APInt(api); 3974 if (Sem == &semFloat8E4M3FN) 3975 return initFromFloat8E4M3FNAPInt(api); 3976 if (Sem == &semFloat8E4M3FNUZ) 3977 return initFromFloat8E4M3FNUZAPInt(api); 3978 if (Sem == &semFloat8E4M3B11FNUZ) 3979 return initFromFloat8E4M3B11FNUZAPInt(api); 3980 if (Sem == &semFloatTF32) 3981 return initFromFloatTF32APInt(api); 3982 if (Sem == &semFloat6E3M2FN) 3983 return initFromFloat6E3M2FNAPInt(api); 3984 if (Sem == &semFloat6E2M3FN) 3985 return initFromFloat6E2M3FNAPInt(api); 3986 if (Sem == &semFloat4E2M1FN) 3987 return initFromFloat4E2M1FNAPInt(api); 3988 3989 llvm_unreachable(nullptr); 3990 } 3991 3992 /// Make this number the largest magnitude normal number in the given 3993 /// semantics. 3994 void IEEEFloat::makeLargest(bool Negative) { 3995 // We want (in interchange format): 3996 // sign = {Negative} 3997 // exponent = 1..10 3998 // significand = 1..1 3999 category = fcNormal; 4000 sign = Negative; 4001 exponent = semantics->maxExponent; 4002 4003 // Use memset to set all but the highest integerPart to all ones. 4004 integerPart *significand = significandParts(); 4005 unsigned PartCount = partCount(); 4006 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4007 4008 // Set the high integerPart especially setting all unused top bits for 4009 // internal consistency. 4010 const unsigned NumUnusedHighBits = 4011 PartCount*integerPartWidth - semantics->precision; 4012 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4013 ? (~integerPart(0) >> NumUnusedHighBits) 4014 : 0; 4015 4016 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4017 semantics->nanEncoding == fltNanEncoding::AllOnes) 4018 significand[0] &= ~integerPart(1); 4019 } 4020 4021 /// Make this number the smallest magnitude denormal number in the given 4022 /// semantics. 4023 void IEEEFloat::makeSmallest(bool Negative) { 4024 // We want (in interchange format): 4025 // sign = {Negative} 4026 // exponent = 0..0 4027 // significand = 0..01 4028 category = fcNormal; 4029 sign = Negative; 4030 exponent = semantics->minExponent; 4031 APInt::tcSet(significandParts(), 1, partCount()); 4032 } 4033 4034 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4035 // We want (in interchange format): 4036 // sign = {Negative} 4037 // exponent = 0..0 4038 // significand = 10..0 4039 4040 category = fcNormal; 4041 zeroSignificand(); 4042 sign = Negative; 4043 exponent = semantics->minExponent; 4044 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4045 } 4046 4047 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4048 initFromAPInt(&Sem, API); 4049 } 4050 4051 IEEEFloat::IEEEFloat(float f) { 4052 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4053 } 4054 4055 IEEEFloat::IEEEFloat(double d) { 4056 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4057 } 4058 4059 namespace { 4060 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4061 Buffer.append(Str.begin(), Str.end()); 4062 } 4063 4064 /// Removes data from the given significand until it is no more 4065 /// precise than is required for the desired precision. 4066 void AdjustToPrecision(APInt &significand, 4067 int &exp, unsigned FormatPrecision) { 4068 unsigned bits = significand.getActiveBits(); 4069 4070 // 196/59 is a very slight overestimate of lg_2(10). 4071 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4072 4073 if (bits <= bitsRequired) return; 4074 4075 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4076 if (!tensRemovable) return; 4077 4078 exp += tensRemovable; 4079 4080 APInt divisor(significand.getBitWidth(), 1); 4081 APInt powten(significand.getBitWidth(), 10); 4082 while (true) { 4083 if (tensRemovable & 1) 4084 divisor *= powten; 4085 tensRemovable >>= 1; 4086 if (!tensRemovable) break; 4087 powten *= powten; 4088 } 4089 4090 significand = significand.udiv(divisor); 4091 4092 // Truncate the significand down to its active bit count. 4093 significand = significand.trunc(significand.getActiveBits()); 4094 } 4095 4096 4097 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4098 int &exp, unsigned FormatPrecision) { 4099 unsigned N = buffer.size(); 4100 if (N <= FormatPrecision) return; 4101 4102 // The most significant figures are the last ones in the buffer. 4103 unsigned FirstSignificant = N - FormatPrecision; 4104 4105 // Round. 4106 // FIXME: this probably shouldn't use 'round half up'. 4107 4108 // Rounding down is just a truncation, except we also want to drop 4109 // trailing zeros from the new result. 4110 if (buffer[FirstSignificant - 1] < '5') { 4111 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4112 FirstSignificant++; 4113 4114 exp += FirstSignificant; 4115 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4116 return; 4117 } 4118 4119 // Rounding up requires a decimal add-with-carry. If we continue 4120 // the carry, the newly-introduced zeros will just be truncated. 4121 for (unsigned I = FirstSignificant; I != N; ++I) { 4122 if (buffer[I] == '9') { 4123 FirstSignificant++; 4124 } else { 4125 buffer[I]++; 4126 break; 4127 } 4128 } 4129 4130 // If we carried through, we have exactly one digit of precision. 4131 if (FirstSignificant == N) { 4132 exp += FirstSignificant; 4133 buffer.clear(); 4134 buffer.push_back('1'); 4135 return; 4136 } 4137 4138 exp += FirstSignificant; 4139 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4140 } 4141 4142 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4143 APInt significand, unsigned FormatPrecision, 4144 unsigned FormatMaxPadding, bool TruncateZero) { 4145 const int semanticsPrecision = significand.getBitWidth(); 4146 4147 if (isNeg) 4148 Str.push_back('-'); 4149 4150 // Set FormatPrecision if zero. We want to do this before we 4151 // truncate trailing zeros, as those are part of the precision. 4152 if (!FormatPrecision) { 4153 // We use enough digits so the number can be round-tripped back to an 4154 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4155 // Accurately" by Steele and White. 4156 // FIXME: Using a formula based purely on the precision is conservative; 4157 // we can print fewer digits depending on the actual value being printed. 4158 4159 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4160 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4161 } 4162 4163 // Ignore trailing binary zeros. 4164 int trailingZeros = significand.countr_zero(); 4165 exp += trailingZeros; 4166 significand.lshrInPlace(trailingZeros); 4167 4168 // Change the exponent from 2^e to 10^e. 4169 if (exp == 0) { 4170 // Nothing to do. 4171 } else if (exp > 0) { 4172 // Just shift left. 4173 significand = significand.zext(semanticsPrecision + exp); 4174 significand <<= exp; 4175 exp = 0; 4176 } else { /* exp < 0 */ 4177 int texp = -exp; 4178 4179 // We transform this using the identity: 4180 // (N)(2^-e) == (N)(5^e)(10^-e) 4181 // This means we have to multiply N (the significand) by 5^e. 4182 // To avoid overflow, we have to operate on numbers large 4183 // enough to store N * 5^e: 4184 // log2(N * 5^e) == log2(N) + e * log2(5) 4185 // <= semantics->precision + e * 137 / 59 4186 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4187 4188 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4189 4190 // Multiply significand by 5^e. 4191 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4192 significand = significand.zext(precision); 4193 APInt five_to_the_i(precision, 5); 4194 while (true) { 4195 if (texp & 1) 4196 significand *= five_to_the_i; 4197 4198 texp >>= 1; 4199 if (!texp) 4200 break; 4201 five_to_the_i *= five_to_the_i; 4202 } 4203 } 4204 4205 AdjustToPrecision(significand, exp, FormatPrecision); 4206 4207 SmallVector<char, 256> buffer; 4208 4209 // Fill the buffer. 4210 unsigned precision = significand.getBitWidth(); 4211 if (precision < 4) { 4212 // We need enough precision to store the value 10. 4213 precision = 4; 4214 significand = significand.zext(precision); 4215 } 4216 APInt ten(precision, 10); 4217 APInt digit(precision, 0); 4218 4219 bool inTrail = true; 4220 while (significand != 0) { 4221 // digit <- significand % 10 4222 // significand <- significand / 10 4223 APInt::udivrem(significand, ten, significand, digit); 4224 4225 unsigned d = digit.getZExtValue(); 4226 4227 // Drop trailing zeros. 4228 if (inTrail && !d) 4229 exp++; 4230 else { 4231 buffer.push_back((char) ('0' + d)); 4232 inTrail = false; 4233 } 4234 } 4235 4236 assert(!buffer.empty() && "no characters in buffer!"); 4237 4238 // Drop down to FormatPrecision. 4239 // TODO: don't do more precise calculations above than are required. 4240 AdjustToPrecision(buffer, exp, FormatPrecision); 4241 4242 unsigned NDigits = buffer.size(); 4243 4244 // Check whether we should use scientific notation. 4245 bool FormatScientific; 4246 if (!FormatMaxPadding) 4247 FormatScientific = true; 4248 else { 4249 if (exp >= 0) { 4250 // 765e3 --> 765000 4251 // ^^^ 4252 // But we shouldn't make the number look more precise than it is. 4253 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4254 NDigits + (unsigned) exp > FormatPrecision); 4255 } else { 4256 // Power of the most significant digit. 4257 int MSD = exp + (int) (NDigits - 1); 4258 if (MSD >= 0) { 4259 // 765e-2 == 7.65 4260 FormatScientific = false; 4261 } else { 4262 // 765e-5 == 0.00765 4263 // ^ ^^ 4264 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4265 } 4266 } 4267 } 4268 4269 // Scientific formatting is pretty straightforward. 4270 if (FormatScientific) { 4271 exp += (NDigits - 1); 4272 4273 Str.push_back(buffer[NDigits-1]); 4274 Str.push_back('.'); 4275 if (NDigits == 1 && TruncateZero) 4276 Str.push_back('0'); 4277 else 4278 for (unsigned I = 1; I != NDigits; ++I) 4279 Str.push_back(buffer[NDigits-1-I]); 4280 // Fill with zeros up to FormatPrecision. 4281 if (!TruncateZero && FormatPrecision > NDigits - 1) 4282 Str.append(FormatPrecision - NDigits + 1, '0'); 4283 // For !TruncateZero we use lower 'e'. 4284 Str.push_back(TruncateZero ? 'E' : 'e'); 4285 4286 Str.push_back(exp >= 0 ? '+' : '-'); 4287 if (exp < 0) 4288 exp = -exp; 4289 SmallVector<char, 6> expbuf; 4290 do { 4291 expbuf.push_back((char) ('0' + (exp % 10))); 4292 exp /= 10; 4293 } while (exp); 4294 // Exponent always at least two digits if we do not truncate zeros. 4295 if (!TruncateZero && expbuf.size() < 2) 4296 expbuf.push_back('0'); 4297 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4298 Str.push_back(expbuf[E-1-I]); 4299 return; 4300 } 4301 4302 // Non-scientific, positive exponents. 4303 if (exp >= 0) { 4304 for (unsigned I = 0; I != NDigits; ++I) 4305 Str.push_back(buffer[NDigits-1-I]); 4306 for (unsigned I = 0; I != (unsigned) exp; ++I) 4307 Str.push_back('0'); 4308 return; 4309 } 4310 4311 // Non-scientific, negative exponents. 4312 4313 // The number of digits to the left of the decimal point. 4314 int NWholeDigits = exp + (int) NDigits; 4315 4316 unsigned I = 0; 4317 if (NWholeDigits > 0) { 4318 for (; I != (unsigned) NWholeDigits; ++I) 4319 Str.push_back(buffer[NDigits-I-1]); 4320 Str.push_back('.'); 4321 } else { 4322 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4323 4324 Str.push_back('0'); 4325 Str.push_back('.'); 4326 for (unsigned Z = 1; Z != NZeros; ++Z) 4327 Str.push_back('0'); 4328 } 4329 4330 for (; I != NDigits; ++I) 4331 Str.push_back(buffer[NDigits-I-1]); 4332 4333 } 4334 } // namespace 4335 4336 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4337 unsigned FormatMaxPadding, bool TruncateZero) const { 4338 switch (category) { 4339 case fcInfinity: 4340 if (isNegative()) 4341 return append(Str, "-Inf"); 4342 else 4343 return append(Str, "+Inf"); 4344 4345 case fcNaN: return append(Str, "NaN"); 4346 4347 case fcZero: 4348 if (isNegative()) 4349 Str.push_back('-'); 4350 4351 if (!FormatMaxPadding) { 4352 if (TruncateZero) 4353 append(Str, "0.0E+0"); 4354 else { 4355 append(Str, "0.0"); 4356 if (FormatPrecision > 1) 4357 Str.append(FormatPrecision - 1, '0'); 4358 append(Str, "e+00"); 4359 } 4360 } else 4361 Str.push_back('0'); 4362 return; 4363 4364 case fcNormal: 4365 break; 4366 } 4367 4368 // Decompose the number into an APInt and an exponent. 4369 int exp = exponent - ((int) semantics->precision - 1); 4370 APInt significand( 4371 semantics->precision, 4372 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4373 4374 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4375 FormatMaxPadding, TruncateZero); 4376 4377 } 4378 4379 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4380 // Special floats and denormals have no exact inverse. 4381 if (!isFiniteNonZero()) 4382 return false; 4383 4384 // Check that the number is a power of two by making sure that only the 4385 // integer bit is set in the significand. 4386 if (significandLSB() != semantics->precision - 1) 4387 return false; 4388 4389 // Get the inverse. 4390 IEEEFloat reciprocal(*semantics, 1ULL); 4391 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4392 return false; 4393 4394 // Avoid multiplication with a denormal, it is not safe on all platforms and 4395 // may be slower than a normal division. 4396 if (reciprocal.isDenormal()) 4397 return false; 4398 4399 assert(reciprocal.isFiniteNonZero() && 4400 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4401 4402 if (inv) 4403 *inv = APFloat(reciprocal, *semantics); 4404 4405 return true; 4406 } 4407 4408 int IEEEFloat::getExactLog2Abs() const { 4409 if (!isFinite() || isZero()) 4410 return INT_MIN; 4411 4412 const integerPart *Parts = significandParts(); 4413 const int PartCount = partCountForBits(semantics->precision); 4414 4415 int PopCount = 0; 4416 for (int i = 0; i < PartCount; ++i) { 4417 PopCount += llvm::popcount(Parts[i]); 4418 if (PopCount > 1) 4419 return INT_MIN; 4420 } 4421 4422 if (exponent != semantics->minExponent) 4423 return exponent; 4424 4425 int CountrParts = 0; 4426 for (int i = 0; i < PartCount; 4427 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4428 if (Parts[i] != 0) { 4429 return exponent - semantics->precision + CountrParts + 4430 llvm::countr_zero(Parts[i]) + 1; 4431 } 4432 } 4433 4434 llvm_unreachable("didn't find the set bit"); 4435 } 4436 4437 bool IEEEFloat::isSignaling() const { 4438 if (!isNaN()) 4439 return false; 4440 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4441 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4442 return false; 4443 4444 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4445 // first bit of the trailing significand being 0. 4446 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4447 } 4448 4449 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4450 /// 4451 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4452 /// appropriate sign switching before/after the computation. 4453 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4454 // If we are performing nextDown, swap sign so we have -x. 4455 if (nextDown) 4456 changeSign(); 4457 4458 // Compute nextUp(x) 4459 opStatus result = opOK; 4460 4461 // Handle each float category separately. 4462 switch (category) { 4463 case fcInfinity: 4464 // nextUp(+inf) = +inf 4465 if (!isNegative()) 4466 break; 4467 // nextUp(-inf) = -getLargest() 4468 makeLargest(true); 4469 break; 4470 case fcNaN: 4471 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4472 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4473 // change the payload. 4474 if (isSignaling()) { 4475 result = opInvalidOp; 4476 // For consistency, propagate the sign of the sNaN to the qNaN. 4477 makeNaN(false, isNegative(), nullptr); 4478 } 4479 break; 4480 case fcZero: 4481 // nextUp(pm 0) = +getSmallest() 4482 makeSmallest(false); 4483 break; 4484 case fcNormal: 4485 // nextUp(-getSmallest()) = -0 4486 if (isSmallest() && isNegative()) { 4487 APInt::tcSet(significandParts(), 0, partCount()); 4488 category = fcZero; 4489 exponent = 0; 4490 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4491 sign = false; 4492 break; 4493 } 4494 4495 if (isLargest() && !isNegative()) { 4496 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4497 // nextUp(getLargest()) == NAN 4498 makeNaN(); 4499 break; 4500 } else if (semantics->nonFiniteBehavior == 4501 fltNonfiniteBehavior::FiniteOnly) { 4502 // nextUp(getLargest()) == getLargest() 4503 break; 4504 } else { 4505 // nextUp(getLargest()) == INFINITY 4506 APInt::tcSet(significandParts(), 0, partCount()); 4507 category = fcInfinity; 4508 exponent = semantics->maxExponent + 1; 4509 break; 4510 } 4511 } 4512 4513 // nextUp(normal) == normal + inc. 4514 if (isNegative()) { 4515 // If we are negative, we need to decrement the significand. 4516 4517 // We only cross a binade boundary that requires adjusting the exponent 4518 // if: 4519 // 1. exponent != semantics->minExponent. This implies we are not in the 4520 // smallest binade or are dealing with denormals. 4521 // 2. Our significand excluding the integral bit is all zeros. 4522 bool WillCrossBinadeBoundary = 4523 exponent != semantics->minExponent && isSignificandAllZeros(); 4524 4525 // Decrement the significand. 4526 // 4527 // We always do this since: 4528 // 1. If we are dealing with a non-binade decrement, by definition we 4529 // just decrement the significand. 4530 // 2. If we are dealing with a normal -> normal binade decrement, since 4531 // we have an explicit integral bit the fact that all bits but the 4532 // integral bit are zero implies that subtracting one will yield a 4533 // significand with 0 integral bit and 1 in all other spots. Thus we 4534 // must just adjust the exponent and set the integral bit to 1. 4535 // 3. If we are dealing with a normal -> denormal binade decrement, 4536 // since we set the integral bit to 0 when we represent denormals, we 4537 // just decrement the significand. 4538 integerPart *Parts = significandParts(); 4539 APInt::tcDecrement(Parts, partCount()); 4540 4541 if (WillCrossBinadeBoundary) { 4542 // Our result is a normal number. Do the following: 4543 // 1. Set the integral bit to 1. 4544 // 2. Decrement the exponent. 4545 APInt::tcSetBit(Parts, semantics->precision - 1); 4546 exponent--; 4547 } 4548 } else { 4549 // If we are positive, we need to increment the significand. 4550 4551 // We only cross a binade boundary that requires adjusting the exponent if 4552 // the input is not a denormal and all of said input's significand bits 4553 // are set. If all of said conditions are true: clear the significand, set 4554 // the integral bit to 1, and increment the exponent. If we have a 4555 // denormal always increment since moving denormals and the numbers in the 4556 // smallest normal binade have the same exponent in our representation. 4557 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4558 4559 if (WillCrossBinadeBoundary) { 4560 integerPart *Parts = significandParts(); 4561 APInt::tcSet(Parts, 0, partCount()); 4562 APInt::tcSetBit(Parts, semantics->precision - 1); 4563 assert(exponent != semantics->maxExponent && 4564 "We can not increment an exponent beyond the maxExponent allowed" 4565 " by the given floating point semantics."); 4566 exponent++; 4567 } else { 4568 incrementSignificand(); 4569 } 4570 } 4571 break; 4572 } 4573 4574 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4575 if (nextDown) 4576 changeSign(); 4577 4578 return result; 4579 } 4580 4581 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4582 return ::exponentNaN(*semantics); 4583 } 4584 4585 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4586 return ::exponentInf(*semantics); 4587 } 4588 4589 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4590 return ::exponentZero(*semantics); 4591 } 4592 4593 void IEEEFloat::makeInf(bool Negative) { 4594 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4595 llvm_unreachable("This floating point format does not support Inf"); 4596 4597 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4598 // There is no Inf, so make NaN instead. 4599 makeNaN(false, Negative); 4600 return; 4601 } 4602 category = fcInfinity; 4603 sign = Negative; 4604 exponent = exponentInf(); 4605 APInt::tcSet(significandParts(), 0, partCount()); 4606 } 4607 4608 void IEEEFloat::makeZero(bool Negative) { 4609 category = fcZero; 4610 sign = Negative; 4611 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4612 // Merge negative zero to positive because 0b10000...000 is used for NaN 4613 sign = false; 4614 } 4615 exponent = exponentZero(); 4616 APInt::tcSet(significandParts(), 0, partCount()); 4617 } 4618 4619 void IEEEFloat::makeQuiet() { 4620 assert(isNaN()); 4621 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4622 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4623 } 4624 4625 int ilogb(const IEEEFloat &Arg) { 4626 if (Arg.isNaN()) 4627 return IEEEFloat::IEK_NaN; 4628 if (Arg.isZero()) 4629 return IEEEFloat::IEK_Zero; 4630 if (Arg.isInfinity()) 4631 return IEEEFloat::IEK_Inf; 4632 if (!Arg.isDenormal()) 4633 return Arg.exponent; 4634 4635 IEEEFloat Normalized(Arg); 4636 int SignificandBits = Arg.getSemantics().precision - 1; 4637 4638 Normalized.exponent += SignificandBits; 4639 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4640 return Normalized.exponent - SignificandBits; 4641 } 4642 4643 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4644 auto MaxExp = X.getSemantics().maxExponent; 4645 auto MinExp = X.getSemantics().minExponent; 4646 4647 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4648 // overflow; clamp it to a safe range before adding, but ensure that the range 4649 // is large enough that the clamp does not change the result. The range we 4650 // need to support is the difference between the largest possible exponent and 4651 // the normalized exponent of half the smallest denormal. 4652 4653 int SignificandBits = X.getSemantics().precision - 1; 4654 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4655 4656 // Clamp to one past the range ends to let normalize handle overlflow. 4657 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4658 X.normalize(RoundingMode, lfExactlyZero); 4659 if (X.isNaN()) 4660 X.makeQuiet(); 4661 return X; 4662 } 4663 4664 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4665 Exp = ilogb(Val); 4666 4667 // Quiet signalling nans. 4668 if (Exp == IEEEFloat::IEK_NaN) { 4669 IEEEFloat Quiet(Val); 4670 Quiet.makeQuiet(); 4671 return Quiet; 4672 } 4673 4674 if (Exp == IEEEFloat::IEK_Inf) 4675 return Val; 4676 4677 // 1 is added because frexp is defined to return a normalized fraction in 4678 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4679 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4680 return scalbn(Val, -Exp, RM); 4681 } 4682 4683 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4684 : Semantics(&S), 4685 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4686 assert(Semantics == &semPPCDoubleDouble); 4687 } 4688 4689 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4690 : Semantics(&S), 4691 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4692 APFloat(semIEEEdouble, uninitialized)}) { 4693 assert(Semantics == &semPPCDoubleDouble); 4694 } 4695 4696 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4697 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4698 APFloat(semIEEEdouble)}) { 4699 assert(Semantics == &semPPCDoubleDouble); 4700 } 4701 4702 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4703 : Semantics(&S), 4704 Floats(new APFloat[2]{ 4705 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4706 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4707 assert(Semantics == &semPPCDoubleDouble); 4708 } 4709 4710 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4711 APFloat &&Second) 4712 : Semantics(&S), 4713 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4714 assert(Semantics == &semPPCDoubleDouble); 4715 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4716 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4717 } 4718 4719 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4720 : Semantics(RHS.Semantics), 4721 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4722 APFloat(RHS.Floats[1])} 4723 : nullptr) { 4724 assert(Semantics == &semPPCDoubleDouble); 4725 } 4726 4727 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4728 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4729 RHS.Semantics = &semBogus; 4730 assert(Semantics == &semPPCDoubleDouble); 4731 } 4732 4733 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4734 if (Semantics == RHS.Semantics && RHS.Floats) { 4735 Floats[0] = RHS.Floats[0]; 4736 Floats[1] = RHS.Floats[1]; 4737 } else if (this != &RHS) { 4738 this->~DoubleAPFloat(); 4739 new (this) DoubleAPFloat(RHS); 4740 } 4741 return *this; 4742 } 4743 4744 // Implement addition, subtraction, multiplication and division based on: 4745 // "Software for Doubled-Precision Floating-Point Computations", 4746 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4747 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4748 const APFloat &c, const APFloat &cc, 4749 roundingMode RM) { 4750 int Status = opOK; 4751 APFloat z = a; 4752 Status |= z.add(c, RM); 4753 if (!z.isFinite()) { 4754 if (!z.isInfinity()) { 4755 Floats[0] = std::move(z); 4756 Floats[1].makeZero(/* Neg = */ false); 4757 return (opStatus)Status; 4758 } 4759 Status = opOK; 4760 auto AComparedToC = a.compareAbsoluteValue(c); 4761 z = cc; 4762 Status |= z.add(aa, RM); 4763 if (AComparedToC == APFloat::cmpGreaterThan) { 4764 // z = cc + aa + c + a; 4765 Status |= z.add(c, RM); 4766 Status |= z.add(a, RM); 4767 } else { 4768 // z = cc + aa + a + c; 4769 Status |= z.add(a, RM); 4770 Status |= z.add(c, RM); 4771 } 4772 if (!z.isFinite()) { 4773 Floats[0] = std::move(z); 4774 Floats[1].makeZero(/* Neg = */ false); 4775 return (opStatus)Status; 4776 } 4777 Floats[0] = z; 4778 APFloat zz = aa; 4779 Status |= zz.add(cc, RM); 4780 if (AComparedToC == APFloat::cmpGreaterThan) { 4781 // Floats[1] = a - z + c + zz; 4782 Floats[1] = a; 4783 Status |= Floats[1].subtract(z, RM); 4784 Status |= Floats[1].add(c, RM); 4785 Status |= Floats[1].add(zz, RM); 4786 } else { 4787 // Floats[1] = c - z + a + zz; 4788 Floats[1] = c; 4789 Status |= Floats[1].subtract(z, RM); 4790 Status |= Floats[1].add(a, RM); 4791 Status |= Floats[1].add(zz, RM); 4792 } 4793 } else { 4794 // q = a - z; 4795 APFloat q = a; 4796 Status |= q.subtract(z, RM); 4797 4798 // zz = q + c + (a - (q + z)) + aa + cc; 4799 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4800 auto zz = q; 4801 Status |= zz.add(c, RM); 4802 Status |= q.add(z, RM); 4803 Status |= q.subtract(a, RM); 4804 q.changeSign(); 4805 Status |= zz.add(q, RM); 4806 Status |= zz.add(aa, RM); 4807 Status |= zz.add(cc, RM); 4808 if (zz.isZero() && !zz.isNegative()) { 4809 Floats[0] = std::move(z); 4810 Floats[1].makeZero(/* Neg = */ false); 4811 return opOK; 4812 } 4813 Floats[0] = z; 4814 Status |= Floats[0].add(zz, RM); 4815 if (!Floats[0].isFinite()) { 4816 Floats[1].makeZero(/* Neg = */ false); 4817 return (opStatus)Status; 4818 } 4819 Floats[1] = std::move(z); 4820 Status |= Floats[1].subtract(Floats[0], RM); 4821 Status |= Floats[1].add(zz, RM); 4822 } 4823 return (opStatus)Status; 4824 } 4825 4826 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4827 const DoubleAPFloat &RHS, 4828 DoubleAPFloat &Out, 4829 roundingMode RM) { 4830 if (LHS.getCategory() == fcNaN) { 4831 Out = LHS; 4832 return opOK; 4833 } 4834 if (RHS.getCategory() == fcNaN) { 4835 Out = RHS; 4836 return opOK; 4837 } 4838 if (LHS.getCategory() == fcZero) { 4839 Out = RHS; 4840 return opOK; 4841 } 4842 if (RHS.getCategory() == fcZero) { 4843 Out = LHS; 4844 return opOK; 4845 } 4846 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4847 LHS.isNegative() != RHS.isNegative()) { 4848 Out.makeNaN(false, Out.isNegative(), nullptr); 4849 return opInvalidOp; 4850 } 4851 if (LHS.getCategory() == fcInfinity) { 4852 Out = LHS; 4853 return opOK; 4854 } 4855 if (RHS.getCategory() == fcInfinity) { 4856 Out = RHS; 4857 return opOK; 4858 } 4859 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4860 4861 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4862 CC(RHS.Floats[1]); 4863 assert(&A.getSemantics() == &semIEEEdouble); 4864 assert(&AA.getSemantics() == &semIEEEdouble); 4865 assert(&C.getSemantics() == &semIEEEdouble); 4866 assert(&CC.getSemantics() == &semIEEEdouble); 4867 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4868 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4869 return Out.addImpl(A, AA, C, CC, RM); 4870 } 4871 4872 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4873 roundingMode RM) { 4874 return addWithSpecial(*this, RHS, *this, RM); 4875 } 4876 4877 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4878 roundingMode RM) { 4879 changeSign(); 4880 auto Ret = add(RHS, RM); 4881 changeSign(); 4882 return Ret; 4883 } 4884 4885 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4886 APFloat::roundingMode RM) { 4887 const auto &LHS = *this; 4888 auto &Out = *this; 4889 /* Interesting observation: For special categories, finding the lowest 4890 common ancestor of the following layered graph gives the correct 4891 return category: 4892 4893 NaN 4894 / \ 4895 Zero Inf 4896 \ / 4897 Normal 4898 4899 e.g. NaN * NaN = NaN 4900 Zero * Inf = NaN 4901 Normal * Zero = Zero 4902 Normal * Inf = Inf 4903 */ 4904 if (LHS.getCategory() == fcNaN) { 4905 Out = LHS; 4906 return opOK; 4907 } 4908 if (RHS.getCategory() == fcNaN) { 4909 Out = RHS; 4910 return opOK; 4911 } 4912 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4913 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4914 Out.makeNaN(false, false, nullptr); 4915 return opOK; 4916 } 4917 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4918 Out = LHS; 4919 return opOK; 4920 } 4921 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4922 Out = RHS; 4923 return opOK; 4924 } 4925 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4926 "Special cases not handled exhaustively"); 4927 4928 int Status = opOK; 4929 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4930 // t = a * c 4931 APFloat T = A; 4932 Status |= T.multiply(C, RM); 4933 if (!T.isFiniteNonZero()) { 4934 Floats[0] = T; 4935 Floats[1].makeZero(/* Neg = */ false); 4936 return (opStatus)Status; 4937 } 4938 4939 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4940 APFloat Tau = A; 4941 T.changeSign(); 4942 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4943 T.changeSign(); 4944 { 4945 // v = a * d 4946 APFloat V = A; 4947 Status |= V.multiply(D, RM); 4948 // w = b * c 4949 APFloat W = B; 4950 Status |= W.multiply(C, RM); 4951 Status |= V.add(W, RM); 4952 // tau += v + w 4953 Status |= Tau.add(V, RM); 4954 } 4955 // u = t + tau 4956 APFloat U = T; 4957 Status |= U.add(Tau, RM); 4958 4959 Floats[0] = U; 4960 if (!U.isFinite()) { 4961 Floats[1].makeZero(/* Neg = */ false); 4962 } else { 4963 // Floats[1] = (t - u) + tau 4964 Status |= T.subtract(U, RM); 4965 Status |= T.add(Tau, RM); 4966 Floats[1] = T; 4967 } 4968 return (opStatus)Status; 4969 } 4970 4971 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4972 APFloat::roundingMode RM) { 4973 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4974 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4975 auto Ret = 4976 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4977 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4978 return Ret; 4979 } 4980 4981 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4982 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4983 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4984 auto Ret = 4985 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4986 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4987 return Ret; 4988 } 4989 4990 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4991 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4992 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4993 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4994 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4995 return Ret; 4996 } 4997 4998 APFloat::opStatus 4999 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5000 const DoubleAPFloat &Addend, 5001 APFloat::roundingMode RM) { 5002 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5003 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5004 auto Ret = Tmp.fusedMultiplyAdd( 5005 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5006 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5007 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5008 return Ret; 5009 } 5010 5011 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5012 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5013 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5014 auto Ret = Tmp.roundToIntegral(RM); 5015 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5016 return Ret; 5017 } 5018 5019 void DoubleAPFloat::changeSign() { 5020 Floats[0].changeSign(); 5021 Floats[1].changeSign(); 5022 } 5023 5024 APFloat::cmpResult 5025 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5026 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5027 if (Result != cmpEqual) 5028 return Result; 5029 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5030 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5031 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5032 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5033 if (Against && !RHSAgainst) 5034 return cmpLessThan; 5035 if (!Against && RHSAgainst) 5036 return cmpGreaterThan; 5037 if (!Against && !RHSAgainst) 5038 return Result; 5039 if (Against && RHSAgainst) 5040 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5041 } 5042 return Result; 5043 } 5044 5045 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5046 return Floats[0].getCategory(); 5047 } 5048 5049 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5050 5051 void DoubleAPFloat::makeInf(bool Neg) { 5052 Floats[0].makeInf(Neg); 5053 Floats[1].makeZero(/* Neg = */ false); 5054 } 5055 5056 void DoubleAPFloat::makeZero(bool Neg) { 5057 Floats[0].makeZero(Neg); 5058 Floats[1].makeZero(/* Neg = */ false); 5059 } 5060 5061 void DoubleAPFloat::makeLargest(bool Neg) { 5062 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5063 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5064 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5065 if (Neg) 5066 changeSign(); 5067 } 5068 5069 void DoubleAPFloat::makeSmallest(bool Neg) { 5070 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5071 Floats[0].makeSmallest(Neg); 5072 Floats[1].makeZero(/* Neg = */ false); 5073 } 5074 5075 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5076 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5077 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5078 if (Neg) 5079 Floats[0].changeSign(); 5080 Floats[1].makeZero(/* Neg = */ false); 5081 } 5082 5083 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5084 Floats[0].makeNaN(SNaN, Neg, fill); 5085 Floats[1].makeZero(/* Neg = */ false); 5086 } 5087 5088 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5089 auto Result = Floats[0].compare(RHS.Floats[0]); 5090 // |Float[0]| > |Float[1]| 5091 if (Result == APFloat::cmpEqual) 5092 return Floats[1].compare(RHS.Floats[1]); 5093 return Result; 5094 } 5095 5096 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5097 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5098 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5099 } 5100 5101 hash_code hash_value(const DoubleAPFloat &Arg) { 5102 if (Arg.Floats) 5103 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5104 return hash_combine(Arg.Semantics); 5105 } 5106 5107 APInt DoubleAPFloat::bitcastToAPInt() const { 5108 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5109 uint64_t Data[] = { 5110 Floats[0].bitcastToAPInt().getRawData()[0], 5111 Floats[1].bitcastToAPInt().getRawData()[0], 5112 }; 5113 return APInt(128, 2, Data); 5114 } 5115 5116 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5117 roundingMode RM) { 5118 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5119 APFloat Tmp(semPPCDoubleDoubleLegacy); 5120 auto Ret = Tmp.convertFromString(S, RM); 5121 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5122 return Ret; 5123 } 5124 5125 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5126 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5127 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5128 auto Ret = Tmp.next(nextDown); 5129 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5130 return Ret; 5131 } 5132 5133 APFloat::opStatus 5134 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5135 unsigned int Width, bool IsSigned, 5136 roundingMode RM, bool *IsExact) const { 5137 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5138 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5139 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5140 } 5141 5142 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5143 bool IsSigned, 5144 roundingMode RM) { 5145 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5146 APFloat Tmp(semPPCDoubleDoubleLegacy); 5147 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5148 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5149 return Ret; 5150 } 5151 5152 APFloat::opStatus 5153 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5154 unsigned int InputSize, 5155 bool IsSigned, roundingMode RM) { 5156 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5157 APFloat Tmp(semPPCDoubleDoubleLegacy); 5158 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5159 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5160 return Ret; 5161 } 5162 5163 APFloat::opStatus 5164 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5165 unsigned int InputSize, 5166 bool IsSigned, roundingMode RM) { 5167 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5168 APFloat Tmp(semPPCDoubleDoubleLegacy); 5169 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5170 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5171 return Ret; 5172 } 5173 5174 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5175 unsigned int HexDigits, 5176 bool UpperCase, 5177 roundingMode RM) const { 5178 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5179 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5180 .convertToHexString(DST, HexDigits, UpperCase, RM); 5181 } 5182 5183 bool DoubleAPFloat::isDenormal() const { 5184 return getCategory() == fcNormal && 5185 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5186 // (double)(Hi + Lo) == Hi defines a normal number. 5187 Floats[0] != Floats[0] + Floats[1]); 5188 } 5189 5190 bool DoubleAPFloat::isSmallest() const { 5191 if (getCategory() != fcNormal) 5192 return false; 5193 DoubleAPFloat Tmp(*this); 5194 Tmp.makeSmallest(this->isNegative()); 5195 return Tmp.compare(*this) == cmpEqual; 5196 } 5197 5198 bool DoubleAPFloat::isSmallestNormalized() const { 5199 if (getCategory() != fcNormal) 5200 return false; 5201 5202 DoubleAPFloat Tmp(*this); 5203 Tmp.makeSmallestNormalized(this->isNegative()); 5204 return Tmp.compare(*this) == cmpEqual; 5205 } 5206 5207 bool DoubleAPFloat::isLargest() const { 5208 if (getCategory() != fcNormal) 5209 return false; 5210 DoubleAPFloat Tmp(*this); 5211 Tmp.makeLargest(this->isNegative()); 5212 return Tmp.compare(*this) == cmpEqual; 5213 } 5214 5215 bool DoubleAPFloat::isInteger() const { 5216 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5217 return Floats[0].isInteger() && Floats[1].isInteger(); 5218 } 5219 5220 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5221 unsigned FormatPrecision, 5222 unsigned FormatMaxPadding, 5223 bool TruncateZero) const { 5224 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5225 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5226 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5227 } 5228 5229 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5230 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5231 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5232 if (!inv) 5233 return Tmp.getExactInverse(nullptr); 5234 APFloat Inv(semPPCDoubleDoubleLegacy); 5235 auto Ret = Tmp.getExactInverse(&Inv); 5236 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5237 return Ret; 5238 } 5239 5240 int DoubleAPFloat::getExactLog2() const { 5241 // TODO: Implement me 5242 return INT_MIN; 5243 } 5244 5245 int DoubleAPFloat::getExactLog2Abs() const { 5246 // TODO: Implement me 5247 return INT_MIN; 5248 } 5249 5250 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5251 APFloat::roundingMode RM) { 5252 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5253 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5254 scalbn(Arg.Floats[1], Exp, RM)); 5255 } 5256 5257 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5258 APFloat::roundingMode RM) { 5259 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5260 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5261 APFloat Second = Arg.Floats[1]; 5262 if (Arg.getCategory() == APFloat::fcNormal) 5263 Second = scalbn(Second, -Exp, RM); 5264 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5265 } 5266 5267 } // namespace detail 5268 5269 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5270 if (usesLayout<IEEEFloat>(Semantics)) { 5271 new (&IEEE) IEEEFloat(std::move(F)); 5272 return; 5273 } 5274 if (usesLayout<DoubleAPFloat>(Semantics)) { 5275 const fltSemantics& S = F.getSemantics(); 5276 new (&Double) 5277 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5278 APFloat(semIEEEdouble)); 5279 return; 5280 } 5281 llvm_unreachable("Unexpected semantics"); 5282 } 5283 5284 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5285 roundingMode RM) { 5286 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5287 } 5288 5289 hash_code hash_value(const APFloat &Arg) { 5290 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5291 return hash_value(Arg.U.IEEE); 5292 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5293 return hash_value(Arg.U.Double); 5294 llvm_unreachable("Unexpected semantics"); 5295 } 5296 5297 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5298 : APFloat(Semantics) { 5299 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5300 assert(StatusOrErr && "Invalid floating point representation"); 5301 consumeError(StatusOrErr.takeError()); 5302 } 5303 5304 FPClassTest APFloat::classify() const { 5305 if (isZero()) 5306 return isNegative() ? fcNegZero : fcPosZero; 5307 if (isNormal()) 5308 return isNegative() ? fcNegNormal : fcPosNormal; 5309 if (isDenormal()) 5310 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5311 if (isInfinity()) 5312 return isNegative() ? fcNegInf : fcPosInf; 5313 assert(isNaN() && "Other class of FP constant"); 5314 return isSignaling() ? fcSNan : fcQNan; 5315 } 5316 5317 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5318 roundingMode RM, bool *losesInfo) { 5319 if (&getSemantics() == &ToSemantics) { 5320 *losesInfo = false; 5321 return opOK; 5322 } 5323 if (usesLayout<IEEEFloat>(getSemantics()) && 5324 usesLayout<IEEEFloat>(ToSemantics)) 5325 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5326 if (usesLayout<IEEEFloat>(getSemantics()) && 5327 usesLayout<DoubleAPFloat>(ToSemantics)) { 5328 assert(&ToSemantics == &semPPCDoubleDouble); 5329 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5330 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5331 return Ret; 5332 } 5333 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5334 usesLayout<IEEEFloat>(ToSemantics)) { 5335 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5336 *this = APFloat(std::move(getIEEE()), ToSemantics); 5337 return Ret; 5338 } 5339 llvm_unreachable("Unexpected semantics"); 5340 } 5341 5342 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5343 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5344 } 5345 5346 void APFloat::print(raw_ostream &OS) const { 5347 SmallVector<char, 16> Buffer; 5348 toString(Buffer); 5349 OS << Buffer << "\n"; 5350 } 5351 5352 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5353 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5354 #endif 5355 5356 void APFloat::Profile(FoldingSetNodeID &NID) const { 5357 NID.Add(bitcastToAPInt()); 5358 } 5359 5360 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5361 an APSInt, whose initial bit-width and signed-ness are used to determine the 5362 precision of the conversion. 5363 */ 5364 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5365 roundingMode rounding_mode, 5366 bool *isExact) const { 5367 unsigned bitWidth = result.getBitWidth(); 5368 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5369 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5370 rounding_mode, isExact); 5371 // Keeps the original signed-ness. 5372 result = APInt(bitWidth, parts); 5373 return status; 5374 } 5375 5376 double APFloat::convertToDouble() const { 5377 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5378 return getIEEE().convertToDouble(); 5379 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5380 "Float semantics is not representable by IEEEdouble"); 5381 APFloat Temp = *this; 5382 bool LosesInfo; 5383 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5384 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5385 (void)St; 5386 return Temp.getIEEE().convertToDouble(); 5387 } 5388 5389 #ifdef HAS_IEE754_FLOAT128 5390 float128 APFloat::convertToQuad() const { 5391 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5392 return getIEEE().convertToQuad(); 5393 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5394 "Float semantics is not representable by IEEEquad"); 5395 APFloat Temp = *this; 5396 bool LosesInfo; 5397 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5398 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5399 (void)St; 5400 return Temp.getIEEE().convertToQuad(); 5401 } 5402 #endif 5403 5404 float APFloat::convertToFloat() const { 5405 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5406 return getIEEE().convertToFloat(); 5407 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5408 "Float semantics is not representable by IEEEsingle"); 5409 APFloat Temp = *this; 5410 bool LosesInfo; 5411 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5412 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5413 (void)St; 5414 return Temp.getIEEE().convertToFloat(); 5415 } 5416 5417 } // namespace llvm 5418 5419 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5420