Lines Matching +full:pex +full:- +full:bias

1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
23 #include "llvm/Config/llvm-config.h"
79 // while having IEEE non-finite behavior is liable to lead to unexpected
83 // exponent is all 1s and the significand is non-zero.
90 // as non-signalling, although the paper does not state whether the NaN
131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
138 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
141 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
143 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
145 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
148 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
153 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
156 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
157 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
159 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
160 to each other, and two 11-bit exponents.
165 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
168 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
172 It's not equivalent to IBM double-double. For example, a legit IBM
173 double-double, 1 + epsilon:
181 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
182 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
183 semPPCDoubleDoubleLegacy -> IEEE operations
191 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
314 requires two parts to hold the single-part result). So we add an
318 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
368 return semantics.minExponent - 1; in exponentZero()
393 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; in partCountForBits()
396 /* Returns 0U-9U. Return values >= 10U are not digits. */
400 return c - '0'; in decDigitValue()
404 [+-]ddddddd.
416 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { in readExponent()
420 isNegative = (*p == '-'); in readExponent()
421 if (*p == '-' || *p == '+') { in readExponent()
446 return -(int) absExponent; in readExponent()
463 negative = *p == '-'; in totalExponent()
464 if (*p == '-' || *p == '+') { in totalExponent()
486 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) in totalExponent()
492 exponent = -exponent; in totalExponent()
494 if (exponent > 32767 || exponent < -32768) in totalExponent()
499 exponent = negative ? -32768: 32767; in totalExponent()
515 if (end - begin == 1) in skipLeadingZeroesAndAnyDot()
527 dddd.dddd[eE][+-]ddd
533 non-zero digit.
535 If the value is zero, V->firstSigDigit points to a non-digit, and
554 D->firstSigDigit = p; in interpretDecimal()
555 D->exponent = 0; in interpretDecimal()
556 D->normalizedExponent = 0; in interpretDecimal()
575 if (dot != end && p - begin == 1) in interpretDecimal()
578 /* p points to the first non-digit in the string */ in interpretDecimal()
582 D->exponent = *ExpOrErr; in interpretDecimal()
590 if (p != D->firstSigDigit) { in interpretDecimal()
595 p--; in interpretDecimal()
601 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); in interpretDecimal()
602 D->normalizedExponent = (D->exponent + in interpretDecimal()
603 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) in interpretDecimal()
604 - (dot > D->firstSigDigit && dot < p))); in interpretDecimal()
607 D->lastSigDigit = p; in interpretDecimal()
626 // Otherwise we need to find the first non-zero digit. in trailingHexadecimalFraction()
635 /* If we ran off the end it is exactly zero or one-half, otherwise in trailingHexadecimalFraction()
660 APInt::tcExtractBit(parts, bits - 1)) in lostFractionThroughTruncation()
694 /* The error from the true value, in half-ulps, on multiplying two
696 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
707 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ in HUerrBound()
723 bits--; in ulpsFromBoundary()
727 …part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)… in ulpsFromBoundary()
730 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); in ulpsFromBoundary()
735 if (part - boundary <= boundary - part) in ulpsFromBoundary()
736 return part - boundary; in ulpsFromBoundary()
738 return boundary - part; in ulpsFromBoundary()
742 while (--count) in ulpsFromBoundary()
747 } else if (part == boundary - 1) { in ulpsFromBoundary()
748 while (--count) in ulpsFromBoundary()
752 return -parts[0]; in ulpsFromBoundary()
783 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, in powerOf5()
786 if (pow5[partsCount - 1] == 0) in powerOf5()
787 partsCount--; in powerOf5()
795 if (p2[result - 1] == 0) in powerOf5()
796 result--; in powerOf5()
834 part >>= (APFloatBase::integerPartWidth - 4 * count); in partAsHex()
835 while (count--) { in partAsHex()
855 *dst++ = *--p; in writeUnsignedDecimal()
866 *dst++ = '-'; in writeSignedDecimal()
867 dst = writeUnsignedDecimal(dst, -(unsigned) value); in writeSignedDecimal()
912 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) in makeNaN()
923 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { in makeNaN()
924 // Finite-only types do not distinguish signalling and quiet NaN, so in makeNaN()
927 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { in makeNaN()
929 fill_storage = APInt::getZero(semantics->precision - 1); in makeNaN()
931 fill_storage = APInt::getAllOnes(semantics->precision - 1); in makeNaN()
937 if (!fill || fill->getNumWords() < numParts) in makeNaN()
940 APInt::tcAssign(significand, fill->getRawData(), in makeNaN()
941 std::min(fill->getNumWords(), numParts)); in makeNaN()
944 unsigned bitsToPreserve = semantics->precision - 1; in makeNaN()
947 significand[part] &= ((1ULL << bitsToPreserve) - 1); in makeNaN()
952 unsigned QNaNBit = semantics->precision - 2; in makeNaN()
962 APInt::tcSetBit(significand, QNaNBit - 1); in makeNaN()
963 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { in makeNaN()
972 // pseudo-NaN. Maybe we should expose the ability to make in makeNaN()
973 // pseudo-NaNs? in makeNaN()
1004 return isFiniteNonZero() && (exponent == semantics->minExponent) && in isDenormal()
1006 semantics->precision - 1) == 0); in isDenormal()
1013 return isFiniteNonZero() && exponent == semantics->minExponent && in isSmallest()
1018 return getCategory() == fcNormal && exponent == semantics->minExponent && in isSmallestNormalized()
1026 const unsigned PartCount = partCountForBits(semantics->precision); in isSignificandAllOnes()
1027 for (unsigned i = 0; i < PartCount - 1; i++) in isSignificandAllOnes()
1033 PartCount*integerPartWidth - semantics->precision + 1; in isSignificandAllOnes()
1037 ~integerPart(0) << (integerPartWidth - NumHighBits); in isSignificandAllOnes()
1038 if (~(Parts[PartCount - 1] | HighBitFill)) in isSignificandAllOnes()
1052 const unsigned PartCount = partCountForBits(semantics->precision); in isSignificandAllOnesExceptLSB()
1053 for (unsigned i = 0; i < PartCount - 1; i++) { in isSignificandAllOnesExceptLSB()
1060 PartCount * integerPartWidth - semantics->precision + 1; in isSignificandAllOnesExceptLSB()
1064 << (integerPartWidth - NumHighBits); in isSignificandAllOnesExceptLSB()
1065 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) in isSignificandAllOnesExceptLSB()
1075 const unsigned PartCount = partCountForBits(semantics->precision); in isSignificandAllZeros()
1077 for (unsigned i = 0; i < PartCount - 1; i++) in isSignificandAllZeros()
1083 PartCount*integerPartWidth - semantics->precision + 1; in isSignificandAllZeros()
1088 if (Parts[PartCount - 1] & HighBitMask) in isSignificandAllZeros()
1096 const unsigned PartCount = partCountForBits(semantics->precision); in isSignificandAllZerosExceptMSB()
1098 for (unsigned i = 0; i < PartCount - 1; i++) { in isSignificandAllZerosExceptMSB()
1104 PartCount * integerPartWidth - semantics->precision + 1; in isSignificandAllZerosExceptMSB()
1105 return Parts[PartCount - 1] == integerPart(1) in isSignificandAllZerosExceptMSB()
1106 << (integerPartWidth - NumHighBits); in isSignificandAllZerosExceptMSB()
1110 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && in isLargest()
1111 semantics->nanEncoding == fltNanEncoding::AllOnes) { in isLargest()
1115 return isFiniteNonZero() && exponent == semantics->maxExponent && in isLargest()
1120 return isFiniteNonZero() && exponent == semantics->maxExponent && in isLargest()
1155 exponent = ourSemantics.precision - 1; in IEEEFloat()
1182 return partCountForBits(semantics->precision + 1); in partCount()
1186 return const_cast<IEEEFloat *>(this)->significandParts(); in significandParts()
1238 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1239 on to the full-precision result of the multiplication. Returns the
1253 precision = semantics->precision; in multiplySignificand()
1274 // Assume the operands involved in the multiplication are single-precision in multiplySignificand()
1280 // Note that there are three significant bits at the left-hand side of the in multiplySignificand()
1298 if (omsb != extendedPrecision - 1) { in multiplySignificand()
1301 (extendedPrecision - 1) - omsb); in multiplySignificand()
1302 exponent -= (extendedPrecision - 1) - omsb; in multiplySignificand()
1328 "Lost precision while shifting addend for fused-multiply-add."); in multiplySignificand()
1341 // Convert the result having "2 * precision" significant-bits back to the one in multiplySignificand()
1342 // having "precision" significant-bits. First, move the radix point from in multiplySignificand()
1343 // poision "2*precision - 1" to "precision - 1". The exponent need to be in multiplySignificand()
1344 // adjusted by "2*precision - 1" - "precision - 1" = "precision". in multiplySignificand()
1345 exponent -= precision + 1; in multiplySignificand()
1347 // In case MSB resides at the left-hand side of radix point, shift the in multiplySignificand()
1349 // the radix point (i.e. "MSB . rest-significant-bits"). in multiplySignificand()
1358 bits = omsb - precision; in multiplySignificand()
1398 /* Copy the dividend and divisor as they will be modified in-place. */ in divideSignificand()
1405 exponent -= rhs.exponent; in divideSignificand()
1407 unsigned int precision = semantics->precision; in divideSignificand()
1410 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; in divideSignificand()
1417 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; in divideSignificand()
1419 exponent -= bit; in divideSignificand()
1427 exponent--; in divideSignificand()
1433 for (bit = precision; bit; bit -= 1) { in divideSignificand()
1436 APInt::tcSetBit(lhsSignificand, bit - 1); in divideSignificand()
1480 assert(bits < semantics->precision); in shiftSignificandLeft()
1486 exponent -= bits; in shiftSignificandLeft()
1500 compare = exponent - rhs.exponent; in compareAbsoluteValue()
1523 bits -= APInt::APINT_BITS_PER_WORD; in tcSetLeastSignificantBits()
1527 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); in tcSetLeastSignificantBits()
1536 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { in handleOverflow()
1542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) in handleOverflow()
1552 exponent = semantics->maxExponent; in handleOverflow()
1554 semantics->precision); in handleOverflow()
1555 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && in handleOverflow()
1556 semantics->nanEncoding == fltNanEncoding::AllOnes) in handleOverflow()
1620 exponentChange = omsb - semantics->precision; in normalize()
1624 if (exponent + exponentChange > semantics->maxExponent) in normalize()
1629 if (exponent + exponentChange < semantics->minExponent) in normalize()
1630 exponentChange = semantics->minExponent - exponent; in normalize()
1636 shiftSignificandLeft(-exponentChange); in normalize()
1649 /* Keep OMSB up-to-date. */ in normalize()
1651 omsb -= exponentChange; in normalize()
1657 // The all-ones values is an overflow if NaN is all ones. If NaN is in normalize()
1659 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && in normalize()
1660 semantics->nanEncoding == fltNanEncoding::AllOnes && in normalize()
1661 exponent == semantics->maxExponent && isSignificandAllOnes()) in normalize()
1673 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in normalize()
1683 exponent = semantics->minExponent; in normalize()
1689 if (omsb == (unsigned) semantics->precision + 1) { in normalize()
1693 if (exponent == semantics->maxExponent) in normalize()
1697 // account for 8-bit floating point types that have no inf, only NaN. in normalize()
1705 // The all-ones values is an overflow if NaN is all ones. If NaN is in normalize()
1707 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && in normalize()
1708 semantics->nanEncoding == fltNanEncoding::AllOnes && in normalize()
1709 exponent == semantics->maxExponent && isSignificandAllOnes()) in normalize()
1713 /* The normal case - we were and are not denormal, and any in normalize()
1715 if (omsb == semantics->precision) in normalize()
1718 /* We have a non-zero denormal. */ in normalize()
1719 assert(omsb < semantics->precision); in normalize()
1724 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in normalize()
1799 /* Are we bigger exponent-wise than the RHS? */ in addOrSubtractSignificand()
1800 bits = exponent - rhs.exponent; in addOrSubtractSignificand()
1809 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); in addOrSubtractSignificand()
1812 lost_fraction = shiftSignificandRight(-bits - 1); in addOrSubtractSignificand()
1827 /* Invert the lost fraction - it was on the RHS and in addOrSubtractSignificand()
1845 lost_fraction = shiftSignificandRight(-bits); in addOrSubtractSignificand()
1934 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) in divideSpecials()
2028 // With NaN-as-negative-zero, neither NaN or negative zero can change in changeSign()
2030 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && in changeSign()
2058 adding two like-signed zeroes gives that zero. */ in addOrSubtract()
2062 // NaN-in-negative-zero means zeros need to be normalized to +0. in addOrSubtract()
2063 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in addOrSubtract()
2090 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) in multiply()
2110 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) in divide()
2154 // remainder = numer - rquot * denom = x - r * p in remainder()
2164 // - if x < 0.5p then we round to the nearest number with is 0, and are done. in remainder()
2165 // - if x == 0.5p then we round to the nearest even number which is 0, and we in remainder()
2167 // - if 0.5p < x < p then we round to nearest number which is 1, and we have in remainder()
2169 // - if x >= p then we must subtract p at least once, as x must be a in remainder()
2175 // - if x < 0.5p then we round to the nearest number with is 0, and are done. in remainder()
2176 // - if x == 0.5p then we round to the nearest even number. As r is odd, we in remainder()
2178 // - if x > 0.5p (and inherently x < p) then we must round r up to the next in remainder()
2186 extendedSemantics.minExponent--; in remainder()
2192 IEEEFloat PEx = P; in remainder() local
2193 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); in remainder()
2201 if (VEx.compare(PEx) == cmpGreaterThan) { in remainder()
2206 // not want to `convert` again, so we just subtract PEx twice (which equals in remainder()
2208 fs = VEx.subtract(PEx, rmNearestTiesToEven); in remainder()
2210 fs = VEx.subtract(PEx, rmNearestTiesToEven); in remainder()
2213 cmpResult result = VEx.compare(PEx); in remainder()
2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in remainder()
2223 // But some 8-bit floats only have positive 0. in remainder()
2240 int Exp = ilogb(*this) - ilogb(rhs); in mod()
2245 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); in mod()
2253 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in mod()
2259 /* Normalized fused-multiply-add. */
2265 /* Post-multiplication sign, before addition. */ in fusedMultiplyAdd()
2269 extended-precision calculation. */ in fusedMultiplyAdd()
2282 adding two like-signed zeroes gives that zero. */ in fusedMultiplyAdd()
2285 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in fusedMultiplyAdd()
2292 to do in the latter case. The IEEE-754R standard says it is in fusedMultiplyAdd()
2293 implementation-defined in this case whether, if ADDEND is a in fusedMultiplyAdd()
2305 /* Rounding-mode correct round to integral value. */
2310 // [IEEE Std 754-2008 6.1]: in roundToIntegral()
2311 // The behavior of infinity in floating-point arithmetic is derived from the in roundToIntegral()
2321 // [IEEE Std 754-2008 6.2]: in roundToIntegral()
2323 // operation exception and for which a floating-point result is to be in roundToIntegral()
2326 // [IEEE Std 754-2008 6.2]: in roundToIntegral()
2329 // general-computational and signaling-computational operation except for in roundToIntegral()
2333 // [IEEE Std 754-2008 6.2]: in roundToIntegral()
2335 // operations, if a floating-point result is to be delivered the result in roundToIntegral()
2338 // Every general-computational and quiet-computational operation involving in roundToIntegral()
2347 // [IEEE Std 754-2008 6.3]: in roundToIntegral()
2356 // to +/-Inf. Bail out early instead. in roundToIntegral()
2360 // The algorithm here is quite simple: we add 2^(p-1), where p is the in roundToIntegral()
2367 IntegerConstant <<= semanticsPrecision(*semantics)-1; in roundToIntegral()
2463 /// IEEEFloat::convert - convert a value of one floating point type to another.
2483 shift = toSemantics.precision - fromSemantics.precision; in convert()
2497 // when truncating from PowerPC double-double to double format), the in convert()
2504 int exponentChange = omsb - fromSemantics.precision; in convert()
2506 exponentChange = toSemantics.minExponent - exponent; in convert()
2510 shift -= exponentChange; in convert()
2512 } else if (omsb <= -shift) { in convert()
2513 exponentChange = omsb + shift - 1; // leave at least one bit set in convert()
2514 shift -= exponentChange; in convert()
2521 (category == fcNaN && semantics->nonFiniteBehavior != in convert()
2523 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); in convert()
2536 // Switch to built-in storage for a single part. in convert()
2556 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { in convert()
2564 // NaN to -Inf. in convert()
2566 semantics->nanEncoding != fltNanEncoding::NegativeZero) in convert()
2574 APInt::tcSetBit(significandParts(), semantics->precision - 1); in convert()
2586 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { in convert()
2591 semantics->nanEncoding == fltNanEncoding::NegativeZero) { in convert()
2596 // NaN is negative zero means -0 -> +0, which can lose information in convert()
2615 round-to-zero to always be used. */
2646 /* For exponent -1 the integer bit represents .5, look at that. in convertToSignExtendedInteger()
2648 truncatedBits = semantics->precision -1U - exponent; in convertToSignExtendedInteger()
2658 if (bits < semantics->precision) { in convertToSignExtendedInteger()
2659 /* We truncate (semantics->precision - bits) bits. */ in convertToSignExtendedInteger()
2660 truncatedBits = semantics->precision - bits; in convertToSignExtendedInteger()
2664 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, in convertToSignExtendedInteger()
2667 bits - semantics->precision); in convertToSignExtendedInteger()
2749 bits = width - isSigned; in convertToInteger()
2753 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); in convertToInteger()
2772 precision = semantics->precision; in convertFromUnsignedParts()
2777 exponent = omsb - 1; in convertFromUnsignedParts()
2779 omsb - precision); in convertFromUnsignedParts()
2780 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); in convertFromUnsignedParts()
2782 exponent = precision - 1; in convertFromUnsignedParts()
2798 api = -api; in convertFromAPInt()
2806 integer is signed, in which case it must be sign-extended. */
2814 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { in convertFromSignExtendedInteger()
2841 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { in convertFromZeroExtendedInteger()
2843 api = -api; in convertFromZeroExtendedInteger()
2891 bitPos -= 4; in convertFromHexadecimalString()
2910 if (dot != end && p - begin == 1) in convertFromHexadecimalString()
2923 expAdjustment = static_cast<int>(dot - firstSignificantDigit); in convertFromHexadecimalString()
2926 expAdjustment = expAdjustment * 4 - 1; in convertFromHexadecimalString()
2930 expAdjustment += semantics->precision; in convertFromHexadecimalString()
2931 expAdjustment -= partsCount * integerPartWidth; in convertFromHexadecimalString()
2948 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; in roundSignificandWithExponent()
2955 parts = partCountForBits(semantics->precision + 11); in roundSignificandWithExponent()
2958 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); in roundSignificandWithExponent()
2964 calcSemantics.precision = parts * integerPartWidth - 1; in roundSignificandWithExponent()
2965 excessPrecision = calcSemantics.precision - semantics->precision; in roundSignificandWithExponent()
2984 /* multiplySignificand leaves the precision-th bit set to 1. */ in roundSignificandWithExponent()
2990 if (decSig.exponent < semantics->minExponent) { in roundSignificandWithExponent()
2991 excessPrecision += (semantics->minExponent - decSig.exponent); in roundSignificandWithExponent()
2996 /* Extra half-ulp lost in reciprocal of exponent. */ in roundSignificandWithExponent()
3003 (decSig.significandParts(), calcSemantics.precision - 1) == 1); in roundSignificandWithExponent()
3013 calcSemantics.precision - excessPrecision, in roundSignificandWithExponent()
3015 /* Take the exponent of decSig. If we tcExtract-ed less bits in roundSignificandWithExponent()
3018 exponent = (decSig.exponent + semantics->precision in roundSignificandWithExponent()
3019 - (calcSemantics.precision - excessPrecision)); in roundSignificandWithExponent()
3043 (exp - 1) * L >= maxExponent in convertFromDecimalString()
3047 (exp + 1) * L <= minExponent - precision in convertFromDecimalString()
3056 // and zero decimals with non-zero exponents. in convertFromDecimalString()
3059 // D->firstSigDigit equals str.end(), every digit must be a zero and there can in convertFromDecimalString()
3060 // be at most one dot. On the other hand, if we have a zero with a non-zero in convertFromDecimalString()
3061 // exponent, then we know that D.firstSigDigit will be non-numeric. in convertFromDecimalString()
3065 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in convertFromDecimalString()
3069 max during the log-rebasing in the max-exponent check below. */ in convertFromDecimalString()
3070 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { in convertFromDecimalString()
3074 during the log-rebasing in the min-exponent check. Check that it in convertFromDecimalString()
3075 won't overflow min in either check, then perform the min-exponent in convertFromDecimalString()
3077 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || in convertFromDecimalString()
3079 8651 * (semantics->minExponent - (int) semantics->precision)) { in convertFromDecimalString()
3085 /* We can finally safely perform the max-exponent check. */ in convertFromDecimalString()
3086 } else if ((D.normalizedExponent - 1) * 42039 in convertFromDecimalString()
3087 >= 12655 * semantics->maxExponent) { in convertFromDecimalString()
3095 N-digit decimal integer is N * 196 / 59. Allocate enough space in convertFromDecimalString()
3098 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; in convertFromDecimalString()
3103 /* Convert to binary efficiently - we do almost all multiplication in convertFromDecimalString()
3129 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); in convertFromDecimalString()
3162 bool IsNegative = str.front() == '-'; in convertFromStringSpecials()
3197 str = str.slice(1, str.size() - 1); in convertFromStringSpecials()
3233 sign = *p == '-' ? 1 : 0; in convertFromString()
3234 if (*p == '-' || *p == '+') { in convertFromString()
3236 slen--; in convertFromString()
3244 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), in convertFromString()
3253 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3272 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3282 *dst++ = '-'; in convertToHexString()
3286 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); in convertToHexString()
3287 dst += sizeof infinityL - 1; in convertToHexString()
3291 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); in convertToHexString()
3292 dst += sizeof NaNU - 1; in convertToHexString()
3301 memset (dst, '0', hexDigits - 1); in convertToHexString()
3302 dst += hexDigits - 1; in convertToHexString()
3315 return static_cast<unsigned int>(dst - p); in convertToHexString()
3341 we have 3 virtual zero most-significant-bits. */ in convertNormalToHexString()
3342 valueBits = semantics->precision + 3; in convertNormalToHexString()
3343 shift = integerPartWidth - valueBits % integerPartWidth; in convertNormalToHexString()
3347 outputDigits = (valueBits - significandLSB () + 3) / 4; in convertNormalToHexString()
3354 /* We are dropping non-zero bits, so need to check how to round. in convertNormalToHexString()
3359 bits = valueBits - hexDigits * 4; in convertNormalToHexString()
3371 count = (valueBits + integerPartWidth - 1) / integerPartWidth; in convertNormalToHexString()
3377 if (--count == partsCount) in convertNormalToHexString()
3383 part |= significand[count - 1] >> (integerPartWidth - shift); in convertNormalToHexString()
3391 outputDigits -= curDigits; in convertNormalToHexString()
3399 q--; in convertNormalToHexString()
3412 p[-1] = p[0]; in convertNormalToHexString()
3413 if (dst -1 == p) in convertNormalToHexString()
3414 dst--; in convertNormalToHexString()
3429 Arg.semantics->precision); in hash_value()
3433 Arg.semantics->precision, Arg.exponent, in hash_value()
3445 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3455 myexponent = exponent+16383; //bias in convertF80LongDoubleAPFloatToAPInt()
3486 // Convert number to double. To avoid spurious underflows, we re- in convertPPCDoubleDoubleAPFloatToAPInt()
3506 // just set the second double to zero. Otherwise, re-convert back to in convertPPCDoubleDoubleAPFloatToAPInt()
3531 constexpr int bias = -(S.minExponent - 1); in convertIEEEFloatToAPInt() local
3532 constexpr unsigned int trailing_significand_bits = S.precision - 1; in convertIEEEFloatToAPInt()
3536 constexpr uint64_t significand_mask = integer_bit - 1; in convertIEEEFloatToAPInt()
3538 S.sizeInBits - 1 - trailing_significand_bits; in convertIEEEFloatToAPInt()
3540 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; in convertIEEEFloatToAPInt()
3547 myexponent = exponent + bias; in convertIEEEFloatToAPInt()
3554 myexponent = ::exponentZero(S) + bias; in convertIEEEFloatToAPInt()
3560 myexponent = ::exponentInf(S) + bias; in convertIEEEFloatToAPInt()
3566 myexponent = ::exponentNaN(S) + bias; in convertIEEEFloatToAPInt()
3575 words[mysignificand.size() - 1] &= significand_mask; in convertIEEEFloatToAPInt()
3578 constexpr size_t last_word = words.size() - 1; in convertIEEEFloatToAPInt()
3580 << ((S.sizeInBits - 1) % 64); in convertIEEEFloatToAPInt()
3777 exponent = myexponent - 16383; in initFromF80LongDoubleAPInt()
3781 exponent = -16382; in initFromF80LongDoubleAPInt()
3812 << ((S.precision - 1) % integerPartWidth); in initFromIEEEAPInt()
3813 constexpr uint64_t significand_mask = integer_bit - 1; in initFromIEEEAPInt()
3814 constexpr unsigned int trailing_significand_bits = S.precision - 1; in initFromIEEEAPInt()
3818 S.sizeInBits - 1 - trailing_significand_bits; in initFromIEEEAPInt()
3820 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; in initFromIEEEAPInt()
3821 constexpr int bias = -(S.minExponent - 1); in initFromIEEEAPInt() local
3828 mysignificand[mysignificand.size() - 1] &= significand_mask; in initFromIEEEAPInt()
3833 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; in initFromIEEEAPInt()
3840 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); in initFromIEEEAPInt()
3848 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { in initFromIEEEAPInt()
3857 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; in initFromIEEEAPInt()
3860 std::all_of(mysignificand.begin(), mysignificand.end() - 1, in initFromIEEEAPInt()
3863 mysignificand[mysignificand.size() - 1] == significand_mask); in initFromIEEEAPInt()
3864 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; in initFromIEEEAPInt()
3883 exponent = myexponent - bias; in initFromIEEEAPInt()
3888 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit in initFromIEEEAPInt()
3953 assert(api.getBitWidth() == Sem->sizeInBits); in initFromAPInt()
4001 exponent = semantics->maxExponent; in makeLargest()
4006 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); in makeLargest()
4011 PartCount*integerPartWidth - semantics->precision; in makeLargest()
4012 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) in makeLargest()
4016 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && in makeLargest()
4017 semantics->nanEncoding == fltNanEncoding::AllOnes) in makeLargest()
4030 exponent = semantics->minExponent; in makeSmallest()
4043 exponent = semantics->minExponent; in makeSmallestNormalized()
4044 APInt::tcSetBit(significandParts(), semantics->precision - 1); in makeSmallestNormalized()
4075 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; in AdjustToPrecision()
4103 unsigned FirstSignificant = N - FormatPrecision; in AdjustToPrecision()
4110 if (buffer[FirstSignificant - 1] < '5') { in AdjustToPrecision()
4119 // Rounding up requires a decimal add-with-carry. If we continue in AdjustToPrecision()
4120 // the carry, the newly-introduced zeros will just be truncated. in AdjustToPrecision()
4148 Str.push_back('-'); in toStringImpl()
4153 // We use enough digits so the number can be round-tripped back to an in toStringImpl()
4154 // APFloat. The formula comes from "How to Print Floating-Point Numbers in toStringImpl()
4177 int texp = -exp; in toStringImpl()
4180 // (N)(2^-e) == (N)(5^e)(10^-e) in toStringImpl()
4185 // <= semantics->precision + e * 137 / 59 in toStringImpl()
4221 // digit <- significand % 10 in toStringImpl()
4222 // significand <- significand / 10 in toStringImpl()
4250 // 765e3 --> 765000 in toStringImpl()
4257 int MSD = exp + (int) (NDigits - 1); in toStringImpl()
4259 // 765e-2 == 7.65 in toStringImpl()
4262 // 765e-5 == 0.00765 in toStringImpl()
4264 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; in toStringImpl()
4271 exp += (NDigits - 1); in toStringImpl()
4273 Str.push_back(buffer[NDigits-1]); in toStringImpl()
4279 Str.push_back(buffer[NDigits-1-I]); in toStringImpl()
4281 if (!TruncateZero && FormatPrecision > NDigits - 1) in toStringImpl()
4282 Str.append(FormatPrecision - NDigits + 1, '0'); in toStringImpl()
4286 Str.push_back(exp >= 0 ? '+' : '-'); in toStringImpl()
4288 exp = -exp; in toStringImpl()
4298 Str.push_back(expbuf[E-1-I]); in toStringImpl()
4302 // Non-scientific, positive exponents. in toStringImpl()
4305 Str.push_back(buffer[NDigits-1-I]); in toStringImpl()
4311 // Non-scientific, negative exponents. in toStringImpl()
4319 Str.push_back(buffer[NDigits-I-1]); in toStringImpl()
4322 unsigned NZeros = 1 + (unsigned) -NWholeDigits; in toStringImpl()
4331 Str.push_back(buffer[NDigits-I-1]); in toStringImpl()
4341 return append(Str, "-Inf"); in toString()
4349 Str.push_back('-'); in toString()
4357 Str.append(FormatPrecision - 1, '0'); in toString()
4369 int exp = exponent - ((int) semantics->precision - 1); in toString()
4371 semantics->precision, in toString()
4372 ArrayRef(significandParts(), partCountForBits(semantics->precision))); in toString()
4386 if (significandLSB() != semantics->precision - 1) in getExactInverse()
4400 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); in getExactInverse()
4413 const int PartCount = partCountForBits(semantics->precision); in getExactLog2Abs()
4422 if (exponent != semantics->minExponent) in getExactLog2Abs()
4429 return exponent - semantics->precision + CountrParts + in getExactLog2Abs()
4440 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || in isSignaling()
4441 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) in isSignaling()
4444 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the in isSignaling()
4446 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); in isSignaling()
4449 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4451 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4454 // If we are performing nextDown, swap sign so we have -x. in next()
4467 // nextUp(-inf) = -getLargest() in next()
4471 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. in next()
4472 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not in next()
4485 // nextUp(-getSmallest()) = -0 in next()
4490 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) in next()
4496 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { in next()
4500 } else if (semantics->nonFiniteBehavior == in next()
4508 exponent = semantics->maxExponent + 1; in next()
4519 // 1. exponent != semantics->minExponent. This implies we are not in the in next()
4523 exponent != semantics->minExponent && isSignificandAllZeros(); in next()
4528 // 1. If we are dealing with a non-binade decrement, by definition we in next()
4530 // 2. If we are dealing with a normal -> normal binade decrement, since in next()
4535 // 3. If we are dealing with a normal -> denormal binade decrement, in next()
4545 APInt::tcSetBit(Parts, semantics->precision - 1); in next()
4546 exponent--; in next()
4562 APInt::tcSetBit(Parts, semantics->precision - 1); in next()
4563 assert(exponent != semantics->maxExponent && in next()
4574 // If we are performing nextDown, swap sign so we have -nextUp(-x) in next()
4594 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) in makeInf()
4597 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { in makeInf()
4611 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { in makeZero()
4621 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) in makeQuiet()
4622 APInt::tcSetBit(significandParts(), semantics->precision - 2); in makeQuiet()
4636 int SignificandBits = Arg.getSemantics().precision - 1; in ilogb()
4640 return Normalized.exponent - SignificandBits; in ilogb()
4647 // If Exp is wildly out-of-scale, simply adding it to X.exponent will in scalbn()
4653 int SignificandBits = X.getSemantics().precision - 1; in scalbn()
4654 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; in scalbn()
4657 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); in scalbn()
4678 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). in frexp()
4680 return scalbn(Val, -Exp, RM); in frexp()
4738 this->~DoubleAPFloat(); in operator =()
4745 // "Software for Doubled-Precision Floating-Point Computations",
4746 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4781 // Floats[1] = a - z + c + zz; in addImpl()
4787 // Floats[1] = c - z + a + zz; in addImpl()
4794 // q = a - z; in addImpl()
4798 // zz = q + c + (a - (q + z)) + aa + cc; in addImpl()
4799 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. in addImpl()
4939 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). in multiply()
4963 // Floats[1] = (t - u) + tau in multiply()
5040 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); in compareAbsoluteValue()
5194 Tmp.makeSmallest(this->isNegative()); in isSmallest()
5203 Tmp.makeSmallestNormalized(this->isNegative()); in isSmallestNormalized()
5211 Tmp.makeLargest(this->isNegative()); in isLargest()
5263 Second = scalbn(Second, -Exp, RM); in frexp()
5361 an APSInt, whose initial bit-width and signed-ness are used to determine the
5371 // Keeps the original signed-ness. in convertToInteger()