xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APFloat.cpp (revision b9fa1500cb2265b95927e19b9d2119ca26d65be3)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 };
72 
73 // How NaN values are represented. This is curently only used in combination
74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75 // while having IEEE non-finite behavior is liable to lead to unexpected
76 // results.
77 enum class fltNanEncoding {
78   // Represents the standard IEEE behavior where a value is NaN if its
79   // exponent is all 1s and the significand is non-zero.
80   IEEE,
81 
82   // Represents the behavior in the Float8E4M3 floating point type where NaN is
83   // represented by having the exponent and mantissa set to all 1s.
84   // This behavior matches the FP8 E4M3 type described in
85   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86   // as non-signalling, although the paper does not state whether the NaN
87   // values are signalling or not.
88   AllOnes,
89 
90   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93   // there is only one NaN value, it is treated as quiet NaN. This matches the
94   // behavior described in https://arxiv.org/abs/2206.02915 .
95   NegativeZero,
96 };
97 
98 /* Represents floating point arithmetic semantics.  */
99 struct fltSemantics {
100   /* The largest E such that 2^E is representable; this matches the
101      definition of IEEE 754.  */
102   APFloatBase::ExponentType maxExponent;
103 
104   /* The smallest E such that 2^E is a normalized number; this
105      matches the definition of IEEE 754.  */
106   APFloatBase::ExponentType minExponent;
107 
108   /* Number of bits in the significand.  This includes the integer
109      bit.  */
110   unsigned int precision;
111 
112   /* Number of bits actually used in the semantics. */
113   unsigned int sizeInBits;
114 
115   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
116 
117   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
118   // Returns true if any number described by this semantics can be precisely
119   // represented by the specified semantics. Does not take into account
120   // the value of fltNonfiniteBehavior.
121   bool isRepresentableBy(const fltSemantics &S) const {
122     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123            precision <= S.precision;
124   }
125 };
126 
127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133 static constexpr fltSemantics semFloat8E5M2FNUZ = {
134     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
135 static constexpr fltSemantics semFloat8E4M3FN = {
136     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
137 static constexpr fltSemantics semFloat8E4M3FNUZ = {
138     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
140     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144 
145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
146    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149    to each other, and two 11-bit exponents.
150 
151    Note: we need to make the value different from semBogus as otherwise
152    an unsafe optimization may collapse both values to a single address,
153    and we heavily rely on them having distinct addresses.             */
154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
155 
156 /* These are legacy semantics for the fallback, inaccrurate implementation of
157    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158    operation. It's equivalent to having an IEEE number with consecutive 106
159    bits of mantissa and 11 bits of exponent.
160 
161    It's not equivalent to IBM double-double. For example, a legit IBM
162    double-double, 1 + epsilon:
163 
164      1 + epsilon = 1 + (1 >> 1076)
165 
166    is not representable by a consecutive 106 bits of mantissa.
167 
168    Currently, these semantics are used in the following way:
169 
170      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172      semPPCDoubleDoubleLegacy -> IEEE operations
173 
174    We use bitcastToAPInt() to get the bit representation (in APInt) of the
175    underlying IEEEdouble, then use the APInt constructor to construct the
176    legacy IEEE float.
177 
178    TODO: Implement all operations in semPPCDoubleDouble, and delete these
179    semantics.  */
180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181                                                           53 + 53, 128};
182 
183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
184   switch (S) {
185   case S_IEEEhalf:
186     return IEEEhalf();
187   case S_BFloat:
188     return BFloat();
189   case S_IEEEsingle:
190     return IEEEsingle();
191   case S_IEEEdouble:
192     return IEEEdouble();
193   case S_IEEEquad:
194     return IEEEquad();
195   case S_PPCDoubleDouble:
196     return PPCDoubleDouble();
197   case S_Float8E5M2:
198     return Float8E5M2();
199   case S_Float8E5M2FNUZ:
200     return Float8E5M2FNUZ();
201   case S_Float8E4M3FN:
202     return Float8E4M3FN();
203   case S_Float8E4M3FNUZ:
204     return Float8E4M3FNUZ();
205   case S_Float8E4M3B11FNUZ:
206     return Float8E4M3B11FNUZ();
207   case S_FloatTF32:
208     return FloatTF32();
209   case S_x87DoubleExtended:
210     return x87DoubleExtended();
211   }
212   llvm_unreachable("Unrecognised floating semantics");
213 }
214 
215 APFloatBase::Semantics
216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
217   if (&Sem == &llvm::APFloat::IEEEhalf())
218     return S_IEEEhalf;
219   else if (&Sem == &llvm::APFloat::BFloat())
220     return S_BFloat;
221   else if (&Sem == &llvm::APFloat::IEEEsingle())
222     return S_IEEEsingle;
223   else if (&Sem == &llvm::APFloat::IEEEdouble())
224     return S_IEEEdouble;
225   else if (&Sem == &llvm::APFloat::IEEEquad())
226     return S_IEEEquad;
227   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228     return S_PPCDoubleDouble;
229   else if (&Sem == &llvm::APFloat::Float8E5M2())
230     return S_Float8E5M2;
231   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232     return S_Float8E5M2FNUZ;
233   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234     return S_Float8E4M3FN;
235   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236     return S_Float8E4M3FNUZ;
237   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238     return S_Float8E4M3B11FNUZ;
239   else if (&Sem == &llvm::APFloat::FloatTF32())
240     return S_FloatTF32;
241   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242     return S_x87DoubleExtended;
243   else
244     llvm_unreachable("Unknown floating semantics");
245 }
246 
247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
252 const fltSemantics &APFloatBase::PPCDoubleDouble() {
253   return semPPCDoubleDouble;
254 }
255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
260   return semFloat8E4M3B11FNUZ;
261 }
262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
263 const fltSemantics &APFloatBase::x87DoubleExtended() {
264   return semX87DoubleExtended;
265 }
266 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
267 
268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
269 constexpr RoundingMode APFloatBase::rmTowardPositive;
270 constexpr RoundingMode APFloatBase::rmTowardNegative;
271 constexpr RoundingMode APFloatBase::rmTowardZero;
272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
273 
274 /* A tight upper bound on number of parts required to hold the value
275    pow(5, power) is
276 
277      power * 815 / (351 * integerPartWidth) + 1
278 
279    However, whilst the result may require only this many parts,
280    because we are multiplying two values to get it, the
281    multiplication may require an extra part with the excess part
282    being zero (consider the trivial case of 1 * 1, tcFullMultiply
283    requires two parts to hold the single-part result).  So we add an
284    extra one to guarantee enough space whilst multiplying.  */
285 const unsigned int maxExponent = 16383;
286 const unsigned int maxPrecision = 113;
287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
288 const unsigned int maxPowerOfFiveParts =
289     2 +
290     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
291 
292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293   return semantics.precision;
294 }
295 APFloatBase::ExponentType
296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
297   return semantics.maxExponent;
298 }
299 APFloatBase::ExponentType
300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
301   return semantics.minExponent;
302 }
303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304   return semantics.sizeInBits;
305 }
306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
307                                                  bool isSigned) {
308   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309   // at least one more bit than the MaxExponent to hold the max FP value.
310   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311   // Extra sign bit needed.
312   if (isSigned)
313     ++MinBitWidth;
314   return MinBitWidth;
315 }
316 
317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
318                                             const fltSemantics &Dst) {
319   // Exponent range must be larger.
320   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321     return false;
322 
323   // If the mantissa is long enough, the result value could still be denormal
324   // with a larger exponent range.
325   //
326   // FIXME: This condition is probably not accurate but also shouldn't be a
327   // practical concern with existing types.
328   return Dst.precision >= Src.precision;
329 }
330 
331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
332   return Sem.sizeInBits;
333 }
334 
335 static constexpr APFloatBase::ExponentType
336 exponentZero(const fltSemantics &semantics) {
337   return semantics.minExponent - 1;
338 }
339 
340 static constexpr APFloatBase::ExponentType
341 exponentInf(const fltSemantics &semantics) {
342   return semantics.maxExponent + 1;
343 }
344 
345 static constexpr APFloatBase::ExponentType
346 exponentNaN(const fltSemantics &semantics) {
347   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
348     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
349       return exponentZero(semantics);
350     return semantics.maxExponent;
351   }
352   return semantics.maxExponent + 1;
353 }
354 
355 /* A bunch of private, handy routines.  */
356 
357 static inline Error createError(const Twine &Err) {
358   return make_error<StringError>(Err, inconvertibleErrorCode());
359 }
360 
361 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
362   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
363 }
364 
365 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
366 static inline unsigned int
367 decDigitValue(unsigned int c)
368 {
369   return c - '0';
370 }
371 
372 /* Return the value of a decimal exponent of the form
373    [+-]ddddddd.
374 
375    If the exponent overflows, returns a large exponent with the
376    appropriate sign.  */
377 static Expected<int> readExponent(StringRef::iterator begin,
378                                   StringRef::iterator end) {
379   bool isNegative;
380   unsigned int absExponent;
381   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
382   StringRef::iterator p = begin;
383 
384   // Treat no exponent as 0 to match binutils
385   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386     return 0;
387   }
388 
389   isNegative = (*p == '-');
390   if (*p == '-' || *p == '+') {
391     p++;
392     if (p == end)
393       return createError("Exponent has no digits");
394   }
395 
396   absExponent = decDigitValue(*p++);
397   if (absExponent >= 10U)
398     return createError("Invalid character in exponent");
399 
400   for (; p != end; ++p) {
401     unsigned int value;
402 
403     value = decDigitValue(*p);
404     if (value >= 10U)
405       return createError("Invalid character in exponent");
406 
407     absExponent = absExponent * 10U + value;
408     if (absExponent >= overlargeExponent) {
409       absExponent = overlargeExponent;
410       break;
411     }
412   }
413 
414   if (isNegative)
415     return -(int) absExponent;
416   else
417     return (int) absExponent;
418 }
419 
420 /* This is ugly and needs cleaning up, but I don't immediately see
421    how whilst remaining safe.  */
422 static Expected<int> totalExponent(StringRef::iterator p,
423                                    StringRef::iterator end,
424                                    int exponentAdjustment) {
425   int unsignedExponent;
426   bool negative, overflow;
427   int exponent = 0;
428 
429   if (p == end)
430     return createError("Exponent has no digits");
431 
432   negative = *p == '-';
433   if (*p == '-' || *p == '+') {
434     p++;
435     if (p == end)
436       return createError("Exponent has no digits");
437   }
438 
439   unsignedExponent = 0;
440   overflow = false;
441   for (; p != end; ++p) {
442     unsigned int value;
443 
444     value = decDigitValue(*p);
445     if (value >= 10U)
446       return createError("Invalid character in exponent");
447 
448     unsignedExponent = unsignedExponent * 10 + value;
449     if (unsignedExponent > 32767) {
450       overflow = true;
451       break;
452     }
453   }
454 
455   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456     overflow = true;
457 
458   if (!overflow) {
459     exponent = unsignedExponent;
460     if (negative)
461       exponent = -exponent;
462     exponent += exponentAdjustment;
463     if (exponent > 32767 || exponent < -32768)
464       overflow = true;
465   }
466 
467   if (overflow)
468     exponent = negative ? -32768: 32767;
469 
470   return exponent;
471 }
472 
473 static Expected<StringRef::iterator>
474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
475                            StringRef::iterator *dot) {
476   StringRef::iterator p = begin;
477   *dot = end;
478   while (p != end && *p == '0')
479     p++;
480 
481   if (p != end && *p == '.') {
482     *dot = p++;
483 
484     if (end - begin == 1)
485       return createError("Significand has no digits");
486 
487     while (p != end && *p == '0')
488       p++;
489   }
490 
491   return p;
492 }
493 
494 /* Given a normal decimal floating point number of the form
495 
496      dddd.dddd[eE][+-]ddd
497 
498    where the decimal point and exponent are optional, fill out the
499    structure D.  Exponent is appropriate if the significand is
500    treated as an integer, and normalizedExponent if the significand
501    is taken to have the decimal point after a single leading
502    non-zero digit.
503 
504    If the value is zero, V->firstSigDigit points to a non-digit, and
505    the return exponent is zero.
506 */
507 struct decimalInfo {
508   const char *firstSigDigit;
509   const char *lastSigDigit;
510   int exponent;
511   int normalizedExponent;
512 };
513 
514 static Error interpretDecimal(StringRef::iterator begin,
515                               StringRef::iterator end, decimalInfo *D) {
516   StringRef::iterator dot = end;
517 
518   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519   if (!PtrOrErr)
520     return PtrOrErr.takeError();
521   StringRef::iterator p = *PtrOrErr;
522 
523   D->firstSigDigit = p;
524   D->exponent = 0;
525   D->normalizedExponent = 0;
526 
527   for (; p != end; ++p) {
528     if (*p == '.') {
529       if (dot != end)
530         return createError("String contains multiple dots");
531       dot = p++;
532       if (p == end)
533         break;
534     }
535     if (decDigitValue(*p) >= 10U)
536       break;
537   }
538 
539   if (p != end) {
540     if (*p != 'e' && *p != 'E')
541       return createError("Invalid character in significand");
542     if (p == begin)
543       return createError("Significand has no digits");
544     if (dot != end && p - begin == 1)
545       return createError("Significand has no digits");
546 
547     /* p points to the first non-digit in the string */
548     auto ExpOrErr = readExponent(p + 1, end);
549     if (!ExpOrErr)
550       return ExpOrErr.takeError();
551     D->exponent = *ExpOrErr;
552 
553     /* Implied decimal point?  */
554     if (dot == end)
555       dot = p;
556   }
557 
558   /* If number is all zeroes accept any exponent.  */
559   if (p != D->firstSigDigit) {
560     /* Drop insignificant trailing zeroes.  */
561     if (p != begin) {
562       do
563         do
564           p--;
565         while (p != begin && *p == '0');
566       while (p != begin && *p == '.');
567     }
568 
569     /* Adjust the exponents for any decimal point.  */
570     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571     D->normalizedExponent = (D->exponent +
572               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573                                       - (dot > D->firstSigDigit && dot < p)));
574   }
575 
576   D->lastSigDigit = p;
577   return Error::success();
578 }
579 
580 /* Return the trailing fraction of a hexadecimal number.
581    DIGITVALUE is the first hex digit of the fraction, P points to
582    the next digit.  */
583 static Expected<lostFraction>
584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
585                             unsigned int digitValue) {
586   unsigned int hexDigit;
587 
588   /* If the first trailing digit isn't 0 or 8 we can work out the
589      fraction immediately.  */
590   if (digitValue > 8)
591     return lfMoreThanHalf;
592   else if (digitValue < 8 && digitValue > 0)
593     return lfLessThanHalf;
594 
595   // Otherwise we need to find the first non-zero digit.
596   while (p != end && (*p == '0' || *p == '.'))
597     p++;
598 
599   if (p == end)
600     return createError("Invalid trailing hexadecimal fraction!");
601 
602   hexDigit = hexDigitValue(*p);
603 
604   /* If we ran off the end it is exactly zero or one-half, otherwise
605      a little more.  */
606   if (hexDigit == UINT_MAX)
607     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608   else
609     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
610 }
611 
612 /* Return the fraction lost were a bignum truncated losing the least
613    significant BITS bits.  */
614 static lostFraction
615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
616                               unsigned int partCount,
617                               unsigned int bits)
618 {
619   unsigned int lsb;
620 
621   lsb = APInt::tcLSB(parts, partCount);
622 
623   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
624   if (bits <= lsb)
625     return lfExactlyZero;
626   if (bits == lsb + 1)
627     return lfExactlyHalf;
628   if (bits <= partCount * APFloatBase::integerPartWidth &&
629       APInt::tcExtractBit(parts, bits - 1))
630     return lfMoreThanHalf;
631 
632   return lfLessThanHalf;
633 }
634 
635 /* Shift DST right BITS bits noting lost fraction.  */
636 static lostFraction
637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
638 {
639   lostFraction lost_fraction;
640 
641   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
642 
643   APInt::tcShiftRight(dst, parts, bits);
644 
645   return lost_fraction;
646 }
647 
648 /* Combine the effect of two lost fractions.  */
649 static lostFraction
650 combineLostFractions(lostFraction moreSignificant,
651                      lostFraction lessSignificant)
652 {
653   if (lessSignificant != lfExactlyZero) {
654     if (moreSignificant == lfExactlyZero)
655       moreSignificant = lfLessThanHalf;
656     else if (moreSignificant == lfExactlyHalf)
657       moreSignificant = lfMoreThanHalf;
658   }
659 
660   return moreSignificant;
661 }
662 
663 /* The error from the true value, in half-ulps, on multiplying two
664    floating point numbers, which differ from the value they
665    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666    than the returned value.
667 
668    See "How to Read Floating Point Numbers Accurately" by William D
669    Clinger.  */
670 static unsigned int
671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
672 {
673   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
674 
675   if (HUerr1 + HUerr2 == 0)
676     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
677   else
678     return inexactMultiply + 2 * (HUerr1 + HUerr2);
679 }
680 
681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
682    when the least significant BITS are truncated.  BITS cannot be
683    zero.  */
684 static APFloatBase::integerPart
685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686                  bool isNearest) {
687   unsigned int count, partBits;
688   APFloatBase::integerPart part, boundary;
689 
690   assert(bits != 0);
691 
692   bits--;
693   count = bits / APFloatBase::integerPartWidth;
694   partBits = bits % APFloatBase::integerPartWidth + 1;
695 
696   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
697 
698   if (isNearest)
699     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700   else
701     boundary = 0;
702 
703   if (count == 0) {
704     if (part - boundary <= boundary - part)
705       return part - boundary;
706     else
707       return boundary - part;
708   }
709 
710   if (part == boundary) {
711     while (--count)
712       if (parts[count])
713         return ~(APFloatBase::integerPart) 0; /* A lot.  */
714 
715     return parts[0];
716   } else if (part == boundary - 1) {
717     while (--count)
718       if (~parts[count])
719         return ~(APFloatBase::integerPart) 0; /* A lot.  */
720 
721     return -parts[0];
722   }
723 
724   return ~(APFloatBase::integerPart) 0; /* A lot.  */
725 }
726 
727 /* Place pow(5, power) in DST, and return the number of parts used.
728    DST must be at least one part larger than size of the answer.  */
729 static unsigned int
730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
732   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
733   pow5s[0] = 78125 * 5;
734 
735   unsigned int partsCount[16] = { 1 };
736   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737   unsigned int result;
738   assert(power <= maxExponent);
739 
740   p1 = dst;
741   p2 = scratch;
742 
743   *p1 = firstEightPowers[power & 7];
744   power >>= 3;
745 
746   result = 1;
747   pow5 = pow5s;
748 
749   for (unsigned int n = 0; power; power >>= 1, n++) {
750     unsigned int pc;
751 
752     pc = partsCount[n];
753 
754     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
755     if (pc == 0) {
756       pc = partsCount[n - 1];
757       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
758       pc *= 2;
759       if (pow5[pc - 1] == 0)
760         pc--;
761       partsCount[n] = pc;
762     }
763 
764     if (power & 1) {
765       APFloatBase::integerPart *tmp;
766 
767       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
768       result += pc;
769       if (p2[result - 1] == 0)
770         result--;
771 
772       /* Now result is in p1 with partsCount parts and p2 is scratch
773          space.  */
774       tmp = p1;
775       p1 = p2;
776       p2 = tmp;
777     }
778 
779     pow5 += pc;
780   }
781 
782   if (p1 != dst)
783     APInt::tcAssign(dst, p1, result);
784 
785   return result;
786 }
787 
788 /* Zero at the end to avoid modular arithmetic when adding one; used
789    when rounding up during hexadecimal output.  */
790 static const char hexDigitsLower[] = "0123456789abcdef0";
791 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
792 static const char infinityL[] = "infinity";
793 static const char infinityU[] = "INFINITY";
794 static const char NaNL[] = "nan";
795 static const char NaNU[] = "NAN";
796 
797 /* Write out an integerPart in hexadecimal, starting with the most
798    significant nibble.  Write out exactly COUNT hexdigits, return
799    COUNT.  */
800 static unsigned int
801 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
802            const char *hexDigitChars)
803 {
804   unsigned int result = count;
805 
806   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
807 
808   part >>= (APFloatBase::integerPartWidth - 4 * count);
809   while (count--) {
810     dst[count] = hexDigitChars[part & 0xf];
811     part >>= 4;
812   }
813 
814   return result;
815 }
816 
817 /* Write out an unsigned decimal integer.  */
818 static char *
819 writeUnsignedDecimal (char *dst, unsigned int n)
820 {
821   char buff[40], *p;
822 
823   p = buff;
824   do
825     *p++ = '0' + n % 10;
826   while (n /= 10);
827 
828   do
829     *dst++ = *--p;
830   while (p != buff);
831 
832   return dst;
833 }
834 
835 /* Write out a signed decimal integer.  */
836 static char *
837 writeSignedDecimal (char *dst, int value)
838 {
839   if (value < 0) {
840     *dst++ = '-';
841     dst = writeUnsignedDecimal(dst, -(unsigned) value);
842   } else
843     dst = writeUnsignedDecimal(dst, value);
844 
845   return dst;
846 }
847 
848 namespace detail {
849 /* Constructors.  */
850 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
851   unsigned int count;
852 
853   semantics = ourSemantics;
854   count = partCount();
855   if (count > 1)
856     significand.parts = new integerPart[count];
857 }
858 
859 void IEEEFloat::freeSignificand() {
860   if (needsCleanup())
861     delete [] significand.parts;
862 }
863 
864 void IEEEFloat::assign(const IEEEFloat &rhs) {
865   assert(semantics == rhs.semantics);
866 
867   sign = rhs.sign;
868   category = rhs.category;
869   exponent = rhs.exponent;
870   if (isFiniteNonZero() || category == fcNaN)
871     copySignificand(rhs);
872 }
873 
874 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
875   assert(isFiniteNonZero() || category == fcNaN);
876   assert(rhs.partCount() >= partCount());
877 
878   APInt::tcAssign(significandParts(), rhs.significandParts(),
879                   partCount());
880 }
881 
882 /* Make this number a NaN, with an arbitrary but deterministic value
883    for the significand.  If double or longer, this is a signalling NaN,
884    which may not be ideal.  If float, this is QNaN(0).  */
885 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
886   category = fcNaN;
887   sign = Negative;
888   exponent = exponentNaN();
889 
890   integerPart *significand = significandParts();
891   unsigned numParts = partCount();
892 
893   APInt fill_storage;
894   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
895     // Finite-only types do not distinguish signalling and quiet NaN, so
896     // make them all signalling.
897     SNaN = false;
898     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
899       sign = true;
900       fill_storage = APInt::getZero(semantics->precision - 1);
901     } else {
902       fill_storage = APInt::getAllOnes(semantics->precision - 1);
903     }
904     fill = &fill_storage;
905   }
906 
907   // Set the significand bits to the fill.
908   if (!fill || fill->getNumWords() < numParts)
909     APInt::tcSet(significand, 0, numParts);
910   if (fill) {
911     APInt::tcAssign(significand, fill->getRawData(),
912                     std::min(fill->getNumWords(), numParts));
913 
914     // Zero out the excess bits of the significand.
915     unsigned bitsToPreserve = semantics->precision - 1;
916     unsigned part = bitsToPreserve / 64;
917     bitsToPreserve %= 64;
918     significand[part] &= ((1ULL << bitsToPreserve) - 1);
919     for (part++; part != numParts; ++part)
920       significand[part] = 0;
921   }
922 
923   unsigned QNaNBit = semantics->precision - 2;
924 
925   if (SNaN) {
926     // We always have to clear the QNaN bit to make it an SNaN.
927     APInt::tcClearBit(significand, QNaNBit);
928 
929     // If there are no bits set in the payload, we have to set
930     // *something* to make it a NaN instead of an infinity;
931     // conventionally, this is the next bit down from the QNaN bit.
932     if (APInt::tcIsZero(significand, numParts))
933       APInt::tcSetBit(significand, QNaNBit - 1);
934   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
935     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
936     // Do nothing.
937   } else {
938     // We always have to set the QNaN bit to make it a QNaN.
939     APInt::tcSetBit(significand, QNaNBit);
940   }
941 
942   // For x87 extended precision, we want to make a NaN, not a
943   // pseudo-NaN.  Maybe we should expose the ability to make
944   // pseudo-NaNs?
945   if (semantics == &semX87DoubleExtended)
946     APInt::tcSetBit(significand, QNaNBit + 1);
947 }
948 
949 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
950   if (this != &rhs) {
951     if (semantics != rhs.semantics) {
952       freeSignificand();
953       initialize(rhs.semantics);
954     }
955     assign(rhs);
956   }
957 
958   return *this;
959 }
960 
961 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
962   freeSignificand();
963 
964   semantics = rhs.semantics;
965   significand = rhs.significand;
966   exponent = rhs.exponent;
967   category = rhs.category;
968   sign = rhs.sign;
969 
970   rhs.semantics = &semBogus;
971   return *this;
972 }
973 
974 bool IEEEFloat::isDenormal() const {
975   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
976          (APInt::tcExtractBit(significandParts(),
977                               semantics->precision - 1) == 0);
978 }
979 
980 bool IEEEFloat::isSmallest() const {
981   // The smallest number by magnitude in our format will be the smallest
982   // denormal, i.e. the floating point number with exponent being minimum
983   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
984   return isFiniteNonZero() && exponent == semantics->minExponent &&
985     significandMSB() == 0;
986 }
987 
988 bool IEEEFloat::isSmallestNormalized() const {
989   return getCategory() == fcNormal && exponent == semantics->minExponent &&
990          isSignificandAllZerosExceptMSB();
991 }
992 
993 bool IEEEFloat::isSignificandAllOnes() const {
994   // Test if the significand excluding the integral bit is all ones. This allows
995   // us to test for binade boundaries.
996   const integerPart *Parts = significandParts();
997   const unsigned PartCount = partCountForBits(semantics->precision);
998   for (unsigned i = 0; i < PartCount - 1; i++)
999     if (~Parts[i])
1000       return false;
1001 
1002   // Set the unused high bits to all ones when we compare.
1003   const unsigned NumHighBits =
1004     PartCount*integerPartWidth - semantics->precision + 1;
1005   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1006          "Can not have more high bits to fill than integerPartWidth");
1007   const integerPart HighBitFill =
1008     ~integerPart(0) << (integerPartWidth - NumHighBits);
1009   if (~(Parts[PartCount - 1] | HighBitFill))
1010     return false;
1011 
1012   return true;
1013 }
1014 
1015 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1016   // Test if the significand excluding the integral bit is all ones except for
1017   // the least significant bit.
1018   const integerPart *Parts = significandParts();
1019 
1020   if (Parts[0] & 1)
1021     return false;
1022 
1023   const unsigned PartCount = partCountForBits(semantics->precision);
1024   for (unsigned i = 0; i < PartCount - 1; i++) {
1025     if (~Parts[i] & ~unsigned{!i})
1026       return false;
1027   }
1028 
1029   // Set the unused high bits to all ones when we compare.
1030   const unsigned NumHighBits =
1031       PartCount * integerPartWidth - semantics->precision + 1;
1032   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1033          "Can not have more high bits to fill than integerPartWidth");
1034   const integerPart HighBitFill = ~integerPart(0)
1035                                   << (integerPartWidth - NumHighBits);
1036   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1037     return false;
1038 
1039   return true;
1040 }
1041 
1042 bool IEEEFloat::isSignificandAllZeros() const {
1043   // Test if the significand excluding the integral bit is all zeros. This
1044   // allows us to test for binade boundaries.
1045   const integerPart *Parts = significandParts();
1046   const unsigned PartCount = partCountForBits(semantics->precision);
1047 
1048   for (unsigned i = 0; i < PartCount - 1; i++)
1049     if (Parts[i])
1050       return false;
1051 
1052   // Compute how many bits are used in the final word.
1053   const unsigned NumHighBits =
1054     PartCount*integerPartWidth - semantics->precision + 1;
1055   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1056          "clear than integerPartWidth");
1057   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1058 
1059   if (Parts[PartCount - 1] & HighBitMask)
1060     return false;
1061 
1062   return true;
1063 }
1064 
1065 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1066   const integerPart *Parts = significandParts();
1067   const unsigned PartCount = partCountForBits(semantics->precision);
1068 
1069   for (unsigned i = 0; i < PartCount - 1; i++) {
1070     if (Parts[i])
1071       return false;
1072   }
1073 
1074   const unsigned NumHighBits =
1075       PartCount * integerPartWidth - semantics->precision + 1;
1076   return Parts[PartCount - 1] == integerPart(1)
1077                                      << (integerPartWidth - NumHighBits);
1078 }
1079 
1080 bool IEEEFloat::isLargest() const {
1081   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1082       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1083     // The largest number by magnitude in our format will be the floating point
1084     // number with maximum exponent and with significand that is all ones except
1085     // the LSB.
1086     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087            isSignificandAllOnesExceptLSB();
1088   } else {
1089     // The largest number by magnitude in our format will be the floating point
1090     // number with maximum exponent and with significand that is all ones.
1091     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1092            isSignificandAllOnes();
1093   }
1094 }
1095 
1096 bool IEEEFloat::isInteger() const {
1097   // This could be made more efficient; I'm going for obviously correct.
1098   if (!isFinite()) return false;
1099   IEEEFloat truncated = *this;
1100   truncated.roundToIntegral(rmTowardZero);
1101   return compare(truncated) == cmpEqual;
1102 }
1103 
1104 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1105   if (this == &rhs)
1106     return true;
1107   if (semantics != rhs.semantics ||
1108       category != rhs.category ||
1109       sign != rhs.sign)
1110     return false;
1111   if (category==fcZero || category==fcInfinity)
1112     return true;
1113 
1114   if (isFiniteNonZero() && exponent != rhs.exponent)
1115     return false;
1116 
1117   return std::equal(significandParts(), significandParts() + partCount(),
1118                     rhs.significandParts());
1119 }
1120 
1121 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1122   initialize(&ourSemantics);
1123   sign = 0;
1124   category = fcNormal;
1125   zeroSignificand();
1126   exponent = ourSemantics.precision - 1;
1127   significandParts()[0] = value;
1128   normalize(rmNearestTiesToEven, lfExactlyZero);
1129 }
1130 
1131 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1132   initialize(&ourSemantics);
1133   makeZero(false);
1134 }
1135 
1136 // Delegate to the previous constructor, because later copy constructor may
1137 // actually inspects category, which can't be garbage.
1138 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1139     : IEEEFloat(ourSemantics) {}
1140 
1141 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1142   initialize(rhs.semantics);
1143   assign(rhs);
1144 }
1145 
1146 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1147   *this = std::move(rhs);
1148 }
1149 
1150 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1151 
1152 unsigned int IEEEFloat::partCount() const {
1153   return partCountForBits(semantics->precision + 1);
1154 }
1155 
1156 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1157   return const_cast<IEEEFloat *>(this)->significandParts();
1158 }
1159 
1160 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1161   if (partCount() > 1)
1162     return significand.parts;
1163   else
1164     return &significand.part;
1165 }
1166 
1167 void IEEEFloat::zeroSignificand() {
1168   APInt::tcSet(significandParts(), 0, partCount());
1169 }
1170 
1171 /* Increment an fcNormal floating point number's significand.  */
1172 void IEEEFloat::incrementSignificand() {
1173   integerPart carry;
1174 
1175   carry = APInt::tcIncrement(significandParts(), partCount());
1176 
1177   /* Our callers should never cause us to overflow.  */
1178   assert(carry == 0);
1179   (void)carry;
1180 }
1181 
1182 /* Add the significand of the RHS.  Returns the carry flag.  */
1183 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1184   integerPart *parts;
1185 
1186   parts = significandParts();
1187 
1188   assert(semantics == rhs.semantics);
1189   assert(exponent == rhs.exponent);
1190 
1191   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1192 }
1193 
1194 /* Subtract the significand of the RHS with a borrow flag.  Returns
1195    the borrow flag.  */
1196 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1197                                                       integerPart borrow) {
1198   integerPart *parts;
1199 
1200   parts = significandParts();
1201 
1202   assert(semantics == rhs.semantics);
1203   assert(exponent == rhs.exponent);
1204 
1205   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1206                            partCount());
1207 }
1208 
1209 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1210    on to the full-precision result of the multiplication.  Returns the
1211    lost fraction.  */
1212 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1213                                             IEEEFloat addend) {
1214   unsigned int omsb;        // One, not zero, based MSB.
1215   unsigned int partsCount, newPartsCount, precision;
1216   integerPart *lhsSignificand;
1217   integerPart scratch[4];
1218   integerPart *fullSignificand;
1219   lostFraction lost_fraction;
1220   bool ignored;
1221 
1222   assert(semantics == rhs.semantics);
1223 
1224   precision = semantics->precision;
1225 
1226   // Allocate space for twice as many bits as the original significand, plus one
1227   // extra bit for the addition to overflow into.
1228   newPartsCount = partCountForBits(precision * 2 + 1);
1229 
1230   if (newPartsCount > 4)
1231     fullSignificand = new integerPart[newPartsCount];
1232   else
1233     fullSignificand = scratch;
1234 
1235   lhsSignificand = significandParts();
1236   partsCount = partCount();
1237 
1238   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1239                         rhs.significandParts(), partsCount, partsCount);
1240 
1241   lost_fraction = lfExactlyZero;
1242   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1243   exponent += rhs.exponent;
1244 
1245   // Assume the operands involved in the multiplication are single-precision
1246   // FP, and the two multiplicants are:
1247   //   *this = a23 . a22 ... a0 * 2^e1
1248   //     rhs = b23 . b22 ... b0 * 2^e2
1249   // the result of multiplication is:
1250   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1251   // Note that there are three significant bits at the left-hand side of the
1252   // radix point: two for the multiplication, and an overflow bit for the
1253   // addition (that will always be zero at this point). Move the radix point
1254   // toward left by two bits, and adjust exponent accordingly.
1255   exponent += 2;
1256 
1257   if (addend.isNonZero()) {
1258     // The intermediate result of the multiplication has "2 * precision"
1259     // signicant bit; adjust the addend to be consistent with mul result.
1260     //
1261     Significand savedSignificand = significand;
1262     const fltSemantics *savedSemantics = semantics;
1263     fltSemantics extendedSemantics;
1264     opStatus status;
1265     unsigned int extendedPrecision;
1266 
1267     // Normalize our MSB to one below the top bit to allow for overflow.
1268     extendedPrecision = 2 * precision + 1;
1269     if (omsb != extendedPrecision - 1) {
1270       assert(extendedPrecision > omsb);
1271       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1272                          (extendedPrecision - 1) - omsb);
1273       exponent -= (extendedPrecision - 1) - omsb;
1274     }
1275 
1276     /* Create new semantics.  */
1277     extendedSemantics = *semantics;
1278     extendedSemantics.precision = extendedPrecision;
1279 
1280     if (newPartsCount == 1)
1281       significand.part = fullSignificand[0];
1282     else
1283       significand.parts = fullSignificand;
1284     semantics = &extendedSemantics;
1285 
1286     // Make a copy so we can convert it to the extended semantics.
1287     // Note that we cannot convert the addend directly, as the extendedSemantics
1288     // is a local variable (which we take a reference to).
1289     IEEEFloat extendedAddend(addend);
1290     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1291     assert(status == opOK);
1292     (void)status;
1293 
1294     // Shift the significand of the addend right by one bit. This guarantees
1295     // that the high bit of the significand is zero (same as fullSignificand),
1296     // so the addition will overflow (if it does overflow at all) into the top bit.
1297     lost_fraction = extendedAddend.shiftSignificandRight(1);
1298     assert(lost_fraction == lfExactlyZero &&
1299            "Lost precision while shifting addend for fused-multiply-add.");
1300 
1301     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1302 
1303     /* Restore our state.  */
1304     if (newPartsCount == 1)
1305       fullSignificand[0] = significand.part;
1306     significand = savedSignificand;
1307     semantics = savedSemantics;
1308 
1309     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1310   }
1311 
1312   // Convert the result having "2 * precision" significant-bits back to the one
1313   // having "precision" significant-bits. First, move the radix point from
1314   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1315   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1316   exponent -= precision + 1;
1317 
1318   // In case MSB resides at the left-hand side of radix point, shift the
1319   // mantissa right by some amount to make sure the MSB reside right before
1320   // the radix point (i.e. "MSB . rest-significant-bits").
1321   //
1322   // Note that the result is not normalized when "omsb < precision". So, the
1323   // caller needs to call IEEEFloat::normalize() if normalized value is
1324   // expected.
1325   if (omsb > precision) {
1326     unsigned int bits, significantParts;
1327     lostFraction lf;
1328 
1329     bits = omsb - precision;
1330     significantParts = partCountForBits(omsb);
1331     lf = shiftRight(fullSignificand, significantParts, bits);
1332     lost_fraction = combineLostFractions(lf, lost_fraction);
1333     exponent += bits;
1334   }
1335 
1336   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1337 
1338   if (newPartsCount > 4)
1339     delete [] fullSignificand;
1340 
1341   return lost_fraction;
1342 }
1343 
1344 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1345   return multiplySignificand(rhs, IEEEFloat(*semantics));
1346 }
1347 
1348 /* Multiply the significands of LHS and RHS to DST.  */
1349 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1350   unsigned int bit, i, partsCount;
1351   const integerPart *rhsSignificand;
1352   integerPart *lhsSignificand, *dividend, *divisor;
1353   integerPart scratch[4];
1354   lostFraction lost_fraction;
1355 
1356   assert(semantics == rhs.semantics);
1357 
1358   lhsSignificand = significandParts();
1359   rhsSignificand = rhs.significandParts();
1360   partsCount = partCount();
1361 
1362   if (partsCount > 2)
1363     dividend = new integerPart[partsCount * 2];
1364   else
1365     dividend = scratch;
1366 
1367   divisor = dividend + partsCount;
1368 
1369   /* Copy the dividend and divisor as they will be modified in-place.  */
1370   for (i = 0; i < partsCount; i++) {
1371     dividend[i] = lhsSignificand[i];
1372     divisor[i] = rhsSignificand[i];
1373     lhsSignificand[i] = 0;
1374   }
1375 
1376   exponent -= rhs.exponent;
1377 
1378   unsigned int precision = semantics->precision;
1379 
1380   /* Normalize the divisor.  */
1381   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1382   if (bit) {
1383     exponent += bit;
1384     APInt::tcShiftLeft(divisor, partsCount, bit);
1385   }
1386 
1387   /* Normalize the dividend.  */
1388   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1389   if (bit) {
1390     exponent -= bit;
1391     APInt::tcShiftLeft(dividend, partsCount, bit);
1392   }
1393 
1394   /* Ensure the dividend >= divisor initially for the loop below.
1395      Incidentally, this means that the division loop below is
1396      guaranteed to set the integer bit to one.  */
1397   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1398     exponent--;
1399     APInt::tcShiftLeft(dividend, partsCount, 1);
1400     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1401   }
1402 
1403   /* Long division.  */
1404   for (bit = precision; bit; bit -= 1) {
1405     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1406       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1407       APInt::tcSetBit(lhsSignificand, bit - 1);
1408     }
1409 
1410     APInt::tcShiftLeft(dividend, partsCount, 1);
1411   }
1412 
1413   /* Figure out the lost fraction.  */
1414   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1415 
1416   if (cmp > 0)
1417     lost_fraction = lfMoreThanHalf;
1418   else if (cmp == 0)
1419     lost_fraction = lfExactlyHalf;
1420   else if (APInt::tcIsZero(dividend, partsCount))
1421     lost_fraction = lfExactlyZero;
1422   else
1423     lost_fraction = lfLessThanHalf;
1424 
1425   if (partsCount > 2)
1426     delete [] dividend;
1427 
1428   return lost_fraction;
1429 }
1430 
1431 unsigned int IEEEFloat::significandMSB() const {
1432   return APInt::tcMSB(significandParts(), partCount());
1433 }
1434 
1435 unsigned int IEEEFloat::significandLSB() const {
1436   return APInt::tcLSB(significandParts(), partCount());
1437 }
1438 
1439 /* Note that a zero result is NOT normalized to fcZero.  */
1440 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1441   /* Our exponent should not overflow.  */
1442   assert((ExponentType) (exponent + bits) >= exponent);
1443 
1444   exponent += bits;
1445 
1446   return shiftRight(significandParts(), partCount(), bits);
1447 }
1448 
1449 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1450 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1451   assert(bits < semantics->precision);
1452 
1453   if (bits) {
1454     unsigned int partsCount = partCount();
1455 
1456     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1457     exponent -= bits;
1458 
1459     assert(!APInt::tcIsZero(significandParts(), partsCount));
1460   }
1461 }
1462 
1463 IEEEFloat::cmpResult
1464 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1465   int compare;
1466 
1467   assert(semantics == rhs.semantics);
1468   assert(isFiniteNonZero());
1469   assert(rhs.isFiniteNonZero());
1470 
1471   compare = exponent - rhs.exponent;
1472 
1473   /* If exponents are equal, do an unsigned bignum comparison of the
1474      significands.  */
1475   if (compare == 0)
1476     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1477                                partCount());
1478 
1479   if (compare > 0)
1480     return cmpGreaterThan;
1481   else if (compare < 0)
1482     return cmpLessThan;
1483   else
1484     return cmpEqual;
1485 }
1486 
1487 /* Set the least significant BITS bits of a bignum, clear the
1488    rest.  */
1489 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1490                                       unsigned bits) {
1491   unsigned i = 0;
1492   while (bits > APInt::APINT_BITS_PER_WORD) {
1493     dst[i++] = ~(APInt::WordType)0;
1494     bits -= APInt::APINT_BITS_PER_WORD;
1495   }
1496 
1497   if (bits)
1498     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1499 
1500   while (i < parts)
1501     dst[i++] = 0;
1502 }
1503 
1504 /* Handle overflow.  Sign is preserved.  We either become infinity or
1505    the largest finite number.  */
1506 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1507   /* Infinity?  */
1508   if (rounding_mode == rmNearestTiesToEven ||
1509       rounding_mode == rmNearestTiesToAway ||
1510       (rounding_mode == rmTowardPositive && !sign) ||
1511       (rounding_mode == rmTowardNegative && sign)) {
1512     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1513       makeNaN(false, sign);
1514     else
1515       category = fcInfinity;
1516     return (opStatus) (opOverflow | opInexact);
1517   }
1518 
1519   /* Otherwise we become the largest finite number.  */
1520   category = fcNormal;
1521   exponent = semantics->maxExponent;
1522   tcSetLeastSignificantBits(significandParts(), partCount(),
1523                             semantics->precision);
1524   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1525       semantics->nanEncoding == fltNanEncoding::AllOnes)
1526     APInt::tcClearBit(significandParts(), 0);
1527 
1528   return opInexact;
1529 }
1530 
1531 /* Returns TRUE if, when truncating the current number, with BIT the
1532    new LSB, with the given lost fraction and rounding mode, the result
1533    would need to be rounded away from zero (i.e., by increasing the
1534    signficand).  This routine must work for fcZero of both signs, and
1535    fcNormal numbers.  */
1536 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1537                                   lostFraction lost_fraction,
1538                                   unsigned int bit) const {
1539   /* NaNs and infinities should not have lost fractions.  */
1540   assert(isFiniteNonZero() || category == fcZero);
1541 
1542   /* Current callers never pass this so we don't handle it.  */
1543   assert(lost_fraction != lfExactlyZero);
1544 
1545   switch (rounding_mode) {
1546   case rmNearestTiesToAway:
1547     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1548 
1549   case rmNearestTiesToEven:
1550     if (lost_fraction == lfMoreThanHalf)
1551       return true;
1552 
1553     /* Our zeroes don't have a significand to test.  */
1554     if (lost_fraction == lfExactlyHalf && category != fcZero)
1555       return APInt::tcExtractBit(significandParts(), bit);
1556 
1557     return false;
1558 
1559   case rmTowardZero:
1560     return false;
1561 
1562   case rmTowardPositive:
1563     return !sign;
1564 
1565   case rmTowardNegative:
1566     return sign;
1567 
1568   default:
1569     break;
1570   }
1571   llvm_unreachable("Invalid rounding mode found");
1572 }
1573 
1574 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1575                                          lostFraction lost_fraction) {
1576   unsigned int omsb;                /* One, not zero, based MSB.  */
1577   int exponentChange;
1578 
1579   if (!isFiniteNonZero())
1580     return opOK;
1581 
1582   /* Before rounding normalize the exponent of fcNormal numbers.  */
1583   omsb = significandMSB() + 1;
1584 
1585   if (omsb) {
1586     /* OMSB is numbered from 1.  We want to place it in the integer
1587        bit numbered PRECISION if possible, with a compensating change in
1588        the exponent.  */
1589     exponentChange = omsb - semantics->precision;
1590 
1591     /* If the resulting exponent is too high, overflow according to
1592        the rounding mode.  */
1593     if (exponent + exponentChange > semantics->maxExponent)
1594       return handleOverflow(rounding_mode);
1595 
1596     /* Subnormal numbers have exponent minExponent, and their MSB
1597        is forced based on that.  */
1598     if (exponent + exponentChange < semantics->minExponent)
1599       exponentChange = semantics->minExponent - exponent;
1600 
1601     /* Shifting left is easy as we don't lose precision.  */
1602     if (exponentChange < 0) {
1603       assert(lost_fraction == lfExactlyZero);
1604 
1605       shiftSignificandLeft(-exponentChange);
1606 
1607       return opOK;
1608     }
1609 
1610     if (exponentChange > 0) {
1611       lostFraction lf;
1612 
1613       /* Shift right and capture any new lost fraction.  */
1614       lf = shiftSignificandRight(exponentChange);
1615 
1616       lost_fraction = combineLostFractions(lf, lost_fraction);
1617 
1618       /* Keep OMSB up-to-date.  */
1619       if (omsb > (unsigned) exponentChange)
1620         omsb -= exponentChange;
1621       else
1622         omsb = 0;
1623     }
1624   }
1625 
1626   // The all-ones values is an overflow if NaN is all ones. If NaN is
1627   // represented by negative zero, then it is a valid finite value.
1628   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1629       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1630       exponent == semantics->maxExponent && isSignificandAllOnes())
1631     return handleOverflow(rounding_mode);
1632 
1633   /* Now round the number according to rounding_mode given the lost
1634      fraction.  */
1635 
1636   /* As specified in IEEE 754, since we do not trap we do not report
1637      underflow for exact results.  */
1638   if (lost_fraction == lfExactlyZero) {
1639     /* Canonicalize zeroes.  */
1640     if (omsb == 0) {
1641       category = fcZero;
1642       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1643         sign = false;
1644     }
1645 
1646     return opOK;
1647   }
1648 
1649   /* Increment the significand if we're rounding away from zero.  */
1650   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1651     if (omsb == 0)
1652       exponent = semantics->minExponent;
1653 
1654     incrementSignificand();
1655     omsb = significandMSB() + 1;
1656 
1657     /* Did the significand increment overflow?  */
1658     if (omsb == (unsigned) semantics->precision + 1) {
1659       /* Renormalize by incrementing the exponent and shifting our
1660          significand right one.  However if we already have the
1661          maximum exponent we overflow to infinity.  */
1662       if (exponent == semantics->maxExponent)
1663         // Invoke overflow handling with a rounding mode that will guarantee
1664         // that the result gets turned into the correct infinity representation.
1665         // This is needed instead of just setting the category to infinity to
1666         // account for 8-bit floating point types that have no inf, only NaN.
1667         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1668 
1669       shiftSignificandRight(1);
1670 
1671       return opInexact;
1672     }
1673 
1674     // The all-ones values is an overflow if NaN is all ones. If NaN is
1675     // represented by negative zero, then it is a valid finite value.
1676     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1677         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1678         exponent == semantics->maxExponent && isSignificandAllOnes())
1679       return handleOverflow(rounding_mode);
1680   }
1681 
1682   /* The normal case - we were and are not denormal, and any
1683      significand increment above didn't overflow.  */
1684   if (omsb == semantics->precision)
1685     return opInexact;
1686 
1687   /* We have a non-zero denormal.  */
1688   assert(omsb < semantics->precision);
1689 
1690   /* Canonicalize zeroes.  */
1691   if (omsb == 0) {
1692     category = fcZero;
1693     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1694       sign = false;
1695   }
1696 
1697   /* The fcZero case is a denormal that underflowed to zero.  */
1698   return (opStatus) (opUnderflow | opInexact);
1699 }
1700 
1701 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1702                                                      bool subtract) {
1703   switch (PackCategoriesIntoKey(category, rhs.category)) {
1704   default:
1705     llvm_unreachable(nullptr);
1706 
1707   case PackCategoriesIntoKey(fcZero, fcNaN):
1708   case PackCategoriesIntoKey(fcNormal, fcNaN):
1709   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1710     assign(rhs);
1711     [[fallthrough]];
1712   case PackCategoriesIntoKey(fcNaN, fcZero):
1713   case PackCategoriesIntoKey(fcNaN, fcNormal):
1714   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1715   case PackCategoriesIntoKey(fcNaN, fcNaN):
1716     if (isSignaling()) {
1717       makeQuiet();
1718       return opInvalidOp;
1719     }
1720     return rhs.isSignaling() ? opInvalidOp : opOK;
1721 
1722   case PackCategoriesIntoKey(fcNormal, fcZero):
1723   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1724   case PackCategoriesIntoKey(fcInfinity, fcZero):
1725     return opOK;
1726 
1727   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1728   case PackCategoriesIntoKey(fcZero, fcInfinity):
1729     category = fcInfinity;
1730     sign = rhs.sign ^ subtract;
1731     return opOK;
1732 
1733   case PackCategoriesIntoKey(fcZero, fcNormal):
1734     assign(rhs);
1735     sign = rhs.sign ^ subtract;
1736     return opOK;
1737 
1738   case PackCategoriesIntoKey(fcZero, fcZero):
1739     /* Sign depends on rounding mode; handled by caller.  */
1740     return opOK;
1741 
1742   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1743     /* Differently signed infinities can only be validly
1744        subtracted.  */
1745     if (((sign ^ rhs.sign)!=0) != subtract) {
1746       makeNaN();
1747       return opInvalidOp;
1748     }
1749 
1750     return opOK;
1751 
1752   case PackCategoriesIntoKey(fcNormal, fcNormal):
1753     return opDivByZero;
1754   }
1755 }
1756 
1757 /* Add or subtract two normal numbers.  */
1758 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1759                                                  bool subtract) {
1760   integerPart carry;
1761   lostFraction lost_fraction;
1762   int bits;
1763 
1764   /* Determine if the operation on the absolute values is effectively
1765      an addition or subtraction.  */
1766   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1767 
1768   /* Are we bigger exponent-wise than the RHS?  */
1769   bits = exponent - rhs.exponent;
1770 
1771   /* Subtraction is more subtle than one might naively expect.  */
1772   if (subtract) {
1773     IEEEFloat temp_rhs(rhs);
1774 
1775     if (bits == 0)
1776       lost_fraction = lfExactlyZero;
1777     else if (bits > 0) {
1778       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1779       shiftSignificandLeft(1);
1780     } else {
1781       lost_fraction = shiftSignificandRight(-bits - 1);
1782       temp_rhs.shiftSignificandLeft(1);
1783     }
1784 
1785     // Should we reverse the subtraction.
1786     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1787       carry = temp_rhs.subtractSignificand
1788         (*this, lost_fraction != lfExactlyZero);
1789       copySignificand(temp_rhs);
1790       sign = !sign;
1791     } else {
1792       carry = subtractSignificand
1793         (temp_rhs, lost_fraction != lfExactlyZero);
1794     }
1795 
1796     /* Invert the lost fraction - it was on the RHS and
1797        subtracted.  */
1798     if (lost_fraction == lfLessThanHalf)
1799       lost_fraction = lfMoreThanHalf;
1800     else if (lost_fraction == lfMoreThanHalf)
1801       lost_fraction = lfLessThanHalf;
1802 
1803     /* The code above is intended to ensure that no borrow is
1804        necessary.  */
1805     assert(!carry);
1806     (void)carry;
1807   } else {
1808     if (bits > 0) {
1809       IEEEFloat temp_rhs(rhs);
1810 
1811       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1812       carry = addSignificand(temp_rhs);
1813     } else {
1814       lost_fraction = shiftSignificandRight(-bits);
1815       carry = addSignificand(rhs);
1816     }
1817 
1818     /* We have a guard bit; generating a carry cannot happen.  */
1819     assert(!carry);
1820     (void)carry;
1821   }
1822 
1823   return lost_fraction;
1824 }
1825 
1826 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1827   switch (PackCategoriesIntoKey(category, rhs.category)) {
1828   default:
1829     llvm_unreachable(nullptr);
1830 
1831   case PackCategoriesIntoKey(fcZero, fcNaN):
1832   case PackCategoriesIntoKey(fcNormal, fcNaN):
1833   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1834     assign(rhs);
1835     sign = false;
1836     [[fallthrough]];
1837   case PackCategoriesIntoKey(fcNaN, fcZero):
1838   case PackCategoriesIntoKey(fcNaN, fcNormal):
1839   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1840   case PackCategoriesIntoKey(fcNaN, fcNaN):
1841     sign ^= rhs.sign; // restore the original sign
1842     if (isSignaling()) {
1843       makeQuiet();
1844       return opInvalidOp;
1845     }
1846     return rhs.isSignaling() ? opInvalidOp : opOK;
1847 
1848   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1849   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1850   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1851     category = fcInfinity;
1852     return opOK;
1853 
1854   case PackCategoriesIntoKey(fcZero, fcNormal):
1855   case PackCategoriesIntoKey(fcNormal, fcZero):
1856   case PackCategoriesIntoKey(fcZero, fcZero):
1857     category = fcZero;
1858     return opOK;
1859 
1860   case PackCategoriesIntoKey(fcZero, fcInfinity):
1861   case PackCategoriesIntoKey(fcInfinity, fcZero):
1862     makeNaN();
1863     return opInvalidOp;
1864 
1865   case PackCategoriesIntoKey(fcNormal, fcNormal):
1866     return opOK;
1867   }
1868 }
1869 
1870 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1871   switch (PackCategoriesIntoKey(category, rhs.category)) {
1872   default:
1873     llvm_unreachable(nullptr);
1874 
1875   case PackCategoriesIntoKey(fcZero, fcNaN):
1876   case PackCategoriesIntoKey(fcNormal, fcNaN):
1877   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1878     assign(rhs);
1879     sign = false;
1880     [[fallthrough]];
1881   case PackCategoriesIntoKey(fcNaN, fcZero):
1882   case PackCategoriesIntoKey(fcNaN, fcNormal):
1883   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1884   case PackCategoriesIntoKey(fcNaN, fcNaN):
1885     sign ^= rhs.sign; // restore the original sign
1886     if (isSignaling()) {
1887       makeQuiet();
1888       return opInvalidOp;
1889     }
1890     return rhs.isSignaling() ? opInvalidOp : opOK;
1891 
1892   case PackCategoriesIntoKey(fcInfinity, fcZero):
1893   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1894   case PackCategoriesIntoKey(fcZero, fcInfinity):
1895   case PackCategoriesIntoKey(fcZero, fcNormal):
1896     return opOK;
1897 
1898   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1899     category = fcZero;
1900     return opOK;
1901 
1902   case PackCategoriesIntoKey(fcNormal, fcZero):
1903     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1904       makeNaN(false, sign);
1905     else
1906       category = fcInfinity;
1907     return opDivByZero;
1908 
1909   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1910   case PackCategoriesIntoKey(fcZero, fcZero):
1911     makeNaN();
1912     return opInvalidOp;
1913 
1914   case PackCategoriesIntoKey(fcNormal, fcNormal):
1915     return opOK;
1916   }
1917 }
1918 
1919 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1920   switch (PackCategoriesIntoKey(category, rhs.category)) {
1921   default:
1922     llvm_unreachable(nullptr);
1923 
1924   case PackCategoriesIntoKey(fcZero, fcNaN):
1925   case PackCategoriesIntoKey(fcNormal, fcNaN):
1926   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1927     assign(rhs);
1928     [[fallthrough]];
1929   case PackCategoriesIntoKey(fcNaN, fcZero):
1930   case PackCategoriesIntoKey(fcNaN, fcNormal):
1931   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1932   case PackCategoriesIntoKey(fcNaN, fcNaN):
1933     if (isSignaling()) {
1934       makeQuiet();
1935       return opInvalidOp;
1936     }
1937     return rhs.isSignaling() ? opInvalidOp : opOK;
1938 
1939   case PackCategoriesIntoKey(fcZero, fcInfinity):
1940   case PackCategoriesIntoKey(fcZero, fcNormal):
1941   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1942     return opOK;
1943 
1944   case PackCategoriesIntoKey(fcNormal, fcZero):
1945   case PackCategoriesIntoKey(fcInfinity, fcZero):
1946   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1947   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1948   case PackCategoriesIntoKey(fcZero, fcZero):
1949     makeNaN();
1950     return opInvalidOp;
1951 
1952   case PackCategoriesIntoKey(fcNormal, fcNormal):
1953     return opOK;
1954   }
1955 }
1956 
1957 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1958   switch (PackCategoriesIntoKey(category, rhs.category)) {
1959   default:
1960     llvm_unreachable(nullptr);
1961 
1962   case PackCategoriesIntoKey(fcZero, fcNaN):
1963   case PackCategoriesIntoKey(fcNormal, fcNaN):
1964   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1965     assign(rhs);
1966     [[fallthrough]];
1967   case PackCategoriesIntoKey(fcNaN, fcZero):
1968   case PackCategoriesIntoKey(fcNaN, fcNormal):
1969   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1970   case PackCategoriesIntoKey(fcNaN, fcNaN):
1971     if (isSignaling()) {
1972       makeQuiet();
1973       return opInvalidOp;
1974     }
1975     return rhs.isSignaling() ? opInvalidOp : opOK;
1976 
1977   case PackCategoriesIntoKey(fcZero, fcInfinity):
1978   case PackCategoriesIntoKey(fcZero, fcNormal):
1979   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1980     return opOK;
1981 
1982   case PackCategoriesIntoKey(fcNormal, fcZero):
1983   case PackCategoriesIntoKey(fcInfinity, fcZero):
1984   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1985   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1986   case PackCategoriesIntoKey(fcZero, fcZero):
1987     makeNaN();
1988     return opInvalidOp;
1989 
1990   case PackCategoriesIntoKey(fcNormal, fcNormal):
1991     return opDivByZero; // fake status, indicating this is not a special case
1992   }
1993 }
1994 
1995 /* Change sign.  */
1996 void IEEEFloat::changeSign() {
1997   // With NaN-as-negative-zero, neither NaN or negative zero can change
1998   // their signs.
1999   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2000       (isZero() || isNaN()))
2001     return;
2002   /* Look mummy, this one's easy.  */
2003   sign = !sign;
2004 }
2005 
2006 /* Normalized addition or subtraction.  */
2007 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2008                                              roundingMode rounding_mode,
2009                                              bool subtract) {
2010   opStatus fs;
2011 
2012   fs = addOrSubtractSpecials(rhs, subtract);
2013 
2014   /* This return code means it was not a simple case.  */
2015   if (fs == opDivByZero) {
2016     lostFraction lost_fraction;
2017 
2018     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019     fs = normalize(rounding_mode, lost_fraction);
2020 
2021     /* Can only be zero if we lost no fraction.  */
2022     assert(category != fcZero || lost_fraction == lfExactlyZero);
2023   }
2024 
2025   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026      positive zero unless rounding to minus infinity, except that
2027      adding two like-signed zeroes gives that zero.  */
2028   if (category == fcZero) {
2029     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030       sign = (rounding_mode == rmTowardNegative);
2031     // NaN-in-negative-zero means zeros need to be normalized to +0.
2032     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033       sign = false;
2034   }
2035 
2036   return fs;
2037 }
2038 
2039 /* Normalized addition.  */
2040 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2041                                    roundingMode rounding_mode) {
2042   return addOrSubtract(rhs, rounding_mode, false);
2043 }
2044 
2045 /* Normalized subtraction.  */
2046 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2047                                         roundingMode rounding_mode) {
2048   return addOrSubtract(rhs, rounding_mode, true);
2049 }
2050 
2051 /* Normalized multiply.  */
2052 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2053                                         roundingMode rounding_mode) {
2054   opStatus fs;
2055 
2056   sign ^= rhs.sign;
2057   fs = multiplySpecials(rhs);
2058 
2059   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060     sign = false;
2061   if (isFiniteNonZero()) {
2062     lostFraction lost_fraction = multiplySignificand(rhs);
2063     fs = normalize(rounding_mode, lost_fraction);
2064     if (lost_fraction != lfExactlyZero)
2065       fs = (opStatus) (fs | opInexact);
2066   }
2067 
2068   return fs;
2069 }
2070 
2071 /* Normalized divide.  */
2072 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2073                                       roundingMode rounding_mode) {
2074   opStatus fs;
2075 
2076   sign ^= rhs.sign;
2077   fs = divideSpecials(rhs);
2078 
2079   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2080     sign = false;
2081   if (isFiniteNonZero()) {
2082     lostFraction lost_fraction = divideSignificand(rhs);
2083     fs = normalize(rounding_mode, lost_fraction);
2084     if (lost_fraction != lfExactlyZero)
2085       fs = (opStatus) (fs | opInexact);
2086   }
2087 
2088   return fs;
2089 }
2090 
2091 /* Normalized remainder.  */
2092 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2093   opStatus fs;
2094   unsigned int origSign = sign;
2095 
2096   // First handle the special cases.
2097   fs = remainderSpecials(rhs);
2098   if (fs != opDivByZero)
2099     return fs;
2100 
2101   fs = opOK;
2102 
2103   // Make sure the current value is less than twice the denom. If the addition
2104   // did not succeed (an overflow has happened), which means that the finite
2105   // value we currently posses must be less than twice the denom (as we are
2106   // using the same semantics).
2107   IEEEFloat P2 = rhs;
2108   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2109     fs = mod(P2);
2110     assert(fs == opOK);
2111   }
2112 
2113   // Lets work with absolute numbers.
2114   IEEEFloat P = rhs;
2115   P.sign = false;
2116   sign = false;
2117 
2118   //
2119   // To calculate the remainder we use the following scheme.
2120   //
2121   // The remainder is defained as follows:
2122   //
2123   // remainder = numer - rquot * denom = x - r * p
2124   //
2125   // Where r is the result of: x/p, rounded toward the nearest integral value
2126   // (with halfway cases rounded toward the even number).
2127   //
2128   // Currently, (after x mod 2p):
2129   // r is the number of 2p's present inside x, which is inherently, an even
2130   // number of p's.
2131   //
2132   // We may split the remaining calculation into 4 options:
2133   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2134   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2135   //   are done as well.
2136   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2137   //   to subtract 1p at least once.
2138   // - if x >= p then we must subtract p at least once, as x must be a
2139   //   remainder.
2140   //
2141   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2142   //
2143   // We can now split the remaining calculation to the following 3 options:
2144   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2145   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2146   //   must round up to the next even number. so we must subtract p once more.
2147   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2148   //   integral, and subtract p once more.
2149   //
2150 
2151   // Extend the semantics to prevent an overflow/underflow or inexact result.
2152   bool losesInfo;
2153   fltSemantics extendedSemantics = *semantics;
2154   extendedSemantics.maxExponent++;
2155   extendedSemantics.minExponent--;
2156   extendedSemantics.precision += 2;
2157 
2158   IEEEFloat VEx = *this;
2159   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2160   assert(fs == opOK && !losesInfo);
2161   IEEEFloat PEx = P;
2162   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2163   assert(fs == opOK && !losesInfo);
2164 
2165   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2166   // any fraction.
2167   fs = VEx.add(VEx, rmNearestTiesToEven);
2168   assert(fs == opOK);
2169 
2170   if (VEx.compare(PEx) == cmpGreaterThan) {
2171     fs = subtract(P, rmNearestTiesToEven);
2172     assert(fs == opOK);
2173 
2174     // Make VEx = this.add(this), but because we have different semantics, we do
2175     // not want to `convert` again, so we just subtract PEx twice (which equals
2176     // to the desired value).
2177     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2178     assert(fs == opOK);
2179     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2180     assert(fs == opOK);
2181 
2182     cmpResult result = VEx.compare(PEx);
2183     if (result == cmpGreaterThan || result == cmpEqual) {
2184       fs = subtract(P, rmNearestTiesToEven);
2185       assert(fs == opOK);
2186     }
2187   }
2188 
2189   if (isZero()) {
2190     sign = origSign;    // IEEE754 requires this
2191     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2192       // But some 8-bit floats only have positive 0.
2193       sign = false;
2194   }
2195 
2196   else
2197     sign ^= origSign;
2198   return fs;
2199 }
2200 
2201 /* Normalized llvm frem (C fmod). */
2202 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2203   opStatus fs;
2204   fs = modSpecials(rhs);
2205   unsigned int origSign = sign;
2206 
2207   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2208          compareAbsoluteValue(rhs) != cmpLessThan) {
2209     int Exp = ilogb(*this) - ilogb(rhs);
2210     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2211     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2212     // check for it.
2213     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2214       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2215     V.sign = sign;
2216 
2217     fs = subtract(V, rmNearestTiesToEven);
2218     assert(fs==opOK);
2219   }
2220   if (isZero()) {
2221     sign = origSign; // fmod requires this
2222     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223       sign = false;
2224   }
2225   return fs;
2226 }
2227 
2228 /* Normalized fused-multiply-add.  */
2229 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2230                                                 const IEEEFloat &addend,
2231                                                 roundingMode rounding_mode) {
2232   opStatus fs;
2233 
2234   /* Post-multiplication sign, before addition.  */
2235   sign ^= multiplicand.sign;
2236 
2237   /* If and only if all arguments are normal do we need to do an
2238      extended-precision calculation.  */
2239   if (isFiniteNonZero() &&
2240       multiplicand.isFiniteNonZero() &&
2241       addend.isFinite()) {
2242     lostFraction lost_fraction;
2243 
2244     lost_fraction = multiplySignificand(multiplicand, addend);
2245     fs = normalize(rounding_mode, lost_fraction);
2246     if (lost_fraction != lfExactlyZero)
2247       fs = (opStatus) (fs | opInexact);
2248 
2249     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2250        positive zero unless rounding to minus infinity, except that
2251        adding two like-signed zeroes gives that zero.  */
2252     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2253       sign = (rounding_mode == rmTowardNegative);
2254       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2255         sign = false;
2256     }
2257   } else {
2258     fs = multiplySpecials(multiplicand);
2259 
2260     /* FS can only be opOK or opInvalidOp.  There is no more work
2261        to do in the latter case.  The IEEE-754R standard says it is
2262        implementation-defined in this case whether, if ADDEND is a
2263        quiet NaN, we raise invalid op; this implementation does so.
2264 
2265        If we need to do the addition we can do so with normal
2266        precision.  */
2267     if (fs == opOK)
2268       fs = addOrSubtract(addend, rounding_mode, false);
2269   }
2270 
2271   return fs;
2272 }
2273 
2274 /* Rounding-mode correct round to integral value.  */
2275 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2276   opStatus fs;
2277 
2278   if (isInfinity())
2279     // [IEEE Std 754-2008 6.1]:
2280     // The behavior of infinity in floating-point arithmetic is derived from the
2281     // limiting cases of real arithmetic with operands of arbitrarily
2282     // large magnitude, when such a limit exists.
2283     // ...
2284     // Operations on infinite operands are usually exact and therefore signal no
2285     // exceptions ...
2286     return opOK;
2287 
2288   if (isNaN()) {
2289     if (isSignaling()) {
2290       // [IEEE Std 754-2008 6.2]:
2291       // Under default exception handling, any operation signaling an invalid
2292       // operation exception and for which a floating-point result is to be
2293       // delivered shall deliver a quiet NaN.
2294       makeQuiet();
2295       // [IEEE Std 754-2008 6.2]:
2296       // Signaling NaNs shall be reserved operands that, under default exception
2297       // handling, signal the invalid operation exception(see 7.2) for every
2298       // general-computational and signaling-computational operation except for
2299       // the conversions described in 5.12.
2300       return opInvalidOp;
2301     } else {
2302       // [IEEE Std 754-2008 6.2]:
2303       // For an operation with quiet NaN inputs, other than maximum and minimum
2304       // operations, if a floating-point result is to be delivered the result
2305       // shall be a quiet NaN which should be one of the input NaNs.
2306       // ...
2307       // Every general-computational and quiet-computational operation involving
2308       // one or more input NaNs, none of them signaling, shall signal no
2309       // exception, except fusedMultiplyAdd might signal the invalid operation
2310       // exception(see 7.2).
2311       return opOK;
2312     }
2313   }
2314 
2315   if (isZero()) {
2316     // [IEEE Std 754-2008 6.3]:
2317     // ... the sign of the result of conversions, the quantize operation, the
2318     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319     // the sign of the first or only operand.
2320     return opOK;
2321   }
2322 
2323   // If the exponent is large enough, we know that this value is already
2324   // integral, and the arithmetic below would potentially cause it to saturate
2325   // to +/-Inf.  Bail out early instead.
2326   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2327     return opOK;
2328 
2329   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330   // precision of our format, and then subtract it back off again.  The choice
2331   // of rounding modes for the addition/subtraction determines the rounding mode
2332   // for our integral rounding as well.
2333   // NOTE: When the input value is negative, we do subtraction followed by
2334   // addition instead.
2335   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2336   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2337   IEEEFloat MagicConstant(*semantics);
2338   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2339                                       rmNearestTiesToEven);
2340   assert(fs == opOK);
2341   MagicConstant.sign = sign;
2342 
2343   // Preserve the input sign so that we can handle the case of zero result
2344   // correctly.
2345   bool inputSign = isNegative();
2346 
2347   fs = add(MagicConstant, rounding_mode);
2348 
2349   // Current value and 'MagicConstant' are both integers, so the result of the
2350   // subtraction is always exact according to Sterbenz' lemma.
2351   subtract(MagicConstant, rounding_mode);
2352 
2353   // Restore the input sign.
2354   if (inputSign != isNegative())
2355     changeSign();
2356 
2357   return fs;
2358 }
2359 
2360 
2361 /* Comparison requires normalized numbers.  */
2362 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2363   cmpResult result;
2364 
2365   assert(semantics == rhs.semantics);
2366 
2367   switch (PackCategoriesIntoKey(category, rhs.category)) {
2368   default:
2369     llvm_unreachable(nullptr);
2370 
2371   case PackCategoriesIntoKey(fcNaN, fcZero):
2372   case PackCategoriesIntoKey(fcNaN, fcNormal):
2373   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2374   case PackCategoriesIntoKey(fcNaN, fcNaN):
2375   case PackCategoriesIntoKey(fcZero, fcNaN):
2376   case PackCategoriesIntoKey(fcNormal, fcNaN):
2377   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2378     return cmpUnordered;
2379 
2380   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2381   case PackCategoriesIntoKey(fcInfinity, fcZero):
2382   case PackCategoriesIntoKey(fcNormal, fcZero):
2383     if (sign)
2384       return cmpLessThan;
2385     else
2386       return cmpGreaterThan;
2387 
2388   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2389   case PackCategoriesIntoKey(fcZero, fcInfinity):
2390   case PackCategoriesIntoKey(fcZero, fcNormal):
2391     if (rhs.sign)
2392       return cmpGreaterThan;
2393     else
2394       return cmpLessThan;
2395 
2396   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2397     if (sign == rhs.sign)
2398       return cmpEqual;
2399     else if (sign)
2400       return cmpLessThan;
2401     else
2402       return cmpGreaterThan;
2403 
2404   case PackCategoriesIntoKey(fcZero, fcZero):
2405     return cmpEqual;
2406 
2407   case PackCategoriesIntoKey(fcNormal, fcNormal):
2408     break;
2409   }
2410 
2411   /* Two normal numbers.  Do they have the same sign?  */
2412   if (sign != rhs.sign) {
2413     if (sign)
2414       result = cmpLessThan;
2415     else
2416       result = cmpGreaterThan;
2417   } else {
2418     /* Compare absolute values; invert result if negative.  */
2419     result = compareAbsoluteValue(rhs);
2420 
2421     if (sign) {
2422       if (result == cmpLessThan)
2423         result = cmpGreaterThan;
2424       else if (result == cmpGreaterThan)
2425         result = cmpLessThan;
2426     }
2427   }
2428 
2429   return result;
2430 }
2431 
2432 /// IEEEFloat::convert - convert a value of one floating point type to another.
2433 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2434 /// records whether the transformation lost information, i.e. whether
2435 /// converting the result back to the original type will produce the
2436 /// original value (this is almost the same as return value==fsOK, but there
2437 /// are edge cases where this is not so).
2438 
2439 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2440                                        roundingMode rounding_mode,
2441                                        bool *losesInfo) {
2442   lostFraction lostFraction;
2443   unsigned int newPartCount, oldPartCount;
2444   opStatus fs;
2445   int shift;
2446   const fltSemantics &fromSemantics = *semantics;
2447   bool is_signaling = isSignaling();
2448 
2449   lostFraction = lfExactlyZero;
2450   newPartCount = partCountForBits(toSemantics.precision + 1);
2451   oldPartCount = partCount();
2452   shift = toSemantics.precision - fromSemantics.precision;
2453 
2454   bool X86SpecialNan = false;
2455   if (&fromSemantics == &semX87DoubleExtended &&
2456       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2457       (!(*significandParts() & 0x8000000000000000ULL) ||
2458        !(*significandParts() & 0x4000000000000000ULL))) {
2459     // x86 has some unusual NaNs which cannot be represented in any other
2460     // format; note them here.
2461     X86SpecialNan = true;
2462   }
2463 
2464   // If this is a truncation of a denormal number, and the target semantics
2465   // has larger exponent range than the source semantics (this can happen
2466   // when truncating from PowerPC double-double to double format), the
2467   // right shift could lose result mantissa bits.  Adjust exponent instead
2468   // of performing excessive shift.
2469   // Also do a similar trick in case shifting denormal would produce zero
2470   // significand as this case isn't handled correctly by normalize.
2471   if (shift < 0 && isFiniteNonZero()) {
2472     int omsb = significandMSB() + 1;
2473     int exponentChange = omsb - fromSemantics.precision;
2474     if (exponent + exponentChange < toSemantics.minExponent)
2475       exponentChange = toSemantics.minExponent - exponent;
2476     if (exponentChange < shift)
2477       exponentChange = shift;
2478     if (exponentChange < 0) {
2479       shift -= exponentChange;
2480       exponent += exponentChange;
2481     } else if (omsb <= -shift) {
2482       exponentChange = omsb + shift - 1; // leave at least one bit set
2483       shift -= exponentChange;
2484       exponent += exponentChange;
2485     }
2486   }
2487 
2488   // If this is a truncation, perform the shift before we narrow the storage.
2489   if (shift < 0 && (isFiniteNonZero() ||
2490                     (category == fcNaN && semantics->nonFiniteBehavior !=
2491                                               fltNonfiniteBehavior::NanOnly)))
2492     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2493 
2494   // Fix the storage so it can hold to new value.
2495   if (newPartCount > oldPartCount) {
2496     // The new type requires more storage; make it available.
2497     integerPart *newParts;
2498     newParts = new integerPart[newPartCount];
2499     APInt::tcSet(newParts, 0, newPartCount);
2500     if (isFiniteNonZero() || category==fcNaN)
2501       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2502     freeSignificand();
2503     significand.parts = newParts;
2504   } else if (newPartCount == 1 && oldPartCount != 1) {
2505     // Switch to built-in storage for a single part.
2506     integerPart newPart = 0;
2507     if (isFiniteNonZero() || category==fcNaN)
2508       newPart = significandParts()[0];
2509     freeSignificand();
2510     significand.part = newPart;
2511   }
2512 
2513   // Now that we have the right storage, switch the semantics.
2514   semantics = &toSemantics;
2515 
2516   // If this is an extension, perform the shift now that the storage is
2517   // available.
2518   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2519     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2520 
2521   if (isFiniteNonZero()) {
2522     fs = normalize(rounding_mode, lostFraction);
2523     *losesInfo = (fs != opOK);
2524   } else if (category == fcNaN) {
2525     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2526       *losesInfo =
2527           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2528       makeNaN(false, sign);
2529       return is_signaling ? opInvalidOp : opOK;
2530     }
2531 
2532     // If NaN is negative zero, we need to create a new NaN to avoid converting
2533     // NaN to -Inf.
2534     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2535         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2536       makeNaN(false, false);
2537 
2538     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2539 
2540     // For x87 extended precision, we want to make a NaN, not a special NaN if
2541     // the input wasn't special either.
2542     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2543       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2544 
2545     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2546     // This also guarantees that a sNaN does not become Inf on a truncation
2547     // that loses all payload bits.
2548     if (is_signaling) {
2549       makeQuiet();
2550       fs = opInvalidOp;
2551     } else {
2552       fs = opOK;
2553     }
2554   } else if (category == fcInfinity &&
2555              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2556     makeNaN(false, sign);
2557     *losesInfo = true;
2558     fs = opInexact;
2559   } else if (category == fcZero &&
2560              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2561     // Negative zero loses info, but positive zero doesn't.
2562     *losesInfo =
2563         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2564     fs = *losesInfo ? opInexact : opOK;
2565     // NaN is negative zero means -0 -> +0, which can lose information
2566     sign = false;
2567   } else {
2568     *losesInfo = false;
2569     fs = opOK;
2570   }
2571 
2572   return fs;
2573 }
2574 
2575 /* Convert a floating point number to an integer according to the
2576    rounding mode.  If the rounded integer value is out of range this
2577    returns an invalid operation exception and the contents of the
2578    destination parts are unspecified.  If the rounded value is in
2579    range but the floating point number is not the exact integer, the C
2580    standard doesn't require an inexact exception to be raised.  IEEE
2581    854 does require it so we do that.
2582 
2583    Note that for conversions to integer type the C standard requires
2584    round-to-zero to always be used.  */
2585 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2586     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2587     roundingMode rounding_mode, bool *isExact) const {
2588   lostFraction lost_fraction;
2589   const integerPart *src;
2590   unsigned int dstPartsCount, truncatedBits;
2591 
2592   *isExact = false;
2593 
2594   /* Handle the three special cases first.  */
2595   if (category == fcInfinity || category == fcNaN)
2596     return opInvalidOp;
2597 
2598   dstPartsCount = partCountForBits(width);
2599   assert(dstPartsCount <= parts.size() && "Integer too big");
2600 
2601   if (category == fcZero) {
2602     APInt::tcSet(parts.data(), 0, dstPartsCount);
2603     // Negative zero can't be represented as an int.
2604     *isExact = !sign;
2605     return opOK;
2606   }
2607 
2608   src = significandParts();
2609 
2610   /* Step 1: place our absolute value, with any fraction truncated, in
2611      the destination.  */
2612   if (exponent < 0) {
2613     /* Our absolute value is less than one; truncate everything.  */
2614     APInt::tcSet(parts.data(), 0, dstPartsCount);
2615     /* For exponent -1 the integer bit represents .5, look at that.
2616        For smaller exponents leftmost truncated bit is 0. */
2617     truncatedBits = semantics->precision -1U - exponent;
2618   } else {
2619     /* We want the most significant (exponent + 1) bits; the rest are
2620        truncated.  */
2621     unsigned int bits = exponent + 1U;
2622 
2623     /* Hopelessly large in magnitude?  */
2624     if (bits > width)
2625       return opInvalidOp;
2626 
2627     if (bits < semantics->precision) {
2628       /* We truncate (semantics->precision - bits) bits.  */
2629       truncatedBits = semantics->precision - bits;
2630       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2631     } else {
2632       /* We want at least as many bits as are available.  */
2633       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2634                        0);
2635       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2636                          bits - semantics->precision);
2637       truncatedBits = 0;
2638     }
2639   }
2640 
2641   /* Step 2: work out any lost fraction, and increment the absolute
2642      value if we would round away from zero.  */
2643   if (truncatedBits) {
2644     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2645                                                   truncatedBits);
2646     if (lost_fraction != lfExactlyZero &&
2647         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2648       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2649         return opInvalidOp;     /* Overflow.  */
2650     }
2651   } else {
2652     lost_fraction = lfExactlyZero;
2653   }
2654 
2655   /* Step 3: check if we fit in the destination.  */
2656   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2657 
2658   if (sign) {
2659     if (!isSigned) {
2660       /* Negative numbers cannot be represented as unsigned.  */
2661       if (omsb != 0)
2662         return opInvalidOp;
2663     } else {
2664       /* It takes omsb bits to represent the unsigned integer value.
2665          We lose a bit for the sign, but care is needed as the
2666          maximally negative integer is a special case.  */
2667       if (omsb == width &&
2668           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2669         return opInvalidOp;
2670 
2671       /* This case can happen because of rounding.  */
2672       if (omsb > width)
2673         return opInvalidOp;
2674     }
2675 
2676     APInt::tcNegate (parts.data(), dstPartsCount);
2677   } else {
2678     if (omsb >= width + !isSigned)
2679       return opInvalidOp;
2680   }
2681 
2682   if (lost_fraction == lfExactlyZero) {
2683     *isExact = true;
2684     return opOK;
2685   } else
2686     return opInexact;
2687 }
2688 
2689 /* Same as convertToSignExtendedInteger, except we provide
2690    deterministic values in case of an invalid operation exception,
2691    namely zero for NaNs and the minimal or maximal value respectively
2692    for underflow or overflow.
2693    The *isExact output tells whether the result is exact, in the sense
2694    that converting it back to the original floating point type produces
2695    the original value.  This is almost equivalent to result==opOK,
2696    except for negative zeroes.
2697 */
2698 IEEEFloat::opStatus
2699 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2700                             unsigned int width, bool isSigned,
2701                             roundingMode rounding_mode, bool *isExact) const {
2702   opStatus fs;
2703 
2704   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2705                                     isExact);
2706 
2707   if (fs == opInvalidOp) {
2708     unsigned int bits, dstPartsCount;
2709 
2710     dstPartsCount = partCountForBits(width);
2711     assert(dstPartsCount <= parts.size() && "Integer too big");
2712 
2713     if (category == fcNaN)
2714       bits = 0;
2715     else if (sign)
2716       bits = isSigned;
2717     else
2718       bits = width - isSigned;
2719 
2720     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2721     if (sign && isSigned)
2722       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2723   }
2724 
2725   return fs;
2726 }
2727 
2728 /* Convert an unsigned integer SRC to a floating point number,
2729    rounding according to ROUNDING_MODE.  The sign of the floating
2730    point number is not modified.  */
2731 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2732     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2733   unsigned int omsb, precision, dstCount;
2734   integerPart *dst;
2735   lostFraction lost_fraction;
2736 
2737   category = fcNormal;
2738   omsb = APInt::tcMSB(src, srcCount) + 1;
2739   dst = significandParts();
2740   dstCount = partCount();
2741   precision = semantics->precision;
2742 
2743   /* We want the most significant PRECISION bits of SRC.  There may not
2744      be that many; extract what we can.  */
2745   if (precision <= omsb) {
2746     exponent = omsb - 1;
2747     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2748                                                   omsb - precision);
2749     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2750   } else {
2751     exponent = precision - 1;
2752     lost_fraction = lfExactlyZero;
2753     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2754   }
2755 
2756   return normalize(rounding_mode, lost_fraction);
2757 }
2758 
2759 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2760                                                 roundingMode rounding_mode) {
2761   unsigned int partCount = Val.getNumWords();
2762   APInt api = Val;
2763 
2764   sign = false;
2765   if (isSigned && api.isNegative()) {
2766     sign = true;
2767     api = -api;
2768   }
2769 
2770   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2771 }
2772 
2773 /* Convert a two's complement integer SRC to a floating point number,
2774    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2775    integer is signed, in which case it must be sign-extended.  */
2776 IEEEFloat::opStatus
2777 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2778                                           unsigned int srcCount, bool isSigned,
2779                                           roundingMode rounding_mode) {
2780   opStatus status;
2781 
2782   if (isSigned &&
2783       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2784     integerPart *copy;
2785 
2786     /* If we're signed and negative negate a copy.  */
2787     sign = true;
2788     copy = new integerPart[srcCount];
2789     APInt::tcAssign(copy, src, srcCount);
2790     APInt::tcNegate(copy, srcCount);
2791     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2792     delete [] copy;
2793   } else {
2794     sign = false;
2795     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2796   }
2797 
2798   return status;
2799 }
2800 
2801 /* FIXME: should this just take a const APInt reference?  */
2802 IEEEFloat::opStatus
2803 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2804                                           unsigned int width, bool isSigned,
2805                                           roundingMode rounding_mode) {
2806   unsigned int partCount = partCountForBits(width);
2807   APInt api = APInt(width, ArrayRef(parts, partCount));
2808 
2809   sign = false;
2810   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2811     sign = true;
2812     api = -api;
2813   }
2814 
2815   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2816 }
2817 
2818 Expected<IEEEFloat::opStatus>
2819 IEEEFloat::convertFromHexadecimalString(StringRef s,
2820                                         roundingMode rounding_mode) {
2821   lostFraction lost_fraction = lfExactlyZero;
2822 
2823   category = fcNormal;
2824   zeroSignificand();
2825   exponent = 0;
2826 
2827   integerPart *significand = significandParts();
2828   unsigned partsCount = partCount();
2829   unsigned bitPos = partsCount * integerPartWidth;
2830   bool computedTrailingFraction = false;
2831 
2832   // Skip leading zeroes and any (hexa)decimal point.
2833   StringRef::iterator begin = s.begin();
2834   StringRef::iterator end = s.end();
2835   StringRef::iterator dot;
2836   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2837   if (!PtrOrErr)
2838     return PtrOrErr.takeError();
2839   StringRef::iterator p = *PtrOrErr;
2840   StringRef::iterator firstSignificantDigit = p;
2841 
2842   while (p != end) {
2843     integerPart hex_value;
2844 
2845     if (*p == '.') {
2846       if (dot != end)
2847         return createError("String contains multiple dots");
2848       dot = p++;
2849       continue;
2850     }
2851 
2852     hex_value = hexDigitValue(*p);
2853     if (hex_value == UINT_MAX)
2854       break;
2855 
2856     p++;
2857 
2858     // Store the number while we have space.
2859     if (bitPos) {
2860       bitPos -= 4;
2861       hex_value <<= bitPos % integerPartWidth;
2862       significand[bitPos / integerPartWidth] |= hex_value;
2863     } else if (!computedTrailingFraction) {
2864       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2865       if (!FractOrErr)
2866         return FractOrErr.takeError();
2867       lost_fraction = *FractOrErr;
2868       computedTrailingFraction = true;
2869     }
2870   }
2871 
2872   /* Hex floats require an exponent but not a hexadecimal point.  */
2873   if (p == end)
2874     return createError("Hex strings require an exponent");
2875   if (*p != 'p' && *p != 'P')
2876     return createError("Invalid character in significand");
2877   if (p == begin)
2878     return createError("Significand has no digits");
2879   if (dot != end && p - begin == 1)
2880     return createError("Significand has no digits");
2881 
2882   /* Ignore the exponent if we are zero.  */
2883   if (p != firstSignificantDigit) {
2884     int expAdjustment;
2885 
2886     /* Implicit hexadecimal point?  */
2887     if (dot == end)
2888       dot = p;
2889 
2890     /* Calculate the exponent adjustment implicit in the number of
2891        significant digits.  */
2892     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2893     if (expAdjustment < 0)
2894       expAdjustment++;
2895     expAdjustment = expAdjustment * 4 - 1;
2896 
2897     /* Adjust for writing the significand starting at the most
2898        significant nibble.  */
2899     expAdjustment += semantics->precision;
2900     expAdjustment -= partsCount * integerPartWidth;
2901 
2902     /* Adjust for the given exponent.  */
2903     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2904     if (!ExpOrErr)
2905       return ExpOrErr.takeError();
2906     exponent = *ExpOrErr;
2907   }
2908 
2909   return normalize(rounding_mode, lost_fraction);
2910 }
2911 
2912 IEEEFloat::opStatus
2913 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2914                                         unsigned sigPartCount, int exp,
2915                                         roundingMode rounding_mode) {
2916   unsigned int parts, pow5PartCount;
2917   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2918   integerPart pow5Parts[maxPowerOfFiveParts];
2919   bool isNearest;
2920 
2921   isNearest = (rounding_mode == rmNearestTiesToEven ||
2922                rounding_mode == rmNearestTiesToAway);
2923 
2924   parts = partCountForBits(semantics->precision + 11);
2925 
2926   /* Calculate pow(5, abs(exp)).  */
2927   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2928 
2929   for (;; parts *= 2) {
2930     opStatus sigStatus, powStatus;
2931     unsigned int excessPrecision, truncatedBits;
2932 
2933     calcSemantics.precision = parts * integerPartWidth - 1;
2934     excessPrecision = calcSemantics.precision - semantics->precision;
2935     truncatedBits = excessPrecision;
2936 
2937     IEEEFloat decSig(calcSemantics, uninitialized);
2938     decSig.makeZero(sign);
2939     IEEEFloat pow5(calcSemantics);
2940 
2941     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2942                                                 rmNearestTiesToEven);
2943     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2944                                               rmNearestTiesToEven);
2945     /* Add exp, as 10^n = 5^n * 2^n.  */
2946     decSig.exponent += exp;
2947 
2948     lostFraction calcLostFraction;
2949     integerPart HUerr, HUdistance;
2950     unsigned int powHUerr;
2951 
2952     if (exp >= 0) {
2953       /* multiplySignificand leaves the precision-th bit set to 1.  */
2954       calcLostFraction = decSig.multiplySignificand(pow5);
2955       powHUerr = powStatus != opOK;
2956     } else {
2957       calcLostFraction = decSig.divideSignificand(pow5);
2958       /* Denormal numbers have less precision.  */
2959       if (decSig.exponent < semantics->minExponent) {
2960         excessPrecision += (semantics->minExponent - decSig.exponent);
2961         truncatedBits = excessPrecision;
2962         if (excessPrecision > calcSemantics.precision)
2963           excessPrecision = calcSemantics.precision;
2964       }
2965       /* Extra half-ulp lost in reciprocal of exponent.  */
2966       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2967     }
2968 
2969     /* Both multiplySignificand and divideSignificand return the
2970        result with the integer bit set.  */
2971     assert(APInt::tcExtractBit
2972            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2973 
2974     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2975                        powHUerr);
2976     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2977                                       excessPrecision, isNearest);
2978 
2979     /* Are we guaranteed to round correctly if we truncate?  */
2980     if (HUdistance >= HUerr) {
2981       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2982                        calcSemantics.precision - excessPrecision,
2983                        excessPrecision);
2984       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2985          above we must adjust our exponent to compensate for the
2986          implicit right shift.  */
2987       exponent = (decSig.exponent + semantics->precision
2988                   - (calcSemantics.precision - excessPrecision));
2989       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2990                                                        decSig.partCount(),
2991                                                        truncatedBits);
2992       return normalize(rounding_mode, calcLostFraction);
2993     }
2994   }
2995 }
2996 
2997 Expected<IEEEFloat::opStatus>
2998 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2999   decimalInfo D;
3000   opStatus fs;
3001 
3002   /* Scan the text.  */
3003   StringRef::iterator p = str.begin();
3004   if (Error Err = interpretDecimal(p, str.end(), &D))
3005     return std::move(Err);
3006 
3007   /* Handle the quick cases.  First the case of no significant digits,
3008      i.e. zero, and then exponents that are obviously too large or too
3009      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010      definitely overflows if
3011 
3012            (exp - 1) * L >= maxExponent
3013 
3014      and definitely underflows to zero where
3015 
3016            (exp + 1) * L <= minExponent - precision
3017 
3018      With integer arithmetic the tightest bounds for L are
3019 
3020            93/28 < L < 196/59            [ numerator <= 256 ]
3021            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3022   */
3023 
3024   // Test if we have a zero number allowing for strings with no null terminators
3025   // and zero decimals with non-zero exponents.
3026   //
3027   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029   // be at most one dot. On the other hand, if we have a zero with a non-zero
3030   // exponent, then we know that D.firstSigDigit will be non-numeric.
3031   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3032     category = fcZero;
3033     fs = opOK;
3034     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3035       sign = false;
3036 
3037     /* Check whether the normalized exponent is high enough to overflow
3038        max during the log-rebasing in the max-exponent check below. */
3039   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3040     fs = handleOverflow(rounding_mode);
3041 
3042   /* If it wasn't, then it also wasn't high enough to overflow max
3043      during the log-rebasing in the min-exponent check.  Check that it
3044      won't overflow min in either check, then perform the min-exponent
3045      check. */
3046   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3047              (D.normalizedExponent + 1) * 28738 <=
3048                8651 * (semantics->minExponent - (int) semantics->precision)) {
3049     /* Underflow to zero and round.  */
3050     category = fcNormal;
3051     zeroSignificand();
3052     fs = normalize(rounding_mode, lfLessThanHalf);
3053 
3054   /* We can finally safely perform the max-exponent check. */
3055   } else if ((D.normalizedExponent - 1) * 42039
3056              >= 12655 * semantics->maxExponent) {
3057     /* Overflow and round.  */
3058     fs = handleOverflow(rounding_mode);
3059   } else {
3060     integerPart *decSignificand;
3061     unsigned int partCount;
3062 
3063     /* A tight upper bound on number of bits required to hold an
3064        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3065        to hold the full significand, and an extra part required by
3066        tcMultiplyPart.  */
3067     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3068     partCount = partCountForBits(1 + 196 * partCount / 59);
3069     decSignificand = new integerPart[partCount + 1];
3070     partCount = 0;
3071 
3072     /* Convert to binary efficiently - we do almost all multiplication
3073        in an integerPart.  When this would overflow do we do a single
3074        bignum multiplication, and then revert again to multiplication
3075        in an integerPart.  */
3076     do {
3077       integerPart decValue, val, multiplier;
3078 
3079       val = 0;
3080       multiplier = 1;
3081 
3082       do {
3083         if (*p == '.') {
3084           p++;
3085           if (p == str.end()) {
3086             break;
3087           }
3088         }
3089         decValue = decDigitValue(*p++);
3090         if (decValue >= 10U) {
3091           delete[] decSignificand;
3092           return createError("Invalid character in significand");
3093         }
3094         multiplier *= 10;
3095         val = val * 10 + decValue;
3096         /* The maximum number that can be multiplied by ten with any
3097            digit added without overflowing an integerPart.  */
3098       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3099 
3100       /* Multiply out the current part.  */
3101       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3102                             partCount, partCount + 1, false);
3103 
3104       /* If we used another part (likely but not guaranteed), increase
3105          the count.  */
3106       if (decSignificand[partCount])
3107         partCount++;
3108     } while (p <= D.lastSigDigit);
3109 
3110     category = fcNormal;
3111     fs = roundSignificandWithExponent(decSignificand, partCount,
3112                                       D.exponent, rounding_mode);
3113 
3114     delete [] decSignificand;
3115   }
3116 
3117   return fs;
3118 }
3119 
3120 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3121   const size_t MIN_NAME_SIZE = 3;
3122 
3123   if (str.size() < MIN_NAME_SIZE)
3124     return false;
3125 
3126   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
3127     makeInf(false);
3128     return true;
3129   }
3130 
3131   bool IsNegative = str.front() == '-';
3132   if (IsNegative) {
3133     str = str.drop_front();
3134     if (str.size() < MIN_NAME_SIZE)
3135       return false;
3136 
3137     if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
3138       makeInf(true);
3139       return true;
3140     }
3141   }
3142 
3143   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3144   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3145   if (IsSignaling) {
3146     str = str.drop_front();
3147     if (str.size() < MIN_NAME_SIZE)
3148       return false;
3149   }
3150 
3151   if (str.starts_with("nan") || str.starts_with("NaN")) {
3152     str = str.drop_front(3);
3153 
3154     // A NaN without payload.
3155     if (str.empty()) {
3156       makeNaN(IsSignaling, IsNegative);
3157       return true;
3158     }
3159 
3160     // Allow the payload to be inside parentheses.
3161     if (str.front() == '(') {
3162       // Parentheses should be balanced (and not empty).
3163       if (str.size() <= 2 || str.back() != ')')
3164         return false;
3165 
3166       str = str.slice(1, str.size() - 1);
3167     }
3168 
3169     // Determine the payload number's radix.
3170     unsigned Radix = 10;
3171     if (str[0] == '0') {
3172       if (str.size() > 1 && tolower(str[1]) == 'x') {
3173         str = str.drop_front(2);
3174         Radix = 16;
3175       } else
3176         Radix = 8;
3177     }
3178 
3179     // Parse the payload and make the NaN.
3180     APInt Payload;
3181     if (!str.getAsInteger(Radix, Payload)) {
3182       makeNaN(IsSignaling, IsNegative, &Payload);
3183       return true;
3184     }
3185   }
3186 
3187   return false;
3188 }
3189 
3190 Expected<IEEEFloat::opStatus>
3191 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3192   if (str.empty())
3193     return createError("Invalid string length");
3194 
3195   // Handle special cases.
3196   if (convertFromStringSpecials(str))
3197     return opOK;
3198 
3199   /* Handle a leading minus sign.  */
3200   StringRef::iterator p = str.begin();
3201   size_t slen = str.size();
3202   sign = *p == '-' ? 1 : 0;
3203   if (*p == '-' || *p == '+') {
3204     p++;
3205     slen--;
3206     if (!slen)
3207       return createError("String has no digits");
3208   }
3209 
3210   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3211     if (slen == 2)
3212       return createError("Invalid string");
3213     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3214                                         rounding_mode);
3215   }
3216 
3217   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3218 }
3219 
3220 /* Write out a hexadecimal representation of the floating point value
3221    to DST, which must be of sufficient size, in the C99 form
3222    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3223    excluding the terminating NUL.
3224 
3225    If UPPERCASE, the output is in upper case, otherwise in lower case.
3226 
3227    HEXDIGITS digits appear altogether, rounding the value if
3228    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3229    number precisely is used instead.  If nothing would appear after
3230    the decimal point it is suppressed.
3231 
3232    The decimal exponent is always printed and has at least one digit.
3233    Zero values display an exponent of zero.  Infinities and NaNs
3234    appear as "infinity" or "nan" respectively.
3235 
3236    The above rules are as specified by C99.  There is ambiguity about
3237    what the leading hexadecimal digit should be.  This implementation
3238    uses whatever is necessary so that the exponent is displayed as
3239    stored.  This implies the exponent will fall within the IEEE format
3240    range, and the leading hexadecimal digit will be 0 (for denormals),
3241    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3242    any other digits zero).
3243 */
3244 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3245                                            bool upperCase,
3246                                            roundingMode rounding_mode) const {
3247   char *p;
3248 
3249   p = dst;
3250   if (sign)
3251     *dst++ = '-';
3252 
3253   switch (category) {
3254   case fcInfinity:
3255     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3256     dst += sizeof infinityL - 1;
3257     break;
3258 
3259   case fcNaN:
3260     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3261     dst += sizeof NaNU - 1;
3262     break;
3263 
3264   case fcZero:
3265     *dst++ = '0';
3266     *dst++ = upperCase ? 'X': 'x';
3267     *dst++ = '0';
3268     if (hexDigits > 1) {
3269       *dst++ = '.';
3270       memset (dst, '0', hexDigits - 1);
3271       dst += hexDigits - 1;
3272     }
3273     *dst++ = upperCase ? 'P': 'p';
3274     *dst++ = '0';
3275     break;
3276 
3277   case fcNormal:
3278     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3279     break;
3280   }
3281 
3282   *dst = 0;
3283 
3284   return static_cast<unsigned int>(dst - p);
3285 }
3286 
3287 /* Does the hard work of outputting the correctly rounded hexadecimal
3288    form of a normal floating point number with the specified number of
3289    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3290    digits necessary to print the value precisely is output.  */
3291 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3292                                           bool upperCase,
3293                                           roundingMode rounding_mode) const {
3294   unsigned int count, valueBits, shift, partsCount, outputDigits;
3295   const char *hexDigitChars;
3296   const integerPart *significand;
3297   char *p;
3298   bool roundUp;
3299 
3300   *dst++ = '0';
3301   *dst++ = upperCase ? 'X': 'x';
3302 
3303   roundUp = false;
3304   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3305 
3306   significand = significandParts();
3307   partsCount = partCount();
3308 
3309   /* +3 because the first digit only uses the single integer bit, so
3310      we have 3 virtual zero most-significant-bits.  */
3311   valueBits = semantics->precision + 3;
3312   shift = integerPartWidth - valueBits % integerPartWidth;
3313 
3314   /* The natural number of digits required ignoring trailing
3315      insignificant zeroes.  */
3316   outputDigits = (valueBits - significandLSB () + 3) / 4;
3317 
3318   /* hexDigits of zero means use the required number for the
3319      precision.  Otherwise, see if we are truncating.  If we are,
3320      find out if we need to round away from zero.  */
3321   if (hexDigits) {
3322     if (hexDigits < outputDigits) {
3323       /* We are dropping non-zero bits, so need to check how to round.
3324          "bits" is the number of dropped bits.  */
3325       unsigned int bits;
3326       lostFraction fraction;
3327 
3328       bits = valueBits - hexDigits * 4;
3329       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3330       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3331     }
3332     outputDigits = hexDigits;
3333   }
3334 
3335   /* Write the digits consecutively, and start writing in the location
3336      of the hexadecimal point.  We move the most significant digit
3337      left and add the hexadecimal point later.  */
3338   p = ++dst;
3339 
3340   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3341 
3342   while (outputDigits && count) {
3343     integerPart part;
3344 
3345     /* Put the most significant integerPartWidth bits in "part".  */
3346     if (--count == partsCount)
3347       part = 0;  /* An imaginary higher zero part.  */
3348     else
3349       part = significand[count] << shift;
3350 
3351     if (count && shift)
3352       part |= significand[count - 1] >> (integerPartWidth - shift);
3353 
3354     /* Convert as much of "part" to hexdigits as we can.  */
3355     unsigned int curDigits = integerPartWidth / 4;
3356 
3357     if (curDigits > outputDigits)
3358       curDigits = outputDigits;
3359     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3360     outputDigits -= curDigits;
3361   }
3362 
3363   if (roundUp) {
3364     char *q = dst;
3365 
3366     /* Note that hexDigitChars has a trailing '0'.  */
3367     do {
3368       q--;
3369       *q = hexDigitChars[hexDigitValue (*q) + 1];
3370     } while (*q == '0');
3371     assert(q >= p);
3372   } else {
3373     /* Add trailing zeroes.  */
3374     memset (dst, '0', outputDigits);
3375     dst += outputDigits;
3376   }
3377 
3378   /* Move the most significant digit to before the point, and if there
3379      is something after the decimal point add it.  This must come
3380      after rounding above.  */
3381   p[-1] = p[0];
3382   if (dst -1 == p)
3383     dst--;
3384   else
3385     p[0] = '.';
3386 
3387   /* Finally output the exponent.  */
3388   *dst++ = upperCase ? 'P': 'p';
3389 
3390   return writeSignedDecimal (dst, exponent);
3391 }
3392 
3393 hash_code hash_value(const IEEEFloat &Arg) {
3394   if (!Arg.isFiniteNonZero())
3395     return hash_combine((uint8_t)Arg.category,
3396                         // NaN has no sign, fix it at zero.
3397                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3398                         Arg.semantics->precision);
3399 
3400   // Normal floats need their exponent and significand hashed.
3401   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3402                       Arg.semantics->precision, Arg.exponent,
3403                       hash_combine_range(
3404                         Arg.significandParts(),
3405                         Arg.significandParts() + Arg.partCount()));
3406 }
3407 
3408 // Conversion from APFloat to/from host float/double.  It may eventually be
3409 // possible to eliminate these and have everybody deal with APFloats, but that
3410 // will take a while.  This approach will not easily extend to long double.
3411 // Current implementation requires integerPartWidth==64, which is correct at
3412 // the moment but could be made more general.
3413 
3414 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3415 // the actual IEEE respresentations.  We compensate for that here.
3416 
3417 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3418   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3419   assert(partCount()==2);
3420 
3421   uint64_t myexponent, mysignificand;
3422 
3423   if (isFiniteNonZero()) {
3424     myexponent = exponent+16383; //bias
3425     mysignificand = significandParts()[0];
3426     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3427       myexponent = 0;   // denormal
3428   } else if (category==fcZero) {
3429     myexponent = 0;
3430     mysignificand = 0;
3431   } else if (category==fcInfinity) {
3432     myexponent = 0x7fff;
3433     mysignificand = 0x8000000000000000ULL;
3434   } else {
3435     assert(category == fcNaN && "Unknown category");
3436     myexponent = 0x7fff;
3437     mysignificand = significandParts()[0];
3438   }
3439 
3440   uint64_t words[2];
3441   words[0] = mysignificand;
3442   words[1] =  ((uint64_t)(sign & 1) << 15) |
3443               (myexponent & 0x7fffLL);
3444   return APInt(80, words);
3445 }
3446 
3447 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3448   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3449   assert(partCount()==2);
3450 
3451   uint64_t words[2];
3452   opStatus fs;
3453   bool losesInfo;
3454 
3455   // Convert number to double.  To avoid spurious underflows, we re-
3456   // normalize against the "double" minExponent first, and only *then*
3457   // truncate the mantissa.  The result of that second conversion
3458   // may be inexact, but should never underflow.
3459   // Declare fltSemantics before APFloat that uses it (and
3460   // saves pointer to it) to ensure correct destruction order.
3461   fltSemantics extendedSemantics = *semantics;
3462   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3463   IEEEFloat extended(*this);
3464   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3465   assert(fs == opOK && !losesInfo);
3466   (void)fs;
3467 
3468   IEEEFloat u(extended);
3469   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3470   assert(fs == opOK || fs == opInexact);
3471   (void)fs;
3472   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3473 
3474   // If conversion was exact or resulted in a special case, we're done;
3475   // just set the second double to zero.  Otherwise, re-convert back to
3476   // the extended format and compute the difference.  This now should
3477   // convert exactly to double.
3478   if (u.isFiniteNonZero() && losesInfo) {
3479     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3480     assert(fs == opOK && !losesInfo);
3481     (void)fs;
3482 
3483     IEEEFloat v(extended);
3484     v.subtract(u, rmNearestTiesToEven);
3485     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3486     assert(fs == opOK && !losesInfo);
3487     (void)fs;
3488     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3489   } else {
3490     words[1] = 0;
3491   }
3492 
3493   return APInt(128, words);
3494 }
3495 
3496 template <const fltSemantics &S>
3497 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3498   assert(semantics == &S);
3499 
3500   constexpr int bias = -(S.minExponent - 1);
3501   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3502   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3503   constexpr integerPart integer_bit =
3504       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3505   constexpr uint64_t significand_mask = integer_bit - 1;
3506   constexpr unsigned int exponent_bits =
3507       S.sizeInBits - 1 - trailing_significand_bits;
3508   static_assert(exponent_bits < 64);
3509   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3510 
3511   uint64_t myexponent;
3512   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3513       mysignificand;
3514 
3515   if (isFiniteNonZero()) {
3516     myexponent = exponent + bias;
3517     std::copy_n(significandParts(), mysignificand.size(),
3518                 mysignificand.begin());
3519     if (myexponent == 1 &&
3520         !(significandParts()[integer_bit_part] & integer_bit))
3521       myexponent = 0; // denormal
3522   } else if (category == fcZero) {
3523     myexponent = ::exponentZero(S) + bias;
3524     mysignificand.fill(0);
3525   } else if (category == fcInfinity) {
3526     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3527       llvm_unreachable("semantics don't support inf!");
3528     }
3529     myexponent = ::exponentInf(S) + bias;
3530     mysignificand.fill(0);
3531   } else {
3532     assert(category == fcNaN && "Unknown category!");
3533     myexponent = ::exponentNaN(S) + bias;
3534     std::copy_n(significandParts(), mysignificand.size(),
3535                 mysignificand.begin());
3536   }
3537   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3538   auto words_iter =
3539       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3540   if constexpr (significand_mask != 0) {
3541     // Clear the integer bit.
3542     words[mysignificand.size() - 1] &= significand_mask;
3543   }
3544   std::fill(words_iter, words.end(), uint64_t{0});
3545   constexpr size_t last_word = words.size() - 1;
3546   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3547                           << ((S.sizeInBits - 1) % 64);
3548   words[last_word] |= shifted_sign;
3549   uint64_t shifted_exponent = (myexponent & exponent_mask)
3550                               << (trailing_significand_bits % 64);
3551   words[last_word] |= shifted_exponent;
3552   if constexpr (last_word == 0) {
3553     return APInt(S.sizeInBits, words[0]);
3554   }
3555   return APInt(S.sizeInBits, words);
3556 }
3557 
3558 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3559   assert(partCount() == 2);
3560   return convertIEEEFloatToAPInt<semIEEEquad>();
3561 }
3562 
3563 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3564   assert(partCount()==1);
3565   return convertIEEEFloatToAPInt<semIEEEdouble>();
3566 }
3567 
3568 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3569   assert(partCount()==1);
3570   return convertIEEEFloatToAPInt<semIEEEsingle>();
3571 }
3572 
3573 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3574   assert(partCount() == 1);
3575   return convertIEEEFloatToAPInt<semBFloat>();
3576 }
3577 
3578 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3579   assert(partCount()==1);
3580   return convertIEEEFloatToAPInt<semIEEEhalf>();
3581 }
3582 
3583 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3584   assert(partCount() == 1);
3585   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3586 }
3587 
3588 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3589   assert(partCount() == 1);
3590   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3591 }
3592 
3593 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3594   assert(partCount() == 1);
3595   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3596 }
3597 
3598 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3599   assert(partCount() == 1);
3600   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3601 }
3602 
3603 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3604   assert(partCount() == 1);
3605   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3606 }
3607 
3608 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3609   assert(partCount() == 1);
3610   return convertIEEEFloatToAPInt<semFloatTF32>();
3611 }
3612 
3613 // This function creates an APInt that is just a bit map of the floating
3614 // point constant as it would appear in memory.  It is not a conversion,
3615 // and treating the result as a normal integer is unlikely to be useful.
3616 
3617 APInt IEEEFloat::bitcastToAPInt() const {
3618   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3619     return convertHalfAPFloatToAPInt();
3620 
3621   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3622     return convertBFloatAPFloatToAPInt();
3623 
3624   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3625     return convertFloatAPFloatToAPInt();
3626 
3627   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3628     return convertDoubleAPFloatToAPInt();
3629 
3630   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3631     return convertQuadrupleAPFloatToAPInt();
3632 
3633   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3634     return convertPPCDoubleDoubleAPFloatToAPInt();
3635 
3636   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3637     return convertFloat8E5M2APFloatToAPInt();
3638 
3639   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3640     return convertFloat8E5M2FNUZAPFloatToAPInt();
3641 
3642   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3643     return convertFloat8E4M3FNAPFloatToAPInt();
3644 
3645   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3646     return convertFloat8E4M3FNUZAPFloatToAPInt();
3647 
3648   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3649     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3650 
3651   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3652     return convertFloatTF32APFloatToAPInt();
3653 
3654   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3655          "unknown format!");
3656   return convertF80LongDoubleAPFloatToAPInt();
3657 }
3658 
3659 float IEEEFloat::convertToFloat() const {
3660   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3661          "Float semantics are not IEEEsingle");
3662   APInt api = bitcastToAPInt();
3663   return api.bitsToFloat();
3664 }
3665 
3666 double IEEEFloat::convertToDouble() const {
3667   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3668          "Float semantics are not IEEEdouble");
3669   APInt api = bitcastToAPInt();
3670   return api.bitsToDouble();
3671 }
3672 
3673 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3674 /// does not support these bit patterns:
3675 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3679 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3680 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681   uint64_t i1 = api.getRawData()[0];
3682   uint64_t i2 = api.getRawData()[1];
3683   uint64_t myexponent = (i2 & 0x7fff);
3684   uint64_t mysignificand = i1;
3685   uint8_t myintegerbit = mysignificand >> 63;
3686 
3687   initialize(&semX87DoubleExtended);
3688   assert(partCount()==2);
3689 
3690   sign = static_cast<unsigned int>(i2>>15);
3691   if (myexponent == 0 && mysignificand == 0) {
3692     makeZero(sign);
3693   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694     makeInf(sign);
3695   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697     category = fcNaN;
3698     exponent = exponentNaN();
3699     significandParts()[0] = mysignificand;
3700     significandParts()[1] = 0;
3701   } else {
3702     category = fcNormal;
3703     exponent = myexponent - 16383;
3704     significandParts()[0] = mysignificand;
3705     significandParts()[1] = 0;
3706     if (myexponent==0)          // denormal
3707       exponent = -16382;
3708   }
3709 }
3710 
3711 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3712   uint64_t i1 = api.getRawData()[0];
3713   uint64_t i2 = api.getRawData()[1];
3714   opStatus fs;
3715   bool losesInfo;
3716 
3717   // Get the first double and convert to our format.
3718   initFromDoubleAPInt(APInt(64, i1));
3719   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3720   assert(fs == opOK && !losesInfo);
3721   (void)fs;
3722 
3723   // Unless we have a special case, add in second double.
3724   if (isFiniteNonZero()) {
3725     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3726     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3727     assert(fs == opOK && !losesInfo);
3728     (void)fs;
3729 
3730     add(v, rmNearestTiesToEven);
3731   }
3732 }
3733 
3734 template <const fltSemantics &S>
3735 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3736   assert(api.getBitWidth() == S.sizeInBits);
3737   constexpr integerPart integer_bit = integerPart{1}
3738                                       << ((S.precision - 1) % integerPartWidth);
3739   constexpr uint64_t significand_mask = integer_bit - 1;
3740   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3741   constexpr unsigned int stored_significand_parts =
3742       partCountForBits(trailing_significand_bits);
3743   constexpr unsigned int exponent_bits =
3744       S.sizeInBits - 1 - trailing_significand_bits;
3745   static_assert(exponent_bits < 64);
3746   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3747   constexpr int bias = -(S.minExponent - 1);
3748 
3749   // Copy the bits of the significand. We need to clear out the exponent and
3750   // sign bit in the last word.
3751   std::array<integerPart, stored_significand_parts> mysignificand;
3752   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3753   if constexpr (significand_mask != 0) {
3754     mysignificand[mysignificand.size() - 1] &= significand_mask;
3755   }
3756 
3757   // We assume the last word holds the sign bit, the exponent, and potentially
3758   // some of the trailing significand field.
3759   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3760   uint64_t myexponent =
3761       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3762 
3763   initialize(&S);
3764   assert(partCount() == mysignificand.size());
3765 
3766   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3767 
3768   bool all_zero_significand =
3769       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3770 
3771   bool is_zero = myexponent == 0 && all_zero_significand;
3772 
3773   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3774     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3775       makeInf(sign);
3776       return;
3777     }
3778   }
3779 
3780   bool is_nan = false;
3781 
3782   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3783     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3784   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3785     bool all_ones_significand =
3786         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3787                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3788         (!significand_mask ||
3789          mysignificand[mysignificand.size() - 1] == significand_mask);
3790     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3791   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3792     is_nan = is_zero && sign;
3793   }
3794 
3795   if (is_nan) {
3796     category = fcNaN;
3797     exponent = ::exponentNaN(S);
3798     std::copy_n(mysignificand.begin(), mysignificand.size(),
3799                 significandParts());
3800     return;
3801   }
3802 
3803   if (is_zero) {
3804     makeZero(sign);
3805     return;
3806   }
3807 
3808   category = fcNormal;
3809   exponent = myexponent - bias;
3810   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3811   if (myexponent == 0) // denormal
3812     exponent = S.minExponent;
3813   else
3814     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3815 }
3816 
3817 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3818   initFromIEEEAPInt<semIEEEquad>(api);
3819 }
3820 
3821 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3822   initFromIEEEAPInt<semIEEEdouble>(api);
3823 }
3824 
3825 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3826   initFromIEEEAPInt<semIEEEsingle>(api);
3827 }
3828 
3829 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3830   initFromIEEEAPInt<semBFloat>(api);
3831 }
3832 
3833 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3834   initFromIEEEAPInt<semIEEEhalf>(api);
3835 }
3836 
3837 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3838   initFromIEEEAPInt<semFloat8E5M2>(api);
3839 }
3840 
3841 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3842   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3843 }
3844 
3845 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3846   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3847 }
3848 
3849 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3850   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3851 }
3852 
3853 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3854   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3855 }
3856 
3857 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3858   initFromIEEEAPInt<semFloatTF32>(api);
3859 }
3860 
3861 /// Treat api as containing the bits of a floating point number.
3862 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3863   assert(api.getBitWidth() == Sem->sizeInBits);
3864   if (Sem == &semIEEEhalf)
3865     return initFromHalfAPInt(api);
3866   if (Sem == &semBFloat)
3867     return initFromBFloatAPInt(api);
3868   if (Sem == &semIEEEsingle)
3869     return initFromFloatAPInt(api);
3870   if (Sem == &semIEEEdouble)
3871     return initFromDoubleAPInt(api);
3872   if (Sem == &semX87DoubleExtended)
3873     return initFromF80LongDoubleAPInt(api);
3874   if (Sem == &semIEEEquad)
3875     return initFromQuadrupleAPInt(api);
3876   if (Sem == &semPPCDoubleDoubleLegacy)
3877     return initFromPPCDoubleDoubleAPInt(api);
3878   if (Sem == &semFloat8E5M2)
3879     return initFromFloat8E5M2APInt(api);
3880   if (Sem == &semFloat8E5M2FNUZ)
3881     return initFromFloat8E5M2FNUZAPInt(api);
3882   if (Sem == &semFloat8E4M3FN)
3883     return initFromFloat8E4M3FNAPInt(api);
3884   if (Sem == &semFloat8E4M3FNUZ)
3885     return initFromFloat8E4M3FNUZAPInt(api);
3886   if (Sem == &semFloat8E4M3B11FNUZ)
3887     return initFromFloat8E4M3B11FNUZAPInt(api);
3888   if (Sem == &semFloatTF32)
3889     return initFromFloatTF32APInt(api);
3890 
3891   llvm_unreachable(nullptr);
3892 }
3893 
3894 /// Make this number the largest magnitude normal number in the given
3895 /// semantics.
3896 void IEEEFloat::makeLargest(bool Negative) {
3897   // We want (in interchange format):
3898   //   sign = {Negative}
3899   //   exponent = 1..10
3900   //   significand = 1..1
3901   category = fcNormal;
3902   sign = Negative;
3903   exponent = semantics->maxExponent;
3904 
3905   // Use memset to set all but the highest integerPart to all ones.
3906   integerPart *significand = significandParts();
3907   unsigned PartCount = partCount();
3908   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3909 
3910   // Set the high integerPart especially setting all unused top bits for
3911   // internal consistency.
3912   const unsigned NumUnusedHighBits =
3913     PartCount*integerPartWidth - semantics->precision;
3914   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3915                                    ? (~integerPart(0) >> NumUnusedHighBits)
3916                                    : 0;
3917 
3918   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3919       semantics->nanEncoding == fltNanEncoding::AllOnes)
3920     significand[0] &= ~integerPart(1);
3921 }
3922 
3923 /// Make this number the smallest magnitude denormal number in the given
3924 /// semantics.
3925 void IEEEFloat::makeSmallest(bool Negative) {
3926   // We want (in interchange format):
3927   //   sign = {Negative}
3928   //   exponent = 0..0
3929   //   significand = 0..01
3930   category = fcNormal;
3931   sign = Negative;
3932   exponent = semantics->minExponent;
3933   APInt::tcSet(significandParts(), 1, partCount());
3934 }
3935 
3936 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3937   // We want (in interchange format):
3938   //   sign = {Negative}
3939   //   exponent = 0..0
3940   //   significand = 10..0
3941 
3942   category = fcNormal;
3943   zeroSignificand();
3944   sign = Negative;
3945   exponent = semantics->minExponent;
3946   APInt::tcSetBit(significandParts(), semantics->precision - 1);
3947 }
3948 
3949 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3950   initFromAPInt(&Sem, API);
3951 }
3952 
3953 IEEEFloat::IEEEFloat(float f) {
3954   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3955 }
3956 
3957 IEEEFloat::IEEEFloat(double d) {
3958   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3959 }
3960 
3961 namespace {
3962   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3963     Buffer.append(Str.begin(), Str.end());
3964   }
3965 
3966   /// Removes data from the given significand until it is no more
3967   /// precise than is required for the desired precision.
3968   void AdjustToPrecision(APInt &significand,
3969                          int &exp, unsigned FormatPrecision) {
3970     unsigned bits = significand.getActiveBits();
3971 
3972     // 196/59 is a very slight overestimate of lg_2(10).
3973     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3974 
3975     if (bits <= bitsRequired) return;
3976 
3977     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3978     if (!tensRemovable) return;
3979 
3980     exp += tensRemovable;
3981 
3982     APInt divisor(significand.getBitWidth(), 1);
3983     APInt powten(significand.getBitWidth(), 10);
3984     while (true) {
3985       if (tensRemovable & 1)
3986         divisor *= powten;
3987       tensRemovable >>= 1;
3988       if (!tensRemovable) break;
3989       powten *= powten;
3990     }
3991 
3992     significand = significand.udiv(divisor);
3993 
3994     // Truncate the significand down to its active bit count.
3995     significand = significand.trunc(significand.getActiveBits());
3996   }
3997 
3998 
3999   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4000                          int &exp, unsigned FormatPrecision) {
4001     unsigned N = buffer.size();
4002     if (N <= FormatPrecision) return;
4003 
4004     // The most significant figures are the last ones in the buffer.
4005     unsigned FirstSignificant = N - FormatPrecision;
4006 
4007     // Round.
4008     // FIXME: this probably shouldn't use 'round half up'.
4009 
4010     // Rounding down is just a truncation, except we also want to drop
4011     // trailing zeros from the new result.
4012     if (buffer[FirstSignificant - 1] < '5') {
4013       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4014         FirstSignificant++;
4015 
4016       exp += FirstSignificant;
4017       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4018       return;
4019     }
4020 
4021     // Rounding up requires a decimal add-with-carry.  If we continue
4022     // the carry, the newly-introduced zeros will just be truncated.
4023     for (unsigned I = FirstSignificant; I != N; ++I) {
4024       if (buffer[I] == '9') {
4025         FirstSignificant++;
4026       } else {
4027         buffer[I]++;
4028         break;
4029       }
4030     }
4031 
4032     // If we carried through, we have exactly one digit of precision.
4033     if (FirstSignificant == N) {
4034       exp += FirstSignificant;
4035       buffer.clear();
4036       buffer.push_back('1');
4037       return;
4038     }
4039 
4040     exp += FirstSignificant;
4041     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4042   }
4043 } // namespace
4044 
4045 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4046                          unsigned FormatMaxPadding, bool TruncateZero) const {
4047   switch (category) {
4048   case fcInfinity:
4049     if (isNegative())
4050       return append(Str, "-Inf");
4051     else
4052       return append(Str, "+Inf");
4053 
4054   case fcNaN: return append(Str, "NaN");
4055 
4056   case fcZero:
4057     if (isNegative())
4058       Str.push_back('-');
4059 
4060     if (!FormatMaxPadding) {
4061       if (TruncateZero)
4062         append(Str, "0.0E+0");
4063       else {
4064         append(Str, "0.0");
4065         if (FormatPrecision > 1)
4066           Str.append(FormatPrecision - 1, '0');
4067         append(Str, "e+00");
4068       }
4069     } else
4070       Str.push_back('0');
4071     return;
4072 
4073   case fcNormal:
4074     break;
4075   }
4076 
4077   if (isNegative())
4078     Str.push_back('-');
4079 
4080   // Decompose the number into an APInt and an exponent.
4081   int exp = exponent - ((int) semantics->precision - 1);
4082   APInt significand(
4083       semantics->precision,
4084       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4085 
4086   // Set FormatPrecision if zero.  We want to do this before we
4087   // truncate trailing zeros, as those are part of the precision.
4088   if (!FormatPrecision) {
4089     // We use enough digits so the number can be round-tripped back to an
4090     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4091     // Accurately" by Steele and White.
4092     // FIXME: Using a formula based purely on the precision is conservative;
4093     // we can print fewer digits depending on the actual value being printed.
4094 
4095     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4096     FormatPrecision = 2 + semantics->precision * 59 / 196;
4097   }
4098 
4099   // Ignore trailing binary zeros.
4100   int trailingZeros = significand.countr_zero();
4101   exp += trailingZeros;
4102   significand.lshrInPlace(trailingZeros);
4103 
4104   // Change the exponent from 2^e to 10^e.
4105   if (exp == 0) {
4106     // Nothing to do.
4107   } else if (exp > 0) {
4108     // Just shift left.
4109     significand = significand.zext(semantics->precision + exp);
4110     significand <<= exp;
4111     exp = 0;
4112   } else { /* exp < 0 */
4113     int texp = -exp;
4114 
4115     // We transform this using the identity:
4116     //   (N)(2^-e) == (N)(5^e)(10^-e)
4117     // This means we have to multiply N (the significand) by 5^e.
4118     // To avoid overflow, we have to operate on numbers large
4119     // enough to store N * 5^e:
4120     //   log2(N * 5^e) == log2(N) + e * log2(5)
4121     //                 <= semantics->precision + e * 137 / 59
4122     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4123 
4124     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4125 
4126     // Multiply significand by 5^e.
4127     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4128     significand = significand.zext(precision);
4129     APInt five_to_the_i(precision, 5);
4130     while (true) {
4131       if (texp & 1) significand *= five_to_the_i;
4132 
4133       texp >>= 1;
4134       if (!texp) break;
4135       five_to_the_i *= five_to_the_i;
4136     }
4137   }
4138 
4139   AdjustToPrecision(significand, exp, FormatPrecision);
4140 
4141   SmallVector<char, 256> buffer;
4142 
4143   // Fill the buffer.
4144   unsigned precision = significand.getBitWidth();
4145   if (precision < 4) {
4146     // We need enough precision to store the value 10.
4147     precision = 4;
4148     significand = significand.zext(precision);
4149   }
4150   APInt ten(precision, 10);
4151   APInt digit(precision, 0);
4152 
4153   bool inTrail = true;
4154   while (significand != 0) {
4155     // digit <- significand % 10
4156     // significand <- significand / 10
4157     APInt::udivrem(significand, ten, significand, digit);
4158 
4159     unsigned d = digit.getZExtValue();
4160 
4161     // Drop trailing zeros.
4162     if (inTrail && !d) exp++;
4163     else {
4164       buffer.push_back((char) ('0' + d));
4165       inTrail = false;
4166     }
4167   }
4168 
4169   assert(!buffer.empty() && "no characters in buffer!");
4170 
4171   // Drop down to FormatPrecision.
4172   // TODO: don't do more precise calculations above than are required.
4173   AdjustToPrecision(buffer, exp, FormatPrecision);
4174 
4175   unsigned NDigits = buffer.size();
4176 
4177   // Check whether we should use scientific notation.
4178   bool FormatScientific;
4179   if (!FormatMaxPadding)
4180     FormatScientific = true;
4181   else {
4182     if (exp >= 0) {
4183       // 765e3 --> 765000
4184       //              ^^^
4185       // But we shouldn't make the number look more precise than it is.
4186       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4187                           NDigits + (unsigned) exp > FormatPrecision);
4188     } else {
4189       // Power of the most significant digit.
4190       int MSD = exp + (int) (NDigits - 1);
4191       if (MSD >= 0) {
4192         // 765e-2 == 7.65
4193         FormatScientific = false;
4194       } else {
4195         // 765e-5 == 0.00765
4196         //           ^ ^^
4197         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4198       }
4199     }
4200   }
4201 
4202   // Scientific formatting is pretty straightforward.
4203   if (FormatScientific) {
4204     exp += (NDigits - 1);
4205 
4206     Str.push_back(buffer[NDigits-1]);
4207     Str.push_back('.');
4208     if (NDigits == 1 && TruncateZero)
4209       Str.push_back('0');
4210     else
4211       for (unsigned I = 1; I != NDigits; ++I)
4212         Str.push_back(buffer[NDigits-1-I]);
4213     // Fill with zeros up to FormatPrecision.
4214     if (!TruncateZero && FormatPrecision > NDigits - 1)
4215       Str.append(FormatPrecision - NDigits + 1, '0');
4216     // For !TruncateZero we use lower 'e'.
4217     Str.push_back(TruncateZero ? 'E' : 'e');
4218 
4219     Str.push_back(exp >= 0 ? '+' : '-');
4220     if (exp < 0) exp = -exp;
4221     SmallVector<char, 6> expbuf;
4222     do {
4223       expbuf.push_back((char) ('0' + (exp % 10)));
4224       exp /= 10;
4225     } while (exp);
4226     // Exponent always at least two digits if we do not truncate zeros.
4227     if (!TruncateZero && expbuf.size() < 2)
4228       expbuf.push_back('0');
4229     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4230       Str.push_back(expbuf[E-1-I]);
4231     return;
4232   }
4233 
4234   // Non-scientific, positive exponents.
4235   if (exp >= 0) {
4236     for (unsigned I = 0; I != NDigits; ++I)
4237       Str.push_back(buffer[NDigits-1-I]);
4238     for (unsigned I = 0; I != (unsigned) exp; ++I)
4239       Str.push_back('0');
4240     return;
4241   }
4242 
4243   // Non-scientific, negative exponents.
4244 
4245   // The number of digits to the left of the decimal point.
4246   int NWholeDigits = exp + (int) NDigits;
4247 
4248   unsigned I = 0;
4249   if (NWholeDigits > 0) {
4250     for (; I != (unsigned) NWholeDigits; ++I)
4251       Str.push_back(buffer[NDigits-I-1]);
4252     Str.push_back('.');
4253   } else {
4254     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4255 
4256     Str.push_back('0');
4257     Str.push_back('.');
4258     for (unsigned Z = 1; Z != NZeros; ++Z)
4259       Str.push_back('0');
4260   }
4261 
4262   for (; I != NDigits; ++I)
4263     Str.push_back(buffer[NDigits-I-1]);
4264 }
4265 
4266 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4267   // Special floats and denormals have no exact inverse.
4268   if (!isFiniteNonZero())
4269     return false;
4270 
4271   // Check that the number is a power of two by making sure that only the
4272   // integer bit is set in the significand.
4273   if (significandLSB() != semantics->precision - 1)
4274     return false;
4275 
4276   // Get the inverse.
4277   IEEEFloat reciprocal(*semantics, 1ULL);
4278   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4279     return false;
4280 
4281   // Avoid multiplication with a denormal, it is not safe on all platforms and
4282   // may be slower than a normal division.
4283   if (reciprocal.isDenormal())
4284     return false;
4285 
4286   assert(reciprocal.isFiniteNonZero() &&
4287          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4288 
4289   if (inv)
4290     *inv = APFloat(reciprocal, *semantics);
4291 
4292   return true;
4293 }
4294 
4295 int IEEEFloat::getExactLog2Abs() const {
4296   if (!isFinite() || isZero())
4297     return INT_MIN;
4298 
4299   const integerPart *Parts = significandParts();
4300   const int PartCount = partCountForBits(semantics->precision);
4301 
4302   int PopCount = 0;
4303   for (int i = 0; i < PartCount; ++i) {
4304     PopCount += llvm::popcount(Parts[i]);
4305     if (PopCount > 1)
4306       return INT_MIN;
4307   }
4308 
4309   if (exponent != semantics->minExponent)
4310     return exponent;
4311 
4312   int CountrParts = 0;
4313   for (int i = 0; i < PartCount;
4314        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4315     if (Parts[i] != 0) {
4316       return exponent - semantics->precision + CountrParts +
4317              llvm::countr_zero(Parts[i]) + 1;
4318     }
4319   }
4320 
4321   llvm_unreachable("didn't find the set bit");
4322 }
4323 
4324 bool IEEEFloat::isSignaling() const {
4325   if (!isNaN())
4326     return false;
4327   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4328     return false;
4329 
4330   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4331   // first bit of the trailing significand being 0.
4332   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4333 }
4334 
4335 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4336 ///
4337 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4338 /// appropriate sign switching before/after the computation.
4339 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4340   // If we are performing nextDown, swap sign so we have -x.
4341   if (nextDown)
4342     changeSign();
4343 
4344   // Compute nextUp(x)
4345   opStatus result = opOK;
4346 
4347   // Handle each float category separately.
4348   switch (category) {
4349   case fcInfinity:
4350     // nextUp(+inf) = +inf
4351     if (!isNegative())
4352       break;
4353     // nextUp(-inf) = -getLargest()
4354     makeLargest(true);
4355     break;
4356   case fcNaN:
4357     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4358     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4359     //                     change the payload.
4360     if (isSignaling()) {
4361       result = opInvalidOp;
4362       // For consistency, propagate the sign of the sNaN to the qNaN.
4363       makeNaN(false, isNegative(), nullptr);
4364     }
4365     break;
4366   case fcZero:
4367     // nextUp(pm 0) = +getSmallest()
4368     makeSmallest(false);
4369     break;
4370   case fcNormal:
4371     // nextUp(-getSmallest()) = -0
4372     if (isSmallest() && isNegative()) {
4373       APInt::tcSet(significandParts(), 0, partCount());
4374       category = fcZero;
4375       exponent = 0;
4376       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4377         sign = false;
4378       break;
4379     }
4380 
4381     if (isLargest() && !isNegative()) {
4382       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4383         // nextUp(getLargest()) == NAN
4384         makeNaN();
4385         break;
4386       } else {
4387         // nextUp(getLargest()) == INFINITY
4388         APInt::tcSet(significandParts(), 0, partCount());
4389         category = fcInfinity;
4390         exponent = semantics->maxExponent + 1;
4391         break;
4392       }
4393     }
4394 
4395     // nextUp(normal) == normal + inc.
4396     if (isNegative()) {
4397       // If we are negative, we need to decrement the significand.
4398 
4399       // We only cross a binade boundary that requires adjusting the exponent
4400       // if:
4401       //   1. exponent != semantics->minExponent. This implies we are not in the
4402       //   smallest binade or are dealing with denormals.
4403       //   2. Our significand excluding the integral bit is all zeros.
4404       bool WillCrossBinadeBoundary =
4405         exponent != semantics->minExponent && isSignificandAllZeros();
4406 
4407       // Decrement the significand.
4408       //
4409       // We always do this since:
4410       //   1. If we are dealing with a non-binade decrement, by definition we
4411       //   just decrement the significand.
4412       //   2. If we are dealing with a normal -> normal binade decrement, since
4413       //   we have an explicit integral bit the fact that all bits but the
4414       //   integral bit are zero implies that subtracting one will yield a
4415       //   significand with 0 integral bit and 1 in all other spots. Thus we
4416       //   must just adjust the exponent and set the integral bit to 1.
4417       //   3. If we are dealing with a normal -> denormal binade decrement,
4418       //   since we set the integral bit to 0 when we represent denormals, we
4419       //   just decrement the significand.
4420       integerPart *Parts = significandParts();
4421       APInt::tcDecrement(Parts, partCount());
4422 
4423       if (WillCrossBinadeBoundary) {
4424         // Our result is a normal number. Do the following:
4425         // 1. Set the integral bit to 1.
4426         // 2. Decrement the exponent.
4427         APInt::tcSetBit(Parts, semantics->precision - 1);
4428         exponent--;
4429       }
4430     } else {
4431       // If we are positive, we need to increment the significand.
4432 
4433       // We only cross a binade boundary that requires adjusting the exponent if
4434       // the input is not a denormal and all of said input's significand bits
4435       // are set. If all of said conditions are true: clear the significand, set
4436       // the integral bit to 1, and increment the exponent. If we have a
4437       // denormal always increment since moving denormals and the numbers in the
4438       // smallest normal binade have the same exponent in our representation.
4439       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4440 
4441       if (WillCrossBinadeBoundary) {
4442         integerPart *Parts = significandParts();
4443         APInt::tcSet(Parts, 0, partCount());
4444         APInt::tcSetBit(Parts, semantics->precision - 1);
4445         assert(exponent != semantics->maxExponent &&
4446                "We can not increment an exponent beyond the maxExponent allowed"
4447                " by the given floating point semantics.");
4448         exponent++;
4449       } else {
4450         incrementSignificand();
4451       }
4452     }
4453     break;
4454   }
4455 
4456   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4457   if (nextDown)
4458     changeSign();
4459 
4460   return result;
4461 }
4462 
4463 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4464   return ::exponentNaN(*semantics);
4465 }
4466 
4467 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4468   return ::exponentInf(*semantics);
4469 }
4470 
4471 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4472   return ::exponentZero(*semantics);
4473 }
4474 
4475 void IEEEFloat::makeInf(bool Negative) {
4476   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4477     // There is no Inf, so make NaN instead.
4478     makeNaN(false, Negative);
4479     return;
4480   }
4481   category = fcInfinity;
4482   sign = Negative;
4483   exponent = exponentInf();
4484   APInt::tcSet(significandParts(), 0, partCount());
4485 }
4486 
4487 void IEEEFloat::makeZero(bool Negative) {
4488   category = fcZero;
4489   sign = Negative;
4490   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4491     // Merge negative zero to positive because 0b10000...000 is used for NaN
4492     sign = false;
4493   }
4494   exponent = exponentZero();
4495   APInt::tcSet(significandParts(), 0, partCount());
4496 }
4497 
4498 void IEEEFloat::makeQuiet() {
4499   assert(isNaN());
4500   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4501     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4502 }
4503 
4504 int ilogb(const IEEEFloat &Arg) {
4505   if (Arg.isNaN())
4506     return IEEEFloat::IEK_NaN;
4507   if (Arg.isZero())
4508     return IEEEFloat::IEK_Zero;
4509   if (Arg.isInfinity())
4510     return IEEEFloat::IEK_Inf;
4511   if (!Arg.isDenormal())
4512     return Arg.exponent;
4513 
4514   IEEEFloat Normalized(Arg);
4515   int SignificandBits = Arg.getSemantics().precision - 1;
4516 
4517   Normalized.exponent += SignificandBits;
4518   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4519   return Normalized.exponent - SignificandBits;
4520 }
4521 
4522 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4523   auto MaxExp = X.getSemantics().maxExponent;
4524   auto MinExp = X.getSemantics().minExponent;
4525 
4526   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4527   // overflow; clamp it to a safe range before adding, but ensure that the range
4528   // is large enough that the clamp does not change the result. The range we
4529   // need to support is the difference between the largest possible exponent and
4530   // the normalized exponent of half the smallest denormal.
4531 
4532   int SignificandBits = X.getSemantics().precision - 1;
4533   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4534 
4535   // Clamp to one past the range ends to let normalize handle overlflow.
4536   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4537   X.normalize(RoundingMode, lfExactlyZero);
4538   if (X.isNaN())
4539     X.makeQuiet();
4540   return X;
4541 }
4542 
4543 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4544   Exp = ilogb(Val);
4545 
4546   // Quiet signalling nans.
4547   if (Exp == IEEEFloat::IEK_NaN) {
4548     IEEEFloat Quiet(Val);
4549     Quiet.makeQuiet();
4550     return Quiet;
4551   }
4552 
4553   if (Exp == IEEEFloat::IEK_Inf)
4554     return Val;
4555 
4556   // 1 is added because frexp is defined to return a normalized fraction in
4557   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4558   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4559   return scalbn(Val, -Exp, RM);
4560 }
4561 
4562 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4563     : Semantics(&S),
4564       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4565   assert(Semantics == &semPPCDoubleDouble);
4566 }
4567 
4568 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4569     : Semantics(&S),
4570       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4571                             APFloat(semIEEEdouble, uninitialized)}) {
4572   assert(Semantics == &semPPCDoubleDouble);
4573 }
4574 
4575 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4576     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4577                                            APFloat(semIEEEdouble)}) {
4578   assert(Semantics == &semPPCDoubleDouble);
4579 }
4580 
4581 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4582     : Semantics(&S),
4583       Floats(new APFloat[2]{
4584           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4585           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4586   assert(Semantics == &semPPCDoubleDouble);
4587 }
4588 
4589 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4590                              APFloat &&Second)
4591     : Semantics(&S),
4592       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4593   assert(Semantics == &semPPCDoubleDouble);
4594   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4595   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4596 }
4597 
4598 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4599     : Semantics(RHS.Semantics),
4600       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4601                                          APFloat(RHS.Floats[1])}
4602                         : nullptr) {
4603   assert(Semantics == &semPPCDoubleDouble);
4604 }
4605 
4606 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4607     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4608   RHS.Semantics = &semBogus;
4609   assert(Semantics == &semPPCDoubleDouble);
4610 }
4611 
4612 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4613   if (Semantics == RHS.Semantics && RHS.Floats) {
4614     Floats[0] = RHS.Floats[0];
4615     Floats[1] = RHS.Floats[1];
4616   } else if (this != &RHS) {
4617     this->~DoubleAPFloat();
4618     new (this) DoubleAPFloat(RHS);
4619   }
4620   return *this;
4621 }
4622 
4623 // Implement addition, subtraction, multiplication and division based on:
4624 // "Software for Doubled-Precision Floating-Point Computations",
4625 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4626 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4627                                          const APFloat &c, const APFloat &cc,
4628                                          roundingMode RM) {
4629   int Status = opOK;
4630   APFloat z = a;
4631   Status |= z.add(c, RM);
4632   if (!z.isFinite()) {
4633     if (!z.isInfinity()) {
4634       Floats[0] = std::move(z);
4635       Floats[1].makeZero(/* Neg = */ false);
4636       return (opStatus)Status;
4637     }
4638     Status = opOK;
4639     auto AComparedToC = a.compareAbsoluteValue(c);
4640     z = cc;
4641     Status |= z.add(aa, RM);
4642     if (AComparedToC == APFloat::cmpGreaterThan) {
4643       // z = cc + aa + c + a;
4644       Status |= z.add(c, RM);
4645       Status |= z.add(a, RM);
4646     } else {
4647       // z = cc + aa + a + c;
4648       Status |= z.add(a, RM);
4649       Status |= z.add(c, RM);
4650     }
4651     if (!z.isFinite()) {
4652       Floats[0] = std::move(z);
4653       Floats[1].makeZero(/* Neg = */ false);
4654       return (opStatus)Status;
4655     }
4656     Floats[0] = z;
4657     APFloat zz = aa;
4658     Status |= zz.add(cc, RM);
4659     if (AComparedToC == APFloat::cmpGreaterThan) {
4660       // Floats[1] = a - z + c + zz;
4661       Floats[1] = a;
4662       Status |= Floats[1].subtract(z, RM);
4663       Status |= Floats[1].add(c, RM);
4664       Status |= Floats[1].add(zz, RM);
4665     } else {
4666       // Floats[1] = c - z + a + zz;
4667       Floats[1] = c;
4668       Status |= Floats[1].subtract(z, RM);
4669       Status |= Floats[1].add(a, RM);
4670       Status |= Floats[1].add(zz, RM);
4671     }
4672   } else {
4673     // q = a - z;
4674     APFloat q = a;
4675     Status |= q.subtract(z, RM);
4676 
4677     // zz = q + c + (a - (q + z)) + aa + cc;
4678     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4679     auto zz = q;
4680     Status |= zz.add(c, RM);
4681     Status |= q.add(z, RM);
4682     Status |= q.subtract(a, RM);
4683     q.changeSign();
4684     Status |= zz.add(q, RM);
4685     Status |= zz.add(aa, RM);
4686     Status |= zz.add(cc, RM);
4687     if (zz.isZero() && !zz.isNegative()) {
4688       Floats[0] = std::move(z);
4689       Floats[1].makeZero(/* Neg = */ false);
4690       return opOK;
4691     }
4692     Floats[0] = z;
4693     Status |= Floats[0].add(zz, RM);
4694     if (!Floats[0].isFinite()) {
4695       Floats[1].makeZero(/* Neg = */ false);
4696       return (opStatus)Status;
4697     }
4698     Floats[1] = std::move(z);
4699     Status |= Floats[1].subtract(Floats[0], RM);
4700     Status |= Floats[1].add(zz, RM);
4701   }
4702   return (opStatus)Status;
4703 }
4704 
4705 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4706                                                 const DoubleAPFloat &RHS,
4707                                                 DoubleAPFloat &Out,
4708                                                 roundingMode RM) {
4709   if (LHS.getCategory() == fcNaN) {
4710     Out = LHS;
4711     return opOK;
4712   }
4713   if (RHS.getCategory() == fcNaN) {
4714     Out = RHS;
4715     return opOK;
4716   }
4717   if (LHS.getCategory() == fcZero) {
4718     Out = RHS;
4719     return opOK;
4720   }
4721   if (RHS.getCategory() == fcZero) {
4722     Out = LHS;
4723     return opOK;
4724   }
4725   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4726       LHS.isNegative() != RHS.isNegative()) {
4727     Out.makeNaN(false, Out.isNegative(), nullptr);
4728     return opInvalidOp;
4729   }
4730   if (LHS.getCategory() == fcInfinity) {
4731     Out = LHS;
4732     return opOK;
4733   }
4734   if (RHS.getCategory() == fcInfinity) {
4735     Out = RHS;
4736     return opOK;
4737   }
4738   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4739 
4740   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4741       CC(RHS.Floats[1]);
4742   assert(&A.getSemantics() == &semIEEEdouble);
4743   assert(&AA.getSemantics() == &semIEEEdouble);
4744   assert(&C.getSemantics() == &semIEEEdouble);
4745   assert(&CC.getSemantics() == &semIEEEdouble);
4746   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4747   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4748   return Out.addImpl(A, AA, C, CC, RM);
4749 }
4750 
4751 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4752                                      roundingMode RM) {
4753   return addWithSpecial(*this, RHS, *this, RM);
4754 }
4755 
4756 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4757                                           roundingMode RM) {
4758   changeSign();
4759   auto Ret = add(RHS, RM);
4760   changeSign();
4761   return Ret;
4762 }
4763 
4764 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4765                                           APFloat::roundingMode RM) {
4766   const auto &LHS = *this;
4767   auto &Out = *this;
4768   /* Interesting observation: For special categories, finding the lowest
4769      common ancestor of the following layered graph gives the correct
4770      return category:
4771 
4772         NaN
4773        /   \
4774      Zero  Inf
4775        \   /
4776        Normal
4777 
4778      e.g. NaN * NaN = NaN
4779           Zero * Inf = NaN
4780           Normal * Zero = Zero
4781           Normal * Inf = Inf
4782   */
4783   if (LHS.getCategory() == fcNaN) {
4784     Out = LHS;
4785     return opOK;
4786   }
4787   if (RHS.getCategory() == fcNaN) {
4788     Out = RHS;
4789     return opOK;
4790   }
4791   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4792       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4793     Out.makeNaN(false, false, nullptr);
4794     return opOK;
4795   }
4796   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4797     Out = LHS;
4798     return opOK;
4799   }
4800   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4801     Out = RHS;
4802     return opOK;
4803   }
4804   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4805          "Special cases not handled exhaustively");
4806 
4807   int Status = opOK;
4808   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4809   // t = a * c
4810   APFloat T = A;
4811   Status |= T.multiply(C, RM);
4812   if (!T.isFiniteNonZero()) {
4813     Floats[0] = T;
4814     Floats[1].makeZero(/* Neg = */ false);
4815     return (opStatus)Status;
4816   }
4817 
4818   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4819   APFloat Tau = A;
4820   T.changeSign();
4821   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4822   T.changeSign();
4823   {
4824     // v = a * d
4825     APFloat V = A;
4826     Status |= V.multiply(D, RM);
4827     // w = b * c
4828     APFloat W = B;
4829     Status |= W.multiply(C, RM);
4830     Status |= V.add(W, RM);
4831     // tau += v + w
4832     Status |= Tau.add(V, RM);
4833   }
4834   // u = t + tau
4835   APFloat U = T;
4836   Status |= U.add(Tau, RM);
4837 
4838   Floats[0] = U;
4839   if (!U.isFinite()) {
4840     Floats[1].makeZero(/* Neg = */ false);
4841   } else {
4842     // Floats[1] = (t - u) + tau
4843     Status |= T.subtract(U, RM);
4844     Status |= T.add(Tau, RM);
4845     Floats[1] = T;
4846   }
4847   return (opStatus)Status;
4848 }
4849 
4850 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4851                                         APFloat::roundingMode RM) {
4852   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4853   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4854   auto Ret =
4855       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4856   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4857   return Ret;
4858 }
4859 
4860 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4861   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4862   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4863   auto Ret =
4864       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4865   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4866   return Ret;
4867 }
4868 
4869 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4870   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4871   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4872   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4873   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4874   return Ret;
4875 }
4876 
4877 APFloat::opStatus
4878 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4879                                 const DoubleAPFloat &Addend,
4880                                 APFloat::roundingMode RM) {
4881   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4882   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4883   auto Ret = Tmp.fusedMultiplyAdd(
4884       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4885       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4886   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4887   return Ret;
4888 }
4889 
4890 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4891   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4892   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4893   auto Ret = Tmp.roundToIntegral(RM);
4894   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4895   return Ret;
4896 }
4897 
4898 void DoubleAPFloat::changeSign() {
4899   Floats[0].changeSign();
4900   Floats[1].changeSign();
4901 }
4902 
4903 APFloat::cmpResult
4904 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4905   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4906   if (Result != cmpEqual)
4907     return Result;
4908   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4909   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4910     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4911     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4912     if (Against && !RHSAgainst)
4913       return cmpLessThan;
4914     if (!Against && RHSAgainst)
4915       return cmpGreaterThan;
4916     if (!Against && !RHSAgainst)
4917       return Result;
4918     if (Against && RHSAgainst)
4919       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4920   }
4921   return Result;
4922 }
4923 
4924 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4925   return Floats[0].getCategory();
4926 }
4927 
4928 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4929 
4930 void DoubleAPFloat::makeInf(bool Neg) {
4931   Floats[0].makeInf(Neg);
4932   Floats[1].makeZero(/* Neg = */ false);
4933 }
4934 
4935 void DoubleAPFloat::makeZero(bool Neg) {
4936   Floats[0].makeZero(Neg);
4937   Floats[1].makeZero(/* Neg = */ false);
4938 }
4939 
4940 void DoubleAPFloat::makeLargest(bool Neg) {
4941   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4943   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4944   if (Neg)
4945     changeSign();
4946 }
4947 
4948 void DoubleAPFloat::makeSmallest(bool Neg) {
4949   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950   Floats[0].makeSmallest(Neg);
4951   Floats[1].makeZero(/* Neg = */ false);
4952 }
4953 
4954 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4955   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4956   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4957   if (Neg)
4958     Floats[0].changeSign();
4959   Floats[1].makeZero(/* Neg = */ false);
4960 }
4961 
4962 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4963   Floats[0].makeNaN(SNaN, Neg, fill);
4964   Floats[1].makeZero(/* Neg = */ false);
4965 }
4966 
4967 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4968   auto Result = Floats[0].compare(RHS.Floats[0]);
4969   // |Float[0]| > |Float[1]|
4970   if (Result == APFloat::cmpEqual)
4971     return Floats[1].compare(RHS.Floats[1]);
4972   return Result;
4973 }
4974 
4975 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4976   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4977          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4978 }
4979 
4980 hash_code hash_value(const DoubleAPFloat &Arg) {
4981   if (Arg.Floats)
4982     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4983   return hash_combine(Arg.Semantics);
4984 }
4985 
4986 APInt DoubleAPFloat::bitcastToAPInt() const {
4987   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4988   uint64_t Data[] = {
4989       Floats[0].bitcastToAPInt().getRawData()[0],
4990       Floats[1].bitcastToAPInt().getRawData()[0],
4991   };
4992   return APInt(128, 2, Data);
4993 }
4994 
4995 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4996                                                              roundingMode RM) {
4997   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4998   APFloat Tmp(semPPCDoubleDoubleLegacy);
4999   auto Ret = Tmp.convertFromString(S, RM);
5000   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5001   return Ret;
5002 }
5003 
5004 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5005   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5006   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5007   auto Ret = Tmp.next(nextDown);
5008   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5009   return Ret;
5010 }
5011 
5012 APFloat::opStatus
5013 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5014                                 unsigned int Width, bool IsSigned,
5015                                 roundingMode RM, bool *IsExact) const {
5016   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5017   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5018       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5019 }
5020 
5021 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5022                                                   bool IsSigned,
5023                                                   roundingMode RM) {
5024   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5025   APFloat Tmp(semPPCDoubleDoubleLegacy);
5026   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5027   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5028   return Ret;
5029 }
5030 
5031 APFloat::opStatus
5032 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5033                                               unsigned int InputSize,
5034                                               bool IsSigned, roundingMode RM) {
5035   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5036   APFloat Tmp(semPPCDoubleDoubleLegacy);
5037   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5038   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5039   return Ret;
5040 }
5041 
5042 APFloat::opStatus
5043 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5044                                               unsigned int InputSize,
5045                                               bool IsSigned, roundingMode RM) {
5046   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5047   APFloat Tmp(semPPCDoubleDoubleLegacy);
5048   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5049   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5050   return Ret;
5051 }
5052 
5053 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5054                                                unsigned int HexDigits,
5055                                                bool UpperCase,
5056                                                roundingMode RM) const {
5057   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5058   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5059       .convertToHexString(DST, HexDigits, UpperCase, RM);
5060 }
5061 
5062 bool DoubleAPFloat::isDenormal() const {
5063   return getCategory() == fcNormal &&
5064          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5065           // (double)(Hi + Lo) == Hi defines a normal number.
5066           Floats[0] != Floats[0] + Floats[1]);
5067 }
5068 
5069 bool DoubleAPFloat::isSmallest() const {
5070   if (getCategory() != fcNormal)
5071     return false;
5072   DoubleAPFloat Tmp(*this);
5073   Tmp.makeSmallest(this->isNegative());
5074   return Tmp.compare(*this) == cmpEqual;
5075 }
5076 
5077 bool DoubleAPFloat::isSmallestNormalized() const {
5078   if (getCategory() != fcNormal)
5079     return false;
5080 
5081   DoubleAPFloat Tmp(*this);
5082   Tmp.makeSmallestNormalized(this->isNegative());
5083   return Tmp.compare(*this) == cmpEqual;
5084 }
5085 
5086 bool DoubleAPFloat::isLargest() const {
5087   if (getCategory() != fcNormal)
5088     return false;
5089   DoubleAPFloat Tmp(*this);
5090   Tmp.makeLargest(this->isNegative());
5091   return Tmp.compare(*this) == cmpEqual;
5092 }
5093 
5094 bool DoubleAPFloat::isInteger() const {
5095   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5096   return Floats[0].isInteger() && Floats[1].isInteger();
5097 }
5098 
5099 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5100                              unsigned FormatPrecision,
5101                              unsigned FormatMaxPadding,
5102                              bool TruncateZero) const {
5103   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5104   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5105       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5106 }
5107 
5108 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5109   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5110   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5111   if (!inv)
5112     return Tmp.getExactInverse(nullptr);
5113   APFloat Inv(semPPCDoubleDoubleLegacy);
5114   auto Ret = Tmp.getExactInverse(&Inv);
5115   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5116   return Ret;
5117 }
5118 
5119 int DoubleAPFloat::getExactLog2() const {
5120   // TODO: Implement me
5121   return INT_MIN;
5122 }
5123 
5124 int DoubleAPFloat::getExactLog2Abs() const {
5125   // TODO: Implement me
5126   return INT_MIN;
5127 }
5128 
5129 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5130                      APFloat::roundingMode RM) {
5131   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5132   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5133                        scalbn(Arg.Floats[1], Exp, RM));
5134 }
5135 
5136 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5137                     APFloat::roundingMode RM) {
5138   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5139   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5140   APFloat Second = Arg.Floats[1];
5141   if (Arg.getCategory() == APFloat::fcNormal)
5142     Second = scalbn(Second, -Exp, RM);
5143   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5144 }
5145 
5146 } // namespace detail
5147 
5148 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5149   if (usesLayout<IEEEFloat>(Semantics)) {
5150     new (&IEEE) IEEEFloat(std::move(F));
5151     return;
5152   }
5153   if (usesLayout<DoubleAPFloat>(Semantics)) {
5154     const fltSemantics& S = F.getSemantics();
5155     new (&Double)
5156         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5157                       APFloat(semIEEEdouble));
5158     return;
5159   }
5160   llvm_unreachable("Unexpected semantics");
5161 }
5162 
5163 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5164                                                        roundingMode RM) {
5165   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5166 }
5167 
5168 hash_code hash_value(const APFloat &Arg) {
5169   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5170     return hash_value(Arg.U.IEEE);
5171   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5172     return hash_value(Arg.U.Double);
5173   llvm_unreachable("Unexpected semantics");
5174 }
5175 
5176 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5177     : APFloat(Semantics) {
5178   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5179   assert(StatusOrErr && "Invalid floating point representation");
5180   consumeError(StatusOrErr.takeError());
5181 }
5182 
5183 FPClassTest APFloat::classify() const {
5184   if (isZero())
5185     return isNegative() ? fcNegZero : fcPosZero;
5186   if (isNormal())
5187     return isNegative() ? fcNegNormal : fcPosNormal;
5188   if (isDenormal())
5189     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5190   if (isInfinity())
5191     return isNegative() ? fcNegInf : fcPosInf;
5192   assert(isNaN() && "Other class of FP constant");
5193   return isSignaling() ? fcSNan : fcQNan;
5194 }
5195 
5196 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5197                                    roundingMode RM, bool *losesInfo) {
5198   if (&getSemantics() == &ToSemantics) {
5199     *losesInfo = false;
5200     return opOK;
5201   }
5202   if (usesLayout<IEEEFloat>(getSemantics()) &&
5203       usesLayout<IEEEFloat>(ToSemantics))
5204     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5205   if (usesLayout<IEEEFloat>(getSemantics()) &&
5206       usesLayout<DoubleAPFloat>(ToSemantics)) {
5207     assert(&ToSemantics == &semPPCDoubleDouble);
5208     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5209     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5210     return Ret;
5211   }
5212   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5213       usesLayout<IEEEFloat>(ToSemantics)) {
5214     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5215     *this = APFloat(std::move(getIEEE()), ToSemantics);
5216     return Ret;
5217   }
5218   llvm_unreachable("Unexpected semantics");
5219 }
5220 
5221 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5222   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5223 }
5224 
5225 void APFloat::print(raw_ostream &OS) const {
5226   SmallVector<char, 16> Buffer;
5227   toString(Buffer);
5228   OS << Buffer << "\n";
5229 }
5230 
5231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5232 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5233 #endif
5234 
5235 void APFloat::Profile(FoldingSetNodeID &NID) const {
5236   NID.Add(bitcastToAPInt());
5237 }
5238 
5239 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5240    an APSInt, whose initial bit-width and signed-ness are used to determine the
5241    precision of the conversion.
5242  */
5243 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5244                                             roundingMode rounding_mode,
5245                                             bool *isExact) const {
5246   unsigned bitWidth = result.getBitWidth();
5247   SmallVector<uint64_t, 4> parts(result.getNumWords());
5248   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5249                                      rounding_mode, isExact);
5250   // Keeps the original signed-ness.
5251   result = APInt(bitWidth, parts);
5252   return status;
5253 }
5254 
5255 double APFloat::convertToDouble() const {
5256   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5257     return getIEEE().convertToDouble();
5258   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5259          "Float semantics is not representable by IEEEdouble");
5260   APFloat Temp = *this;
5261   bool LosesInfo;
5262   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5263   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5264   (void)St;
5265   return Temp.getIEEE().convertToDouble();
5266 }
5267 
5268 float APFloat::convertToFloat() const {
5269   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5270     return getIEEE().convertToFloat();
5271   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5272          "Float semantics is not representable by IEEEsingle");
5273   APFloat Temp = *this;
5274   bool LosesInfo;
5275   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5276   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5277   (void)St;
5278   return Temp.getIEEE().convertToFloat();
5279 }
5280 
5281 } // namespace llvm
5282 
5283 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5284