xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APFloat.cpp (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28 
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
30   do {                                                                         \
31     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
32       return U.IEEE.METHOD_CALL;                                               \
33     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
34       return U.Double.METHOD_CALL;                                             \
35     llvm_unreachable("Unexpected semantics");                                  \
36   } while (false)
37 
38 using namespace llvm;
39 
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47 
48 /* Assumed in hexadecimal significand parsing, and conversion to
49    hexadecimal strings.  */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51 
52 namespace llvm {
53   /* Represents floating point arithmetic semantics.  */
54   struct fltSemantics {
55     /* The largest E such that 2^E is representable; this matches the
56        definition of IEEE 754.  */
57     APFloatBase::ExponentType maxExponent;
58 
59     /* The smallest E such that 2^E is a normalized number; this
60        matches the definition of IEEE 754.  */
61     APFloatBase::ExponentType minExponent;
62 
63     /* Number of bits in the significand.  This includes the integer
64        bit.  */
65     unsigned int precision;
66 
67     /* Number of bits actually used in the semantics. */
68     unsigned int sizeInBits;
69 
70     // Returns true if any number described by this semantics can be precisely
71     // represented by the specified semantics.
72     bool isRepresentableBy(const fltSemantics &S) const {
73       return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
74              precision <= S.precision;
75     }
76   };
77 
78   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
79   static const fltSemantics semBFloat = {127, -126, 8, 16};
80   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
81   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
82   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
83   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
84   static const fltSemantics semBogus = {0, 0, 0, 0};
85 
86   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
87      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
88      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
89      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
90      to each other, and two 11-bit exponents.
91 
92      Note: we need to make the value different from semBogus as otherwise
93      an unsafe optimization may collapse both values to a single address,
94      and we heavily rely on them having distinct addresses.             */
95   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
96 
97   /* These are legacy semantics for the fallback, inaccrurate implementation of
98      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
99      operation. It's equivalent to having an IEEE number with consecutive 106
100      bits of mantissa and 11 bits of exponent.
101 
102      It's not equivalent to IBM double-double. For example, a legit IBM
103      double-double, 1 + epsilon:
104 
105        1 + epsilon = 1 + (1 >> 1076)
106 
107      is not representable by a consecutive 106 bits of mantissa.
108 
109      Currently, these semantics are used in the following way:
110 
111        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
112        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
113        semPPCDoubleDoubleLegacy -> IEEE operations
114 
115      We use bitcastToAPInt() to get the bit representation (in APInt) of the
116      underlying IEEEdouble, then use the APInt constructor to construct the
117      legacy IEEE float.
118 
119      TODO: Implement all operations in semPPCDoubleDouble, and delete these
120      semantics.  */
121   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
122                                                         53 + 53, 128};
123 
124   const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
125     switch (S) {
126     case S_IEEEhalf:
127       return IEEEhalf();
128     case S_BFloat:
129       return BFloat();
130     case S_IEEEsingle:
131       return IEEEsingle();
132     case S_IEEEdouble:
133       return IEEEdouble();
134     case S_x87DoubleExtended:
135       return x87DoubleExtended();
136     case S_IEEEquad:
137       return IEEEquad();
138     case S_PPCDoubleDouble:
139       return PPCDoubleDouble();
140     }
141     llvm_unreachable("Unrecognised floating semantics");
142   }
143 
144   APFloatBase::Semantics
145   APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
146     if (&Sem == &llvm::APFloat::IEEEhalf())
147       return S_IEEEhalf;
148     else if (&Sem == &llvm::APFloat::BFloat())
149       return S_BFloat;
150     else if (&Sem == &llvm::APFloat::IEEEsingle())
151       return S_IEEEsingle;
152     else if (&Sem == &llvm::APFloat::IEEEdouble())
153       return S_IEEEdouble;
154     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
155       return S_x87DoubleExtended;
156     else if (&Sem == &llvm::APFloat::IEEEquad())
157       return S_IEEEquad;
158     else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
159       return S_PPCDoubleDouble;
160     else
161       llvm_unreachable("Unknown floating semantics");
162   }
163 
164   const fltSemantics &APFloatBase::IEEEhalf() {
165     return semIEEEhalf;
166   }
167   const fltSemantics &APFloatBase::BFloat() {
168     return semBFloat;
169   }
170   const fltSemantics &APFloatBase::IEEEsingle() {
171     return semIEEEsingle;
172   }
173   const fltSemantics &APFloatBase::IEEEdouble() {
174     return semIEEEdouble;
175   }
176   const fltSemantics &APFloatBase::IEEEquad() {
177     return semIEEEquad;
178   }
179   const fltSemantics &APFloatBase::x87DoubleExtended() {
180     return semX87DoubleExtended;
181   }
182   const fltSemantics &APFloatBase::Bogus() {
183     return semBogus;
184   }
185   const fltSemantics &APFloatBase::PPCDoubleDouble() {
186     return semPPCDoubleDouble;
187   }
188 
189   constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
190   constexpr RoundingMode APFloatBase::rmTowardPositive;
191   constexpr RoundingMode APFloatBase::rmTowardNegative;
192   constexpr RoundingMode APFloatBase::rmTowardZero;
193   constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
194 
195   /* A tight upper bound on number of parts required to hold the value
196      pow(5, power) is
197 
198        power * 815 / (351 * integerPartWidth) + 1
199 
200      However, whilst the result may require only this many parts,
201      because we are multiplying two values to get it, the
202      multiplication may require an extra part with the excess part
203      being zero (consider the trivial case of 1 * 1, tcFullMultiply
204      requires two parts to hold the single-part result).  So we add an
205      extra one to guarantee enough space whilst multiplying.  */
206   const unsigned int maxExponent = 16383;
207   const unsigned int maxPrecision = 113;
208   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
209   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
210 
211   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
212     return semantics.precision;
213   }
214   APFloatBase::ExponentType
215   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
216     return semantics.maxExponent;
217   }
218   APFloatBase::ExponentType
219   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
220     return semantics.minExponent;
221   }
222   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
223     return semantics.sizeInBits;
224   }
225 
226   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
227     return Sem.sizeInBits;
228 }
229 
230 /* A bunch of private, handy routines.  */
231 
232 static inline Error createError(const Twine &Err) {
233   return make_error<StringError>(Err, inconvertibleErrorCode());
234 }
235 
236 static inline unsigned int
237 partCountForBits(unsigned int bits)
238 {
239   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
240 }
241 
242 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
243 static inline unsigned int
244 decDigitValue(unsigned int c)
245 {
246   return c - '0';
247 }
248 
249 /* Return the value of a decimal exponent of the form
250    [+-]ddddddd.
251 
252    If the exponent overflows, returns a large exponent with the
253    appropriate sign.  */
254 static Expected<int> readExponent(StringRef::iterator begin,
255                                   StringRef::iterator end) {
256   bool isNegative;
257   unsigned int absExponent;
258   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
259   StringRef::iterator p = begin;
260 
261   // Treat no exponent as 0 to match binutils
262   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
263     return 0;
264   }
265 
266   isNegative = (*p == '-');
267   if (*p == '-' || *p == '+') {
268     p++;
269     if (p == end)
270       return createError("Exponent has no digits");
271   }
272 
273   absExponent = decDigitValue(*p++);
274   if (absExponent >= 10U)
275     return createError("Invalid character in exponent");
276 
277   for (; p != end; ++p) {
278     unsigned int value;
279 
280     value = decDigitValue(*p);
281     if (value >= 10U)
282       return createError("Invalid character in exponent");
283 
284     absExponent = absExponent * 10U + value;
285     if (absExponent >= overlargeExponent) {
286       absExponent = overlargeExponent;
287       break;
288     }
289   }
290 
291   if (isNegative)
292     return -(int) absExponent;
293   else
294     return (int) absExponent;
295 }
296 
297 /* This is ugly and needs cleaning up, but I don't immediately see
298    how whilst remaining safe.  */
299 static Expected<int> totalExponent(StringRef::iterator p,
300                                    StringRef::iterator end,
301                                    int exponentAdjustment) {
302   int unsignedExponent;
303   bool negative, overflow;
304   int exponent = 0;
305 
306   if (p == end)
307     return createError("Exponent has no digits");
308 
309   negative = *p == '-';
310   if (*p == '-' || *p == '+') {
311     p++;
312     if (p == end)
313       return createError("Exponent has no digits");
314   }
315 
316   unsignedExponent = 0;
317   overflow = false;
318   for (; p != end; ++p) {
319     unsigned int value;
320 
321     value = decDigitValue(*p);
322     if (value >= 10U)
323       return createError("Invalid character in exponent");
324 
325     unsignedExponent = unsignedExponent * 10 + value;
326     if (unsignedExponent > 32767) {
327       overflow = true;
328       break;
329     }
330   }
331 
332   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
333     overflow = true;
334 
335   if (!overflow) {
336     exponent = unsignedExponent;
337     if (negative)
338       exponent = -exponent;
339     exponent += exponentAdjustment;
340     if (exponent > 32767 || exponent < -32768)
341       overflow = true;
342   }
343 
344   if (overflow)
345     exponent = negative ? -32768: 32767;
346 
347   return exponent;
348 }
349 
350 static Expected<StringRef::iterator>
351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
352                            StringRef::iterator *dot) {
353   StringRef::iterator p = begin;
354   *dot = end;
355   while (p != end && *p == '0')
356     p++;
357 
358   if (p != end && *p == '.') {
359     *dot = p++;
360 
361     if (end - begin == 1)
362       return createError("Significand has no digits");
363 
364     while (p != end && *p == '0')
365       p++;
366   }
367 
368   return p;
369 }
370 
371 /* Given a normal decimal floating point number of the form
372 
373      dddd.dddd[eE][+-]ddd
374 
375    where the decimal point and exponent are optional, fill out the
376    structure D.  Exponent is appropriate if the significand is
377    treated as an integer, and normalizedExponent if the significand
378    is taken to have the decimal point after a single leading
379    non-zero digit.
380 
381    If the value is zero, V->firstSigDigit points to a non-digit, and
382    the return exponent is zero.
383 */
384 struct decimalInfo {
385   const char *firstSigDigit;
386   const char *lastSigDigit;
387   int exponent;
388   int normalizedExponent;
389 };
390 
391 static Error interpretDecimal(StringRef::iterator begin,
392                               StringRef::iterator end, decimalInfo *D) {
393   StringRef::iterator dot = end;
394 
395   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
396   if (!PtrOrErr)
397     return PtrOrErr.takeError();
398   StringRef::iterator p = *PtrOrErr;
399 
400   D->firstSigDigit = p;
401   D->exponent = 0;
402   D->normalizedExponent = 0;
403 
404   for (; p != end; ++p) {
405     if (*p == '.') {
406       if (dot != end)
407         return createError("String contains multiple dots");
408       dot = p++;
409       if (p == end)
410         break;
411     }
412     if (decDigitValue(*p) >= 10U)
413       break;
414   }
415 
416   if (p != end) {
417     if (*p != 'e' && *p != 'E')
418       return createError("Invalid character in significand");
419     if (p == begin)
420       return createError("Significand has no digits");
421     if (dot != end && p - begin == 1)
422       return createError("Significand has no digits");
423 
424     /* p points to the first non-digit in the string */
425     auto ExpOrErr = readExponent(p + 1, end);
426     if (!ExpOrErr)
427       return ExpOrErr.takeError();
428     D->exponent = *ExpOrErr;
429 
430     /* Implied decimal point?  */
431     if (dot == end)
432       dot = p;
433   }
434 
435   /* If number is all zeroes accept any exponent.  */
436   if (p != D->firstSigDigit) {
437     /* Drop insignificant trailing zeroes.  */
438     if (p != begin) {
439       do
440         do
441           p--;
442         while (p != begin && *p == '0');
443       while (p != begin && *p == '.');
444     }
445 
446     /* Adjust the exponents for any decimal point.  */
447     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
448     D->normalizedExponent = (D->exponent +
449               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
450                                       - (dot > D->firstSigDigit && dot < p)));
451   }
452 
453   D->lastSigDigit = p;
454   return Error::success();
455 }
456 
457 /* Return the trailing fraction of a hexadecimal number.
458    DIGITVALUE is the first hex digit of the fraction, P points to
459    the next digit.  */
460 static Expected<lostFraction>
461 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
462                             unsigned int digitValue) {
463   unsigned int hexDigit;
464 
465   /* If the first trailing digit isn't 0 or 8 we can work out the
466      fraction immediately.  */
467   if (digitValue > 8)
468     return lfMoreThanHalf;
469   else if (digitValue < 8 && digitValue > 0)
470     return lfLessThanHalf;
471 
472   // Otherwise we need to find the first non-zero digit.
473   while (p != end && (*p == '0' || *p == '.'))
474     p++;
475 
476   if (p == end)
477     return createError("Invalid trailing hexadecimal fraction!");
478 
479   hexDigit = hexDigitValue(*p);
480 
481   /* If we ran off the end it is exactly zero or one-half, otherwise
482      a little more.  */
483   if (hexDigit == -1U)
484     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
485   else
486     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
487 }
488 
489 /* Return the fraction lost were a bignum truncated losing the least
490    significant BITS bits.  */
491 static lostFraction
492 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
493                               unsigned int partCount,
494                               unsigned int bits)
495 {
496   unsigned int lsb;
497 
498   lsb = APInt::tcLSB(parts, partCount);
499 
500   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
501   if (bits <= lsb)
502     return lfExactlyZero;
503   if (bits == lsb + 1)
504     return lfExactlyHalf;
505   if (bits <= partCount * APFloatBase::integerPartWidth &&
506       APInt::tcExtractBit(parts, bits - 1))
507     return lfMoreThanHalf;
508 
509   return lfLessThanHalf;
510 }
511 
512 /* Shift DST right BITS bits noting lost fraction.  */
513 static lostFraction
514 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
515 {
516   lostFraction lost_fraction;
517 
518   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
519 
520   APInt::tcShiftRight(dst, parts, bits);
521 
522   return lost_fraction;
523 }
524 
525 /* Combine the effect of two lost fractions.  */
526 static lostFraction
527 combineLostFractions(lostFraction moreSignificant,
528                      lostFraction lessSignificant)
529 {
530   if (lessSignificant != lfExactlyZero) {
531     if (moreSignificant == lfExactlyZero)
532       moreSignificant = lfLessThanHalf;
533     else if (moreSignificant == lfExactlyHalf)
534       moreSignificant = lfMoreThanHalf;
535   }
536 
537   return moreSignificant;
538 }
539 
540 /* The error from the true value, in half-ulps, on multiplying two
541    floating point numbers, which differ from the value they
542    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
543    than the returned value.
544 
545    See "How to Read Floating Point Numbers Accurately" by William D
546    Clinger.  */
547 static unsigned int
548 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
549 {
550   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
551 
552   if (HUerr1 + HUerr2 == 0)
553     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
554   else
555     return inexactMultiply + 2 * (HUerr1 + HUerr2);
556 }
557 
558 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
559    when the least significant BITS are truncated.  BITS cannot be
560    zero.  */
561 static APFloatBase::integerPart
562 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
563                  bool isNearest) {
564   unsigned int count, partBits;
565   APFloatBase::integerPart part, boundary;
566 
567   assert(bits != 0);
568 
569   bits--;
570   count = bits / APFloatBase::integerPartWidth;
571   partBits = bits % APFloatBase::integerPartWidth + 1;
572 
573   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
574 
575   if (isNearest)
576     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
577   else
578     boundary = 0;
579 
580   if (count == 0) {
581     if (part - boundary <= boundary - part)
582       return part - boundary;
583     else
584       return boundary - part;
585   }
586 
587   if (part == boundary) {
588     while (--count)
589       if (parts[count])
590         return ~(APFloatBase::integerPart) 0; /* A lot.  */
591 
592     return parts[0];
593   } else if (part == boundary - 1) {
594     while (--count)
595       if (~parts[count])
596         return ~(APFloatBase::integerPart) 0; /* A lot.  */
597 
598     return -parts[0];
599   }
600 
601   return ~(APFloatBase::integerPart) 0; /* A lot.  */
602 }
603 
604 /* Place pow(5, power) in DST, and return the number of parts used.
605    DST must be at least one part larger than size of the answer.  */
606 static unsigned int
607 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
608   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
609   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
610   pow5s[0] = 78125 * 5;
611 
612   unsigned int partsCount[16] = { 1 };
613   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
614   unsigned int result;
615   assert(power <= maxExponent);
616 
617   p1 = dst;
618   p2 = scratch;
619 
620   *p1 = firstEightPowers[power & 7];
621   power >>= 3;
622 
623   result = 1;
624   pow5 = pow5s;
625 
626   for (unsigned int n = 0; power; power >>= 1, n++) {
627     unsigned int pc;
628 
629     pc = partsCount[n];
630 
631     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
632     if (pc == 0) {
633       pc = partsCount[n - 1];
634       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
635       pc *= 2;
636       if (pow5[pc - 1] == 0)
637         pc--;
638       partsCount[n] = pc;
639     }
640 
641     if (power & 1) {
642       APFloatBase::integerPart *tmp;
643 
644       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
645       result += pc;
646       if (p2[result - 1] == 0)
647         result--;
648 
649       /* Now result is in p1 with partsCount parts and p2 is scratch
650          space.  */
651       tmp = p1;
652       p1 = p2;
653       p2 = tmp;
654     }
655 
656     pow5 += pc;
657   }
658 
659   if (p1 != dst)
660     APInt::tcAssign(dst, p1, result);
661 
662   return result;
663 }
664 
665 /* Zero at the end to avoid modular arithmetic when adding one; used
666    when rounding up during hexadecimal output.  */
667 static const char hexDigitsLower[] = "0123456789abcdef0";
668 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
669 static const char infinityL[] = "infinity";
670 static const char infinityU[] = "INFINITY";
671 static const char NaNL[] = "nan";
672 static const char NaNU[] = "NAN";
673 
674 /* Write out an integerPart in hexadecimal, starting with the most
675    significant nibble.  Write out exactly COUNT hexdigits, return
676    COUNT.  */
677 static unsigned int
678 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
679            const char *hexDigitChars)
680 {
681   unsigned int result = count;
682 
683   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
684 
685   part >>= (APFloatBase::integerPartWidth - 4 * count);
686   while (count--) {
687     dst[count] = hexDigitChars[part & 0xf];
688     part >>= 4;
689   }
690 
691   return result;
692 }
693 
694 /* Write out an unsigned decimal integer.  */
695 static char *
696 writeUnsignedDecimal (char *dst, unsigned int n)
697 {
698   char buff[40], *p;
699 
700   p = buff;
701   do
702     *p++ = '0' + n % 10;
703   while (n /= 10);
704 
705   do
706     *dst++ = *--p;
707   while (p != buff);
708 
709   return dst;
710 }
711 
712 /* Write out a signed decimal integer.  */
713 static char *
714 writeSignedDecimal (char *dst, int value)
715 {
716   if (value < 0) {
717     *dst++ = '-';
718     dst = writeUnsignedDecimal(dst, -(unsigned) value);
719   } else
720     dst = writeUnsignedDecimal(dst, value);
721 
722   return dst;
723 }
724 
725 namespace detail {
726 /* Constructors.  */
727 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
728   unsigned int count;
729 
730   semantics = ourSemantics;
731   count = partCount();
732   if (count > 1)
733     significand.parts = new integerPart[count];
734 }
735 
736 void IEEEFloat::freeSignificand() {
737   if (needsCleanup())
738     delete [] significand.parts;
739 }
740 
741 void IEEEFloat::assign(const IEEEFloat &rhs) {
742   assert(semantics == rhs.semantics);
743 
744   sign = rhs.sign;
745   category = rhs.category;
746   exponent = rhs.exponent;
747   if (isFiniteNonZero() || category == fcNaN)
748     copySignificand(rhs);
749 }
750 
751 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
752   assert(isFiniteNonZero() || category == fcNaN);
753   assert(rhs.partCount() >= partCount());
754 
755   APInt::tcAssign(significandParts(), rhs.significandParts(),
756                   partCount());
757 }
758 
759 /* Make this number a NaN, with an arbitrary but deterministic value
760    for the significand.  If double or longer, this is a signalling NaN,
761    which may not be ideal.  If float, this is QNaN(0).  */
762 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
763   category = fcNaN;
764   sign = Negative;
765   exponent = exponentNaN();
766 
767   integerPart *significand = significandParts();
768   unsigned numParts = partCount();
769 
770   // Set the significand bits to the fill.
771   if (!fill || fill->getNumWords() < numParts)
772     APInt::tcSet(significand, 0, numParts);
773   if (fill) {
774     APInt::tcAssign(significand, fill->getRawData(),
775                     std::min(fill->getNumWords(), numParts));
776 
777     // Zero out the excess bits of the significand.
778     unsigned bitsToPreserve = semantics->precision - 1;
779     unsigned part = bitsToPreserve / 64;
780     bitsToPreserve %= 64;
781     significand[part] &= ((1ULL << bitsToPreserve) - 1);
782     for (part++; part != numParts; ++part)
783       significand[part] = 0;
784   }
785 
786   unsigned QNaNBit = semantics->precision - 2;
787 
788   if (SNaN) {
789     // We always have to clear the QNaN bit to make it an SNaN.
790     APInt::tcClearBit(significand, QNaNBit);
791 
792     // If there are no bits set in the payload, we have to set
793     // *something* to make it a NaN instead of an infinity;
794     // conventionally, this is the next bit down from the QNaN bit.
795     if (APInt::tcIsZero(significand, numParts))
796       APInt::tcSetBit(significand, QNaNBit - 1);
797   } else {
798     // We always have to set the QNaN bit to make it a QNaN.
799     APInt::tcSetBit(significand, QNaNBit);
800   }
801 
802   // For x87 extended precision, we want to make a NaN, not a
803   // pseudo-NaN.  Maybe we should expose the ability to make
804   // pseudo-NaNs?
805   if (semantics == &semX87DoubleExtended)
806     APInt::tcSetBit(significand, QNaNBit + 1);
807 }
808 
809 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
810   if (this != &rhs) {
811     if (semantics != rhs.semantics) {
812       freeSignificand();
813       initialize(rhs.semantics);
814     }
815     assign(rhs);
816   }
817 
818   return *this;
819 }
820 
821 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
822   freeSignificand();
823 
824   semantics = rhs.semantics;
825   significand = rhs.significand;
826   exponent = rhs.exponent;
827   category = rhs.category;
828   sign = rhs.sign;
829 
830   rhs.semantics = &semBogus;
831   return *this;
832 }
833 
834 bool IEEEFloat::isDenormal() const {
835   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
836          (APInt::tcExtractBit(significandParts(),
837                               semantics->precision - 1) == 0);
838 }
839 
840 bool IEEEFloat::isSmallest() const {
841   // The smallest number by magnitude in our format will be the smallest
842   // denormal, i.e. the floating point number with exponent being minimum
843   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
844   return isFiniteNonZero() && exponent == semantics->minExponent &&
845     significandMSB() == 0;
846 }
847 
848 bool IEEEFloat::isSignificandAllOnes() const {
849   // Test if the significand excluding the integral bit is all ones. This allows
850   // us to test for binade boundaries.
851   const integerPart *Parts = significandParts();
852   const unsigned PartCount = partCountForBits(semantics->precision);
853   for (unsigned i = 0; i < PartCount - 1; i++)
854     if (~Parts[i])
855       return false;
856 
857   // Set the unused high bits to all ones when we compare.
858   const unsigned NumHighBits =
859     PartCount*integerPartWidth - semantics->precision + 1;
860   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
861          "Can not have more high bits to fill than integerPartWidth");
862   const integerPart HighBitFill =
863     ~integerPart(0) << (integerPartWidth - NumHighBits);
864   if (~(Parts[PartCount - 1] | HighBitFill))
865     return false;
866 
867   return true;
868 }
869 
870 bool IEEEFloat::isSignificandAllZeros() const {
871   // Test if the significand excluding the integral bit is all zeros. This
872   // allows us to test for binade boundaries.
873   const integerPart *Parts = significandParts();
874   const unsigned PartCount = partCountForBits(semantics->precision);
875 
876   for (unsigned i = 0; i < PartCount - 1; i++)
877     if (Parts[i])
878       return false;
879 
880   // Compute how many bits are used in the final word.
881   const unsigned NumHighBits =
882     PartCount*integerPartWidth - semantics->precision + 1;
883   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
884          "clear than integerPartWidth");
885   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
886 
887   if (Parts[PartCount - 1] & HighBitMask)
888     return false;
889 
890   return true;
891 }
892 
893 bool IEEEFloat::isLargest() const {
894   // The largest number by magnitude in our format will be the floating point
895   // number with maximum exponent and with significand that is all ones.
896   return isFiniteNonZero() && exponent == semantics->maxExponent
897     && isSignificandAllOnes();
898 }
899 
900 bool IEEEFloat::isInteger() const {
901   // This could be made more efficient; I'm going for obviously correct.
902   if (!isFinite()) return false;
903   IEEEFloat truncated = *this;
904   truncated.roundToIntegral(rmTowardZero);
905   return compare(truncated) == cmpEqual;
906 }
907 
908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
909   if (this == &rhs)
910     return true;
911   if (semantics != rhs.semantics ||
912       category != rhs.category ||
913       sign != rhs.sign)
914     return false;
915   if (category==fcZero || category==fcInfinity)
916     return true;
917 
918   if (isFiniteNonZero() && exponent != rhs.exponent)
919     return false;
920 
921   return std::equal(significandParts(), significandParts() + partCount(),
922                     rhs.significandParts());
923 }
924 
925 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
926   initialize(&ourSemantics);
927   sign = 0;
928   category = fcNormal;
929   zeroSignificand();
930   exponent = ourSemantics.precision - 1;
931   significandParts()[0] = value;
932   normalize(rmNearestTiesToEven, lfExactlyZero);
933 }
934 
935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
936   initialize(&ourSemantics);
937   makeZero(false);
938 }
939 
940 // Delegate to the previous constructor, because later copy constructor may
941 // actually inspects category, which can't be garbage.
942 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
943     : IEEEFloat(ourSemantics) {}
944 
945 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
946   initialize(rhs.semantics);
947   assign(rhs);
948 }
949 
950 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
951   *this = std::move(rhs);
952 }
953 
954 IEEEFloat::~IEEEFloat() { freeSignificand(); }
955 
956 unsigned int IEEEFloat::partCount() const {
957   return partCountForBits(semantics->precision + 1);
958 }
959 
960 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
961   return const_cast<IEEEFloat *>(this)->significandParts();
962 }
963 
964 IEEEFloat::integerPart *IEEEFloat::significandParts() {
965   if (partCount() > 1)
966     return significand.parts;
967   else
968     return &significand.part;
969 }
970 
971 void IEEEFloat::zeroSignificand() {
972   APInt::tcSet(significandParts(), 0, partCount());
973 }
974 
975 /* Increment an fcNormal floating point number's significand.  */
976 void IEEEFloat::incrementSignificand() {
977   integerPart carry;
978 
979   carry = APInt::tcIncrement(significandParts(), partCount());
980 
981   /* Our callers should never cause us to overflow.  */
982   assert(carry == 0);
983   (void)carry;
984 }
985 
986 /* Add the significand of the RHS.  Returns the carry flag.  */
987 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
988   integerPart *parts;
989 
990   parts = significandParts();
991 
992   assert(semantics == rhs.semantics);
993   assert(exponent == rhs.exponent);
994 
995   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
996 }
997 
998 /* Subtract the significand of the RHS with a borrow flag.  Returns
999    the borrow flag.  */
1000 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1001                                                       integerPart borrow) {
1002   integerPart *parts;
1003 
1004   parts = significandParts();
1005 
1006   assert(semantics == rhs.semantics);
1007   assert(exponent == rhs.exponent);
1008 
1009   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1010                            partCount());
1011 }
1012 
1013 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1014    on to the full-precision result of the multiplication.  Returns the
1015    lost fraction.  */
1016 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1017                                             IEEEFloat addend) {
1018   unsigned int omsb;        // One, not zero, based MSB.
1019   unsigned int partsCount, newPartsCount, precision;
1020   integerPart *lhsSignificand;
1021   integerPart scratch[4];
1022   integerPart *fullSignificand;
1023   lostFraction lost_fraction;
1024   bool ignored;
1025 
1026   assert(semantics == rhs.semantics);
1027 
1028   precision = semantics->precision;
1029 
1030   // Allocate space for twice as many bits as the original significand, plus one
1031   // extra bit for the addition to overflow into.
1032   newPartsCount = partCountForBits(precision * 2 + 1);
1033 
1034   if (newPartsCount > 4)
1035     fullSignificand = new integerPart[newPartsCount];
1036   else
1037     fullSignificand = scratch;
1038 
1039   lhsSignificand = significandParts();
1040   partsCount = partCount();
1041 
1042   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1043                         rhs.significandParts(), partsCount, partsCount);
1044 
1045   lost_fraction = lfExactlyZero;
1046   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1047   exponent += rhs.exponent;
1048 
1049   // Assume the operands involved in the multiplication are single-precision
1050   // FP, and the two multiplicants are:
1051   //   *this = a23 . a22 ... a0 * 2^e1
1052   //     rhs = b23 . b22 ... b0 * 2^e2
1053   // the result of multiplication is:
1054   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1055   // Note that there are three significant bits at the left-hand side of the
1056   // radix point: two for the multiplication, and an overflow bit for the
1057   // addition (that will always be zero at this point). Move the radix point
1058   // toward left by two bits, and adjust exponent accordingly.
1059   exponent += 2;
1060 
1061   if (addend.isNonZero()) {
1062     // The intermediate result of the multiplication has "2 * precision"
1063     // signicant bit; adjust the addend to be consistent with mul result.
1064     //
1065     Significand savedSignificand = significand;
1066     const fltSemantics *savedSemantics = semantics;
1067     fltSemantics extendedSemantics;
1068     opStatus status;
1069     unsigned int extendedPrecision;
1070 
1071     // Normalize our MSB to one below the top bit to allow for overflow.
1072     extendedPrecision = 2 * precision + 1;
1073     if (omsb != extendedPrecision - 1) {
1074       assert(extendedPrecision > omsb);
1075       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1076                          (extendedPrecision - 1) - omsb);
1077       exponent -= (extendedPrecision - 1) - omsb;
1078     }
1079 
1080     /* Create new semantics.  */
1081     extendedSemantics = *semantics;
1082     extendedSemantics.precision = extendedPrecision;
1083 
1084     if (newPartsCount == 1)
1085       significand.part = fullSignificand[0];
1086     else
1087       significand.parts = fullSignificand;
1088     semantics = &extendedSemantics;
1089 
1090     // Make a copy so we can convert it to the extended semantics.
1091     // Note that we cannot convert the addend directly, as the extendedSemantics
1092     // is a local variable (which we take a reference to).
1093     IEEEFloat extendedAddend(addend);
1094     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1095     assert(status == opOK);
1096     (void)status;
1097 
1098     // Shift the significand of the addend right by one bit. This guarantees
1099     // that the high bit of the significand is zero (same as fullSignificand),
1100     // so the addition will overflow (if it does overflow at all) into the top bit.
1101     lost_fraction = extendedAddend.shiftSignificandRight(1);
1102     assert(lost_fraction == lfExactlyZero &&
1103            "Lost precision while shifting addend for fused-multiply-add.");
1104 
1105     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1106 
1107     /* Restore our state.  */
1108     if (newPartsCount == 1)
1109       fullSignificand[0] = significand.part;
1110     significand = savedSignificand;
1111     semantics = savedSemantics;
1112 
1113     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1114   }
1115 
1116   // Convert the result having "2 * precision" significant-bits back to the one
1117   // having "precision" significant-bits. First, move the radix point from
1118   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1119   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1120   exponent -= precision + 1;
1121 
1122   // In case MSB resides at the left-hand side of radix point, shift the
1123   // mantissa right by some amount to make sure the MSB reside right before
1124   // the radix point (i.e. "MSB . rest-significant-bits").
1125   //
1126   // Note that the result is not normalized when "omsb < precision". So, the
1127   // caller needs to call IEEEFloat::normalize() if normalized value is
1128   // expected.
1129   if (omsb > precision) {
1130     unsigned int bits, significantParts;
1131     lostFraction lf;
1132 
1133     bits = omsb - precision;
1134     significantParts = partCountForBits(omsb);
1135     lf = shiftRight(fullSignificand, significantParts, bits);
1136     lost_fraction = combineLostFractions(lf, lost_fraction);
1137     exponent += bits;
1138   }
1139 
1140   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1141 
1142   if (newPartsCount > 4)
1143     delete [] fullSignificand;
1144 
1145   return lost_fraction;
1146 }
1147 
1148 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1149   return multiplySignificand(rhs, IEEEFloat(*semantics));
1150 }
1151 
1152 /* Multiply the significands of LHS and RHS to DST.  */
1153 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1154   unsigned int bit, i, partsCount;
1155   const integerPart *rhsSignificand;
1156   integerPart *lhsSignificand, *dividend, *divisor;
1157   integerPart scratch[4];
1158   lostFraction lost_fraction;
1159 
1160   assert(semantics == rhs.semantics);
1161 
1162   lhsSignificand = significandParts();
1163   rhsSignificand = rhs.significandParts();
1164   partsCount = partCount();
1165 
1166   if (partsCount > 2)
1167     dividend = new integerPart[partsCount * 2];
1168   else
1169     dividend = scratch;
1170 
1171   divisor = dividend + partsCount;
1172 
1173   /* Copy the dividend and divisor as they will be modified in-place.  */
1174   for (i = 0; i < partsCount; i++) {
1175     dividend[i] = lhsSignificand[i];
1176     divisor[i] = rhsSignificand[i];
1177     lhsSignificand[i] = 0;
1178   }
1179 
1180   exponent -= rhs.exponent;
1181 
1182   unsigned int precision = semantics->precision;
1183 
1184   /* Normalize the divisor.  */
1185   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1186   if (bit) {
1187     exponent += bit;
1188     APInt::tcShiftLeft(divisor, partsCount, bit);
1189   }
1190 
1191   /* Normalize the dividend.  */
1192   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1193   if (bit) {
1194     exponent -= bit;
1195     APInt::tcShiftLeft(dividend, partsCount, bit);
1196   }
1197 
1198   /* Ensure the dividend >= divisor initially for the loop below.
1199      Incidentally, this means that the division loop below is
1200      guaranteed to set the integer bit to one.  */
1201   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1202     exponent--;
1203     APInt::tcShiftLeft(dividend, partsCount, 1);
1204     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1205   }
1206 
1207   /* Long division.  */
1208   for (bit = precision; bit; bit -= 1) {
1209     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1210       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1211       APInt::tcSetBit(lhsSignificand, bit - 1);
1212     }
1213 
1214     APInt::tcShiftLeft(dividend, partsCount, 1);
1215   }
1216 
1217   /* Figure out the lost fraction.  */
1218   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1219 
1220   if (cmp > 0)
1221     lost_fraction = lfMoreThanHalf;
1222   else if (cmp == 0)
1223     lost_fraction = lfExactlyHalf;
1224   else if (APInt::tcIsZero(dividend, partsCount))
1225     lost_fraction = lfExactlyZero;
1226   else
1227     lost_fraction = lfLessThanHalf;
1228 
1229   if (partsCount > 2)
1230     delete [] dividend;
1231 
1232   return lost_fraction;
1233 }
1234 
1235 unsigned int IEEEFloat::significandMSB() const {
1236   return APInt::tcMSB(significandParts(), partCount());
1237 }
1238 
1239 unsigned int IEEEFloat::significandLSB() const {
1240   return APInt::tcLSB(significandParts(), partCount());
1241 }
1242 
1243 /* Note that a zero result is NOT normalized to fcZero.  */
1244 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1245   /* Our exponent should not overflow.  */
1246   assert((ExponentType) (exponent + bits) >= exponent);
1247 
1248   exponent += bits;
1249 
1250   return shiftRight(significandParts(), partCount(), bits);
1251 }
1252 
1253 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1255   assert(bits < semantics->precision);
1256 
1257   if (bits) {
1258     unsigned int partsCount = partCount();
1259 
1260     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1261     exponent -= bits;
1262 
1263     assert(!APInt::tcIsZero(significandParts(), partsCount));
1264   }
1265 }
1266 
1267 IEEEFloat::cmpResult
1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1269   int compare;
1270 
1271   assert(semantics == rhs.semantics);
1272   assert(isFiniteNonZero());
1273   assert(rhs.isFiniteNonZero());
1274 
1275   compare = exponent - rhs.exponent;
1276 
1277   /* If exponents are equal, do an unsigned bignum comparison of the
1278      significands.  */
1279   if (compare == 0)
1280     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1281                                partCount());
1282 
1283   if (compare > 0)
1284     return cmpGreaterThan;
1285   else if (compare < 0)
1286     return cmpLessThan;
1287   else
1288     return cmpEqual;
1289 }
1290 
1291 /* Set the least significant BITS bits of a bignum, clear the
1292    rest.  */
1293 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1294                                       unsigned bits) {
1295   unsigned i = 0;
1296   while (bits > APInt::APINT_BITS_PER_WORD) {
1297     dst[i++] = ~(APInt::WordType)0;
1298     bits -= APInt::APINT_BITS_PER_WORD;
1299   }
1300 
1301   if (bits)
1302     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1303 
1304   while (i < parts)
1305     dst[i++] = 0;
1306 }
1307 
1308 /* Handle overflow.  Sign is preserved.  We either become infinity or
1309    the largest finite number.  */
1310 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1311   /* Infinity?  */
1312   if (rounding_mode == rmNearestTiesToEven ||
1313       rounding_mode == rmNearestTiesToAway ||
1314       (rounding_mode == rmTowardPositive && !sign) ||
1315       (rounding_mode == rmTowardNegative && sign)) {
1316     category = fcInfinity;
1317     return (opStatus) (opOverflow | opInexact);
1318   }
1319 
1320   /* Otherwise we become the largest finite number.  */
1321   category = fcNormal;
1322   exponent = semantics->maxExponent;
1323   tcSetLeastSignificantBits(significandParts(), partCount(),
1324                             semantics->precision);
1325 
1326   return opInexact;
1327 }
1328 
1329 /* Returns TRUE if, when truncating the current number, with BIT the
1330    new LSB, with the given lost fraction and rounding mode, the result
1331    would need to be rounded away from zero (i.e., by increasing the
1332    signficand).  This routine must work for fcZero of both signs, and
1333    fcNormal numbers.  */
1334 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1335                                   lostFraction lost_fraction,
1336                                   unsigned int bit) const {
1337   /* NaNs and infinities should not have lost fractions.  */
1338   assert(isFiniteNonZero() || category == fcZero);
1339 
1340   /* Current callers never pass this so we don't handle it.  */
1341   assert(lost_fraction != lfExactlyZero);
1342 
1343   switch (rounding_mode) {
1344   case rmNearestTiesToAway:
1345     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1346 
1347   case rmNearestTiesToEven:
1348     if (lost_fraction == lfMoreThanHalf)
1349       return true;
1350 
1351     /* Our zeroes don't have a significand to test.  */
1352     if (lost_fraction == lfExactlyHalf && category != fcZero)
1353       return APInt::tcExtractBit(significandParts(), bit);
1354 
1355     return false;
1356 
1357   case rmTowardZero:
1358     return false;
1359 
1360   case rmTowardPositive:
1361     return !sign;
1362 
1363   case rmTowardNegative:
1364     return sign;
1365 
1366   default:
1367     break;
1368   }
1369   llvm_unreachable("Invalid rounding mode found");
1370 }
1371 
1372 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1373                                          lostFraction lost_fraction) {
1374   unsigned int omsb;                /* One, not zero, based MSB.  */
1375   int exponentChange;
1376 
1377   if (!isFiniteNonZero())
1378     return opOK;
1379 
1380   /* Before rounding normalize the exponent of fcNormal numbers.  */
1381   omsb = significandMSB() + 1;
1382 
1383   if (omsb) {
1384     /* OMSB is numbered from 1.  We want to place it in the integer
1385        bit numbered PRECISION if possible, with a compensating change in
1386        the exponent.  */
1387     exponentChange = omsb - semantics->precision;
1388 
1389     /* If the resulting exponent is too high, overflow according to
1390        the rounding mode.  */
1391     if (exponent + exponentChange > semantics->maxExponent)
1392       return handleOverflow(rounding_mode);
1393 
1394     /* Subnormal numbers have exponent minExponent, and their MSB
1395        is forced based on that.  */
1396     if (exponent + exponentChange < semantics->minExponent)
1397       exponentChange = semantics->minExponent - exponent;
1398 
1399     /* Shifting left is easy as we don't lose precision.  */
1400     if (exponentChange < 0) {
1401       assert(lost_fraction == lfExactlyZero);
1402 
1403       shiftSignificandLeft(-exponentChange);
1404 
1405       return opOK;
1406     }
1407 
1408     if (exponentChange > 0) {
1409       lostFraction lf;
1410 
1411       /* Shift right and capture any new lost fraction.  */
1412       lf = shiftSignificandRight(exponentChange);
1413 
1414       lost_fraction = combineLostFractions(lf, lost_fraction);
1415 
1416       /* Keep OMSB up-to-date.  */
1417       if (omsb > (unsigned) exponentChange)
1418         omsb -= exponentChange;
1419       else
1420         omsb = 0;
1421     }
1422   }
1423 
1424   /* Now round the number according to rounding_mode given the lost
1425      fraction.  */
1426 
1427   /* As specified in IEEE 754, since we do not trap we do not report
1428      underflow for exact results.  */
1429   if (lost_fraction == lfExactlyZero) {
1430     /* Canonicalize zeroes.  */
1431     if (omsb == 0)
1432       category = fcZero;
1433 
1434     return opOK;
1435   }
1436 
1437   /* Increment the significand if we're rounding away from zero.  */
1438   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1439     if (omsb == 0)
1440       exponent = semantics->minExponent;
1441 
1442     incrementSignificand();
1443     omsb = significandMSB() + 1;
1444 
1445     /* Did the significand increment overflow?  */
1446     if (omsb == (unsigned) semantics->precision + 1) {
1447       /* Renormalize by incrementing the exponent and shifting our
1448          significand right one.  However if we already have the
1449          maximum exponent we overflow to infinity.  */
1450       if (exponent == semantics->maxExponent) {
1451         category = fcInfinity;
1452 
1453         return (opStatus) (opOverflow | opInexact);
1454       }
1455 
1456       shiftSignificandRight(1);
1457 
1458       return opInexact;
1459     }
1460   }
1461 
1462   /* The normal case - we were and are not denormal, and any
1463      significand increment above didn't overflow.  */
1464   if (omsb == semantics->precision)
1465     return opInexact;
1466 
1467   /* We have a non-zero denormal.  */
1468   assert(omsb < semantics->precision);
1469 
1470   /* Canonicalize zeroes.  */
1471   if (omsb == 0)
1472     category = fcZero;
1473 
1474   /* The fcZero case is a denormal that underflowed to zero.  */
1475   return (opStatus) (opUnderflow | opInexact);
1476 }
1477 
1478 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1479                                                      bool subtract) {
1480   switch (PackCategoriesIntoKey(category, rhs.category)) {
1481   default:
1482     llvm_unreachable(nullptr);
1483 
1484   case PackCategoriesIntoKey(fcZero, fcNaN):
1485   case PackCategoriesIntoKey(fcNormal, fcNaN):
1486   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1487     assign(rhs);
1488     LLVM_FALLTHROUGH;
1489   case PackCategoriesIntoKey(fcNaN, fcZero):
1490   case PackCategoriesIntoKey(fcNaN, fcNormal):
1491   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1492   case PackCategoriesIntoKey(fcNaN, fcNaN):
1493     if (isSignaling()) {
1494       makeQuiet();
1495       return opInvalidOp;
1496     }
1497     return rhs.isSignaling() ? opInvalidOp : opOK;
1498 
1499   case PackCategoriesIntoKey(fcNormal, fcZero):
1500   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1501   case PackCategoriesIntoKey(fcInfinity, fcZero):
1502     return opOK;
1503 
1504   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1505   case PackCategoriesIntoKey(fcZero, fcInfinity):
1506     category = fcInfinity;
1507     sign = rhs.sign ^ subtract;
1508     return opOK;
1509 
1510   case PackCategoriesIntoKey(fcZero, fcNormal):
1511     assign(rhs);
1512     sign = rhs.sign ^ subtract;
1513     return opOK;
1514 
1515   case PackCategoriesIntoKey(fcZero, fcZero):
1516     /* Sign depends on rounding mode; handled by caller.  */
1517     return opOK;
1518 
1519   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1520     /* Differently signed infinities can only be validly
1521        subtracted.  */
1522     if (((sign ^ rhs.sign)!=0) != subtract) {
1523       makeNaN();
1524       return opInvalidOp;
1525     }
1526 
1527     return opOK;
1528 
1529   case PackCategoriesIntoKey(fcNormal, fcNormal):
1530     return opDivByZero;
1531   }
1532 }
1533 
1534 /* Add or subtract two normal numbers.  */
1535 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1536                                                  bool subtract) {
1537   integerPart carry;
1538   lostFraction lost_fraction;
1539   int bits;
1540 
1541   /* Determine if the operation on the absolute values is effectively
1542      an addition or subtraction.  */
1543   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1544 
1545   /* Are we bigger exponent-wise than the RHS?  */
1546   bits = exponent - rhs.exponent;
1547 
1548   /* Subtraction is more subtle than one might naively expect.  */
1549   if (subtract) {
1550     IEEEFloat temp_rhs(rhs);
1551 
1552     if (bits == 0)
1553       lost_fraction = lfExactlyZero;
1554     else if (bits > 0) {
1555       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1556       shiftSignificandLeft(1);
1557     } else {
1558       lost_fraction = shiftSignificandRight(-bits - 1);
1559       temp_rhs.shiftSignificandLeft(1);
1560     }
1561 
1562     // Should we reverse the subtraction.
1563     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1564       carry = temp_rhs.subtractSignificand
1565         (*this, lost_fraction != lfExactlyZero);
1566       copySignificand(temp_rhs);
1567       sign = !sign;
1568     } else {
1569       carry = subtractSignificand
1570         (temp_rhs, lost_fraction != lfExactlyZero);
1571     }
1572 
1573     /* Invert the lost fraction - it was on the RHS and
1574        subtracted.  */
1575     if (lost_fraction == lfLessThanHalf)
1576       lost_fraction = lfMoreThanHalf;
1577     else if (lost_fraction == lfMoreThanHalf)
1578       lost_fraction = lfLessThanHalf;
1579 
1580     /* The code above is intended to ensure that no borrow is
1581        necessary.  */
1582     assert(!carry);
1583     (void)carry;
1584   } else {
1585     if (bits > 0) {
1586       IEEEFloat temp_rhs(rhs);
1587 
1588       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1589       carry = addSignificand(temp_rhs);
1590     } else {
1591       lost_fraction = shiftSignificandRight(-bits);
1592       carry = addSignificand(rhs);
1593     }
1594 
1595     /* We have a guard bit; generating a carry cannot happen.  */
1596     assert(!carry);
1597     (void)carry;
1598   }
1599 
1600   return lost_fraction;
1601 }
1602 
1603 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1604   switch (PackCategoriesIntoKey(category, rhs.category)) {
1605   default:
1606     llvm_unreachable(nullptr);
1607 
1608   case PackCategoriesIntoKey(fcZero, fcNaN):
1609   case PackCategoriesIntoKey(fcNormal, fcNaN):
1610   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1611     assign(rhs);
1612     sign = false;
1613     LLVM_FALLTHROUGH;
1614   case PackCategoriesIntoKey(fcNaN, fcZero):
1615   case PackCategoriesIntoKey(fcNaN, fcNormal):
1616   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1617   case PackCategoriesIntoKey(fcNaN, fcNaN):
1618     sign ^= rhs.sign; // restore the original sign
1619     if (isSignaling()) {
1620       makeQuiet();
1621       return opInvalidOp;
1622     }
1623     return rhs.isSignaling() ? opInvalidOp : opOK;
1624 
1625   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1626   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1627   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1628     category = fcInfinity;
1629     return opOK;
1630 
1631   case PackCategoriesIntoKey(fcZero, fcNormal):
1632   case PackCategoriesIntoKey(fcNormal, fcZero):
1633   case PackCategoriesIntoKey(fcZero, fcZero):
1634     category = fcZero;
1635     return opOK;
1636 
1637   case PackCategoriesIntoKey(fcZero, fcInfinity):
1638   case PackCategoriesIntoKey(fcInfinity, fcZero):
1639     makeNaN();
1640     return opInvalidOp;
1641 
1642   case PackCategoriesIntoKey(fcNormal, fcNormal):
1643     return opOK;
1644   }
1645 }
1646 
1647 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1648   switch (PackCategoriesIntoKey(category, rhs.category)) {
1649   default:
1650     llvm_unreachable(nullptr);
1651 
1652   case PackCategoriesIntoKey(fcZero, fcNaN):
1653   case PackCategoriesIntoKey(fcNormal, fcNaN):
1654   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1655     assign(rhs);
1656     sign = false;
1657     LLVM_FALLTHROUGH;
1658   case PackCategoriesIntoKey(fcNaN, fcZero):
1659   case PackCategoriesIntoKey(fcNaN, fcNormal):
1660   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1661   case PackCategoriesIntoKey(fcNaN, fcNaN):
1662     sign ^= rhs.sign; // restore the original sign
1663     if (isSignaling()) {
1664       makeQuiet();
1665       return opInvalidOp;
1666     }
1667     return rhs.isSignaling() ? opInvalidOp : opOK;
1668 
1669   case PackCategoriesIntoKey(fcInfinity, fcZero):
1670   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1671   case PackCategoriesIntoKey(fcZero, fcInfinity):
1672   case PackCategoriesIntoKey(fcZero, fcNormal):
1673     return opOK;
1674 
1675   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1676     category = fcZero;
1677     return opOK;
1678 
1679   case PackCategoriesIntoKey(fcNormal, fcZero):
1680     category = fcInfinity;
1681     return opDivByZero;
1682 
1683   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1684   case PackCategoriesIntoKey(fcZero, fcZero):
1685     makeNaN();
1686     return opInvalidOp;
1687 
1688   case PackCategoriesIntoKey(fcNormal, fcNormal):
1689     return opOK;
1690   }
1691 }
1692 
1693 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1694   switch (PackCategoriesIntoKey(category, rhs.category)) {
1695   default:
1696     llvm_unreachable(nullptr);
1697 
1698   case PackCategoriesIntoKey(fcZero, fcNaN):
1699   case PackCategoriesIntoKey(fcNormal, fcNaN):
1700   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1701     assign(rhs);
1702     LLVM_FALLTHROUGH;
1703   case PackCategoriesIntoKey(fcNaN, fcZero):
1704   case PackCategoriesIntoKey(fcNaN, fcNormal):
1705   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1706   case PackCategoriesIntoKey(fcNaN, fcNaN):
1707     if (isSignaling()) {
1708       makeQuiet();
1709       return opInvalidOp;
1710     }
1711     return rhs.isSignaling() ? opInvalidOp : opOK;
1712 
1713   case PackCategoriesIntoKey(fcZero, fcInfinity):
1714   case PackCategoriesIntoKey(fcZero, fcNormal):
1715   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1716     return opOK;
1717 
1718   case PackCategoriesIntoKey(fcNormal, fcZero):
1719   case PackCategoriesIntoKey(fcInfinity, fcZero):
1720   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1721   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1722   case PackCategoriesIntoKey(fcZero, fcZero):
1723     makeNaN();
1724     return opInvalidOp;
1725 
1726   case PackCategoriesIntoKey(fcNormal, fcNormal):
1727     return opOK;
1728   }
1729 }
1730 
1731 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1732   switch (PackCategoriesIntoKey(category, rhs.category)) {
1733   default:
1734     llvm_unreachable(nullptr);
1735 
1736   case PackCategoriesIntoKey(fcZero, fcNaN):
1737   case PackCategoriesIntoKey(fcNormal, fcNaN):
1738   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1739     assign(rhs);
1740     LLVM_FALLTHROUGH;
1741   case PackCategoriesIntoKey(fcNaN, fcZero):
1742   case PackCategoriesIntoKey(fcNaN, fcNormal):
1743   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1744   case PackCategoriesIntoKey(fcNaN, fcNaN):
1745     if (isSignaling()) {
1746       makeQuiet();
1747       return opInvalidOp;
1748     }
1749     return rhs.isSignaling() ? opInvalidOp : opOK;
1750 
1751   case PackCategoriesIntoKey(fcZero, fcInfinity):
1752   case PackCategoriesIntoKey(fcZero, fcNormal):
1753   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1754     return opOK;
1755 
1756   case PackCategoriesIntoKey(fcNormal, fcZero):
1757   case PackCategoriesIntoKey(fcInfinity, fcZero):
1758   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1759   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1760   case PackCategoriesIntoKey(fcZero, fcZero):
1761     makeNaN();
1762     return opInvalidOp;
1763 
1764   case PackCategoriesIntoKey(fcNormal, fcNormal):
1765     return opDivByZero; // fake status, indicating this is not a special case
1766   }
1767 }
1768 
1769 /* Change sign.  */
1770 void IEEEFloat::changeSign() {
1771   /* Look mummy, this one's easy.  */
1772   sign = !sign;
1773 }
1774 
1775 /* Normalized addition or subtraction.  */
1776 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1777                                              roundingMode rounding_mode,
1778                                              bool subtract) {
1779   opStatus fs;
1780 
1781   fs = addOrSubtractSpecials(rhs, subtract);
1782 
1783   /* This return code means it was not a simple case.  */
1784   if (fs == opDivByZero) {
1785     lostFraction lost_fraction;
1786 
1787     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1788     fs = normalize(rounding_mode, lost_fraction);
1789 
1790     /* Can only be zero if we lost no fraction.  */
1791     assert(category != fcZero || lost_fraction == lfExactlyZero);
1792   }
1793 
1794   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1795      positive zero unless rounding to minus infinity, except that
1796      adding two like-signed zeroes gives that zero.  */
1797   if (category == fcZero) {
1798     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1799       sign = (rounding_mode == rmTowardNegative);
1800   }
1801 
1802   return fs;
1803 }
1804 
1805 /* Normalized addition.  */
1806 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1807                                    roundingMode rounding_mode) {
1808   return addOrSubtract(rhs, rounding_mode, false);
1809 }
1810 
1811 /* Normalized subtraction.  */
1812 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1813                                         roundingMode rounding_mode) {
1814   return addOrSubtract(rhs, rounding_mode, true);
1815 }
1816 
1817 /* Normalized multiply.  */
1818 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1819                                         roundingMode rounding_mode) {
1820   opStatus fs;
1821 
1822   sign ^= rhs.sign;
1823   fs = multiplySpecials(rhs);
1824 
1825   if (isFiniteNonZero()) {
1826     lostFraction lost_fraction = multiplySignificand(rhs);
1827     fs = normalize(rounding_mode, lost_fraction);
1828     if (lost_fraction != lfExactlyZero)
1829       fs = (opStatus) (fs | opInexact);
1830   }
1831 
1832   return fs;
1833 }
1834 
1835 /* Normalized divide.  */
1836 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1837                                       roundingMode rounding_mode) {
1838   opStatus fs;
1839 
1840   sign ^= rhs.sign;
1841   fs = divideSpecials(rhs);
1842 
1843   if (isFiniteNonZero()) {
1844     lostFraction lost_fraction = divideSignificand(rhs);
1845     fs = normalize(rounding_mode, lost_fraction);
1846     if (lost_fraction != lfExactlyZero)
1847       fs = (opStatus) (fs | opInexact);
1848   }
1849 
1850   return fs;
1851 }
1852 
1853 /* Normalized remainder.  */
1854 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1855   opStatus fs;
1856   unsigned int origSign = sign;
1857 
1858   // First handle the special cases.
1859   fs = remainderSpecials(rhs);
1860   if (fs != opDivByZero)
1861     return fs;
1862 
1863   fs = opOK;
1864 
1865   // Make sure the current value is less than twice the denom. If the addition
1866   // did not succeed (an overflow has happened), which means that the finite
1867   // value we currently posses must be less than twice the denom (as we are
1868   // using the same semantics).
1869   IEEEFloat P2 = rhs;
1870   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1871     fs = mod(P2);
1872     assert(fs == opOK);
1873   }
1874 
1875   // Lets work with absolute numbers.
1876   IEEEFloat P = rhs;
1877   P.sign = false;
1878   sign = false;
1879 
1880   //
1881   // To calculate the remainder we use the following scheme.
1882   //
1883   // The remainder is defained as follows:
1884   //
1885   // remainder = numer - rquot * denom = x - r * p
1886   //
1887   // Where r is the result of: x/p, rounded toward the nearest integral value
1888   // (with halfway cases rounded toward the even number).
1889   //
1890   // Currently, (after x mod 2p):
1891   // r is the number of 2p's present inside x, which is inherently, an even
1892   // number of p's.
1893   //
1894   // We may split the remaining calculation into 4 options:
1895   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1896   // - if x == 0.5p then we round to the nearest even number which is 0, and we
1897   //   are done as well.
1898   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1899   //   to subtract 1p at least once.
1900   // - if x >= p then we must subtract p at least once, as x must be a
1901   //   remainder.
1902   //
1903   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1904   //
1905   // We can now split the remaining calculation to the following 3 options:
1906   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1907   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1908   //   must round up to the next even number. so we must subtract p once more.
1909   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1910   //   integral, and subtract p once more.
1911   //
1912 
1913   // Extend the semantics to prevent an overflow/underflow or inexact result.
1914   bool losesInfo;
1915   fltSemantics extendedSemantics = *semantics;
1916   extendedSemantics.maxExponent++;
1917   extendedSemantics.minExponent--;
1918   extendedSemantics.precision += 2;
1919 
1920   IEEEFloat VEx = *this;
1921   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1922   assert(fs == opOK && !losesInfo);
1923   IEEEFloat PEx = P;
1924   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1925   assert(fs == opOK && !losesInfo);
1926 
1927   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
1928   // any fraction.
1929   fs = VEx.add(VEx, rmNearestTiesToEven);
1930   assert(fs == opOK);
1931 
1932   if (VEx.compare(PEx) == cmpGreaterThan) {
1933     fs = subtract(P, rmNearestTiesToEven);
1934     assert(fs == opOK);
1935 
1936     // Make VEx = this.add(this), but because we have different semantics, we do
1937     // not want to `convert` again, so we just subtract PEx twice (which equals
1938     // to the desired value).
1939     fs = VEx.subtract(PEx, rmNearestTiesToEven);
1940     assert(fs == opOK);
1941     fs = VEx.subtract(PEx, rmNearestTiesToEven);
1942     assert(fs == opOK);
1943 
1944     cmpResult result = VEx.compare(PEx);
1945     if (result == cmpGreaterThan || result == cmpEqual) {
1946       fs = subtract(P, rmNearestTiesToEven);
1947       assert(fs == opOK);
1948     }
1949   }
1950 
1951   if (isZero())
1952     sign = origSign;    // IEEE754 requires this
1953   else
1954     sign ^= origSign;
1955   return fs;
1956 }
1957 
1958 /* Normalized llvm frem (C fmod). */
1959 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1960   opStatus fs;
1961   fs = modSpecials(rhs);
1962   unsigned int origSign = sign;
1963 
1964   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1965          compareAbsoluteValue(rhs) != cmpLessThan) {
1966     IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1967     if (compareAbsoluteValue(V) == cmpLessThan)
1968       V = scalbn(V, -1, rmNearestTiesToEven);
1969     V.sign = sign;
1970 
1971     fs = subtract(V, rmNearestTiesToEven);
1972     assert(fs==opOK);
1973   }
1974   if (isZero())
1975     sign = origSign; // fmod requires this
1976   return fs;
1977 }
1978 
1979 /* Normalized fused-multiply-add.  */
1980 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1981                                                 const IEEEFloat &addend,
1982                                                 roundingMode rounding_mode) {
1983   opStatus fs;
1984 
1985   /* Post-multiplication sign, before addition.  */
1986   sign ^= multiplicand.sign;
1987 
1988   /* If and only if all arguments are normal do we need to do an
1989      extended-precision calculation.  */
1990   if (isFiniteNonZero() &&
1991       multiplicand.isFiniteNonZero() &&
1992       addend.isFinite()) {
1993     lostFraction lost_fraction;
1994 
1995     lost_fraction = multiplySignificand(multiplicand, addend);
1996     fs = normalize(rounding_mode, lost_fraction);
1997     if (lost_fraction != lfExactlyZero)
1998       fs = (opStatus) (fs | opInexact);
1999 
2000     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2001        positive zero unless rounding to minus infinity, except that
2002        adding two like-signed zeroes gives that zero.  */
2003     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
2004       sign = (rounding_mode == rmTowardNegative);
2005   } else {
2006     fs = multiplySpecials(multiplicand);
2007 
2008     /* FS can only be opOK or opInvalidOp.  There is no more work
2009        to do in the latter case.  The IEEE-754R standard says it is
2010        implementation-defined in this case whether, if ADDEND is a
2011        quiet NaN, we raise invalid op; this implementation does so.
2012 
2013        If we need to do the addition we can do so with normal
2014        precision.  */
2015     if (fs == opOK)
2016       fs = addOrSubtract(addend, rounding_mode, false);
2017   }
2018 
2019   return fs;
2020 }
2021 
2022 /* Rounding-mode correct round to integral value.  */
2023 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2024   opStatus fs;
2025 
2026   if (isInfinity())
2027     // [IEEE Std 754-2008 6.1]:
2028     // The behavior of infinity in floating-point arithmetic is derived from the
2029     // limiting cases of real arithmetic with operands of arbitrarily
2030     // large magnitude, when such a limit exists.
2031     // ...
2032     // Operations on infinite operands are usually exact and therefore signal no
2033     // exceptions ...
2034     return opOK;
2035 
2036   if (isNaN()) {
2037     if (isSignaling()) {
2038       // [IEEE Std 754-2008 6.2]:
2039       // Under default exception handling, any operation signaling an invalid
2040       // operation exception and for which a floating-point result is to be
2041       // delivered shall deliver a quiet NaN.
2042       makeQuiet();
2043       // [IEEE Std 754-2008 6.2]:
2044       // Signaling NaNs shall be reserved operands that, under default exception
2045       // handling, signal the invalid operation exception(see 7.2) for every
2046       // general-computational and signaling-computational operation except for
2047       // the conversions described in 5.12.
2048       return opInvalidOp;
2049     } else {
2050       // [IEEE Std 754-2008 6.2]:
2051       // For an operation with quiet NaN inputs, other than maximum and minimum
2052       // operations, if a floating-point result is to be delivered the result
2053       // shall be a quiet NaN which should be one of the input NaNs.
2054       // ...
2055       // Every general-computational and quiet-computational operation involving
2056       // one or more input NaNs, none of them signaling, shall signal no
2057       // exception, except fusedMultiplyAdd might signal the invalid operation
2058       // exception(see 7.2).
2059       return opOK;
2060     }
2061   }
2062 
2063   if (isZero()) {
2064     // [IEEE Std 754-2008 6.3]:
2065     // ... the sign of the result of conversions, the quantize operation, the
2066     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2067     // the sign of the first or only operand.
2068     return opOK;
2069   }
2070 
2071   // If the exponent is large enough, we know that this value is already
2072   // integral, and the arithmetic below would potentially cause it to saturate
2073   // to +/-Inf.  Bail out early instead.
2074   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2075     return opOK;
2076 
2077   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2078   // precision of our format, and then subtract it back off again.  The choice
2079   // of rounding modes for the addition/subtraction determines the rounding mode
2080   // for our integral rounding as well.
2081   // NOTE: When the input value is negative, we do subtraction followed by
2082   // addition instead.
2083   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2084   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2085   IEEEFloat MagicConstant(*semantics);
2086   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2087                                       rmNearestTiesToEven);
2088   assert(fs == opOK);
2089   MagicConstant.sign = sign;
2090 
2091   // Preserve the input sign so that we can handle the case of zero result
2092   // correctly.
2093   bool inputSign = isNegative();
2094 
2095   fs = add(MagicConstant, rounding_mode);
2096 
2097   // Current value and 'MagicConstant' are both integers, so the result of the
2098   // subtraction is always exact according to Sterbenz' lemma.
2099   subtract(MagicConstant, rounding_mode);
2100 
2101   // Restore the input sign.
2102   if (inputSign != isNegative())
2103     changeSign();
2104 
2105   return fs;
2106 }
2107 
2108 
2109 /* Comparison requires normalized numbers.  */
2110 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2111   cmpResult result;
2112 
2113   assert(semantics == rhs.semantics);
2114 
2115   switch (PackCategoriesIntoKey(category, rhs.category)) {
2116   default:
2117     llvm_unreachable(nullptr);
2118 
2119   case PackCategoriesIntoKey(fcNaN, fcZero):
2120   case PackCategoriesIntoKey(fcNaN, fcNormal):
2121   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2122   case PackCategoriesIntoKey(fcNaN, fcNaN):
2123   case PackCategoriesIntoKey(fcZero, fcNaN):
2124   case PackCategoriesIntoKey(fcNormal, fcNaN):
2125   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2126     return cmpUnordered;
2127 
2128   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2129   case PackCategoriesIntoKey(fcInfinity, fcZero):
2130   case PackCategoriesIntoKey(fcNormal, fcZero):
2131     if (sign)
2132       return cmpLessThan;
2133     else
2134       return cmpGreaterThan;
2135 
2136   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2137   case PackCategoriesIntoKey(fcZero, fcInfinity):
2138   case PackCategoriesIntoKey(fcZero, fcNormal):
2139     if (rhs.sign)
2140       return cmpGreaterThan;
2141     else
2142       return cmpLessThan;
2143 
2144   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2145     if (sign == rhs.sign)
2146       return cmpEqual;
2147     else if (sign)
2148       return cmpLessThan;
2149     else
2150       return cmpGreaterThan;
2151 
2152   case PackCategoriesIntoKey(fcZero, fcZero):
2153     return cmpEqual;
2154 
2155   case PackCategoriesIntoKey(fcNormal, fcNormal):
2156     break;
2157   }
2158 
2159   /* Two normal numbers.  Do they have the same sign?  */
2160   if (sign != rhs.sign) {
2161     if (sign)
2162       result = cmpLessThan;
2163     else
2164       result = cmpGreaterThan;
2165   } else {
2166     /* Compare absolute values; invert result if negative.  */
2167     result = compareAbsoluteValue(rhs);
2168 
2169     if (sign) {
2170       if (result == cmpLessThan)
2171         result = cmpGreaterThan;
2172       else if (result == cmpGreaterThan)
2173         result = cmpLessThan;
2174     }
2175   }
2176 
2177   return result;
2178 }
2179 
2180 /// IEEEFloat::convert - convert a value of one floating point type to another.
2181 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2182 /// records whether the transformation lost information, i.e. whether
2183 /// converting the result back to the original type will produce the
2184 /// original value (this is almost the same as return value==fsOK, but there
2185 /// are edge cases where this is not so).
2186 
2187 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2188                                        roundingMode rounding_mode,
2189                                        bool *losesInfo) {
2190   lostFraction lostFraction;
2191   unsigned int newPartCount, oldPartCount;
2192   opStatus fs;
2193   int shift;
2194   const fltSemantics &fromSemantics = *semantics;
2195 
2196   lostFraction = lfExactlyZero;
2197   newPartCount = partCountForBits(toSemantics.precision + 1);
2198   oldPartCount = partCount();
2199   shift = toSemantics.precision - fromSemantics.precision;
2200 
2201   bool X86SpecialNan = false;
2202   if (&fromSemantics == &semX87DoubleExtended &&
2203       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2204       (!(*significandParts() & 0x8000000000000000ULL) ||
2205        !(*significandParts() & 0x4000000000000000ULL))) {
2206     // x86 has some unusual NaNs which cannot be represented in any other
2207     // format; note them here.
2208     X86SpecialNan = true;
2209   }
2210 
2211   // If this is a truncation of a denormal number, and the target semantics
2212   // has larger exponent range than the source semantics (this can happen
2213   // when truncating from PowerPC double-double to double format), the
2214   // right shift could lose result mantissa bits.  Adjust exponent instead
2215   // of performing excessive shift.
2216   if (shift < 0 && isFiniteNonZero()) {
2217     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2218     if (exponent + exponentChange < toSemantics.minExponent)
2219       exponentChange = toSemantics.minExponent - exponent;
2220     if (exponentChange < shift)
2221       exponentChange = shift;
2222     if (exponentChange < 0) {
2223       shift -= exponentChange;
2224       exponent += exponentChange;
2225     }
2226   }
2227 
2228   // If this is a truncation, perform the shift before we narrow the storage.
2229   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2230     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2231 
2232   // Fix the storage so it can hold to new value.
2233   if (newPartCount > oldPartCount) {
2234     // The new type requires more storage; make it available.
2235     integerPart *newParts;
2236     newParts = new integerPart[newPartCount];
2237     APInt::tcSet(newParts, 0, newPartCount);
2238     if (isFiniteNonZero() || category==fcNaN)
2239       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2240     freeSignificand();
2241     significand.parts = newParts;
2242   } else if (newPartCount == 1 && oldPartCount != 1) {
2243     // Switch to built-in storage for a single part.
2244     integerPart newPart = 0;
2245     if (isFiniteNonZero() || category==fcNaN)
2246       newPart = significandParts()[0];
2247     freeSignificand();
2248     significand.part = newPart;
2249   }
2250 
2251   // Now that we have the right storage, switch the semantics.
2252   semantics = &toSemantics;
2253 
2254   // If this is an extension, perform the shift now that the storage is
2255   // available.
2256   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2257     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2258 
2259   if (isFiniteNonZero()) {
2260     fs = normalize(rounding_mode, lostFraction);
2261     *losesInfo = (fs != opOK);
2262   } else if (category == fcNaN) {
2263     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2264 
2265     // For x87 extended precision, we want to make a NaN, not a special NaN if
2266     // the input wasn't special either.
2267     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2268       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2269 
2270     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2271     // This also guarantees that a sNaN does not become Inf on a truncation
2272     // that loses all payload bits.
2273     if (isSignaling()) {
2274       makeQuiet();
2275       fs = opInvalidOp;
2276     } else {
2277       fs = opOK;
2278     }
2279   } else {
2280     *losesInfo = false;
2281     fs = opOK;
2282   }
2283 
2284   return fs;
2285 }
2286 
2287 /* Convert a floating point number to an integer according to the
2288    rounding mode.  If the rounded integer value is out of range this
2289    returns an invalid operation exception and the contents of the
2290    destination parts are unspecified.  If the rounded value is in
2291    range but the floating point number is not the exact integer, the C
2292    standard doesn't require an inexact exception to be raised.  IEEE
2293    854 does require it so we do that.
2294 
2295    Note that for conversions to integer type the C standard requires
2296    round-to-zero to always be used.  */
2297 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2298     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2299     roundingMode rounding_mode, bool *isExact) const {
2300   lostFraction lost_fraction;
2301   const integerPart *src;
2302   unsigned int dstPartsCount, truncatedBits;
2303 
2304   *isExact = false;
2305 
2306   /* Handle the three special cases first.  */
2307   if (category == fcInfinity || category == fcNaN)
2308     return opInvalidOp;
2309 
2310   dstPartsCount = partCountForBits(width);
2311   assert(dstPartsCount <= parts.size() && "Integer too big");
2312 
2313   if (category == fcZero) {
2314     APInt::tcSet(parts.data(), 0, dstPartsCount);
2315     // Negative zero can't be represented as an int.
2316     *isExact = !sign;
2317     return opOK;
2318   }
2319 
2320   src = significandParts();
2321 
2322   /* Step 1: place our absolute value, with any fraction truncated, in
2323      the destination.  */
2324   if (exponent < 0) {
2325     /* Our absolute value is less than one; truncate everything.  */
2326     APInt::tcSet(parts.data(), 0, dstPartsCount);
2327     /* For exponent -1 the integer bit represents .5, look at that.
2328        For smaller exponents leftmost truncated bit is 0. */
2329     truncatedBits = semantics->precision -1U - exponent;
2330   } else {
2331     /* We want the most significant (exponent + 1) bits; the rest are
2332        truncated.  */
2333     unsigned int bits = exponent + 1U;
2334 
2335     /* Hopelessly large in magnitude?  */
2336     if (bits > width)
2337       return opInvalidOp;
2338 
2339     if (bits < semantics->precision) {
2340       /* We truncate (semantics->precision - bits) bits.  */
2341       truncatedBits = semantics->precision - bits;
2342       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2343     } else {
2344       /* We want at least as many bits as are available.  */
2345       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2346                        0);
2347       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2348                          bits - semantics->precision);
2349       truncatedBits = 0;
2350     }
2351   }
2352 
2353   /* Step 2: work out any lost fraction, and increment the absolute
2354      value if we would round away from zero.  */
2355   if (truncatedBits) {
2356     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2357                                                   truncatedBits);
2358     if (lost_fraction != lfExactlyZero &&
2359         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2360       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2361         return opInvalidOp;     /* Overflow.  */
2362     }
2363   } else {
2364     lost_fraction = lfExactlyZero;
2365   }
2366 
2367   /* Step 3: check if we fit in the destination.  */
2368   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2369 
2370   if (sign) {
2371     if (!isSigned) {
2372       /* Negative numbers cannot be represented as unsigned.  */
2373       if (omsb != 0)
2374         return opInvalidOp;
2375     } else {
2376       /* It takes omsb bits to represent the unsigned integer value.
2377          We lose a bit for the sign, but care is needed as the
2378          maximally negative integer is a special case.  */
2379       if (omsb == width &&
2380           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2381         return opInvalidOp;
2382 
2383       /* This case can happen because of rounding.  */
2384       if (omsb > width)
2385         return opInvalidOp;
2386     }
2387 
2388     APInt::tcNegate (parts.data(), dstPartsCount);
2389   } else {
2390     if (omsb >= width + !isSigned)
2391       return opInvalidOp;
2392   }
2393 
2394   if (lost_fraction == lfExactlyZero) {
2395     *isExact = true;
2396     return opOK;
2397   } else
2398     return opInexact;
2399 }
2400 
2401 /* Same as convertToSignExtendedInteger, except we provide
2402    deterministic values in case of an invalid operation exception,
2403    namely zero for NaNs and the minimal or maximal value respectively
2404    for underflow or overflow.
2405    The *isExact output tells whether the result is exact, in the sense
2406    that converting it back to the original floating point type produces
2407    the original value.  This is almost equivalent to result==opOK,
2408    except for negative zeroes.
2409 */
2410 IEEEFloat::opStatus
2411 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2412                             unsigned int width, bool isSigned,
2413                             roundingMode rounding_mode, bool *isExact) const {
2414   opStatus fs;
2415 
2416   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2417                                     isExact);
2418 
2419   if (fs == opInvalidOp) {
2420     unsigned int bits, dstPartsCount;
2421 
2422     dstPartsCount = partCountForBits(width);
2423     assert(dstPartsCount <= parts.size() && "Integer too big");
2424 
2425     if (category == fcNaN)
2426       bits = 0;
2427     else if (sign)
2428       bits = isSigned;
2429     else
2430       bits = width - isSigned;
2431 
2432     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2433     if (sign && isSigned)
2434       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2435   }
2436 
2437   return fs;
2438 }
2439 
2440 /* Convert an unsigned integer SRC to a floating point number,
2441    rounding according to ROUNDING_MODE.  The sign of the floating
2442    point number is not modified.  */
2443 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2444     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2445   unsigned int omsb, precision, dstCount;
2446   integerPart *dst;
2447   lostFraction lost_fraction;
2448 
2449   category = fcNormal;
2450   omsb = APInt::tcMSB(src, srcCount) + 1;
2451   dst = significandParts();
2452   dstCount = partCount();
2453   precision = semantics->precision;
2454 
2455   /* We want the most significant PRECISION bits of SRC.  There may not
2456      be that many; extract what we can.  */
2457   if (precision <= omsb) {
2458     exponent = omsb - 1;
2459     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2460                                                   omsb - precision);
2461     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2462   } else {
2463     exponent = precision - 1;
2464     lost_fraction = lfExactlyZero;
2465     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2466   }
2467 
2468   return normalize(rounding_mode, lost_fraction);
2469 }
2470 
2471 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2472                                                 roundingMode rounding_mode) {
2473   unsigned int partCount = Val.getNumWords();
2474   APInt api = Val;
2475 
2476   sign = false;
2477   if (isSigned && api.isNegative()) {
2478     sign = true;
2479     api = -api;
2480   }
2481 
2482   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2483 }
2484 
2485 /* Convert a two's complement integer SRC to a floating point number,
2486    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2487    integer is signed, in which case it must be sign-extended.  */
2488 IEEEFloat::opStatus
2489 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2490                                           unsigned int srcCount, bool isSigned,
2491                                           roundingMode rounding_mode) {
2492   opStatus status;
2493 
2494   if (isSigned &&
2495       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2496     integerPart *copy;
2497 
2498     /* If we're signed and negative negate a copy.  */
2499     sign = true;
2500     copy = new integerPart[srcCount];
2501     APInt::tcAssign(copy, src, srcCount);
2502     APInt::tcNegate(copy, srcCount);
2503     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2504     delete [] copy;
2505   } else {
2506     sign = false;
2507     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2508   }
2509 
2510   return status;
2511 }
2512 
2513 /* FIXME: should this just take a const APInt reference?  */
2514 IEEEFloat::opStatus
2515 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2516                                           unsigned int width, bool isSigned,
2517                                           roundingMode rounding_mode) {
2518   unsigned int partCount = partCountForBits(width);
2519   APInt api = APInt(width, makeArrayRef(parts, partCount));
2520 
2521   sign = false;
2522   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2523     sign = true;
2524     api = -api;
2525   }
2526 
2527   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2528 }
2529 
2530 Expected<IEEEFloat::opStatus>
2531 IEEEFloat::convertFromHexadecimalString(StringRef s,
2532                                         roundingMode rounding_mode) {
2533   lostFraction lost_fraction = lfExactlyZero;
2534 
2535   category = fcNormal;
2536   zeroSignificand();
2537   exponent = 0;
2538 
2539   integerPart *significand = significandParts();
2540   unsigned partsCount = partCount();
2541   unsigned bitPos = partsCount * integerPartWidth;
2542   bool computedTrailingFraction = false;
2543 
2544   // Skip leading zeroes and any (hexa)decimal point.
2545   StringRef::iterator begin = s.begin();
2546   StringRef::iterator end = s.end();
2547   StringRef::iterator dot;
2548   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2549   if (!PtrOrErr)
2550     return PtrOrErr.takeError();
2551   StringRef::iterator p = *PtrOrErr;
2552   StringRef::iterator firstSignificantDigit = p;
2553 
2554   while (p != end) {
2555     integerPart hex_value;
2556 
2557     if (*p == '.') {
2558       if (dot != end)
2559         return createError("String contains multiple dots");
2560       dot = p++;
2561       continue;
2562     }
2563 
2564     hex_value = hexDigitValue(*p);
2565     if (hex_value == -1U)
2566       break;
2567 
2568     p++;
2569 
2570     // Store the number while we have space.
2571     if (bitPos) {
2572       bitPos -= 4;
2573       hex_value <<= bitPos % integerPartWidth;
2574       significand[bitPos / integerPartWidth] |= hex_value;
2575     } else if (!computedTrailingFraction) {
2576       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2577       if (!FractOrErr)
2578         return FractOrErr.takeError();
2579       lost_fraction = *FractOrErr;
2580       computedTrailingFraction = true;
2581     }
2582   }
2583 
2584   /* Hex floats require an exponent but not a hexadecimal point.  */
2585   if (p == end)
2586     return createError("Hex strings require an exponent");
2587   if (*p != 'p' && *p != 'P')
2588     return createError("Invalid character in significand");
2589   if (p == begin)
2590     return createError("Significand has no digits");
2591   if (dot != end && p - begin == 1)
2592     return createError("Significand has no digits");
2593 
2594   /* Ignore the exponent if we are zero.  */
2595   if (p != firstSignificantDigit) {
2596     int expAdjustment;
2597 
2598     /* Implicit hexadecimal point?  */
2599     if (dot == end)
2600       dot = p;
2601 
2602     /* Calculate the exponent adjustment implicit in the number of
2603        significant digits.  */
2604     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2605     if (expAdjustment < 0)
2606       expAdjustment++;
2607     expAdjustment = expAdjustment * 4 - 1;
2608 
2609     /* Adjust for writing the significand starting at the most
2610        significant nibble.  */
2611     expAdjustment += semantics->precision;
2612     expAdjustment -= partsCount * integerPartWidth;
2613 
2614     /* Adjust for the given exponent.  */
2615     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2616     if (!ExpOrErr)
2617       return ExpOrErr.takeError();
2618     exponent = *ExpOrErr;
2619   }
2620 
2621   return normalize(rounding_mode, lost_fraction);
2622 }
2623 
2624 IEEEFloat::opStatus
2625 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2626                                         unsigned sigPartCount, int exp,
2627                                         roundingMode rounding_mode) {
2628   unsigned int parts, pow5PartCount;
2629   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2630   integerPart pow5Parts[maxPowerOfFiveParts];
2631   bool isNearest;
2632 
2633   isNearest = (rounding_mode == rmNearestTiesToEven ||
2634                rounding_mode == rmNearestTiesToAway);
2635 
2636   parts = partCountForBits(semantics->precision + 11);
2637 
2638   /* Calculate pow(5, abs(exp)).  */
2639   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2640 
2641   for (;; parts *= 2) {
2642     opStatus sigStatus, powStatus;
2643     unsigned int excessPrecision, truncatedBits;
2644 
2645     calcSemantics.precision = parts * integerPartWidth - 1;
2646     excessPrecision = calcSemantics.precision - semantics->precision;
2647     truncatedBits = excessPrecision;
2648 
2649     IEEEFloat decSig(calcSemantics, uninitialized);
2650     decSig.makeZero(sign);
2651     IEEEFloat pow5(calcSemantics);
2652 
2653     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2654                                                 rmNearestTiesToEven);
2655     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2656                                               rmNearestTiesToEven);
2657     /* Add exp, as 10^n = 5^n * 2^n.  */
2658     decSig.exponent += exp;
2659 
2660     lostFraction calcLostFraction;
2661     integerPart HUerr, HUdistance;
2662     unsigned int powHUerr;
2663 
2664     if (exp >= 0) {
2665       /* multiplySignificand leaves the precision-th bit set to 1.  */
2666       calcLostFraction = decSig.multiplySignificand(pow5);
2667       powHUerr = powStatus != opOK;
2668     } else {
2669       calcLostFraction = decSig.divideSignificand(pow5);
2670       /* Denormal numbers have less precision.  */
2671       if (decSig.exponent < semantics->minExponent) {
2672         excessPrecision += (semantics->minExponent - decSig.exponent);
2673         truncatedBits = excessPrecision;
2674         if (excessPrecision > calcSemantics.precision)
2675           excessPrecision = calcSemantics.precision;
2676       }
2677       /* Extra half-ulp lost in reciprocal of exponent.  */
2678       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2679     }
2680 
2681     /* Both multiplySignificand and divideSignificand return the
2682        result with the integer bit set.  */
2683     assert(APInt::tcExtractBit
2684            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2685 
2686     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2687                        powHUerr);
2688     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2689                                       excessPrecision, isNearest);
2690 
2691     /* Are we guaranteed to round correctly if we truncate?  */
2692     if (HUdistance >= HUerr) {
2693       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2694                        calcSemantics.precision - excessPrecision,
2695                        excessPrecision);
2696       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2697          above we must adjust our exponent to compensate for the
2698          implicit right shift.  */
2699       exponent = (decSig.exponent + semantics->precision
2700                   - (calcSemantics.precision - excessPrecision));
2701       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2702                                                        decSig.partCount(),
2703                                                        truncatedBits);
2704       return normalize(rounding_mode, calcLostFraction);
2705     }
2706   }
2707 }
2708 
2709 Expected<IEEEFloat::opStatus>
2710 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2711   decimalInfo D;
2712   opStatus fs;
2713 
2714   /* Scan the text.  */
2715   StringRef::iterator p = str.begin();
2716   if (Error Err = interpretDecimal(p, str.end(), &D))
2717     return std::move(Err);
2718 
2719   /* Handle the quick cases.  First the case of no significant digits,
2720      i.e. zero, and then exponents that are obviously too large or too
2721      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2722      definitely overflows if
2723 
2724            (exp - 1) * L >= maxExponent
2725 
2726      and definitely underflows to zero where
2727 
2728            (exp + 1) * L <= minExponent - precision
2729 
2730      With integer arithmetic the tightest bounds for L are
2731 
2732            93/28 < L < 196/59            [ numerator <= 256 ]
2733            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2734   */
2735 
2736   // Test if we have a zero number allowing for strings with no null terminators
2737   // and zero decimals with non-zero exponents.
2738   //
2739   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2740   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2741   // be at most one dot. On the other hand, if we have a zero with a non-zero
2742   // exponent, then we know that D.firstSigDigit will be non-numeric.
2743   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2744     category = fcZero;
2745     fs = opOK;
2746 
2747   /* Check whether the normalized exponent is high enough to overflow
2748      max during the log-rebasing in the max-exponent check below. */
2749   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2750     fs = handleOverflow(rounding_mode);
2751 
2752   /* If it wasn't, then it also wasn't high enough to overflow max
2753      during the log-rebasing in the min-exponent check.  Check that it
2754      won't overflow min in either check, then perform the min-exponent
2755      check. */
2756   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2757              (D.normalizedExponent + 1) * 28738 <=
2758                8651 * (semantics->minExponent - (int) semantics->precision)) {
2759     /* Underflow to zero and round.  */
2760     category = fcNormal;
2761     zeroSignificand();
2762     fs = normalize(rounding_mode, lfLessThanHalf);
2763 
2764   /* We can finally safely perform the max-exponent check. */
2765   } else if ((D.normalizedExponent - 1) * 42039
2766              >= 12655 * semantics->maxExponent) {
2767     /* Overflow and round.  */
2768     fs = handleOverflow(rounding_mode);
2769   } else {
2770     integerPart *decSignificand;
2771     unsigned int partCount;
2772 
2773     /* A tight upper bound on number of bits required to hold an
2774        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2775        to hold the full significand, and an extra part required by
2776        tcMultiplyPart.  */
2777     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2778     partCount = partCountForBits(1 + 196 * partCount / 59);
2779     decSignificand = new integerPart[partCount + 1];
2780     partCount = 0;
2781 
2782     /* Convert to binary efficiently - we do almost all multiplication
2783        in an integerPart.  When this would overflow do we do a single
2784        bignum multiplication, and then revert again to multiplication
2785        in an integerPart.  */
2786     do {
2787       integerPart decValue, val, multiplier;
2788 
2789       val = 0;
2790       multiplier = 1;
2791 
2792       do {
2793         if (*p == '.') {
2794           p++;
2795           if (p == str.end()) {
2796             break;
2797           }
2798         }
2799         decValue = decDigitValue(*p++);
2800         if (decValue >= 10U) {
2801           delete[] decSignificand;
2802           return createError("Invalid character in significand");
2803         }
2804         multiplier *= 10;
2805         val = val * 10 + decValue;
2806         /* The maximum number that can be multiplied by ten with any
2807            digit added without overflowing an integerPart.  */
2808       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2809 
2810       /* Multiply out the current part.  */
2811       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2812                             partCount, partCount + 1, false);
2813 
2814       /* If we used another part (likely but not guaranteed), increase
2815          the count.  */
2816       if (decSignificand[partCount])
2817         partCount++;
2818     } while (p <= D.lastSigDigit);
2819 
2820     category = fcNormal;
2821     fs = roundSignificandWithExponent(decSignificand, partCount,
2822                                       D.exponent, rounding_mode);
2823 
2824     delete [] decSignificand;
2825   }
2826 
2827   return fs;
2828 }
2829 
2830 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2831   const size_t MIN_NAME_SIZE = 3;
2832 
2833   if (str.size() < MIN_NAME_SIZE)
2834     return false;
2835 
2836   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2837     makeInf(false);
2838     return true;
2839   }
2840 
2841   bool IsNegative = str.front() == '-';
2842   if (IsNegative) {
2843     str = str.drop_front();
2844     if (str.size() < MIN_NAME_SIZE)
2845       return false;
2846 
2847     if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2848       makeInf(true);
2849       return true;
2850     }
2851   }
2852 
2853   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2854   bool IsSignaling = str.front() == 's' || str.front() == 'S';
2855   if (IsSignaling) {
2856     str = str.drop_front();
2857     if (str.size() < MIN_NAME_SIZE)
2858       return false;
2859   }
2860 
2861   if (str.startswith("nan") || str.startswith("NaN")) {
2862     str = str.drop_front(3);
2863 
2864     // A NaN without payload.
2865     if (str.empty()) {
2866       makeNaN(IsSignaling, IsNegative);
2867       return true;
2868     }
2869 
2870     // Allow the payload to be inside parentheses.
2871     if (str.front() == '(') {
2872       // Parentheses should be balanced (and not empty).
2873       if (str.size() <= 2 || str.back() != ')')
2874         return false;
2875 
2876       str = str.slice(1, str.size() - 1);
2877     }
2878 
2879     // Determine the payload number's radix.
2880     unsigned Radix = 10;
2881     if (str[0] == '0') {
2882       if (str.size() > 1 && tolower(str[1]) == 'x') {
2883         str = str.drop_front(2);
2884         Radix = 16;
2885       } else
2886         Radix = 8;
2887     }
2888 
2889     // Parse the payload and make the NaN.
2890     APInt Payload;
2891     if (!str.getAsInteger(Radix, Payload)) {
2892       makeNaN(IsSignaling, IsNegative, &Payload);
2893       return true;
2894     }
2895   }
2896 
2897   return false;
2898 }
2899 
2900 Expected<IEEEFloat::opStatus>
2901 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2902   if (str.empty())
2903     return createError("Invalid string length");
2904 
2905   // Handle special cases.
2906   if (convertFromStringSpecials(str))
2907     return opOK;
2908 
2909   /* Handle a leading minus sign.  */
2910   StringRef::iterator p = str.begin();
2911   size_t slen = str.size();
2912   sign = *p == '-' ? 1 : 0;
2913   if (*p == '-' || *p == '+') {
2914     p++;
2915     slen--;
2916     if (!slen)
2917       return createError("String has no digits");
2918   }
2919 
2920   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2921     if (slen == 2)
2922       return createError("Invalid string");
2923     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2924                                         rounding_mode);
2925   }
2926 
2927   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2928 }
2929 
2930 /* Write out a hexadecimal representation of the floating point value
2931    to DST, which must be of sufficient size, in the C99 form
2932    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2933    excluding the terminating NUL.
2934 
2935    If UPPERCASE, the output is in upper case, otherwise in lower case.
2936 
2937    HEXDIGITS digits appear altogether, rounding the value if
2938    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2939    number precisely is used instead.  If nothing would appear after
2940    the decimal point it is suppressed.
2941 
2942    The decimal exponent is always printed and has at least one digit.
2943    Zero values display an exponent of zero.  Infinities and NaNs
2944    appear as "infinity" or "nan" respectively.
2945 
2946    The above rules are as specified by C99.  There is ambiguity about
2947    what the leading hexadecimal digit should be.  This implementation
2948    uses whatever is necessary so that the exponent is displayed as
2949    stored.  This implies the exponent will fall within the IEEE format
2950    range, and the leading hexadecimal digit will be 0 (for denormals),
2951    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2952    any other digits zero).
2953 */
2954 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2955                                            bool upperCase,
2956                                            roundingMode rounding_mode) const {
2957   char *p;
2958 
2959   p = dst;
2960   if (sign)
2961     *dst++ = '-';
2962 
2963   switch (category) {
2964   case fcInfinity:
2965     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2966     dst += sizeof infinityL - 1;
2967     break;
2968 
2969   case fcNaN:
2970     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2971     dst += sizeof NaNU - 1;
2972     break;
2973 
2974   case fcZero:
2975     *dst++ = '0';
2976     *dst++ = upperCase ? 'X': 'x';
2977     *dst++ = '0';
2978     if (hexDigits > 1) {
2979       *dst++ = '.';
2980       memset (dst, '0', hexDigits - 1);
2981       dst += hexDigits - 1;
2982     }
2983     *dst++ = upperCase ? 'P': 'p';
2984     *dst++ = '0';
2985     break;
2986 
2987   case fcNormal:
2988     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2989     break;
2990   }
2991 
2992   *dst = 0;
2993 
2994   return static_cast<unsigned int>(dst - p);
2995 }
2996 
2997 /* Does the hard work of outputting the correctly rounded hexadecimal
2998    form of a normal floating point number with the specified number of
2999    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3000    digits necessary to print the value precisely is output.  */
3001 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3002                                           bool upperCase,
3003                                           roundingMode rounding_mode) const {
3004   unsigned int count, valueBits, shift, partsCount, outputDigits;
3005   const char *hexDigitChars;
3006   const integerPart *significand;
3007   char *p;
3008   bool roundUp;
3009 
3010   *dst++ = '0';
3011   *dst++ = upperCase ? 'X': 'x';
3012 
3013   roundUp = false;
3014   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3015 
3016   significand = significandParts();
3017   partsCount = partCount();
3018 
3019   /* +3 because the first digit only uses the single integer bit, so
3020      we have 3 virtual zero most-significant-bits.  */
3021   valueBits = semantics->precision + 3;
3022   shift = integerPartWidth - valueBits % integerPartWidth;
3023 
3024   /* The natural number of digits required ignoring trailing
3025      insignificant zeroes.  */
3026   outputDigits = (valueBits - significandLSB () + 3) / 4;
3027 
3028   /* hexDigits of zero means use the required number for the
3029      precision.  Otherwise, see if we are truncating.  If we are,
3030      find out if we need to round away from zero.  */
3031   if (hexDigits) {
3032     if (hexDigits < outputDigits) {
3033       /* We are dropping non-zero bits, so need to check how to round.
3034          "bits" is the number of dropped bits.  */
3035       unsigned int bits;
3036       lostFraction fraction;
3037 
3038       bits = valueBits - hexDigits * 4;
3039       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3040       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3041     }
3042     outputDigits = hexDigits;
3043   }
3044 
3045   /* Write the digits consecutively, and start writing in the location
3046      of the hexadecimal point.  We move the most significant digit
3047      left and add the hexadecimal point later.  */
3048   p = ++dst;
3049 
3050   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3051 
3052   while (outputDigits && count) {
3053     integerPart part;
3054 
3055     /* Put the most significant integerPartWidth bits in "part".  */
3056     if (--count == partsCount)
3057       part = 0;  /* An imaginary higher zero part.  */
3058     else
3059       part = significand[count] << shift;
3060 
3061     if (count && shift)
3062       part |= significand[count - 1] >> (integerPartWidth - shift);
3063 
3064     /* Convert as much of "part" to hexdigits as we can.  */
3065     unsigned int curDigits = integerPartWidth / 4;
3066 
3067     if (curDigits > outputDigits)
3068       curDigits = outputDigits;
3069     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3070     outputDigits -= curDigits;
3071   }
3072 
3073   if (roundUp) {
3074     char *q = dst;
3075 
3076     /* Note that hexDigitChars has a trailing '0'.  */
3077     do {
3078       q--;
3079       *q = hexDigitChars[hexDigitValue (*q) + 1];
3080     } while (*q == '0');
3081     assert(q >= p);
3082   } else {
3083     /* Add trailing zeroes.  */
3084     memset (dst, '0', outputDigits);
3085     dst += outputDigits;
3086   }
3087 
3088   /* Move the most significant digit to before the point, and if there
3089      is something after the decimal point add it.  This must come
3090      after rounding above.  */
3091   p[-1] = p[0];
3092   if (dst -1 == p)
3093     dst--;
3094   else
3095     p[0] = '.';
3096 
3097   /* Finally output the exponent.  */
3098   *dst++ = upperCase ? 'P': 'p';
3099 
3100   return writeSignedDecimal (dst, exponent);
3101 }
3102 
3103 hash_code hash_value(const IEEEFloat &Arg) {
3104   if (!Arg.isFiniteNonZero())
3105     return hash_combine((uint8_t)Arg.category,
3106                         // NaN has no sign, fix it at zero.
3107                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3108                         Arg.semantics->precision);
3109 
3110   // Normal floats need their exponent and significand hashed.
3111   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3112                       Arg.semantics->precision, Arg.exponent,
3113                       hash_combine_range(
3114                         Arg.significandParts(),
3115                         Arg.significandParts() + Arg.partCount()));
3116 }
3117 
3118 // Conversion from APFloat to/from host float/double.  It may eventually be
3119 // possible to eliminate these and have everybody deal with APFloats, but that
3120 // will take a while.  This approach will not easily extend to long double.
3121 // Current implementation requires integerPartWidth==64, which is correct at
3122 // the moment but could be made more general.
3123 
3124 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3125 // the actual IEEE respresentations.  We compensate for that here.
3126 
3127 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3128   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3129   assert(partCount()==2);
3130 
3131   uint64_t myexponent, mysignificand;
3132 
3133   if (isFiniteNonZero()) {
3134     myexponent = exponent+16383; //bias
3135     mysignificand = significandParts()[0];
3136     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3137       myexponent = 0;   // denormal
3138   } else if (category==fcZero) {
3139     myexponent = 0;
3140     mysignificand = 0;
3141   } else if (category==fcInfinity) {
3142     myexponent = 0x7fff;
3143     mysignificand = 0x8000000000000000ULL;
3144   } else {
3145     assert(category == fcNaN && "Unknown category");
3146     myexponent = 0x7fff;
3147     mysignificand = significandParts()[0];
3148   }
3149 
3150   uint64_t words[2];
3151   words[0] = mysignificand;
3152   words[1] =  ((uint64_t)(sign & 1) << 15) |
3153               (myexponent & 0x7fffLL);
3154   return APInt(80, words);
3155 }
3156 
3157 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3158   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3159   assert(partCount()==2);
3160 
3161   uint64_t words[2];
3162   opStatus fs;
3163   bool losesInfo;
3164 
3165   // Convert number to double.  To avoid spurious underflows, we re-
3166   // normalize against the "double" minExponent first, and only *then*
3167   // truncate the mantissa.  The result of that second conversion
3168   // may be inexact, but should never underflow.
3169   // Declare fltSemantics before APFloat that uses it (and
3170   // saves pointer to it) to ensure correct destruction order.
3171   fltSemantics extendedSemantics = *semantics;
3172   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3173   IEEEFloat extended(*this);
3174   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3175   assert(fs == opOK && !losesInfo);
3176   (void)fs;
3177 
3178   IEEEFloat u(extended);
3179   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3180   assert(fs == opOK || fs == opInexact);
3181   (void)fs;
3182   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3183 
3184   // If conversion was exact or resulted in a special case, we're done;
3185   // just set the second double to zero.  Otherwise, re-convert back to
3186   // the extended format and compute the difference.  This now should
3187   // convert exactly to double.
3188   if (u.isFiniteNonZero() && losesInfo) {
3189     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3190     assert(fs == opOK && !losesInfo);
3191     (void)fs;
3192 
3193     IEEEFloat v(extended);
3194     v.subtract(u, rmNearestTiesToEven);
3195     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3196     assert(fs == opOK && !losesInfo);
3197     (void)fs;
3198     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3199   } else {
3200     words[1] = 0;
3201   }
3202 
3203   return APInt(128, words);
3204 }
3205 
3206 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3207   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3208   assert(partCount()==2);
3209 
3210   uint64_t myexponent, mysignificand, mysignificand2;
3211 
3212   if (isFiniteNonZero()) {
3213     myexponent = exponent+16383; //bias
3214     mysignificand = significandParts()[0];
3215     mysignificand2 = significandParts()[1];
3216     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3217       myexponent = 0;   // denormal
3218   } else if (category==fcZero) {
3219     myexponent = 0;
3220     mysignificand = mysignificand2 = 0;
3221   } else if (category==fcInfinity) {
3222     myexponent = 0x7fff;
3223     mysignificand = mysignificand2 = 0;
3224   } else {
3225     assert(category == fcNaN && "Unknown category!");
3226     myexponent = 0x7fff;
3227     mysignificand = significandParts()[0];
3228     mysignificand2 = significandParts()[1];
3229   }
3230 
3231   uint64_t words[2];
3232   words[0] = mysignificand;
3233   words[1] = ((uint64_t)(sign & 1) << 63) |
3234              ((myexponent & 0x7fff) << 48) |
3235              (mysignificand2 & 0xffffffffffffLL);
3236 
3237   return APInt(128, words);
3238 }
3239 
3240 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3241   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3242   assert(partCount()==1);
3243 
3244   uint64_t myexponent, mysignificand;
3245 
3246   if (isFiniteNonZero()) {
3247     myexponent = exponent+1023; //bias
3248     mysignificand = *significandParts();
3249     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3250       myexponent = 0;   // denormal
3251   } else if (category==fcZero) {
3252     myexponent = 0;
3253     mysignificand = 0;
3254   } else if (category==fcInfinity) {
3255     myexponent = 0x7ff;
3256     mysignificand = 0;
3257   } else {
3258     assert(category == fcNaN && "Unknown category!");
3259     myexponent = 0x7ff;
3260     mysignificand = *significandParts();
3261   }
3262 
3263   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3264                      ((myexponent & 0x7ff) <<  52) |
3265                      (mysignificand & 0xfffffffffffffLL))));
3266 }
3267 
3268 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3269   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3270   assert(partCount()==1);
3271 
3272   uint32_t myexponent, mysignificand;
3273 
3274   if (isFiniteNonZero()) {
3275     myexponent = exponent+127; //bias
3276     mysignificand = (uint32_t)*significandParts();
3277     if (myexponent == 1 && !(mysignificand & 0x800000))
3278       myexponent = 0;   // denormal
3279   } else if (category==fcZero) {
3280     myexponent = 0;
3281     mysignificand = 0;
3282   } else if (category==fcInfinity) {
3283     myexponent = 0xff;
3284     mysignificand = 0;
3285   } else {
3286     assert(category == fcNaN && "Unknown category!");
3287     myexponent = 0xff;
3288     mysignificand = (uint32_t)*significandParts();
3289   }
3290 
3291   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3292                     (mysignificand & 0x7fffff)));
3293 }
3294 
3295 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3296   assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3297   assert(partCount() == 1);
3298 
3299   uint32_t myexponent, mysignificand;
3300 
3301   if (isFiniteNonZero()) {
3302     myexponent = exponent + 127; // bias
3303     mysignificand = (uint32_t)*significandParts();
3304     if (myexponent == 1 && !(mysignificand & 0x80))
3305       myexponent = 0; // denormal
3306   } else if (category == fcZero) {
3307     myexponent = 0;
3308     mysignificand = 0;
3309   } else if (category == fcInfinity) {
3310     myexponent = 0xff;
3311     mysignificand = 0;
3312   } else {
3313     assert(category == fcNaN && "Unknown category!");
3314     myexponent = 0xff;
3315     mysignificand = (uint32_t)*significandParts();
3316   }
3317 
3318   return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3319                     (mysignificand & 0x7f)));
3320 }
3321 
3322 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3323   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3324   assert(partCount()==1);
3325 
3326   uint32_t myexponent, mysignificand;
3327 
3328   if (isFiniteNonZero()) {
3329     myexponent = exponent+15; //bias
3330     mysignificand = (uint32_t)*significandParts();
3331     if (myexponent == 1 && !(mysignificand & 0x400))
3332       myexponent = 0;   // denormal
3333   } else if (category==fcZero) {
3334     myexponent = 0;
3335     mysignificand = 0;
3336   } else if (category==fcInfinity) {
3337     myexponent = 0x1f;
3338     mysignificand = 0;
3339   } else {
3340     assert(category == fcNaN && "Unknown category!");
3341     myexponent = 0x1f;
3342     mysignificand = (uint32_t)*significandParts();
3343   }
3344 
3345   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3346                     (mysignificand & 0x3ff)));
3347 }
3348 
3349 // This function creates an APInt that is just a bit map of the floating
3350 // point constant as it would appear in memory.  It is not a conversion,
3351 // and treating the result as a normal integer is unlikely to be useful.
3352 
3353 APInt IEEEFloat::bitcastToAPInt() const {
3354   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3355     return convertHalfAPFloatToAPInt();
3356 
3357   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3358     return convertBFloatAPFloatToAPInt();
3359 
3360   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3361     return convertFloatAPFloatToAPInt();
3362 
3363   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3364     return convertDoubleAPFloatToAPInt();
3365 
3366   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3367     return convertQuadrupleAPFloatToAPInt();
3368 
3369   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3370     return convertPPCDoubleDoubleAPFloatToAPInt();
3371 
3372   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3373          "unknown format!");
3374   return convertF80LongDoubleAPFloatToAPInt();
3375 }
3376 
3377 float IEEEFloat::convertToFloat() const {
3378   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3379          "Float semantics are not IEEEsingle");
3380   APInt api = bitcastToAPInt();
3381   return api.bitsToFloat();
3382 }
3383 
3384 double IEEEFloat::convertToDouble() const {
3385   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3386          "Float semantics are not IEEEdouble");
3387   APInt api = bitcastToAPInt();
3388   return api.bitsToDouble();
3389 }
3390 
3391 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3392 /// does not support these bit patterns:
3393 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3394 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3395 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3396 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3397 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3398 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3399   uint64_t i1 = api.getRawData()[0];
3400   uint64_t i2 = api.getRawData()[1];
3401   uint64_t myexponent = (i2 & 0x7fff);
3402   uint64_t mysignificand = i1;
3403   uint8_t myintegerbit = mysignificand >> 63;
3404 
3405   initialize(&semX87DoubleExtended);
3406   assert(partCount()==2);
3407 
3408   sign = static_cast<unsigned int>(i2>>15);
3409   if (myexponent == 0 && mysignificand == 0) {
3410     makeZero(sign);
3411   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3412     makeInf(sign);
3413   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3414              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3415     category = fcNaN;
3416     exponent = exponentNaN();
3417     significandParts()[0] = mysignificand;
3418     significandParts()[1] = 0;
3419   } else {
3420     category = fcNormal;
3421     exponent = myexponent - 16383;
3422     significandParts()[0] = mysignificand;
3423     significandParts()[1] = 0;
3424     if (myexponent==0)          // denormal
3425       exponent = -16382;
3426   }
3427 }
3428 
3429 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3430   uint64_t i1 = api.getRawData()[0];
3431   uint64_t i2 = api.getRawData()[1];
3432   opStatus fs;
3433   bool losesInfo;
3434 
3435   // Get the first double and convert to our format.
3436   initFromDoubleAPInt(APInt(64, i1));
3437   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3438   assert(fs == opOK && !losesInfo);
3439   (void)fs;
3440 
3441   // Unless we have a special case, add in second double.
3442   if (isFiniteNonZero()) {
3443     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3444     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3445     assert(fs == opOK && !losesInfo);
3446     (void)fs;
3447 
3448     add(v, rmNearestTiesToEven);
3449   }
3450 }
3451 
3452 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3453   uint64_t i1 = api.getRawData()[0];
3454   uint64_t i2 = api.getRawData()[1];
3455   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3456   uint64_t mysignificand  = i1;
3457   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3458 
3459   initialize(&semIEEEquad);
3460   assert(partCount()==2);
3461 
3462   sign = static_cast<unsigned int>(i2>>63);
3463   if (myexponent==0 &&
3464       (mysignificand==0 && mysignificand2==0)) {
3465     makeZero(sign);
3466   } else if (myexponent==0x7fff &&
3467              (mysignificand==0 && mysignificand2==0)) {
3468     makeInf(sign);
3469   } else if (myexponent==0x7fff &&
3470              (mysignificand!=0 || mysignificand2 !=0)) {
3471     category = fcNaN;
3472     exponent = exponentNaN();
3473     significandParts()[0] = mysignificand;
3474     significandParts()[1] = mysignificand2;
3475   } else {
3476     category = fcNormal;
3477     exponent = myexponent - 16383;
3478     significandParts()[0] = mysignificand;
3479     significandParts()[1] = mysignificand2;
3480     if (myexponent==0)          // denormal
3481       exponent = -16382;
3482     else
3483       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3484   }
3485 }
3486 
3487 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3488   uint64_t i = *api.getRawData();
3489   uint64_t myexponent = (i >> 52) & 0x7ff;
3490   uint64_t mysignificand = i & 0xfffffffffffffLL;
3491 
3492   initialize(&semIEEEdouble);
3493   assert(partCount()==1);
3494 
3495   sign = static_cast<unsigned int>(i>>63);
3496   if (myexponent==0 && mysignificand==0) {
3497     makeZero(sign);
3498   } else if (myexponent==0x7ff && mysignificand==0) {
3499     makeInf(sign);
3500   } else if (myexponent==0x7ff && mysignificand!=0) {
3501     category = fcNaN;
3502     exponent = exponentNaN();
3503     *significandParts() = mysignificand;
3504   } else {
3505     category = fcNormal;
3506     exponent = myexponent - 1023;
3507     *significandParts() = mysignificand;
3508     if (myexponent==0)          // denormal
3509       exponent = -1022;
3510     else
3511       *significandParts() |= 0x10000000000000LL;  // integer bit
3512   }
3513 }
3514 
3515 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3516   uint32_t i = (uint32_t)*api.getRawData();
3517   uint32_t myexponent = (i >> 23) & 0xff;
3518   uint32_t mysignificand = i & 0x7fffff;
3519 
3520   initialize(&semIEEEsingle);
3521   assert(partCount()==1);
3522 
3523   sign = i >> 31;
3524   if (myexponent==0 && mysignificand==0) {
3525     makeZero(sign);
3526   } else if (myexponent==0xff && mysignificand==0) {
3527     makeInf(sign);
3528   } else if (myexponent==0xff && mysignificand!=0) {
3529     category = fcNaN;
3530     exponent = exponentNaN();
3531     *significandParts() = mysignificand;
3532   } else {
3533     category = fcNormal;
3534     exponent = myexponent - 127;  //bias
3535     *significandParts() = mysignificand;
3536     if (myexponent==0)    // denormal
3537       exponent = -126;
3538     else
3539       *significandParts() |= 0x800000; // integer bit
3540   }
3541 }
3542 
3543 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3544   uint32_t i = (uint32_t)*api.getRawData();
3545   uint32_t myexponent = (i >> 7) & 0xff;
3546   uint32_t mysignificand = i & 0x7f;
3547 
3548   initialize(&semBFloat);
3549   assert(partCount() == 1);
3550 
3551   sign = i >> 15;
3552   if (myexponent == 0 && mysignificand == 0) {
3553     makeZero(sign);
3554   } else if (myexponent == 0xff && mysignificand == 0) {
3555     makeInf(sign);
3556   } else if (myexponent == 0xff && mysignificand != 0) {
3557     category = fcNaN;
3558     exponent = exponentNaN();
3559     *significandParts() = mysignificand;
3560   } else {
3561     category = fcNormal;
3562     exponent = myexponent - 127; // bias
3563     *significandParts() = mysignificand;
3564     if (myexponent == 0) // denormal
3565       exponent = -126;
3566     else
3567       *significandParts() |= 0x80; // integer bit
3568   }
3569 }
3570 
3571 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3572   uint32_t i = (uint32_t)*api.getRawData();
3573   uint32_t myexponent = (i >> 10) & 0x1f;
3574   uint32_t mysignificand = i & 0x3ff;
3575 
3576   initialize(&semIEEEhalf);
3577   assert(partCount()==1);
3578 
3579   sign = i >> 15;
3580   if (myexponent==0 && mysignificand==0) {
3581     makeZero(sign);
3582   } else if (myexponent==0x1f && mysignificand==0) {
3583     makeInf(sign);
3584   } else if (myexponent==0x1f && mysignificand!=0) {
3585     category = fcNaN;
3586     exponent = exponentNaN();
3587     *significandParts() = mysignificand;
3588   } else {
3589     category = fcNormal;
3590     exponent = myexponent - 15;  //bias
3591     *significandParts() = mysignificand;
3592     if (myexponent==0)    // denormal
3593       exponent = -14;
3594     else
3595       *significandParts() |= 0x400; // integer bit
3596   }
3597 }
3598 
3599 /// Treat api as containing the bits of a floating point number.  Currently
3600 /// we infer the floating point type from the size of the APInt.  The
3601 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3602 /// when the size is anything else).
3603 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3604   assert(api.getBitWidth() == Sem->sizeInBits);
3605   if (Sem == &semIEEEhalf)
3606     return initFromHalfAPInt(api);
3607   if (Sem == &semBFloat)
3608     return initFromBFloatAPInt(api);
3609   if (Sem == &semIEEEsingle)
3610     return initFromFloatAPInt(api);
3611   if (Sem == &semIEEEdouble)
3612     return initFromDoubleAPInt(api);
3613   if (Sem == &semX87DoubleExtended)
3614     return initFromF80LongDoubleAPInt(api);
3615   if (Sem == &semIEEEquad)
3616     return initFromQuadrupleAPInt(api);
3617   if (Sem == &semPPCDoubleDoubleLegacy)
3618     return initFromPPCDoubleDoubleAPInt(api);
3619 
3620   llvm_unreachable(nullptr);
3621 }
3622 
3623 /// Make this number the largest magnitude normal number in the given
3624 /// semantics.
3625 void IEEEFloat::makeLargest(bool Negative) {
3626   // We want (in interchange format):
3627   //   sign = {Negative}
3628   //   exponent = 1..10
3629   //   significand = 1..1
3630   category = fcNormal;
3631   sign = Negative;
3632   exponent = semantics->maxExponent;
3633 
3634   // Use memset to set all but the highest integerPart to all ones.
3635   integerPart *significand = significandParts();
3636   unsigned PartCount = partCount();
3637   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3638 
3639   // Set the high integerPart especially setting all unused top bits for
3640   // internal consistency.
3641   const unsigned NumUnusedHighBits =
3642     PartCount*integerPartWidth - semantics->precision;
3643   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3644                                    ? (~integerPart(0) >> NumUnusedHighBits)
3645                                    : 0;
3646 }
3647 
3648 /// Make this number the smallest magnitude denormal number in the given
3649 /// semantics.
3650 void IEEEFloat::makeSmallest(bool Negative) {
3651   // We want (in interchange format):
3652   //   sign = {Negative}
3653   //   exponent = 0..0
3654   //   significand = 0..01
3655   category = fcNormal;
3656   sign = Negative;
3657   exponent = semantics->minExponent;
3658   APInt::tcSet(significandParts(), 1, partCount());
3659 }
3660 
3661 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3662   // We want (in interchange format):
3663   //   sign = {Negative}
3664   //   exponent = 0..0
3665   //   significand = 10..0
3666 
3667   category = fcNormal;
3668   zeroSignificand();
3669   sign = Negative;
3670   exponent = semantics->minExponent;
3671   significandParts()[partCountForBits(semantics->precision) - 1] |=
3672       (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3673 }
3674 
3675 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3676   initFromAPInt(&Sem, API);
3677 }
3678 
3679 IEEEFloat::IEEEFloat(float f) {
3680   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3681 }
3682 
3683 IEEEFloat::IEEEFloat(double d) {
3684   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3685 }
3686 
3687 namespace {
3688   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3689     Buffer.append(Str.begin(), Str.end());
3690   }
3691 
3692   /// Removes data from the given significand until it is no more
3693   /// precise than is required for the desired precision.
3694   void AdjustToPrecision(APInt &significand,
3695                          int &exp, unsigned FormatPrecision) {
3696     unsigned bits = significand.getActiveBits();
3697 
3698     // 196/59 is a very slight overestimate of lg_2(10).
3699     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3700 
3701     if (bits <= bitsRequired) return;
3702 
3703     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3704     if (!tensRemovable) return;
3705 
3706     exp += tensRemovable;
3707 
3708     APInt divisor(significand.getBitWidth(), 1);
3709     APInt powten(significand.getBitWidth(), 10);
3710     while (true) {
3711       if (tensRemovable & 1)
3712         divisor *= powten;
3713       tensRemovable >>= 1;
3714       if (!tensRemovable) break;
3715       powten *= powten;
3716     }
3717 
3718     significand = significand.udiv(divisor);
3719 
3720     // Truncate the significand down to its active bit count.
3721     significand = significand.trunc(significand.getActiveBits());
3722   }
3723 
3724 
3725   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3726                          int &exp, unsigned FormatPrecision) {
3727     unsigned N = buffer.size();
3728     if (N <= FormatPrecision) return;
3729 
3730     // The most significant figures are the last ones in the buffer.
3731     unsigned FirstSignificant = N - FormatPrecision;
3732 
3733     // Round.
3734     // FIXME: this probably shouldn't use 'round half up'.
3735 
3736     // Rounding down is just a truncation, except we also want to drop
3737     // trailing zeros from the new result.
3738     if (buffer[FirstSignificant - 1] < '5') {
3739       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3740         FirstSignificant++;
3741 
3742       exp += FirstSignificant;
3743       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3744       return;
3745     }
3746 
3747     // Rounding up requires a decimal add-with-carry.  If we continue
3748     // the carry, the newly-introduced zeros will just be truncated.
3749     for (unsigned I = FirstSignificant; I != N; ++I) {
3750       if (buffer[I] == '9') {
3751         FirstSignificant++;
3752       } else {
3753         buffer[I]++;
3754         break;
3755       }
3756     }
3757 
3758     // If we carried through, we have exactly one digit of precision.
3759     if (FirstSignificant == N) {
3760       exp += FirstSignificant;
3761       buffer.clear();
3762       buffer.push_back('1');
3763       return;
3764     }
3765 
3766     exp += FirstSignificant;
3767     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3768   }
3769 } // namespace
3770 
3771 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3772                          unsigned FormatMaxPadding, bool TruncateZero) const {
3773   switch (category) {
3774   case fcInfinity:
3775     if (isNegative())
3776       return append(Str, "-Inf");
3777     else
3778       return append(Str, "+Inf");
3779 
3780   case fcNaN: return append(Str, "NaN");
3781 
3782   case fcZero:
3783     if (isNegative())
3784       Str.push_back('-');
3785 
3786     if (!FormatMaxPadding) {
3787       if (TruncateZero)
3788         append(Str, "0.0E+0");
3789       else {
3790         append(Str, "0.0");
3791         if (FormatPrecision > 1)
3792           Str.append(FormatPrecision - 1, '0');
3793         append(Str, "e+00");
3794       }
3795     } else
3796       Str.push_back('0');
3797     return;
3798 
3799   case fcNormal:
3800     break;
3801   }
3802 
3803   if (isNegative())
3804     Str.push_back('-');
3805 
3806   // Decompose the number into an APInt and an exponent.
3807   int exp = exponent - ((int) semantics->precision - 1);
3808   APInt significand(semantics->precision,
3809                     makeArrayRef(significandParts(),
3810                                  partCountForBits(semantics->precision)));
3811 
3812   // Set FormatPrecision if zero.  We want to do this before we
3813   // truncate trailing zeros, as those are part of the precision.
3814   if (!FormatPrecision) {
3815     // We use enough digits so the number can be round-tripped back to an
3816     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3817     // Accurately" by Steele and White.
3818     // FIXME: Using a formula based purely on the precision is conservative;
3819     // we can print fewer digits depending on the actual value being printed.
3820 
3821     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3822     FormatPrecision = 2 + semantics->precision * 59 / 196;
3823   }
3824 
3825   // Ignore trailing binary zeros.
3826   int trailingZeros = significand.countTrailingZeros();
3827   exp += trailingZeros;
3828   significand.lshrInPlace(trailingZeros);
3829 
3830   // Change the exponent from 2^e to 10^e.
3831   if (exp == 0) {
3832     // Nothing to do.
3833   } else if (exp > 0) {
3834     // Just shift left.
3835     significand = significand.zext(semantics->precision + exp);
3836     significand <<= exp;
3837     exp = 0;
3838   } else { /* exp < 0 */
3839     int texp = -exp;
3840 
3841     // We transform this using the identity:
3842     //   (N)(2^-e) == (N)(5^e)(10^-e)
3843     // This means we have to multiply N (the significand) by 5^e.
3844     // To avoid overflow, we have to operate on numbers large
3845     // enough to store N * 5^e:
3846     //   log2(N * 5^e) == log2(N) + e * log2(5)
3847     //                 <= semantics->precision + e * 137 / 59
3848     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3849 
3850     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3851 
3852     // Multiply significand by 5^e.
3853     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3854     significand = significand.zext(precision);
3855     APInt five_to_the_i(precision, 5);
3856     while (true) {
3857       if (texp & 1) significand *= five_to_the_i;
3858 
3859       texp >>= 1;
3860       if (!texp) break;
3861       five_to_the_i *= five_to_the_i;
3862     }
3863   }
3864 
3865   AdjustToPrecision(significand, exp, FormatPrecision);
3866 
3867   SmallVector<char, 256> buffer;
3868 
3869   // Fill the buffer.
3870   unsigned precision = significand.getBitWidth();
3871   APInt ten(precision, 10);
3872   APInt digit(precision, 0);
3873 
3874   bool inTrail = true;
3875   while (significand != 0) {
3876     // digit <- significand % 10
3877     // significand <- significand / 10
3878     APInt::udivrem(significand, ten, significand, digit);
3879 
3880     unsigned d = digit.getZExtValue();
3881 
3882     // Drop trailing zeros.
3883     if (inTrail && !d) exp++;
3884     else {
3885       buffer.push_back((char) ('0' + d));
3886       inTrail = false;
3887     }
3888   }
3889 
3890   assert(!buffer.empty() && "no characters in buffer!");
3891 
3892   // Drop down to FormatPrecision.
3893   // TODO: don't do more precise calculations above than are required.
3894   AdjustToPrecision(buffer, exp, FormatPrecision);
3895 
3896   unsigned NDigits = buffer.size();
3897 
3898   // Check whether we should use scientific notation.
3899   bool FormatScientific;
3900   if (!FormatMaxPadding)
3901     FormatScientific = true;
3902   else {
3903     if (exp >= 0) {
3904       // 765e3 --> 765000
3905       //              ^^^
3906       // But we shouldn't make the number look more precise than it is.
3907       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3908                           NDigits + (unsigned) exp > FormatPrecision);
3909     } else {
3910       // Power of the most significant digit.
3911       int MSD = exp + (int) (NDigits - 1);
3912       if (MSD >= 0) {
3913         // 765e-2 == 7.65
3914         FormatScientific = false;
3915       } else {
3916         // 765e-5 == 0.00765
3917         //           ^ ^^
3918         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3919       }
3920     }
3921   }
3922 
3923   // Scientific formatting is pretty straightforward.
3924   if (FormatScientific) {
3925     exp += (NDigits - 1);
3926 
3927     Str.push_back(buffer[NDigits-1]);
3928     Str.push_back('.');
3929     if (NDigits == 1 && TruncateZero)
3930       Str.push_back('0');
3931     else
3932       for (unsigned I = 1; I != NDigits; ++I)
3933         Str.push_back(buffer[NDigits-1-I]);
3934     // Fill with zeros up to FormatPrecision.
3935     if (!TruncateZero && FormatPrecision > NDigits - 1)
3936       Str.append(FormatPrecision - NDigits + 1, '0');
3937     // For !TruncateZero we use lower 'e'.
3938     Str.push_back(TruncateZero ? 'E' : 'e');
3939 
3940     Str.push_back(exp >= 0 ? '+' : '-');
3941     if (exp < 0) exp = -exp;
3942     SmallVector<char, 6> expbuf;
3943     do {
3944       expbuf.push_back((char) ('0' + (exp % 10)));
3945       exp /= 10;
3946     } while (exp);
3947     // Exponent always at least two digits if we do not truncate zeros.
3948     if (!TruncateZero && expbuf.size() < 2)
3949       expbuf.push_back('0');
3950     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3951       Str.push_back(expbuf[E-1-I]);
3952     return;
3953   }
3954 
3955   // Non-scientific, positive exponents.
3956   if (exp >= 0) {
3957     for (unsigned I = 0; I != NDigits; ++I)
3958       Str.push_back(buffer[NDigits-1-I]);
3959     for (unsigned I = 0; I != (unsigned) exp; ++I)
3960       Str.push_back('0');
3961     return;
3962   }
3963 
3964   // Non-scientific, negative exponents.
3965 
3966   // The number of digits to the left of the decimal point.
3967   int NWholeDigits = exp + (int) NDigits;
3968 
3969   unsigned I = 0;
3970   if (NWholeDigits > 0) {
3971     for (; I != (unsigned) NWholeDigits; ++I)
3972       Str.push_back(buffer[NDigits-I-1]);
3973     Str.push_back('.');
3974   } else {
3975     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3976 
3977     Str.push_back('0');
3978     Str.push_back('.');
3979     for (unsigned Z = 1; Z != NZeros; ++Z)
3980       Str.push_back('0');
3981   }
3982 
3983   for (; I != NDigits; ++I)
3984     Str.push_back(buffer[NDigits-I-1]);
3985 }
3986 
3987 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3988   // Special floats and denormals have no exact inverse.
3989   if (!isFiniteNonZero())
3990     return false;
3991 
3992   // Check that the number is a power of two by making sure that only the
3993   // integer bit is set in the significand.
3994   if (significandLSB() != semantics->precision - 1)
3995     return false;
3996 
3997   // Get the inverse.
3998   IEEEFloat reciprocal(*semantics, 1ULL);
3999   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4000     return false;
4001 
4002   // Avoid multiplication with a denormal, it is not safe on all platforms and
4003   // may be slower than a normal division.
4004   if (reciprocal.isDenormal())
4005     return false;
4006 
4007   assert(reciprocal.isFiniteNonZero() &&
4008          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4009 
4010   if (inv)
4011     *inv = APFloat(reciprocal, *semantics);
4012 
4013   return true;
4014 }
4015 
4016 bool IEEEFloat::isSignaling() const {
4017   if (!isNaN())
4018     return false;
4019 
4020   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4021   // first bit of the trailing significand being 0.
4022   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4023 }
4024 
4025 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4026 ///
4027 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4028 /// appropriate sign switching before/after the computation.
4029 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4030   // If we are performing nextDown, swap sign so we have -x.
4031   if (nextDown)
4032     changeSign();
4033 
4034   // Compute nextUp(x)
4035   opStatus result = opOK;
4036 
4037   // Handle each float category separately.
4038   switch (category) {
4039   case fcInfinity:
4040     // nextUp(+inf) = +inf
4041     if (!isNegative())
4042       break;
4043     // nextUp(-inf) = -getLargest()
4044     makeLargest(true);
4045     break;
4046   case fcNaN:
4047     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4048     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4049     //                     change the payload.
4050     if (isSignaling()) {
4051       result = opInvalidOp;
4052       // For consistency, propagate the sign of the sNaN to the qNaN.
4053       makeNaN(false, isNegative(), nullptr);
4054     }
4055     break;
4056   case fcZero:
4057     // nextUp(pm 0) = +getSmallest()
4058     makeSmallest(false);
4059     break;
4060   case fcNormal:
4061     // nextUp(-getSmallest()) = -0
4062     if (isSmallest() && isNegative()) {
4063       APInt::tcSet(significandParts(), 0, partCount());
4064       category = fcZero;
4065       exponent = 0;
4066       break;
4067     }
4068 
4069     // nextUp(getLargest()) == INFINITY
4070     if (isLargest() && !isNegative()) {
4071       APInt::tcSet(significandParts(), 0, partCount());
4072       category = fcInfinity;
4073       exponent = semantics->maxExponent + 1;
4074       break;
4075     }
4076 
4077     // nextUp(normal) == normal + inc.
4078     if (isNegative()) {
4079       // If we are negative, we need to decrement the significand.
4080 
4081       // We only cross a binade boundary that requires adjusting the exponent
4082       // if:
4083       //   1. exponent != semantics->minExponent. This implies we are not in the
4084       //   smallest binade or are dealing with denormals.
4085       //   2. Our significand excluding the integral bit is all zeros.
4086       bool WillCrossBinadeBoundary =
4087         exponent != semantics->minExponent && isSignificandAllZeros();
4088 
4089       // Decrement the significand.
4090       //
4091       // We always do this since:
4092       //   1. If we are dealing with a non-binade decrement, by definition we
4093       //   just decrement the significand.
4094       //   2. If we are dealing with a normal -> normal binade decrement, since
4095       //   we have an explicit integral bit the fact that all bits but the
4096       //   integral bit are zero implies that subtracting one will yield a
4097       //   significand with 0 integral bit and 1 in all other spots. Thus we
4098       //   must just adjust the exponent and set the integral bit to 1.
4099       //   3. If we are dealing with a normal -> denormal binade decrement,
4100       //   since we set the integral bit to 0 when we represent denormals, we
4101       //   just decrement the significand.
4102       integerPart *Parts = significandParts();
4103       APInt::tcDecrement(Parts, partCount());
4104 
4105       if (WillCrossBinadeBoundary) {
4106         // Our result is a normal number. Do the following:
4107         // 1. Set the integral bit to 1.
4108         // 2. Decrement the exponent.
4109         APInt::tcSetBit(Parts, semantics->precision - 1);
4110         exponent--;
4111       }
4112     } else {
4113       // If we are positive, we need to increment the significand.
4114 
4115       // We only cross a binade boundary that requires adjusting the exponent if
4116       // the input is not a denormal and all of said input's significand bits
4117       // are set. If all of said conditions are true: clear the significand, set
4118       // the integral bit to 1, and increment the exponent. If we have a
4119       // denormal always increment since moving denormals and the numbers in the
4120       // smallest normal binade have the same exponent in our representation.
4121       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4122 
4123       if (WillCrossBinadeBoundary) {
4124         integerPart *Parts = significandParts();
4125         APInt::tcSet(Parts, 0, partCount());
4126         APInt::tcSetBit(Parts, semantics->precision - 1);
4127         assert(exponent != semantics->maxExponent &&
4128                "We can not increment an exponent beyond the maxExponent allowed"
4129                " by the given floating point semantics.");
4130         exponent++;
4131       } else {
4132         incrementSignificand();
4133       }
4134     }
4135     break;
4136   }
4137 
4138   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4139   if (nextDown)
4140     changeSign();
4141 
4142   return result;
4143 }
4144 
4145 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4146   return semantics->maxExponent + 1;
4147 }
4148 
4149 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4150   return semantics->maxExponent + 1;
4151 }
4152 
4153 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4154   return semantics->minExponent - 1;
4155 }
4156 
4157 void IEEEFloat::makeInf(bool Negative) {
4158   category = fcInfinity;
4159   sign = Negative;
4160   exponent = exponentInf();
4161   APInt::tcSet(significandParts(), 0, partCount());
4162 }
4163 
4164 void IEEEFloat::makeZero(bool Negative) {
4165   category = fcZero;
4166   sign = Negative;
4167   exponent = exponentZero();
4168   APInt::tcSet(significandParts(), 0, partCount());
4169 }
4170 
4171 void IEEEFloat::makeQuiet() {
4172   assert(isNaN());
4173   APInt::tcSetBit(significandParts(), semantics->precision - 2);
4174 }
4175 
4176 int ilogb(const IEEEFloat &Arg) {
4177   if (Arg.isNaN())
4178     return IEEEFloat::IEK_NaN;
4179   if (Arg.isZero())
4180     return IEEEFloat::IEK_Zero;
4181   if (Arg.isInfinity())
4182     return IEEEFloat::IEK_Inf;
4183   if (!Arg.isDenormal())
4184     return Arg.exponent;
4185 
4186   IEEEFloat Normalized(Arg);
4187   int SignificandBits = Arg.getSemantics().precision - 1;
4188 
4189   Normalized.exponent += SignificandBits;
4190   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4191   return Normalized.exponent - SignificandBits;
4192 }
4193 
4194 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4195   auto MaxExp = X.getSemantics().maxExponent;
4196   auto MinExp = X.getSemantics().minExponent;
4197 
4198   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4199   // overflow; clamp it to a safe range before adding, but ensure that the range
4200   // is large enough that the clamp does not change the result. The range we
4201   // need to support is the difference between the largest possible exponent and
4202   // the normalized exponent of half the smallest denormal.
4203 
4204   int SignificandBits = X.getSemantics().precision - 1;
4205   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4206 
4207   // Clamp to one past the range ends to let normalize handle overlflow.
4208   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4209   X.normalize(RoundingMode, lfExactlyZero);
4210   if (X.isNaN())
4211     X.makeQuiet();
4212   return X;
4213 }
4214 
4215 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4216   Exp = ilogb(Val);
4217 
4218   // Quiet signalling nans.
4219   if (Exp == IEEEFloat::IEK_NaN) {
4220     IEEEFloat Quiet(Val);
4221     Quiet.makeQuiet();
4222     return Quiet;
4223   }
4224 
4225   if (Exp == IEEEFloat::IEK_Inf)
4226     return Val;
4227 
4228   // 1 is added because frexp is defined to return a normalized fraction in
4229   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4230   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4231   return scalbn(Val, -Exp, RM);
4232 }
4233 
4234 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4235     : Semantics(&S),
4236       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4237   assert(Semantics == &semPPCDoubleDouble);
4238 }
4239 
4240 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4241     : Semantics(&S),
4242       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4243                             APFloat(semIEEEdouble, uninitialized)}) {
4244   assert(Semantics == &semPPCDoubleDouble);
4245 }
4246 
4247 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4248     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4249                                            APFloat(semIEEEdouble)}) {
4250   assert(Semantics == &semPPCDoubleDouble);
4251 }
4252 
4253 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4254     : Semantics(&S),
4255       Floats(new APFloat[2]{
4256           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4257           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4258   assert(Semantics == &semPPCDoubleDouble);
4259 }
4260 
4261 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4262                              APFloat &&Second)
4263     : Semantics(&S),
4264       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4265   assert(Semantics == &semPPCDoubleDouble);
4266   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4267   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4268 }
4269 
4270 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4271     : Semantics(RHS.Semantics),
4272       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4273                                          APFloat(RHS.Floats[1])}
4274                         : nullptr) {
4275   assert(Semantics == &semPPCDoubleDouble);
4276 }
4277 
4278 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4279     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4280   RHS.Semantics = &semBogus;
4281   assert(Semantics == &semPPCDoubleDouble);
4282 }
4283 
4284 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4285   if (Semantics == RHS.Semantics && RHS.Floats) {
4286     Floats[0] = RHS.Floats[0];
4287     Floats[1] = RHS.Floats[1];
4288   } else if (this != &RHS) {
4289     this->~DoubleAPFloat();
4290     new (this) DoubleAPFloat(RHS);
4291   }
4292   return *this;
4293 }
4294 
4295 // Implement addition, subtraction, multiplication and division based on:
4296 // "Software for Doubled-Precision Floating-Point Computations",
4297 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4298 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4299                                          const APFloat &c, const APFloat &cc,
4300                                          roundingMode RM) {
4301   int Status = opOK;
4302   APFloat z = a;
4303   Status |= z.add(c, RM);
4304   if (!z.isFinite()) {
4305     if (!z.isInfinity()) {
4306       Floats[0] = std::move(z);
4307       Floats[1].makeZero(/* Neg = */ false);
4308       return (opStatus)Status;
4309     }
4310     Status = opOK;
4311     auto AComparedToC = a.compareAbsoluteValue(c);
4312     z = cc;
4313     Status |= z.add(aa, RM);
4314     if (AComparedToC == APFloat::cmpGreaterThan) {
4315       // z = cc + aa + c + a;
4316       Status |= z.add(c, RM);
4317       Status |= z.add(a, RM);
4318     } else {
4319       // z = cc + aa + a + c;
4320       Status |= z.add(a, RM);
4321       Status |= z.add(c, RM);
4322     }
4323     if (!z.isFinite()) {
4324       Floats[0] = std::move(z);
4325       Floats[1].makeZero(/* Neg = */ false);
4326       return (opStatus)Status;
4327     }
4328     Floats[0] = z;
4329     APFloat zz = aa;
4330     Status |= zz.add(cc, RM);
4331     if (AComparedToC == APFloat::cmpGreaterThan) {
4332       // Floats[1] = a - z + c + zz;
4333       Floats[1] = a;
4334       Status |= Floats[1].subtract(z, RM);
4335       Status |= Floats[1].add(c, RM);
4336       Status |= Floats[1].add(zz, RM);
4337     } else {
4338       // Floats[1] = c - z + a + zz;
4339       Floats[1] = c;
4340       Status |= Floats[1].subtract(z, RM);
4341       Status |= Floats[1].add(a, RM);
4342       Status |= Floats[1].add(zz, RM);
4343     }
4344   } else {
4345     // q = a - z;
4346     APFloat q = a;
4347     Status |= q.subtract(z, RM);
4348 
4349     // zz = q + c + (a - (q + z)) + aa + cc;
4350     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4351     auto zz = q;
4352     Status |= zz.add(c, RM);
4353     Status |= q.add(z, RM);
4354     Status |= q.subtract(a, RM);
4355     q.changeSign();
4356     Status |= zz.add(q, RM);
4357     Status |= zz.add(aa, RM);
4358     Status |= zz.add(cc, RM);
4359     if (zz.isZero() && !zz.isNegative()) {
4360       Floats[0] = std::move(z);
4361       Floats[1].makeZero(/* Neg = */ false);
4362       return opOK;
4363     }
4364     Floats[0] = z;
4365     Status |= Floats[0].add(zz, RM);
4366     if (!Floats[0].isFinite()) {
4367       Floats[1].makeZero(/* Neg = */ false);
4368       return (opStatus)Status;
4369     }
4370     Floats[1] = std::move(z);
4371     Status |= Floats[1].subtract(Floats[0], RM);
4372     Status |= Floats[1].add(zz, RM);
4373   }
4374   return (opStatus)Status;
4375 }
4376 
4377 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4378                                                 const DoubleAPFloat &RHS,
4379                                                 DoubleAPFloat &Out,
4380                                                 roundingMode RM) {
4381   if (LHS.getCategory() == fcNaN) {
4382     Out = LHS;
4383     return opOK;
4384   }
4385   if (RHS.getCategory() == fcNaN) {
4386     Out = RHS;
4387     return opOK;
4388   }
4389   if (LHS.getCategory() == fcZero) {
4390     Out = RHS;
4391     return opOK;
4392   }
4393   if (RHS.getCategory() == fcZero) {
4394     Out = LHS;
4395     return opOK;
4396   }
4397   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4398       LHS.isNegative() != RHS.isNegative()) {
4399     Out.makeNaN(false, Out.isNegative(), nullptr);
4400     return opInvalidOp;
4401   }
4402   if (LHS.getCategory() == fcInfinity) {
4403     Out = LHS;
4404     return opOK;
4405   }
4406   if (RHS.getCategory() == fcInfinity) {
4407     Out = RHS;
4408     return opOK;
4409   }
4410   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4411 
4412   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4413       CC(RHS.Floats[1]);
4414   assert(&A.getSemantics() == &semIEEEdouble);
4415   assert(&AA.getSemantics() == &semIEEEdouble);
4416   assert(&C.getSemantics() == &semIEEEdouble);
4417   assert(&CC.getSemantics() == &semIEEEdouble);
4418   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4419   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4420   return Out.addImpl(A, AA, C, CC, RM);
4421 }
4422 
4423 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4424                                      roundingMode RM) {
4425   return addWithSpecial(*this, RHS, *this, RM);
4426 }
4427 
4428 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4429                                           roundingMode RM) {
4430   changeSign();
4431   auto Ret = add(RHS, RM);
4432   changeSign();
4433   return Ret;
4434 }
4435 
4436 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4437                                           APFloat::roundingMode RM) {
4438   const auto &LHS = *this;
4439   auto &Out = *this;
4440   /* Interesting observation: For special categories, finding the lowest
4441      common ancestor of the following layered graph gives the correct
4442      return category:
4443 
4444         NaN
4445        /   \
4446      Zero  Inf
4447        \   /
4448        Normal
4449 
4450      e.g. NaN * NaN = NaN
4451           Zero * Inf = NaN
4452           Normal * Zero = Zero
4453           Normal * Inf = Inf
4454   */
4455   if (LHS.getCategory() == fcNaN) {
4456     Out = LHS;
4457     return opOK;
4458   }
4459   if (RHS.getCategory() == fcNaN) {
4460     Out = RHS;
4461     return opOK;
4462   }
4463   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4464       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4465     Out.makeNaN(false, false, nullptr);
4466     return opOK;
4467   }
4468   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4469     Out = LHS;
4470     return opOK;
4471   }
4472   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4473     Out = RHS;
4474     return opOK;
4475   }
4476   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4477          "Special cases not handled exhaustively");
4478 
4479   int Status = opOK;
4480   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4481   // t = a * c
4482   APFloat T = A;
4483   Status |= T.multiply(C, RM);
4484   if (!T.isFiniteNonZero()) {
4485     Floats[0] = T;
4486     Floats[1].makeZero(/* Neg = */ false);
4487     return (opStatus)Status;
4488   }
4489 
4490   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4491   APFloat Tau = A;
4492   T.changeSign();
4493   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4494   T.changeSign();
4495   {
4496     // v = a * d
4497     APFloat V = A;
4498     Status |= V.multiply(D, RM);
4499     // w = b * c
4500     APFloat W = B;
4501     Status |= W.multiply(C, RM);
4502     Status |= V.add(W, RM);
4503     // tau += v + w
4504     Status |= Tau.add(V, RM);
4505   }
4506   // u = t + tau
4507   APFloat U = T;
4508   Status |= U.add(Tau, RM);
4509 
4510   Floats[0] = U;
4511   if (!U.isFinite()) {
4512     Floats[1].makeZero(/* Neg = */ false);
4513   } else {
4514     // Floats[1] = (t - u) + tau
4515     Status |= T.subtract(U, RM);
4516     Status |= T.add(Tau, RM);
4517     Floats[1] = T;
4518   }
4519   return (opStatus)Status;
4520 }
4521 
4522 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4523                                         APFloat::roundingMode RM) {
4524   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4525   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4526   auto Ret =
4527       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4528   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4529   return Ret;
4530 }
4531 
4532 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4533   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4534   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4535   auto Ret =
4536       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4537   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4538   return Ret;
4539 }
4540 
4541 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4542   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4543   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4544   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4545   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4546   return Ret;
4547 }
4548 
4549 APFloat::opStatus
4550 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4551                                 const DoubleAPFloat &Addend,
4552                                 APFloat::roundingMode RM) {
4553   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4554   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4555   auto Ret = Tmp.fusedMultiplyAdd(
4556       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4557       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4558   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4559   return Ret;
4560 }
4561 
4562 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4563   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4564   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4565   auto Ret = Tmp.roundToIntegral(RM);
4566   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4567   return Ret;
4568 }
4569 
4570 void DoubleAPFloat::changeSign() {
4571   Floats[0].changeSign();
4572   Floats[1].changeSign();
4573 }
4574 
4575 APFloat::cmpResult
4576 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4577   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4578   if (Result != cmpEqual)
4579     return Result;
4580   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4581   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4582     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4583     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4584     if (Against && !RHSAgainst)
4585       return cmpLessThan;
4586     if (!Against && RHSAgainst)
4587       return cmpGreaterThan;
4588     if (!Against && !RHSAgainst)
4589       return Result;
4590     if (Against && RHSAgainst)
4591       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4592   }
4593   return Result;
4594 }
4595 
4596 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4597   return Floats[0].getCategory();
4598 }
4599 
4600 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4601 
4602 void DoubleAPFloat::makeInf(bool Neg) {
4603   Floats[0].makeInf(Neg);
4604   Floats[1].makeZero(/* Neg = */ false);
4605 }
4606 
4607 void DoubleAPFloat::makeZero(bool Neg) {
4608   Floats[0].makeZero(Neg);
4609   Floats[1].makeZero(/* Neg = */ false);
4610 }
4611 
4612 void DoubleAPFloat::makeLargest(bool Neg) {
4613   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4614   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4615   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4616   if (Neg)
4617     changeSign();
4618 }
4619 
4620 void DoubleAPFloat::makeSmallest(bool Neg) {
4621   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4622   Floats[0].makeSmallest(Neg);
4623   Floats[1].makeZero(/* Neg = */ false);
4624 }
4625 
4626 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4627   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4628   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4629   if (Neg)
4630     Floats[0].changeSign();
4631   Floats[1].makeZero(/* Neg = */ false);
4632 }
4633 
4634 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4635   Floats[0].makeNaN(SNaN, Neg, fill);
4636   Floats[1].makeZero(/* Neg = */ false);
4637 }
4638 
4639 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4640   auto Result = Floats[0].compare(RHS.Floats[0]);
4641   // |Float[0]| > |Float[1]|
4642   if (Result == APFloat::cmpEqual)
4643     return Floats[1].compare(RHS.Floats[1]);
4644   return Result;
4645 }
4646 
4647 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4648   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4649          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4650 }
4651 
4652 hash_code hash_value(const DoubleAPFloat &Arg) {
4653   if (Arg.Floats)
4654     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4655   return hash_combine(Arg.Semantics);
4656 }
4657 
4658 APInt DoubleAPFloat::bitcastToAPInt() const {
4659   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4660   uint64_t Data[] = {
4661       Floats[0].bitcastToAPInt().getRawData()[0],
4662       Floats[1].bitcastToAPInt().getRawData()[0],
4663   };
4664   return APInt(128, 2, Data);
4665 }
4666 
4667 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4668                                                              roundingMode RM) {
4669   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4670   APFloat Tmp(semPPCDoubleDoubleLegacy);
4671   auto Ret = Tmp.convertFromString(S, RM);
4672   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4673   return Ret;
4674 }
4675 
4676 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4677   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4678   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4679   auto Ret = Tmp.next(nextDown);
4680   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4681   return Ret;
4682 }
4683 
4684 APFloat::opStatus
4685 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4686                                 unsigned int Width, bool IsSigned,
4687                                 roundingMode RM, bool *IsExact) const {
4688   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4689   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4690       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4691 }
4692 
4693 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4694                                                   bool IsSigned,
4695                                                   roundingMode RM) {
4696   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4697   APFloat Tmp(semPPCDoubleDoubleLegacy);
4698   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4699   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4700   return Ret;
4701 }
4702 
4703 APFloat::opStatus
4704 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4705                                               unsigned int InputSize,
4706                                               bool IsSigned, roundingMode RM) {
4707   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4708   APFloat Tmp(semPPCDoubleDoubleLegacy);
4709   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4710   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4711   return Ret;
4712 }
4713 
4714 APFloat::opStatus
4715 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4716                                               unsigned int InputSize,
4717                                               bool IsSigned, roundingMode RM) {
4718   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4719   APFloat Tmp(semPPCDoubleDoubleLegacy);
4720   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4721   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4722   return Ret;
4723 }
4724 
4725 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4726                                                unsigned int HexDigits,
4727                                                bool UpperCase,
4728                                                roundingMode RM) const {
4729   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4730   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4731       .convertToHexString(DST, HexDigits, UpperCase, RM);
4732 }
4733 
4734 bool DoubleAPFloat::isDenormal() const {
4735   return getCategory() == fcNormal &&
4736          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4737           // (double)(Hi + Lo) == Hi defines a normal number.
4738           Floats[0] != Floats[0] + Floats[1]);
4739 }
4740 
4741 bool DoubleAPFloat::isSmallest() const {
4742   if (getCategory() != fcNormal)
4743     return false;
4744   DoubleAPFloat Tmp(*this);
4745   Tmp.makeSmallest(this->isNegative());
4746   return Tmp.compare(*this) == cmpEqual;
4747 }
4748 
4749 bool DoubleAPFloat::isLargest() const {
4750   if (getCategory() != fcNormal)
4751     return false;
4752   DoubleAPFloat Tmp(*this);
4753   Tmp.makeLargest(this->isNegative());
4754   return Tmp.compare(*this) == cmpEqual;
4755 }
4756 
4757 bool DoubleAPFloat::isInteger() const {
4758   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4759   return Floats[0].isInteger() && Floats[1].isInteger();
4760 }
4761 
4762 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4763                              unsigned FormatPrecision,
4764                              unsigned FormatMaxPadding,
4765                              bool TruncateZero) const {
4766   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4767   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4768       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4769 }
4770 
4771 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4772   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4773   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4774   if (!inv)
4775     return Tmp.getExactInverse(nullptr);
4776   APFloat Inv(semPPCDoubleDoubleLegacy);
4777   auto Ret = Tmp.getExactInverse(&Inv);
4778   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4779   return Ret;
4780 }
4781 
4782 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
4783                      APFloat::roundingMode RM) {
4784   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4785   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4786                        scalbn(Arg.Floats[1], Exp, RM));
4787 }
4788 
4789 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4790                     APFloat::roundingMode RM) {
4791   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4792   APFloat First = frexp(Arg.Floats[0], Exp, RM);
4793   APFloat Second = Arg.Floats[1];
4794   if (Arg.getCategory() == APFloat::fcNormal)
4795     Second = scalbn(Second, -Exp, RM);
4796   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4797 }
4798 
4799 } // namespace detail
4800 
4801 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4802   if (usesLayout<IEEEFloat>(Semantics)) {
4803     new (&IEEE) IEEEFloat(std::move(F));
4804     return;
4805   }
4806   if (usesLayout<DoubleAPFloat>(Semantics)) {
4807     const fltSemantics& S = F.getSemantics();
4808     new (&Double)
4809         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4810                       APFloat(semIEEEdouble));
4811     return;
4812   }
4813   llvm_unreachable("Unexpected semantics");
4814 }
4815 
4816 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4817                                                        roundingMode RM) {
4818   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4819 }
4820 
4821 hash_code hash_value(const APFloat &Arg) {
4822   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4823     return hash_value(Arg.U.IEEE);
4824   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4825     return hash_value(Arg.U.Double);
4826   llvm_unreachable("Unexpected semantics");
4827 }
4828 
4829 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4830     : APFloat(Semantics) {
4831   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4832   assert(StatusOrErr && "Invalid floating point representation");
4833   consumeError(StatusOrErr.takeError());
4834 }
4835 
4836 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4837                                    roundingMode RM, bool *losesInfo) {
4838   if (&getSemantics() == &ToSemantics) {
4839     *losesInfo = false;
4840     return opOK;
4841   }
4842   if (usesLayout<IEEEFloat>(getSemantics()) &&
4843       usesLayout<IEEEFloat>(ToSemantics))
4844     return U.IEEE.convert(ToSemantics, RM, losesInfo);
4845   if (usesLayout<IEEEFloat>(getSemantics()) &&
4846       usesLayout<DoubleAPFloat>(ToSemantics)) {
4847     assert(&ToSemantics == &semPPCDoubleDouble);
4848     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4849     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4850     return Ret;
4851   }
4852   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4853       usesLayout<IEEEFloat>(ToSemantics)) {
4854     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4855     *this = APFloat(std::move(getIEEE()), ToSemantics);
4856     return Ret;
4857   }
4858   llvm_unreachable("Unexpected semantics");
4859 }
4860 
4861 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
4862   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
4863 }
4864 
4865 void APFloat::print(raw_ostream &OS) const {
4866   SmallVector<char, 16> Buffer;
4867   toString(Buffer);
4868   OS << Buffer << "\n";
4869 }
4870 
4871 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4872 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4873 #endif
4874 
4875 void APFloat::Profile(FoldingSetNodeID &NID) const {
4876   NID.Add(bitcastToAPInt());
4877 }
4878 
4879 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4880    an APSInt, whose initial bit-width and signed-ness are used to determine the
4881    precision of the conversion.
4882  */
4883 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4884                                             roundingMode rounding_mode,
4885                                             bool *isExact) const {
4886   unsigned bitWidth = result.getBitWidth();
4887   SmallVector<uint64_t, 4> parts(result.getNumWords());
4888   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4889                                      rounding_mode, isExact);
4890   // Keeps the original signed-ness.
4891   result = APInt(bitWidth, parts);
4892   return status;
4893 }
4894 
4895 double APFloat::convertToDouble() const {
4896   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
4897     return getIEEE().convertToDouble();
4898   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
4899          "Float semantics is not representable by IEEEdouble");
4900   APFloat Temp = *this;
4901   bool LosesInfo;
4902   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
4903   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
4904   (void)St;
4905   return Temp.getIEEE().convertToDouble();
4906 }
4907 
4908 float APFloat::convertToFloat() const {
4909   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
4910     return getIEEE().convertToFloat();
4911   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
4912          "Float semantics is not representable by IEEEsingle");
4913   APFloat Temp = *this;
4914   bool LosesInfo;
4915   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
4916   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
4917   (void)St;
4918   return Temp.getIEEE().convertToFloat();
4919 }
4920 
4921 } // namespace llvm
4922 
4923 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4924