xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APFloat.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 
72   // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73   // Float4E2M1FN types, which do not support Inf or NaN values.
74   FiniteOnly,
75 };
76 
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
80 // results.
81 enum class fltNanEncoding {
82   // Represents the standard IEEE behavior where a value is NaN if its
83   // exponent is all 1s and the significand is non-zero.
84   IEEE,
85 
86   // Represents the behavior in the Float8E4M3FN floating point type where NaN
87   // is represented by having the exponent and mantissa set to all 1s.
88   // This behavior matches the FP8 E4M3 type described in
89   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90   // as non-signalling, although the paper does not state whether the NaN
91   // values are signalling or not.
92   AllOnes,
93 
94   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97   // there is only one NaN value, it is treated as quiet NaN. This matches the
98   // behavior described in https://arxiv.org/abs/2206.02915 .
99   NegativeZero,
100 };
101 
102 /* Represents floating point arithmetic semantics.  */
103 struct fltSemantics {
104   /* The largest E such that 2^E is representable; this matches the
105      definition of IEEE 754.  */
106   APFloatBase::ExponentType maxExponent;
107 
108   /* The smallest E such that 2^E is a normalized number; this
109      matches the definition of IEEE 754.  */
110   APFloatBase::ExponentType minExponent;
111 
112   /* Number of bits in the significand.  This includes the integer
113      bit.  */
114   unsigned int precision;
115 
116   /* Number of bits actually used in the semantics. */
117   unsigned int sizeInBits;
118 
119   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120 
121   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122   // Returns true if any number described by this semantics can be precisely
123   // represented by the specified semantics. Does not take into account
124   // the value of fltNonfiniteBehavior.
isRepresentableByllvm::fltSemantics125   bool isRepresentableBy(const fltSemantics &S) const {
126     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
127            precision <= S.precision;
128   }
129 };
130 
131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137 static constexpr fltSemantics semFloat8E5M2FNUZ = {
138     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
140 static constexpr fltSemantics semFloat8E4M3FN = {
141     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
142 static constexpr fltSemantics semFloat8E4M3FNUZ = {
143     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
144 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
145     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
147 static constexpr fltSemantics semFloat6E3M2FN = {
148     4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
149 static constexpr fltSemantics semFloat6E2M3FN = {
150     2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
151 static constexpr fltSemantics semFloat4E2M1FN = {
152     2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
153 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
154 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
155 
156 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
157    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
158    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
159    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
160    to each other, and two 11-bit exponents.
161 
162    Note: we need to make the value different from semBogus as otherwise
163    an unsafe optimization may collapse both values to a single address,
164    and we heavily rely on them having distinct addresses.             */
165 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
166 
167 /* These are legacy semantics for the fallback, inaccrurate implementation of
168    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
169    operation. It's equivalent to having an IEEE number with consecutive 106
170    bits of mantissa and 11 bits of exponent.
171 
172    It's not equivalent to IBM double-double. For example, a legit IBM
173    double-double, 1 + epsilon:
174 
175      1 + epsilon = 1 + (1 >> 1076)
176 
177    is not representable by a consecutive 106 bits of mantissa.
178 
179    Currently, these semantics are used in the following way:
180 
181      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
182      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
183      semPPCDoubleDoubleLegacy -> IEEE operations
184 
185    We use bitcastToAPInt() to get the bit representation (in APInt) of the
186    underlying IEEEdouble, then use the APInt constructor to construct the
187    legacy IEEE float.
188 
189    TODO: Implement all operations in semPPCDoubleDouble, and delete these
190    semantics.  */
191 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
192                                                           53 + 53, 128};
193 
EnumToSemantics(Semantics S)194 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
195   switch (S) {
196   case S_IEEEhalf:
197     return IEEEhalf();
198   case S_BFloat:
199     return BFloat();
200   case S_IEEEsingle:
201     return IEEEsingle();
202   case S_IEEEdouble:
203     return IEEEdouble();
204   case S_IEEEquad:
205     return IEEEquad();
206   case S_PPCDoubleDouble:
207     return PPCDoubleDouble();
208   case S_Float8E5M2:
209     return Float8E5M2();
210   case S_Float8E5M2FNUZ:
211     return Float8E5M2FNUZ();
212   case S_Float8E4M3:
213     return Float8E4M3();
214   case S_Float8E4M3FN:
215     return Float8E4M3FN();
216   case S_Float8E4M3FNUZ:
217     return Float8E4M3FNUZ();
218   case S_Float8E4M3B11FNUZ:
219     return Float8E4M3B11FNUZ();
220   case S_FloatTF32:
221     return FloatTF32();
222   case S_Float6E3M2FN:
223     return Float6E3M2FN();
224   case S_Float6E2M3FN:
225     return Float6E2M3FN();
226   case S_Float4E2M1FN:
227     return Float4E2M1FN();
228   case S_x87DoubleExtended:
229     return x87DoubleExtended();
230   }
231   llvm_unreachable("Unrecognised floating semantics");
232 }
233 
234 APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)235 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
236   if (&Sem == &llvm::APFloat::IEEEhalf())
237     return S_IEEEhalf;
238   else if (&Sem == &llvm::APFloat::BFloat())
239     return S_BFloat;
240   else if (&Sem == &llvm::APFloat::IEEEsingle())
241     return S_IEEEsingle;
242   else if (&Sem == &llvm::APFloat::IEEEdouble())
243     return S_IEEEdouble;
244   else if (&Sem == &llvm::APFloat::IEEEquad())
245     return S_IEEEquad;
246   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
247     return S_PPCDoubleDouble;
248   else if (&Sem == &llvm::APFloat::Float8E5M2())
249     return S_Float8E5M2;
250   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
251     return S_Float8E5M2FNUZ;
252   else if (&Sem == &llvm::APFloat::Float8E4M3())
253     return S_Float8E4M3;
254   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
255     return S_Float8E4M3FN;
256   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
257     return S_Float8E4M3FNUZ;
258   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
259     return S_Float8E4M3B11FNUZ;
260   else if (&Sem == &llvm::APFloat::FloatTF32())
261     return S_FloatTF32;
262   else if (&Sem == &llvm::APFloat::Float6E3M2FN())
263     return S_Float6E3M2FN;
264   else if (&Sem == &llvm::APFloat::Float6E2M3FN())
265     return S_Float6E2M3FN;
266   else if (&Sem == &llvm::APFloat::Float4E2M1FN())
267     return S_Float4E2M1FN;
268   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
269     return S_x87DoubleExtended;
270   else
271     llvm_unreachable("Unknown floating semantics");
272 }
273 
IEEEhalf()274 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
BFloat()275 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
IEEEsingle()276 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
IEEEdouble()277 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
IEEEquad()278 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
PPCDoubleDouble()279 const fltSemantics &APFloatBase::PPCDoubleDouble() {
280   return semPPCDoubleDouble;
281 }
Float8E5M2()282 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
Float8E5M2FNUZ()283 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
Float8E4M3()284 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
Float8E4M3FN()285 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
Float8E4M3FNUZ()286 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
Float8E4M3B11FNUZ()287 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
288   return semFloat8E4M3B11FNUZ;
289 }
FloatTF32()290 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
Float6E3M2FN()291 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
Float6E2M3FN()292 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
Float4E2M1FN()293 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
x87DoubleExtended()294 const fltSemantics &APFloatBase::x87DoubleExtended() {
295   return semX87DoubleExtended;
296 }
Bogus()297 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
298 
299 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
300 constexpr RoundingMode APFloatBase::rmTowardPositive;
301 constexpr RoundingMode APFloatBase::rmTowardNegative;
302 constexpr RoundingMode APFloatBase::rmTowardZero;
303 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
304 
305 /* A tight upper bound on number of parts required to hold the value
306    pow(5, power) is
307 
308      power * 815 / (351 * integerPartWidth) + 1
309 
310    However, whilst the result may require only this many parts,
311    because we are multiplying two values to get it, the
312    multiplication may require an extra part with the excess part
313    being zero (consider the trivial case of 1 * 1, tcFullMultiply
314    requires two parts to hold the single-part result).  So we add an
315    extra one to guarantee enough space whilst multiplying.  */
316 const unsigned int maxExponent = 16383;
317 const unsigned int maxPrecision = 113;
318 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
319 const unsigned int maxPowerOfFiveParts =
320     2 +
321     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
322 
semanticsPrecision(const fltSemantics & semantics)323 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
324   return semantics.precision;
325 }
326 APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)327 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
328   return semantics.maxExponent;
329 }
330 APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)331 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
332   return semantics.minExponent;
333 }
semanticsSizeInBits(const fltSemantics & semantics)334 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
335   return semantics.sizeInBits;
336 }
semanticsIntSizeInBits(const fltSemantics & semantics,bool isSigned)337 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
338                                                  bool isSigned) {
339   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
340   // at least one more bit than the MaxExponent to hold the max FP value.
341   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
342   // Extra sign bit needed.
343   if (isSigned)
344     ++MinBitWidth;
345   return MinBitWidth;
346 }
347 
isRepresentableAsNormalIn(const fltSemantics & Src,const fltSemantics & Dst)348 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
349                                             const fltSemantics &Dst) {
350   // Exponent range must be larger.
351   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
352     return false;
353 
354   // If the mantissa is long enough, the result value could still be denormal
355   // with a larger exponent range.
356   //
357   // FIXME: This condition is probably not accurate but also shouldn't be a
358   // practical concern with existing types.
359   return Dst.precision >= Src.precision;
360 }
361 
getSizeInBits(const fltSemantics & Sem)362 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
363   return Sem.sizeInBits;
364 }
365 
366 static constexpr APFloatBase::ExponentType
exponentZero(const fltSemantics & semantics)367 exponentZero(const fltSemantics &semantics) {
368   return semantics.minExponent - 1;
369 }
370 
371 static constexpr APFloatBase::ExponentType
exponentInf(const fltSemantics & semantics)372 exponentInf(const fltSemantics &semantics) {
373   return semantics.maxExponent + 1;
374 }
375 
376 static constexpr APFloatBase::ExponentType
exponentNaN(const fltSemantics & semantics)377 exponentNaN(const fltSemantics &semantics) {
378   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
379     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
380       return exponentZero(semantics);
381     return semantics.maxExponent;
382   }
383   return semantics.maxExponent + 1;
384 }
385 
386 /* A bunch of private, handy routines.  */
387 
createError(const Twine & Err)388 static inline Error createError(const Twine &Err) {
389   return make_error<StringError>(Err, inconvertibleErrorCode());
390 }
391 
partCountForBits(unsigned int bits)392 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
393   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
394 }
395 
396 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
397 static inline unsigned int
decDigitValue(unsigned int c)398 decDigitValue(unsigned int c)
399 {
400   return c - '0';
401 }
402 
403 /* Return the value of a decimal exponent of the form
404    [+-]ddddddd.
405 
406    If the exponent overflows, returns a large exponent with the
407    appropriate sign.  */
readExponent(StringRef::iterator begin,StringRef::iterator end)408 static Expected<int> readExponent(StringRef::iterator begin,
409                                   StringRef::iterator end) {
410   bool isNegative;
411   unsigned int absExponent;
412   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
413   StringRef::iterator p = begin;
414 
415   // Treat no exponent as 0 to match binutils
416   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
417     return 0;
418   }
419 
420   isNegative = (*p == '-');
421   if (*p == '-' || *p == '+') {
422     p++;
423     if (p == end)
424       return createError("Exponent has no digits");
425   }
426 
427   absExponent = decDigitValue(*p++);
428   if (absExponent >= 10U)
429     return createError("Invalid character in exponent");
430 
431   for (; p != end; ++p) {
432     unsigned int value;
433 
434     value = decDigitValue(*p);
435     if (value >= 10U)
436       return createError("Invalid character in exponent");
437 
438     absExponent = absExponent * 10U + value;
439     if (absExponent >= overlargeExponent) {
440       absExponent = overlargeExponent;
441       break;
442     }
443   }
444 
445   if (isNegative)
446     return -(int) absExponent;
447   else
448     return (int) absExponent;
449 }
450 
451 /* This is ugly and needs cleaning up, but I don't immediately see
452    how whilst remaining safe.  */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)453 static Expected<int> totalExponent(StringRef::iterator p,
454                                    StringRef::iterator end,
455                                    int exponentAdjustment) {
456   int unsignedExponent;
457   bool negative, overflow;
458   int exponent = 0;
459 
460   if (p == end)
461     return createError("Exponent has no digits");
462 
463   negative = *p == '-';
464   if (*p == '-' || *p == '+') {
465     p++;
466     if (p == end)
467       return createError("Exponent has no digits");
468   }
469 
470   unsignedExponent = 0;
471   overflow = false;
472   for (; p != end; ++p) {
473     unsigned int value;
474 
475     value = decDigitValue(*p);
476     if (value >= 10U)
477       return createError("Invalid character in exponent");
478 
479     unsignedExponent = unsignedExponent * 10 + value;
480     if (unsignedExponent > 32767) {
481       overflow = true;
482       break;
483     }
484   }
485 
486   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
487     overflow = true;
488 
489   if (!overflow) {
490     exponent = unsignedExponent;
491     if (negative)
492       exponent = -exponent;
493     exponent += exponentAdjustment;
494     if (exponent > 32767 || exponent < -32768)
495       overflow = true;
496   }
497 
498   if (overflow)
499     exponent = negative ? -32768: 32767;
500 
501   return exponent;
502 }
503 
504 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)505 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
506                            StringRef::iterator *dot) {
507   StringRef::iterator p = begin;
508   *dot = end;
509   while (p != end && *p == '0')
510     p++;
511 
512   if (p != end && *p == '.') {
513     *dot = p++;
514 
515     if (end - begin == 1)
516       return createError("Significand has no digits");
517 
518     while (p != end && *p == '0')
519       p++;
520   }
521 
522   return p;
523 }
524 
525 /* Given a normal decimal floating point number of the form
526 
527      dddd.dddd[eE][+-]ddd
528 
529    where the decimal point and exponent are optional, fill out the
530    structure D.  Exponent is appropriate if the significand is
531    treated as an integer, and normalizedExponent if the significand
532    is taken to have the decimal point after a single leading
533    non-zero digit.
534 
535    If the value is zero, V->firstSigDigit points to a non-digit, and
536    the return exponent is zero.
537 */
538 struct decimalInfo {
539   const char *firstSigDigit;
540   const char *lastSigDigit;
541   int exponent;
542   int normalizedExponent;
543 };
544 
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)545 static Error interpretDecimal(StringRef::iterator begin,
546                               StringRef::iterator end, decimalInfo *D) {
547   StringRef::iterator dot = end;
548 
549   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
550   if (!PtrOrErr)
551     return PtrOrErr.takeError();
552   StringRef::iterator p = *PtrOrErr;
553 
554   D->firstSigDigit = p;
555   D->exponent = 0;
556   D->normalizedExponent = 0;
557 
558   for (; p != end; ++p) {
559     if (*p == '.') {
560       if (dot != end)
561         return createError("String contains multiple dots");
562       dot = p++;
563       if (p == end)
564         break;
565     }
566     if (decDigitValue(*p) >= 10U)
567       break;
568   }
569 
570   if (p != end) {
571     if (*p != 'e' && *p != 'E')
572       return createError("Invalid character in significand");
573     if (p == begin)
574       return createError("Significand has no digits");
575     if (dot != end && p - begin == 1)
576       return createError("Significand has no digits");
577 
578     /* p points to the first non-digit in the string */
579     auto ExpOrErr = readExponent(p + 1, end);
580     if (!ExpOrErr)
581       return ExpOrErr.takeError();
582     D->exponent = *ExpOrErr;
583 
584     /* Implied decimal point?  */
585     if (dot == end)
586       dot = p;
587   }
588 
589   /* If number is all zeroes accept any exponent.  */
590   if (p != D->firstSigDigit) {
591     /* Drop insignificant trailing zeroes.  */
592     if (p != begin) {
593       do
594         do
595           p--;
596         while (p != begin && *p == '0');
597       while (p != begin && *p == '.');
598     }
599 
600     /* Adjust the exponents for any decimal point.  */
601     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
602     D->normalizedExponent = (D->exponent +
603               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
604                                       - (dot > D->firstSigDigit && dot < p)));
605   }
606 
607   D->lastSigDigit = p;
608   return Error::success();
609 }
610 
611 /* Return the trailing fraction of a hexadecimal number.
612    DIGITVALUE is the first hex digit of the fraction, P points to
613    the next digit.  */
614 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)615 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
616                             unsigned int digitValue) {
617   unsigned int hexDigit;
618 
619   /* If the first trailing digit isn't 0 or 8 we can work out the
620      fraction immediately.  */
621   if (digitValue > 8)
622     return lfMoreThanHalf;
623   else if (digitValue < 8 && digitValue > 0)
624     return lfLessThanHalf;
625 
626   // Otherwise we need to find the first non-zero digit.
627   while (p != end && (*p == '0' || *p == '.'))
628     p++;
629 
630   if (p == end)
631     return createError("Invalid trailing hexadecimal fraction!");
632 
633   hexDigit = hexDigitValue(*p);
634 
635   /* If we ran off the end it is exactly zero or one-half, otherwise
636      a little more.  */
637   if (hexDigit == UINT_MAX)
638     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
639   else
640     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
641 }
642 
643 /* Return the fraction lost were a bignum truncated losing the least
644    significant BITS bits.  */
645 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)646 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
647                               unsigned int partCount,
648                               unsigned int bits)
649 {
650   unsigned int lsb;
651 
652   lsb = APInt::tcLSB(parts, partCount);
653 
654   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
655   if (bits <= lsb)
656     return lfExactlyZero;
657   if (bits == lsb + 1)
658     return lfExactlyHalf;
659   if (bits <= partCount * APFloatBase::integerPartWidth &&
660       APInt::tcExtractBit(parts, bits - 1))
661     return lfMoreThanHalf;
662 
663   return lfLessThanHalf;
664 }
665 
666 /* Shift DST right BITS bits noting lost fraction.  */
667 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)668 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
669 {
670   lostFraction lost_fraction;
671 
672   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
673 
674   APInt::tcShiftRight(dst, parts, bits);
675 
676   return lost_fraction;
677 }
678 
679 /* Combine the effect of two lost fractions.  */
680 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)681 combineLostFractions(lostFraction moreSignificant,
682                      lostFraction lessSignificant)
683 {
684   if (lessSignificant != lfExactlyZero) {
685     if (moreSignificant == lfExactlyZero)
686       moreSignificant = lfLessThanHalf;
687     else if (moreSignificant == lfExactlyHalf)
688       moreSignificant = lfMoreThanHalf;
689   }
690 
691   return moreSignificant;
692 }
693 
694 /* The error from the true value, in half-ulps, on multiplying two
695    floating point numbers, which differ from the value they
696    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
697    than the returned value.
698 
699    See "How to Read Floating Point Numbers Accurately" by William D
700    Clinger.  */
701 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)702 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
703 {
704   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
705 
706   if (HUerr1 + HUerr2 == 0)
707     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
708   else
709     return inexactMultiply + 2 * (HUerr1 + HUerr2);
710 }
711 
712 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
713    when the least significant BITS are truncated.  BITS cannot be
714    zero.  */
715 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)716 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
717                  bool isNearest) {
718   unsigned int count, partBits;
719   APFloatBase::integerPart part, boundary;
720 
721   assert(bits != 0);
722 
723   bits--;
724   count = bits / APFloatBase::integerPartWidth;
725   partBits = bits % APFloatBase::integerPartWidth + 1;
726 
727   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
728 
729   if (isNearest)
730     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
731   else
732     boundary = 0;
733 
734   if (count == 0) {
735     if (part - boundary <= boundary - part)
736       return part - boundary;
737     else
738       return boundary - part;
739   }
740 
741   if (part == boundary) {
742     while (--count)
743       if (parts[count])
744         return ~(APFloatBase::integerPart) 0; /* A lot.  */
745 
746     return parts[0];
747   } else if (part == boundary - 1) {
748     while (--count)
749       if (~parts[count])
750         return ~(APFloatBase::integerPart) 0; /* A lot.  */
751 
752     return -parts[0];
753   }
754 
755   return ~(APFloatBase::integerPart) 0; /* A lot.  */
756 }
757 
758 /* Place pow(5, power) in DST, and return the number of parts used.
759    DST must be at least one part larger than size of the answer.  */
760 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)761 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
762   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
763   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
764   pow5s[0] = 78125 * 5;
765 
766   unsigned int partsCount = 1;
767   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
768   unsigned int result;
769   assert(power <= maxExponent);
770 
771   p1 = dst;
772   p2 = scratch;
773 
774   *p1 = firstEightPowers[power & 7];
775   power >>= 3;
776 
777   result = 1;
778   pow5 = pow5s;
779 
780   for (unsigned int n = 0; power; power >>= 1, n++) {
781     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
782     if (n != 0) {
783       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
784                             partsCount, partsCount);
785       partsCount *= 2;
786       if (pow5[partsCount - 1] == 0)
787         partsCount--;
788     }
789 
790     if (power & 1) {
791       APFloatBase::integerPart *tmp;
792 
793       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
794       result += partsCount;
795       if (p2[result - 1] == 0)
796         result--;
797 
798       /* Now result is in p1 with partsCount parts and p2 is scratch
799          space.  */
800       tmp = p1;
801       p1 = p2;
802       p2 = tmp;
803     }
804 
805     pow5 += partsCount;
806   }
807 
808   if (p1 != dst)
809     APInt::tcAssign(dst, p1, result);
810 
811   return result;
812 }
813 
814 /* Zero at the end to avoid modular arithmetic when adding one; used
815    when rounding up during hexadecimal output.  */
816 static const char hexDigitsLower[] = "0123456789abcdef0";
817 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
818 static const char infinityL[] = "infinity";
819 static const char infinityU[] = "INFINITY";
820 static const char NaNL[] = "nan";
821 static const char NaNU[] = "NAN";
822 
823 /* Write out an integerPart in hexadecimal, starting with the most
824    significant nibble.  Write out exactly COUNT hexdigits, return
825    COUNT.  */
826 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)827 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
828            const char *hexDigitChars)
829 {
830   unsigned int result = count;
831 
832   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
833 
834   part >>= (APFloatBase::integerPartWidth - 4 * count);
835   while (count--) {
836     dst[count] = hexDigitChars[part & 0xf];
837     part >>= 4;
838   }
839 
840   return result;
841 }
842 
843 /* Write out an unsigned decimal integer.  */
844 static char *
writeUnsignedDecimal(char * dst,unsigned int n)845 writeUnsignedDecimal (char *dst, unsigned int n)
846 {
847   char buff[40], *p;
848 
849   p = buff;
850   do
851     *p++ = '0' + n % 10;
852   while (n /= 10);
853 
854   do
855     *dst++ = *--p;
856   while (p != buff);
857 
858   return dst;
859 }
860 
861 /* Write out a signed decimal integer.  */
862 static char *
writeSignedDecimal(char * dst,int value)863 writeSignedDecimal (char *dst, int value)
864 {
865   if (value < 0) {
866     *dst++ = '-';
867     dst = writeUnsignedDecimal(dst, -(unsigned) value);
868   } else
869     dst = writeUnsignedDecimal(dst, value);
870 
871   return dst;
872 }
873 
874 namespace detail {
875 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)876 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
877   unsigned int count;
878 
879   semantics = ourSemantics;
880   count = partCount();
881   if (count > 1)
882     significand.parts = new integerPart[count];
883 }
884 
freeSignificand()885 void IEEEFloat::freeSignificand() {
886   if (needsCleanup())
887     delete [] significand.parts;
888 }
889 
assign(const IEEEFloat & rhs)890 void IEEEFloat::assign(const IEEEFloat &rhs) {
891   assert(semantics == rhs.semantics);
892 
893   sign = rhs.sign;
894   category = rhs.category;
895   exponent = rhs.exponent;
896   if (isFiniteNonZero() || category == fcNaN)
897     copySignificand(rhs);
898 }
899 
copySignificand(const IEEEFloat & rhs)900 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
901   assert(isFiniteNonZero() || category == fcNaN);
902   assert(rhs.partCount() >= partCount());
903 
904   APInt::tcAssign(significandParts(), rhs.significandParts(),
905                   partCount());
906 }
907 
908 /* Make this number a NaN, with an arbitrary but deterministic value
909    for the significand.  If double or longer, this is a signalling NaN,
910    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)911 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
912   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
913     llvm_unreachable("This floating point format does not support NaN");
914 
915   category = fcNaN;
916   sign = Negative;
917   exponent = exponentNaN();
918 
919   integerPart *significand = significandParts();
920   unsigned numParts = partCount();
921 
922   APInt fill_storage;
923   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
924     // Finite-only types do not distinguish signalling and quiet NaN, so
925     // make them all signalling.
926     SNaN = false;
927     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
928       sign = true;
929       fill_storage = APInt::getZero(semantics->precision - 1);
930     } else {
931       fill_storage = APInt::getAllOnes(semantics->precision - 1);
932     }
933     fill = &fill_storage;
934   }
935 
936   // Set the significand bits to the fill.
937   if (!fill || fill->getNumWords() < numParts)
938     APInt::tcSet(significand, 0, numParts);
939   if (fill) {
940     APInt::tcAssign(significand, fill->getRawData(),
941                     std::min(fill->getNumWords(), numParts));
942 
943     // Zero out the excess bits of the significand.
944     unsigned bitsToPreserve = semantics->precision - 1;
945     unsigned part = bitsToPreserve / 64;
946     bitsToPreserve %= 64;
947     significand[part] &= ((1ULL << bitsToPreserve) - 1);
948     for (part++; part != numParts; ++part)
949       significand[part] = 0;
950   }
951 
952   unsigned QNaNBit = semantics->precision - 2;
953 
954   if (SNaN) {
955     // We always have to clear the QNaN bit to make it an SNaN.
956     APInt::tcClearBit(significand, QNaNBit);
957 
958     // If there are no bits set in the payload, we have to set
959     // *something* to make it a NaN instead of an infinity;
960     // conventionally, this is the next bit down from the QNaN bit.
961     if (APInt::tcIsZero(significand, numParts))
962       APInt::tcSetBit(significand, QNaNBit - 1);
963   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
964     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
965     // Do nothing.
966   } else {
967     // We always have to set the QNaN bit to make it a QNaN.
968     APInt::tcSetBit(significand, QNaNBit);
969   }
970 
971   // For x87 extended precision, we want to make a NaN, not a
972   // pseudo-NaN.  Maybe we should expose the ability to make
973   // pseudo-NaNs?
974   if (semantics == &semX87DoubleExtended)
975     APInt::tcSetBit(significand, QNaNBit + 1);
976 }
977 
operator =(const IEEEFloat & rhs)978 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
979   if (this != &rhs) {
980     if (semantics != rhs.semantics) {
981       freeSignificand();
982       initialize(rhs.semantics);
983     }
984     assign(rhs);
985   }
986 
987   return *this;
988 }
989 
operator =(IEEEFloat && rhs)990 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
991   freeSignificand();
992 
993   semantics = rhs.semantics;
994   significand = rhs.significand;
995   exponent = rhs.exponent;
996   category = rhs.category;
997   sign = rhs.sign;
998 
999   rhs.semantics = &semBogus;
1000   return *this;
1001 }
1002 
isDenormal() const1003 bool IEEEFloat::isDenormal() const {
1004   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1005          (APInt::tcExtractBit(significandParts(),
1006                               semantics->precision - 1) == 0);
1007 }
1008 
isSmallest() const1009 bool IEEEFloat::isSmallest() const {
1010   // The smallest number by magnitude in our format will be the smallest
1011   // denormal, i.e. the floating point number with exponent being minimum
1012   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1013   return isFiniteNonZero() && exponent == semantics->minExponent &&
1014     significandMSB() == 0;
1015 }
1016 
isSmallestNormalized() const1017 bool IEEEFloat::isSmallestNormalized() const {
1018   return getCategory() == fcNormal && exponent == semantics->minExponent &&
1019          isSignificandAllZerosExceptMSB();
1020 }
1021 
isSignificandAllOnes() const1022 bool IEEEFloat::isSignificandAllOnes() const {
1023   // Test if the significand excluding the integral bit is all ones. This allows
1024   // us to test for binade boundaries.
1025   const integerPart *Parts = significandParts();
1026   const unsigned PartCount = partCountForBits(semantics->precision);
1027   for (unsigned i = 0; i < PartCount - 1; i++)
1028     if (~Parts[i])
1029       return false;
1030 
1031   // Set the unused high bits to all ones when we compare.
1032   const unsigned NumHighBits =
1033     PartCount*integerPartWidth - semantics->precision + 1;
1034   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1035          "Can not have more high bits to fill than integerPartWidth");
1036   const integerPart HighBitFill =
1037     ~integerPart(0) << (integerPartWidth - NumHighBits);
1038   if (~(Parts[PartCount - 1] | HighBitFill))
1039     return false;
1040 
1041   return true;
1042 }
1043 
isSignificandAllOnesExceptLSB() const1044 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1045   // Test if the significand excluding the integral bit is all ones except for
1046   // the least significant bit.
1047   const integerPart *Parts = significandParts();
1048 
1049   if (Parts[0] & 1)
1050     return false;
1051 
1052   const unsigned PartCount = partCountForBits(semantics->precision);
1053   for (unsigned i = 0; i < PartCount - 1; i++) {
1054     if (~Parts[i] & ~unsigned{!i})
1055       return false;
1056   }
1057 
1058   // Set the unused high bits to all ones when we compare.
1059   const unsigned NumHighBits =
1060       PartCount * integerPartWidth - semantics->precision + 1;
1061   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1062          "Can not have more high bits to fill than integerPartWidth");
1063   const integerPart HighBitFill = ~integerPart(0)
1064                                   << (integerPartWidth - NumHighBits);
1065   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1066     return false;
1067 
1068   return true;
1069 }
1070 
isSignificandAllZeros() const1071 bool IEEEFloat::isSignificandAllZeros() const {
1072   // Test if the significand excluding the integral bit is all zeros. This
1073   // allows us to test for binade boundaries.
1074   const integerPart *Parts = significandParts();
1075   const unsigned PartCount = partCountForBits(semantics->precision);
1076 
1077   for (unsigned i = 0; i < PartCount - 1; i++)
1078     if (Parts[i])
1079       return false;
1080 
1081   // Compute how many bits are used in the final word.
1082   const unsigned NumHighBits =
1083     PartCount*integerPartWidth - semantics->precision + 1;
1084   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1085          "clear than integerPartWidth");
1086   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1087 
1088   if (Parts[PartCount - 1] & HighBitMask)
1089     return false;
1090 
1091   return true;
1092 }
1093 
isSignificandAllZerosExceptMSB() const1094 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1095   const integerPart *Parts = significandParts();
1096   const unsigned PartCount = partCountForBits(semantics->precision);
1097 
1098   for (unsigned i = 0; i < PartCount - 1; i++) {
1099     if (Parts[i])
1100       return false;
1101   }
1102 
1103   const unsigned NumHighBits =
1104       PartCount * integerPartWidth - semantics->precision + 1;
1105   return Parts[PartCount - 1] == integerPart(1)
1106                                      << (integerPartWidth - NumHighBits);
1107 }
1108 
isLargest() const1109 bool IEEEFloat::isLargest() const {
1110   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1111       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1112     // The largest number by magnitude in our format will be the floating point
1113     // number with maximum exponent and with significand that is all ones except
1114     // the LSB.
1115     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1116            isSignificandAllOnesExceptLSB();
1117   } else {
1118     // The largest number by magnitude in our format will be the floating point
1119     // number with maximum exponent and with significand that is all ones.
1120     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1121            isSignificandAllOnes();
1122   }
1123 }
1124 
isInteger() const1125 bool IEEEFloat::isInteger() const {
1126   // This could be made more efficient; I'm going for obviously correct.
1127   if (!isFinite()) return false;
1128   IEEEFloat truncated = *this;
1129   truncated.roundToIntegral(rmTowardZero);
1130   return compare(truncated) == cmpEqual;
1131 }
1132 
bitwiseIsEqual(const IEEEFloat & rhs) const1133 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1134   if (this == &rhs)
1135     return true;
1136   if (semantics != rhs.semantics ||
1137       category != rhs.category ||
1138       sign != rhs.sign)
1139     return false;
1140   if (category==fcZero || category==fcInfinity)
1141     return true;
1142 
1143   if (isFiniteNonZero() && exponent != rhs.exponent)
1144     return false;
1145 
1146   return std::equal(significandParts(), significandParts() + partCount(),
1147                     rhs.significandParts());
1148 }
1149 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)1150 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1151   initialize(&ourSemantics);
1152   sign = 0;
1153   category = fcNormal;
1154   zeroSignificand();
1155   exponent = ourSemantics.precision - 1;
1156   significandParts()[0] = value;
1157   normalize(rmNearestTiesToEven, lfExactlyZero);
1158 }
1159 
IEEEFloat(const fltSemantics & ourSemantics)1160 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1161   initialize(&ourSemantics);
1162   makeZero(false);
1163 }
1164 
1165 // Delegate to the previous constructor, because later copy constructor may
1166 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)1167 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1168     : IEEEFloat(ourSemantics) {}
1169 
IEEEFloat(const IEEEFloat & rhs)1170 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1171   initialize(rhs.semantics);
1172   assign(rhs);
1173 }
1174 
IEEEFloat(IEEEFloat && rhs)1175 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1176   *this = std::move(rhs);
1177 }
1178 
~IEEEFloat()1179 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1180 
partCount() const1181 unsigned int IEEEFloat::partCount() const {
1182   return partCountForBits(semantics->precision + 1);
1183 }
1184 
significandParts() const1185 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1186   return const_cast<IEEEFloat *>(this)->significandParts();
1187 }
1188 
significandParts()1189 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1190   if (partCount() > 1)
1191     return significand.parts;
1192   else
1193     return &significand.part;
1194 }
1195 
zeroSignificand()1196 void IEEEFloat::zeroSignificand() {
1197   APInt::tcSet(significandParts(), 0, partCount());
1198 }
1199 
1200 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()1201 void IEEEFloat::incrementSignificand() {
1202   integerPart carry;
1203 
1204   carry = APInt::tcIncrement(significandParts(), partCount());
1205 
1206   /* Our callers should never cause us to overflow.  */
1207   assert(carry == 0);
1208   (void)carry;
1209 }
1210 
1211 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)1212 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1213   integerPart *parts;
1214 
1215   parts = significandParts();
1216 
1217   assert(semantics == rhs.semantics);
1218   assert(exponent == rhs.exponent);
1219 
1220   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1221 }
1222 
1223 /* Subtract the significand of the RHS with a borrow flag.  Returns
1224    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)1225 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1226                                                       integerPart borrow) {
1227   integerPart *parts;
1228 
1229   parts = significandParts();
1230 
1231   assert(semantics == rhs.semantics);
1232   assert(exponent == rhs.exponent);
1233 
1234   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1235                            partCount());
1236 }
1237 
1238 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1239    on to the full-precision result of the multiplication.  Returns the
1240    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1242                                             IEEEFloat addend) {
1243   unsigned int omsb;        // One, not zero, based MSB.
1244   unsigned int partsCount, newPartsCount, precision;
1245   integerPart *lhsSignificand;
1246   integerPart scratch[4];
1247   integerPart *fullSignificand;
1248   lostFraction lost_fraction;
1249   bool ignored;
1250 
1251   assert(semantics == rhs.semantics);
1252 
1253   precision = semantics->precision;
1254 
1255   // Allocate space for twice as many bits as the original significand, plus one
1256   // extra bit for the addition to overflow into.
1257   newPartsCount = partCountForBits(precision * 2 + 1);
1258 
1259   if (newPartsCount > 4)
1260     fullSignificand = new integerPart[newPartsCount];
1261   else
1262     fullSignificand = scratch;
1263 
1264   lhsSignificand = significandParts();
1265   partsCount = partCount();
1266 
1267   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1268                         rhs.significandParts(), partsCount, partsCount);
1269 
1270   lost_fraction = lfExactlyZero;
1271   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1272   exponent += rhs.exponent;
1273 
1274   // Assume the operands involved in the multiplication are single-precision
1275   // FP, and the two multiplicants are:
1276   //   *this = a23 . a22 ... a0 * 2^e1
1277   //     rhs = b23 . b22 ... b0 * 2^e2
1278   // the result of multiplication is:
1279   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1280   // Note that there are three significant bits at the left-hand side of the
1281   // radix point: two for the multiplication, and an overflow bit for the
1282   // addition (that will always be zero at this point). Move the radix point
1283   // toward left by two bits, and adjust exponent accordingly.
1284   exponent += 2;
1285 
1286   if (addend.isNonZero()) {
1287     // The intermediate result of the multiplication has "2 * precision"
1288     // signicant bit; adjust the addend to be consistent with mul result.
1289     //
1290     Significand savedSignificand = significand;
1291     const fltSemantics *savedSemantics = semantics;
1292     fltSemantics extendedSemantics;
1293     opStatus status;
1294     unsigned int extendedPrecision;
1295 
1296     // Normalize our MSB to one below the top bit to allow for overflow.
1297     extendedPrecision = 2 * precision + 1;
1298     if (omsb != extendedPrecision - 1) {
1299       assert(extendedPrecision > omsb);
1300       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1301                          (extendedPrecision - 1) - omsb);
1302       exponent -= (extendedPrecision - 1) - omsb;
1303     }
1304 
1305     /* Create new semantics.  */
1306     extendedSemantics = *semantics;
1307     extendedSemantics.precision = extendedPrecision;
1308 
1309     if (newPartsCount == 1)
1310       significand.part = fullSignificand[0];
1311     else
1312       significand.parts = fullSignificand;
1313     semantics = &extendedSemantics;
1314 
1315     // Make a copy so we can convert it to the extended semantics.
1316     // Note that we cannot convert the addend directly, as the extendedSemantics
1317     // is a local variable (which we take a reference to).
1318     IEEEFloat extendedAddend(addend);
1319     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1320     assert(status == opOK);
1321     (void)status;
1322 
1323     // Shift the significand of the addend right by one bit. This guarantees
1324     // that the high bit of the significand is zero (same as fullSignificand),
1325     // so the addition will overflow (if it does overflow at all) into the top bit.
1326     lost_fraction = extendedAddend.shiftSignificandRight(1);
1327     assert(lost_fraction == lfExactlyZero &&
1328            "Lost precision while shifting addend for fused-multiply-add.");
1329 
1330     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1331 
1332     /* Restore our state.  */
1333     if (newPartsCount == 1)
1334       fullSignificand[0] = significand.part;
1335     significand = savedSignificand;
1336     semantics = savedSemantics;
1337 
1338     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1339   }
1340 
1341   // Convert the result having "2 * precision" significant-bits back to the one
1342   // having "precision" significant-bits. First, move the radix point from
1343   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1344   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1345   exponent -= precision + 1;
1346 
1347   // In case MSB resides at the left-hand side of radix point, shift the
1348   // mantissa right by some amount to make sure the MSB reside right before
1349   // the radix point (i.e. "MSB . rest-significant-bits").
1350   //
1351   // Note that the result is not normalized when "omsb < precision". So, the
1352   // caller needs to call IEEEFloat::normalize() if normalized value is
1353   // expected.
1354   if (omsb > precision) {
1355     unsigned int bits, significantParts;
1356     lostFraction lf;
1357 
1358     bits = omsb - precision;
1359     significantParts = partCountForBits(omsb);
1360     lf = shiftRight(fullSignificand, significantParts, bits);
1361     lost_fraction = combineLostFractions(lf, lost_fraction);
1362     exponent += bits;
1363   }
1364 
1365   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1366 
1367   if (newPartsCount > 4)
1368     delete [] fullSignificand;
1369 
1370   return lost_fraction;
1371 }
1372 
multiplySignificand(const IEEEFloat & rhs)1373 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1374   return multiplySignificand(rhs, IEEEFloat(*semantics));
1375 }
1376 
1377 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1378 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1379   unsigned int bit, i, partsCount;
1380   const integerPart *rhsSignificand;
1381   integerPart *lhsSignificand, *dividend, *divisor;
1382   integerPart scratch[4];
1383   lostFraction lost_fraction;
1384 
1385   assert(semantics == rhs.semantics);
1386 
1387   lhsSignificand = significandParts();
1388   rhsSignificand = rhs.significandParts();
1389   partsCount = partCount();
1390 
1391   if (partsCount > 2)
1392     dividend = new integerPart[partsCount * 2];
1393   else
1394     dividend = scratch;
1395 
1396   divisor = dividend + partsCount;
1397 
1398   /* Copy the dividend and divisor as they will be modified in-place.  */
1399   for (i = 0; i < partsCount; i++) {
1400     dividend[i] = lhsSignificand[i];
1401     divisor[i] = rhsSignificand[i];
1402     lhsSignificand[i] = 0;
1403   }
1404 
1405   exponent -= rhs.exponent;
1406 
1407   unsigned int precision = semantics->precision;
1408 
1409   /* Normalize the divisor.  */
1410   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1411   if (bit) {
1412     exponent += bit;
1413     APInt::tcShiftLeft(divisor, partsCount, bit);
1414   }
1415 
1416   /* Normalize the dividend.  */
1417   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1418   if (bit) {
1419     exponent -= bit;
1420     APInt::tcShiftLeft(dividend, partsCount, bit);
1421   }
1422 
1423   /* Ensure the dividend >= divisor initially for the loop below.
1424      Incidentally, this means that the division loop below is
1425      guaranteed to set the integer bit to one.  */
1426   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1427     exponent--;
1428     APInt::tcShiftLeft(dividend, partsCount, 1);
1429     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1430   }
1431 
1432   /* Long division.  */
1433   for (bit = precision; bit; bit -= 1) {
1434     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1435       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1436       APInt::tcSetBit(lhsSignificand, bit - 1);
1437     }
1438 
1439     APInt::tcShiftLeft(dividend, partsCount, 1);
1440   }
1441 
1442   /* Figure out the lost fraction.  */
1443   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1444 
1445   if (cmp > 0)
1446     lost_fraction = lfMoreThanHalf;
1447   else if (cmp == 0)
1448     lost_fraction = lfExactlyHalf;
1449   else if (APInt::tcIsZero(dividend, partsCount))
1450     lost_fraction = lfExactlyZero;
1451   else
1452     lost_fraction = lfLessThanHalf;
1453 
1454   if (partsCount > 2)
1455     delete [] dividend;
1456 
1457   return lost_fraction;
1458 }
1459 
significandMSB() const1460 unsigned int IEEEFloat::significandMSB() const {
1461   return APInt::tcMSB(significandParts(), partCount());
1462 }
1463 
significandLSB() const1464 unsigned int IEEEFloat::significandLSB() const {
1465   return APInt::tcLSB(significandParts(), partCount());
1466 }
1467 
1468 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1469 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1470   /* Our exponent should not overflow.  */
1471   assert((ExponentType) (exponent + bits) >= exponent);
1472 
1473   exponent += bits;
1474 
1475   return shiftRight(significandParts(), partCount(), bits);
1476 }
1477 
1478 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1479 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1480   assert(bits < semantics->precision);
1481 
1482   if (bits) {
1483     unsigned int partsCount = partCount();
1484 
1485     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1486     exponent -= bits;
1487 
1488     assert(!APInt::tcIsZero(significandParts(), partsCount));
1489   }
1490 }
1491 
1492 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1493 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1494   int compare;
1495 
1496   assert(semantics == rhs.semantics);
1497   assert(isFiniteNonZero());
1498   assert(rhs.isFiniteNonZero());
1499 
1500   compare = exponent - rhs.exponent;
1501 
1502   /* If exponents are equal, do an unsigned bignum comparison of the
1503      significands.  */
1504   if (compare == 0)
1505     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1506                                partCount());
1507 
1508   if (compare > 0)
1509     return cmpGreaterThan;
1510   else if (compare < 0)
1511     return cmpLessThan;
1512   else
1513     return cmpEqual;
1514 }
1515 
1516 /* Set the least significant BITS bits of a bignum, clear the
1517    rest.  */
tcSetLeastSignificantBits(APInt::WordType * dst,unsigned parts,unsigned bits)1518 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1519                                       unsigned bits) {
1520   unsigned i = 0;
1521   while (bits > APInt::APINT_BITS_PER_WORD) {
1522     dst[i++] = ~(APInt::WordType)0;
1523     bits -= APInt::APINT_BITS_PER_WORD;
1524   }
1525 
1526   if (bits)
1527     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1528 
1529   while (i < parts)
1530     dst[i++] = 0;
1531 }
1532 
1533 /* Handle overflow.  Sign is preserved.  We either become infinity or
1534    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1535 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1536   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1537     /* Infinity?  */
1538     if (rounding_mode == rmNearestTiesToEven ||
1539         rounding_mode == rmNearestTiesToAway ||
1540         (rounding_mode == rmTowardPositive && !sign) ||
1541         (rounding_mode == rmTowardNegative && sign)) {
1542       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1543         makeNaN(false, sign);
1544       else
1545         category = fcInfinity;
1546       return static_cast<opStatus>(opOverflow | opInexact);
1547     }
1548   }
1549 
1550   /* Otherwise we become the largest finite number.  */
1551   category = fcNormal;
1552   exponent = semantics->maxExponent;
1553   tcSetLeastSignificantBits(significandParts(), partCount(),
1554                             semantics->precision);
1555   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1556       semantics->nanEncoding == fltNanEncoding::AllOnes)
1557     APInt::tcClearBit(significandParts(), 0);
1558 
1559   return opInexact;
1560 }
1561 
1562 /* Returns TRUE if, when truncating the current number, with BIT the
1563    new LSB, with the given lost fraction and rounding mode, the result
1564    would need to be rounded away from zero (i.e., by increasing the
1565    signficand).  This routine must work for fcZero of both signs, and
1566    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1567 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1568                                   lostFraction lost_fraction,
1569                                   unsigned int bit) const {
1570   /* NaNs and infinities should not have lost fractions.  */
1571   assert(isFiniteNonZero() || category == fcZero);
1572 
1573   /* Current callers never pass this so we don't handle it.  */
1574   assert(lost_fraction != lfExactlyZero);
1575 
1576   switch (rounding_mode) {
1577   case rmNearestTiesToAway:
1578     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1579 
1580   case rmNearestTiesToEven:
1581     if (lost_fraction == lfMoreThanHalf)
1582       return true;
1583 
1584     /* Our zeroes don't have a significand to test.  */
1585     if (lost_fraction == lfExactlyHalf && category != fcZero)
1586       return APInt::tcExtractBit(significandParts(), bit);
1587 
1588     return false;
1589 
1590   case rmTowardZero:
1591     return false;
1592 
1593   case rmTowardPositive:
1594     return !sign;
1595 
1596   case rmTowardNegative:
1597     return sign;
1598 
1599   default:
1600     break;
1601   }
1602   llvm_unreachable("Invalid rounding mode found");
1603 }
1604 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1605 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1606                                          lostFraction lost_fraction) {
1607   unsigned int omsb;                /* One, not zero, based MSB.  */
1608   int exponentChange;
1609 
1610   if (!isFiniteNonZero())
1611     return opOK;
1612 
1613   /* Before rounding normalize the exponent of fcNormal numbers.  */
1614   omsb = significandMSB() + 1;
1615 
1616   if (omsb) {
1617     /* OMSB is numbered from 1.  We want to place it in the integer
1618        bit numbered PRECISION if possible, with a compensating change in
1619        the exponent.  */
1620     exponentChange = omsb - semantics->precision;
1621 
1622     /* If the resulting exponent is too high, overflow according to
1623        the rounding mode.  */
1624     if (exponent + exponentChange > semantics->maxExponent)
1625       return handleOverflow(rounding_mode);
1626 
1627     /* Subnormal numbers have exponent minExponent, and their MSB
1628        is forced based on that.  */
1629     if (exponent + exponentChange < semantics->minExponent)
1630       exponentChange = semantics->minExponent - exponent;
1631 
1632     /* Shifting left is easy as we don't lose precision.  */
1633     if (exponentChange < 0) {
1634       assert(lost_fraction == lfExactlyZero);
1635 
1636       shiftSignificandLeft(-exponentChange);
1637 
1638       return opOK;
1639     }
1640 
1641     if (exponentChange > 0) {
1642       lostFraction lf;
1643 
1644       /* Shift right and capture any new lost fraction.  */
1645       lf = shiftSignificandRight(exponentChange);
1646 
1647       lost_fraction = combineLostFractions(lf, lost_fraction);
1648 
1649       /* Keep OMSB up-to-date.  */
1650       if (omsb > (unsigned) exponentChange)
1651         omsb -= exponentChange;
1652       else
1653         omsb = 0;
1654     }
1655   }
1656 
1657   // The all-ones values is an overflow if NaN is all ones. If NaN is
1658   // represented by negative zero, then it is a valid finite value.
1659   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1660       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1661       exponent == semantics->maxExponent && isSignificandAllOnes())
1662     return handleOverflow(rounding_mode);
1663 
1664   /* Now round the number according to rounding_mode given the lost
1665      fraction.  */
1666 
1667   /* As specified in IEEE 754, since we do not trap we do not report
1668      underflow for exact results.  */
1669   if (lost_fraction == lfExactlyZero) {
1670     /* Canonicalize zeroes.  */
1671     if (omsb == 0) {
1672       category = fcZero;
1673       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1674         sign = false;
1675     }
1676 
1677     return opOK;
1678   }
1679 
1680   /* Increment the significand if we're rounding away from zero.  */
1681   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1682     if (omsb == 0)
1683       exponent = semantics->minExponent;
1684 
1685     incrementSignificand();
1686     omsb = significandMSB() + 1;
1687 
1688     /* Did the significand increment overflow?  */
1689     if (omsb == (unsigned) semantics->precision + 1) {
1690       /* Renormalize by incrementing the exponent and shifting our
1691          significand right one.  However if we already have the
1692          maximum exponent we overflow to infinity.  */
1693       if (exponent == semantics->maxExponent)
1694         // Invoke overflow handling with a rounding mode that will guarantee
1695         // that the result gets turned into the correct infinity representation.
1696         // This is needed instead of just setting the category to infinity to
1697         // account for 8-bit floating point types that have no inf, only NaN.
1698         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1699 
1700       shiftSignificandRight(1);
1701 
1702       return opInexact;
1703     }
1704 
1705     // The all-ones values is an overflow if NaN is all ones. If NaN is
1706     // represented by negative zero, then it is a valid finite value.
1707     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1708         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1709         exponent == semantics->maxExponent && isSignificandAllOnes())
1710       return handleOverflow(rounding_mode);
1711   }
1712 
1713   /* The normal case - we were and are not denormal, and any
1714      significand increment above didn't overflow.  */
1715   if (omsb == semantics->precision)
1716     return opInexact;
1717 
1718   /* We have a non-zero denormal.  */
1719   assert(omsb < semantics->precision);
1720 
1721   /* Canonicalize zeroes.  */
1722   if (omsb == 0) {
1723     category = fcZero;
1724     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1725       sign = false;
1726   }
1727 
1728   /* The fcZero case is a denormal that underflowed to zero.  */
1729   return (opStatus) (opUnderflow | opInexact);
1730 }
1731 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1732 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1733                                                      bool subtract) {
1734   switch (PackCategoriesIntoKey(category, rhs.category)) {
1735   default:
1736     llvm_unreachable(nullptr);
1737 
1738   case PackCategoriesIntoKey(fcZero, fcNaN):
1739   case PackCategoriesIntoKey(fcNormal, fcNaN):
1740   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1741     assign(rhs);
1742     [[fallthrough]];
1743   case PackCategoriesIntoKey(fcNaN, fcZero):
1744   case PackCategoriesIntoKey(fcNaN, fcNormal):
1745   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1746   case PackCategoriesIntoKey(fcNaN, fcNaN):
1747     if (isSignaling()) {
1748       makeQuiet();
1749       return opInvalidOp;
1750     }
1751     return rhs.isSignaling() ? opInvalidOp : opOK;
1752 
1753   case PackCategoriesIntoKey(fcNormal, fcZero):
1754   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1755   case PackCategoriesIntoKey(fcInfinity, fcZero):
1756     return opOK;
1757 
1758   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1759   case PackCategoriesIntoKey(fcZero, fcInfinity):
1760     category = fcInfinity;
1761     sign = rhs.sign ^ subtract;
1762     return opOK;
1763 
1764   case PackCategoriesIntoKey(fcZero, fcNormal):
1765     assign(rhs);
1766     sign = rhs.sign ^ subtract;
1767     return opOK;
1768 
1769   case PackCategoriesIntoKey(fcZero, fcZero):
1770     /* Sign depends on rounding mode; handled by caller.  */
1771     return opOK;
1772 
1773   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1774     /* Differently signed infinities can only be validly
1775        subtracted.  */
1776     if (((sign ^ rhs.sign)!=0) != subtract) {
1777       makeNaN();
1778       return opInvalidOp;
1779     }
1780 
1781     return opOK;
1782 
1783   case PackCategoriesIntoKey(fcNormal, fcNormal):
1784     return opDivByZero;
1785   }
1786 }
1787 
1788 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1789 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1790                                                  bool subtract) {
1791   integerPart carry;
1792   lostFraction lost_fraction;
1793   int bits;
1794 
1795   /* Determine if the operation on the absolute values is effectively
1796      an addition or subtraction.  */
1797   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1798 
1799   /* Are we bigger exponent-wise than the RHS?  */
1800   bits = exponent - rhs.exponent;
1801 
1802   /* Subtraction is more subtle than one might naively expect.  */
1803   if (subtract) {
1804     IEEEFloat temp_rhs(rhs);
1805 
1806     if (bits == 0)
1807       lost_fraction = lfExactlyZero;
1808     else if (bits > 0) {
1809       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1810       shiftSignificandLeft(1);
1811     } else {
1812       lost_fraction = shiftSignificandRight(-bits - 1);
1813       temp_rhs.shiftSignificandLeft(1);
1814     }
1815 
1816     // Should we reverse the subtraction.
1817     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1818       carry = temp_rhs.subtractSignificand
1819         (*this, lost_fraction != lfExactlyZero);
1820       copySignificand(temp_rhs);
1821       sign = !sign;
1822     } else {
1823       carry = subtractSignificand
1824         (temp_rhs, lost_fraction != lfExactlyZero);
1825     }
1826 
1827     /* Invert the lost fraction - it was on the RHS and
1828        subtracted.  */
1829     if (lost_fraction == lfLessThanHalf)
1830       lost_fraction = lfMoreThanHalf;
1831     else if (lost_fraction == lfMoreThanHalf)
1832       lost_fraction = lfLessThanHalf;
1833 
1834     /* The code above is intended to ensure that no borrow is
1835        necessary.  */
1836     assert(!carry);
1837     (void)carry;
1838   } else {
1839     if (bits > 0) {
1840       IEEEFloat temp_rhs(rhs);
1841 
1842       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1843       carry = addSignificand(temp_rhs);
1844     } else {
1845       lost_fraction = shiftSignificandRight(-bits);
1846       carry = addSignificand(rhs);
1847     }
1848 
1849     /* We have a guard bit; generating a carry cannot happen.  */
1850     assert(!carry);
1851     (void)carry;
1852   }
1853 
1854   return lost_fraction;
1855 }
1856 
multiplySpecials(const IEEEFloat & rhs)1857 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1858   switch (PackCategoriesIntoKey(category, rhs.category)) {
1859   default:
1860     llvm_unreachable(nullptr);
1861 
1862   case PackCategoriesIntoKey(fcZero, fcNaN):
1863   case PackCategoriesIntoKey(fcNormal, fcNaN):
1864   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1865     assign(rhs);
1866     sign = false;
1867     [[fallthrough]];
1868   case PackCategoriesIntoKey(fcNaN, fcZero):
1869   case PackCategoriesIntoKey(fcNaN, fcNormal):
1870   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1871   case PackCategoriesIntoKey(fcNaN, fcNaN):
1872     sign ^= rhs.sign; // restore the original sign
1873     if (isSignaling()) {
1874       makeQuiet();
1875       return opInvalidOp;
1876     }
1877     return rhs.isSignaling() ? opInvalidOp : opOK;
1878 
1879   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1880   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1881   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1882     category = fcInfinity;
1883     return opOK;
1884 
1885   case PackCategoriesIntoKey(fcZero, fcNormal):
1886   case PackCategoriesIntoKey(fcNormal, fcZero):
1887   case PackCategoriesIntoKey(fcZero, fcZero):
1888     category = fcZero;
1889     return opOK;
1890 
1891   case PackCategoriesIntoKey(fcZero, fcInfinity):
1892   case PackCategoriesIntoKey(fcInfinity, fcZero):
1893     makeNaN();
1894     return opInvalidOp;
1895 
1896   case PackCategoriesIntoKey(fcNormal, fcNormal):
1897     return opOK;
1898   }
1899 }
1900 
divideSpecials(const IEEEFloat & rhs)1901 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1902   switch (PackCategoriesIntoKey(category, rhs.category)) {
1903   default:
1904     llvm_unreachable(nullptr);
1905 
1906   case PackCategoriesIntoKey(fcZero, fcNaN):
1907   case PackCategoriesIntoKey(fcNormal, fcNaN):
1908   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1909     assign(rhs);
1910     sign = false;
1911     [[fallthrough]];
1912   case PackCategoriesIntoKey(fcNaN, fcZero):
1913   case PackCategoriesIntoKey(fcNaN, fcNormal):
1914   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1915   case PackCategoriesIntoKey(fcNaN, fcNaN):
1916     sign ^= rhs.sign; // restore the original sign
1917     if (isSignaling()) {
1918       makeQuiet();
1919       return opInvalidOp;
1920     }
1921     return rhs.isSignaling() ? opInvalidOp : opOK;
1922 
1923   case PackCategoriesIntoKey(fcInfinity, fcZero):
1924   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1925   case PackCategoriesIntoKey(fcZero, fcInfinity):
1926   case PackCategoriesIntoKey(fcZero, fcNormal):
1927     return opOK;
1928 
1929   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1930     category = fcZero;
1931     return opOK;
1932 
1933   case PackCategoriesIntoKey(fcNormal, fcZero):
1934     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1935       makeNaN(false, sign);
1936     else
1937       category = fcInfinity;
1938     return opDivByZero;
1939 
1940   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1941   case PackCategoriesIntoKey(fcZero, fcZero):
1942     makeNaN();
1943     return opInvalidOp;
1944 
1945   case PackCategoriesIntoKey(fcNormal, fcNormal):
1946     return opOK;
1947   }
1948 }
1949 
modSpecials(const IEEEFloat & rhs)1950 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1951   switch (PackCategoriesIntoKey(category, rhs.category)) {
1952   default:
1953     llvm_unreachable(nullptr);
1954 
1955   case PackCategoriesIntoKey(fcZero, fcNaN):
1956   case PackCategoriesIntoKey(fcNormal, fcNaN):
1957   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1958     assign(rhs);
1959     [[fallthrough]];
1960   case PackCategoriesIntoKey(fcNaN, fcZero):
1961   case PackCategoriesIntoKey(fcNaN, fcNormal):
1962   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1963   case PackCategoriesIntoKey(fcNaN, fcNaN):
1964     if (isSignaling()) {
1965       makeQuiet();
1966       return opInvalidOp;
1967     }
1968     return rhs.isSignaling() ? opInvalidOp : opOK;
1969 
1970   case PackCategoriesIntoKey(fcZero, fcInfinity):
1971   case PackCategoriesIntoKey(fcZero, fcNormal):
1972   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1973     return opOK;
1974 
1975   case PackCategoriesIntoKey(fcNormal, fcZero):
1976   case PackCategoriesIntoKey(fcInfinity, fcZero):
1977   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1978   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1979   case PackCategoriesIntoKey(fcZero, fcZero):
1980     makeNaN();
1981     return opInvalidOp;
1982 
1983   case PackCategoriesIntoKey(fcNormal, fcNormal):
1984     return opOK;
1985   }
1986 }
1987 
remainderSpecials(const IEEEFloat & rhs)1988 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1989   switch (PackCategoriesIntoKey(category, rhs.category)) {
1990   default:
1991     llvm_unreachable(nullptr);
1992 
1993   case PackCategoriesIntoKey(fcZero, fcNaN):
1994   case PackCategoriesIntoKey(fcNormal, fcNaN):
1995   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1996     assign(rhs);
1997     [[fallthrough]];
1998   case PackCategoriesIntoKey(fcNaN, fcZero):
1999   case PackCategoriesIntoKey(fcNaN, fcNormal):
2000   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2001   case PackCategoriesIntoKey(fcNaN, fcNaN):
2002     if (isSignaling()) {
2003       makeQuiet();
2004       return opInvalidOp;
2005     }
2006     return rhs.isSignaling() ? opInvalidOp : opOK;
2007 
2008   case PackCategoriesIntoKey(fcZero, fcInfinity):
2009   case PackCategoriesIntoKey(fcZero, fcNormal):
2010   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2011     return opOK;
2012 
2013   case PackCategoriesIntoKey(fcNormal, fcZero):
2014   case PackCategoriesIntoKey(fcInfinity, fcZero):
2015   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2016   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2017   case PackCategoriesIntoKey(fcZero, fcZero):
2018     makeNaN();
2019     return opInvalidOp;
2020 
2021   case PackCategoriesIntoKey(fcNormal, fcNormal):
2022     return opDivByZero; // fake status, indicating this is not a special case
2023   }
2024 }
2025 
2026 /* Change sign.  */
changeSign()2027 void IEEEFloat::changeSign() {
2028   // With NaN-as-negative-zero, neither NaN or negative zero can change
2029   // their signs.
2030   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2031       (isZero() || isNaN()))
2032     return;
2033   /* Look mummy, this one's easy.  */
2034   sign = !sign;
2035 }
2036 
2037 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)2038 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2039                                              roundingMode rounding_mode,
2040                                              bool subtract) {
2041   opStatus fs;
2042 
2043   fs = addOrSubtractSpecials(rhs, subtract);
2044 
2045   /* This return code means it was not a simple case.  */
2046   if (fs == opDivByZero) {
2047     lostFraction lost_fraction;
2048 
2049     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2050     fs = normalize(rounding_mode, lost_fraction);
2051 
2052     /* Can only be zero if we lost no fraction.  */
2053     assert(category != fcZero || lost_fraction == lfExactlyZero);
2054   }
2055 
2056   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2057      positive zero unless rounding to minus infinity, except that
2058      adding two like-signed zeroes gives that zero.  */
2059   if (category == fcZero) {
2060     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2061       sign = (rounding_mode == rmTowardNegative);
2062     // NaN-in-negative-zero means zeros need to be normalized to +0.
2063     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2064       sign = false;
2065   }
2066 
2067   return fs;
2068 }
2069 
2070 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)2071 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2072                                    roundingMode rounding_mode) {
2073   return addOrSubtract(rhs, rounding_mode, false);
2074 }
2075 
2076 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)2077 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2078                                         roundingMode rounding_mode) {
2079   return addOrSubtract(rhs, rounding_mode, true);
2080 }
2081 
2082 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)2083 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2084                                         roundingMode rounding_mode) {
2085   opStatus fs;
2086 
2087   sign ^= rhs.sign;
2088   fs = multiplySpecials(rhs);
2089 
2090   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2091     sign = false;
2092   if (isFiniteNonZero()) {
2093     lostFraction lost_fraction = multiplySignificand(rhs);
2094     fs = normalize(rounding_mode, lost_fraction);
2095     if (lost_fraction != lfExactlyZero)
2096       fs = (opStatus) (fs | opInexact);
2097   }
2098 
2099   return fs;
2100 }
2101 
2102 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)2103 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2104                                       roundingMode rounding_mode) {
2105   opStatus fs;
2106 
2107   sign ^= rhs.sign;
2108   fs = divideSpecials(rhs);
2109 
2110   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2111     sign = false;
2112   if (isFiniteNonZero()) {
2113     lostFraction lost_fraction = divideSignificand(rhs);
2114     fs = normalize(rounding_mode, lost_fraction);
2115     if (lost_fraction != lfExactlyZero)
2116       fs = (opStatus) (fs | opInexact);
2117   }
2118 
2119   return fs;
2120 }
2121 
2122 /* Normalized remainder.  */
remainder(const IEEEFloat & rhs)2123 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2124   opStatus fs;
2125   unsigned int origSign = sign;
2126 
2127   // First handle the special cases.
2128   fs = remainderSpecials(rhs);
2129   if (fs != opDivByZero)
2130     return fs;
2131 
2132   fs = opOK;
2133 
2134   // Make sure the current value is less than twice the denom. If the addition
2135   // did not succeed (an overflow has happened), which means that the finite
2136   // value we currently posses must be less than twice the denom (as we are
2137   // using the same semantics).
2138   IEEEFloat P2 = rhs;
2139   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2140     fs = mod(P2);
2141     assert(fs == opOK);
2142   }
2143 
2144   // Lets work with absolute numbers.
2145   IEEEFloat P = rhs;
2146   P.sign = false;
2147   sign = false;
2148 
2149   //
2150   // To calculate the remainder we use the following scheme.
2151   //
2152   // The remainder is defained as follows:
2153   //
2154   // remainder = numer - rquot * denom = x - r * p
2155   //
2156   // Where r is the result of: x/p, rounded toward the nearest integral value
2157   // (with halfway cases rounded toward the even number).
2158   //
2159   // Currently, (after x mod 2p):
2160   // r is the number of 2p's present inside x, which is inherently, an even
2161   // number of p's.
2162   //
2163   // We may split the remaining calculation into 4 options:
2164   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2165   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2166   //   are done as well.
2167   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2168   //   to subtract 1p at least once.
2169   // - if x >= p then we must subtract p at least once, as x must be a
2170   //   remainder.
2171   //
2172   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2173   //
2174   // We can now split the remaining calculation to the following 3 options:
2175   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2176   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2177   //   must round up to the next even number. so we must subtract p once more.
2178   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2179   //   integral, and subtract p once more.
2180   //
2181 
2182   // Extend the semantics to prevent an overflow/underflow or inexact result.
2183   bool losesInfo;
2184   fltSemantics extendedSemantics = *semantics;
2185   extendedSemantics.maxExponent++;
2186   extendedSemantics.minExponent--;
2187   extendedSemantics.precision += 2;
2188 
2189   IEEEFloat VEx = *this;
2190   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2191   assert(fs == opOK && !losesInfo);
2192   IEEEFloat PEx = P;
2193   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2194   assert(fs == opOK && !losesInfo);
2195 
2196   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2197   // any fraction.
2198   fs = VEx.add(VEx, rmNearestTiesToEven);
2199   assert(fs == opOK);
2200 
2201   if (VEx.compare(PEx) == cmpGreaterThan) {
2202     fs = subtract(P, rmNearestTiesToEven);
2203     assert(fs == opOK);
2204 
2205     // Make VEx = this.add(this), but because we have different semantics, we do
2206     // not want to `convert` again, so we just subtract PEx twice (which equals
2207     // to the desired value).
2208     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2209     assert(fs == opOK);
2210     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2211     assert(fs == opOK);
2212 
2213     cmpResult result = VEx.compare(PEx);
2214     if (result == cmpGreaterThan || result == cmpEqual) {
2215       fs = subtract(P, rmNearestTiesToEven);
2216       assert(fs == opOK);
2217     }
2218   }
2219 
2220   if (isZero()) {
2221     sign = origSign;    // IEEE754 requires this
2222     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223       // But some 8-bit floats only have positive 0.
2224       sign = false;
2225   }
2226 
2227   else
2228     sign ^= origSign;
2229   return fs;
2230 }
2231 
2232 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)2233 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2234   opStatus fs;
2235   fs = modSpecials(rhs);
2236   unsigned int origSign = sign;
2237 
2238   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2239          compareAbsoluteValue(rhs) != cmpLessThan) {
2240     int Exp = ilogb(*this) - ilogb(rhs);
2241     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2242     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2243     // check for it.
2244     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2245       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2246     V.sign = sign;
2247 
2248     fs = subtract(V, rmNearestTiesToEven);
2249     assert(fs==opOK);
2250   }
2251   if (isZero()) {
2252     sign = origSign; // fmod requires this
2253     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2254       sign = false;
2255   }
2256   return fs;
2257 }
2258 
2259 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)2260 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2261                                                 const IEEEFloat &addend,
2262                                                 roundingMode rounding_mode) {
2263   opStatus fs;
2264 
2265   /* Post-multiplication sign, before addition.  */
2266   sign ^= multiplicand.sign;
2267 
2268   /* If and only if all arguments are normal do we need to do an
2269      extended-precision calculation.  */
2270   if (isFiniteNonZero() &&
2271       multiplicand.isFiniteNonZero() &&
2272       addend.isFinite()) {
2273     lostFraction lost_fraction;
2274 
2275     lost_fraction = multiplySignificand(multiplicand, addend);
2276     fs = normalize(rounding_mode, lost_fraction);
2277     if (lost_fraction != lfExactlyZero)
2278       fs = (opStatus) (fs | opInexact);
2279 
2280     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2281        positive zero unless rounding to minus infinity, except that
2282        adding two like-signed zeroes gives that zero.  */
2283     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2284       sign = (rounding_mode == rmTowardNegative);
2285       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2286         sign = false;
2287     }
2288   } else {
2289     fs = multiplySpecials(multiplicand);
2290 
2291     /* FS can only be opOK or opInvalidOp.  There is no more work
2292        to do in the latter case.  The IEEE-754R standard says it is
2293        implementation-defined in this case whether, if ADDEND is a
2294        quiet NaN, we raise invalid op; this implementation does so.
2295 
2296        If we need to do the addition we can do so with normal
2297        precision.  */
2298     if (fs == opOK)
2299       fs = addOrSubtract(addend, rounding_mode, false);
2300   }
2301 
2302   return fs;
2303 }
2304 
2305 /* Rounding-mode correct round to integral value.  */
roundToIntegral(roundingMode rounding_mode)2306 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2307   opStatus fs;
2308 
2309   if (isInfinity())
2310     // [IEEE Std 754-2008 6.1]:
2311     // The behavior of infinity in floating-point arithmetic is derived from the
2312     // limiting cases of real arithmetic with operands of arbitrarily
2313     // large magnitude, when such a limit exists.
2314     // ...
2315     // Operations on infinite operands are usually exact and therefore signal no
2316     // exceptions ...
2317     return opOK;
2318 
2319   if (isNaN()) {
2320     if (isSignaling()) {
2321       // [IEEE Std 754-2008 6.2]:
2322       // Under default exception handling, any operation signaling an invalid
2323       // operation exception and for which a floating-point result is to be
2324       // delivered shall deliver a quiet NaN.
2325       makeQuiet();
2326       // [IEEE Std 754-2008 6.2]:
2327       // Signaling NaNs shall be reserved operands that, under default exception
2328       // handling, signal the invalid operation exception(see 7.2) for every
2329       // general-computational and signaling-computational operation except for
2330       // the conversions described in 5.12.
2331       return opInvalidOp;
2332     } else {
2333       // [IEEE Std 754-2008 6.2]:
2334       // For an operation with quiet NaN inputs, other than maximum and minimum
2335       // operations, if a floating-point result is to be delivered the result
2336       // shall be a quiet NaN which should be one of the input NaNs.
2337       // ...
2338       // Every general-computational and quiet-computational operation involving
2339       // one or more input NaNs, none of them signaling, shall signal no
2340       // exception, except fusedMultiplyAdd might signal the invalid operation
2341       // exception(see 7.2).
2342       return opOK;
2343     }
2344   }
2345 
2346   if (isZero()) {
2347     // [IEEE Std 754-2008 6.3]:
2348     // ... the sign of the result of conversions, the quantize operation, the
2349     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2350     // the sign of the first or only operand.
2351     return opOK;
2352   }
2353 
2354   // If the exponent is large enough, we know that this value is already
2355   // integral, and the arithmetic below would potentially cause it to saturate
2356   // to +/-Inf.  Bail out early instead.
2357   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2358     return opOK;
2359 
2360   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2361   // precision of our format, and then subtract it back off again.  The choice
2362   // of rounding modes for the addition/subtraction determines the rounding mode
2363   // for our integral rounding as well.
2364   // NOTE: When the input value is negative, we do subtraction followed by
2365   // addition instead.
2366   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2367   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2368   IEEEFloat MagicConstant(*semantics);
2369   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2370                                       rmNearestTiesToEven);
2371   assert(fs == opOK);
2372   MagicConstant.sign = sign;
2373 
2374   // Preserve the input sign so that we can handle the case of zero result
2375   // correctly.
2376   bool inputSign = isNegative();
2377 
2378   fs = add(MagicConstant, rounding_mode);
2379 
2380   // Current value and 'MagicConstant' are both integers, so the result of the
2381   // subtraction is always exact according to Sterbenz' lemma.
2382   subtract(MagicConstant, rounding_mode);
2383 
2384   // Restore the input sign.
2385   if (inputSign != isNegative())
2386     changeSign();
2387 
2388   return fs;
2389 }
2390 
2391 
2392 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const2393 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2394   cmpResult result;
2395 
2396   assert(semantics == rhs.semantics);
2397 
2398   switch (PackCategoriesIntoKey(category, rhs.category)) {
2399   default:
2400     llvm_unreachable(nullptr);
2401 
2402   case PackCategoriesIntoKey(fcNaN, fcZero):
2403   case PackCategoriesIntoKey(fcNaN, fcNormal):
2404   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2405   case PackCategoriesIntoKey(fcNaN, fcNaN):
2406   case PackCategoriesIntoKey(fcZero, fcNaN):
2407   case PackCategoriesIntoKey(fcNormal, fcNaN):
2408   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2409     return cmpUnordered;
2410 
2411   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2412   case PackCategoriesIntoKey(fcInfinity, fcZero):
2413   case PackCategoriesIntoKey(fcNormal, fcZero):
2414     if (sign)
2415       return cmpLessThan;
2416     else
2417       return cmpGreaterThan;
2418 
2419   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2420   case PackCategoriesIntoKey(fcZero, fcInfinity):
2421   case PackCategoriesIntoKey(fcZero, fcNormal):
2422     if (rhs.sign)
2423       return cmpGreaterThan;
2424     else
2425       return cmpLessThan;
2426 
2427   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2428     if (sign == rhs.sign)
2429       return cmpEqual;
2430     else if (sign)
2431       return cmpLessThan;
2432     else
2433       return cmpGreaterThan;
2434 
2435   case PackCategoriesIntoKey(fcZero, fcZero):
2436     return cmpEqual;
2437 
2438   case PackCategoriesIntoKey(fcNormal, fcNormal):
2439     break;
2440   }
2441 
2442   /* Two normal numbers.  Do they have the same sign?  */
2443   if (sign != rhs.sign) {
2444     if (sign)
2445       result = cmpLessThan;
2446     else
2447       result = cmpGreaterThan;
2448   } else {
2449     /* Compare absolute values; invert result if negative.  */
2450     result = compareAbsoluteValue(rhs);
2451 
2452     if (sign) {
2453       if (result == cmpLessThan)
2454         result = cmpGreaterThan;
2455       else if (result == cmpGreaterThan)
2456         result = cmpLessThan;
2457     }
2458   }
2459 
2460   return result;
2461 }
2462 
2463 /// IEEEFloat::convert - convert a value of one floating point type to another.
2464 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2465 /// records whether the transformation lost information, i.e. whether
2466 /// converting the result back to the original type will produce the
2467 /// original value (this is almost the same as return value==fsOK, but there
2468 /// are edge cases where this is not so).
2469 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2470 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2471                                        roundingMode rounding_mode,
2472                                        bool *losesInfo) {
2473   lostFraction lostFraction;
2474   unsigned int newPartCount, oldPartCount;
2475   opStatus fs;
2476   int shift;
2477   const fltSemantics &fromSemantics = *semantics;
2478   bool is_signaling = isSignaling();
2479 
2480   lostFraction = lfExactlyZero;
2481   newPartCount = partCountForBits(toSemantics.precision + 1);
2482   oldPartCount = partCount();
2483   shift = toSemantics.precision - fromSemantics.precision;
2484 
2485   bool X86SpecialNan = false;
2486   if (&fromSemantics == &semX87DoubleExtended &&
2487       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2488       (!(*significandParts() & 0x8000000000000000ULL) ||
2489        !(*significandParts() & 0x4000000000000000ULL))) {
2490     // x86 has some unusual NaNs which cannot be represented in any other
2491     // format; note them here.
2492     X86SpecialNan = true;
2493   }
2494 
2495   // If this is a truncation of a denormal number, and the target semantics
2496   // has larger exponent range than the source semantics (this can happen
2497   // when truncating from PowerPC double-double to double format), the
2498   // right shift could lose result mantissa bits.  Adjust exponent instead
2499   // of performing excessive shift.
2500   // Also do a similar trick in case shifting denormal would produce zero
2501   // significand as this case isn't handled correctly by normalize.
2502   if (shift < 0 && isFiniteNonZero()) {
2503     int omsb = significandMSB() + 1;
2504     int exponentChange = omsb - fromSemantics.precision;
2505     if (exponent + exponentChange < toSemantics.minExponent)
2506       exponentChange = toSemantics.minExponent - exponent;
2507     if (exponentChange < shift)
2508       exponentChange = shift;
2509     if (exponentChange < 0) {
2510       shift -= exponentChange;
2511       exponent += exponentChange;
2512     } else if (omsb <= -shift) {
2513       exponentChange = omsb + shift - 1; // leave at least one bit set
2514       shift -= exponentChange;
2515       exponent += exponentChange;
2516     }
2517   }
2518 
2519   // If this is a truncation, perform the shift before we narrow the storage.
2520   if (shift < 0 && (isFiniteNonZero() ||
2521                     (category == fcNaN && semantics->nonFiniteBehavior !=
2522                                               fltNonfiniteBehavior::NanOnly)))
2523     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2524 
2525   // Fix the storage so it can hold to new value.
2526   if (newPartCount > oldPartCount) {
2527     // The new type requires more storage; make it available.
2528     integerPart *newParts;
2529     newParts = new integerPart[newPartCount];
2530     APInt::tcSet(newParts, 0, newPartCount);
2531     if (isFiniteNonZero() || category==fcNaN)
2532       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2533     freeSignificand();
2534     significand.parts = newParts;
2535   } else if (newPartCount == 1 && oldPartCount != 1) {
2536     // Switch to built-in storage for a single part.
2537     integerPart newPart = 0;
2538     if (isFiniteNonZero() || category==fcNaN)
2539       newPart = significandParts()[0];
2540     freeSignificand();
2541     significand.part = newPart;
2542   }
2543 
2544   // Now that we have the right storage, switch the semantics.
2545   semantics = &toSemantics;
2546 
2547   // If this is an extension, perform the shift now that the storage is
2548   // available.
2549   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2550     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2551 
2552   if (isFiniteNonZero()) {
2553     fs = normalize(rounding_mode, lostFraction);
2554     *losesInfo = (fs != opOK);
2555   } else if (category == fcNaN) {
2556     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2557       *losesInfo =
2558           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2559       makeNaN(false, sign);
2560       return is_signaling ? opInvalidOp : opOK;
2561     }
2562 
2563     // If NaN is negative zero, we need to create a new NaN to avoid converting
2564     // NaN to -Inf.
2565     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2566         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2567       makeNaN(false, false);
2568 
2569     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2570 
2571     // For x87 extended precision, we want to make a NaN, not a special NaN if
2572     // the input wasn't special either.
2573     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2574       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2575 
2576     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2577     // This also guarantees that a sNaN does not become Inf on a truncation
2578     // that loses all payload bits.
2579     if (is_signaling) {
2580       makeQuiet();
2581       fs = opInvalidOp;
2582     } else {
2583       fs = opOK;
2584     }
2585   } else if (category == fcInfinity &&
2586              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2587     makeNaN(false, sign);
2588     *losesInfo = true;
2589     fs = opInexact;
2590   } else if (category == fcZero &&
2591              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2592     // Negative zero loses info, but positive zero doesn't.
2593     *losesInfo =
2594         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2595     fs = *losesInfo ? opInexact : opOK;
2596     // NaN is negative zero means -0 -> +0, which can lose information
2597     sign = false;
2598   } else {
2599     *losesInfo = false;
2600     fs = opOK;
2601   }
2602 
2603   return fs;
2604 }
2605 
2606 /* Convert a floating point number to an integer according to the
2607    rounding mode.  If the rounded integer value is out of range this
2608    returns an invalid operation exception and the contents of the
2609    destination parts are unspecified.  If the rounded value is in
2610    range but the floating point number is not the exact integer, the C
2611    standard doesn't require an inexact exception to be raised.  IEEE
2612    854 does require it so we do that.
2613 
2614    Note that for conversions to integer type the C standard requires
2615    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2616 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2617     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2618     roundingMode rounding_mode, bool *isExact) const {
2619   lostFraction lost_fraction;
2620   const integerPart *src;
2621   unsigned int dstPartsCount, truncatedBits;
2622 
2623   *isExact = false;
2624 
2625   /* Handle the three special cases first.  */
2626   if (category == fcInfinity || category == fcNaN)
2627     return opInvalidOp;
2628 
2629   dstPartsCount = partCountForBits(width);
2630   assert(dstPartsCount <= parts.size() && "Integer too big");
2631 
2632   if (category == fcZero) {
2633     APInt::tcSet(parts.data(), 0, dstPartsCount);
2634     // Negative zero can't be represented as an int.
2635     *isExact = !sign;
2636     return opOK;
2637   }
2638 
2639   src = significandParts();
2640 
2641   /* Step 1: place our absolute value, with any fraction truncated, in
2642      the destination.  */
2643   if (exponent < 0) {
2644     /* Our absolute value is less than one; truncate everything.  */
2645     APInt::tcSet(parts.data(), 0, dstPartsCount);
2646     /* For exponent -1 the integer bit represents .5, look at that.
2647        For smaller exponents leftmost truncated bit is 0. */
2648     truncatedBits = semantics->precision -1U - exponent;
2649   } else {
2650     /* We want the most significant (exponent + 1) bits; the rest are
2651        truncated.  */
2652     unsigned int bits = exponent + 1U;
2653 
2654     /* Hopelessly large in magnitude?  */
2655     if (bits > width)
2656       return opInvalidOp;
2657 
2658     if (bits < semantics->precision) {
2659       /* We truncate (semantics->precision - bits) bits.  */
2660       truncatedBits = semantics->precision - bits;
2661       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2662     } else {
2663       /* We want at least as many bits as are available.  */
2664       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2665                        0);
2666       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2667                          bits - semantics->precision);
2668       truncatedBits = 0;
2669     }
2670   }
2671 
2672   /* Step 2: work out any lost fraction, and increment the absolute
2673      value if we would round away from zero.  */
2674   if (truncatedBits) {
2675     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2676                                                   truncatedBits);
2677     if (lost_fraction != lfExactlyZero &&
2678         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2679       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2680         return opInvalidOp;     /* Overflow.  */
2681     }
2682   } else {
2683     lost_fraction = lfExactlyZero;
2684   }
2685 
2686   /* Step 3: check if we fit in the destination.  */
2687   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2688 
2689   if (sign) {
2690     if (!isSigned) {
2691       /* Negative numbers cannot be represented as unsigned.  */
2692       if (omsb != 0)
2693         return opInvalidOp;
2694     } else {
2695       /* It takes omsb bits to represent the unsigned integer value.
2696          We lose a bit for the sign, but care is needed as the
2697          maximally negative integer is a special case.  */
2698       if (omsb == width &&
2699           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2700         return opInvalidOp;
2701 
2702       /* This case can happen because of rounding.  */
2703       if (omsb > width)
2704         return opInvalidOp;
2705     }
2706 
2707     APInt::tcNegate (parts.data(), dstPartsCount);
2708   } else {
2709     if (omsb >= width + !isSigned)
2710       return opInvalidOp;
2711   }
2712 
2713   if (lost_fraction == lfExactlyZero) {
2714     *isExact = true;
2715     return opOK;
2716   } else
2717     return opInexact;
2718 }
2719 
2720 /* Same as convertToSignExtendedInteger, except we provide
2721    deterministic values in case of an invalid operation exception,
2722    namely zero for NaNs and the minimal or maximal value respectively
2723    for underflow or overflow.
2724    The *isExact output tells whether the result is exact, in the sense
2725    that converting it back to the original floating point type produces
2726    the original value.  This is almost equivalent to result==opOK,
2727    except for negative zeroes.
2728 */
2729 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2730 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2731                             unsigned int width, bool isSigned,
2732                             roundingMode rounding_mode, bool *isExact) const {
2733   opStatus fs;
2734 
2735   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2736                                     isExact);
2737 
2738   if (fs == opInvalidOp) {
2739     unsigned int bits, dstPartsCount;
2740 
2741     dstPartsCount = partCountForBits(width);
2742     assert(dstPartsCount <= parts.size() && "Integer too big");
2743 
2744     if (category == fcNaN)
2745       bits = 0;
2746     else if (sign)
2747       bits = isSigned;
2748     else
2749       bits = width - isSigned;
2750 
2751     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2752     if (sign && isSigned)
2753       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2754   }
2755 
2756   return fs;
2757 }
2758 
2759 /* Convert an unsigned integer SRC to a floating point number,
2760    rounding according to ROUNDING_MODE.  The sign of the floating
2761    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2762 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2763     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2764   unsigned int omsb, precision, dstCount;
2765   integerPart *dst;
2766   lostFraction lost_fraction;
2767 
2768   category = fcNormal;
2769   omsb = APInt::tcMSB(src, srcCount) + 1;
2770   dst = significandParts();
2771   dstCount = partCount();
2772   precision = semantics->precision;
2773 
2774   /* We want the most significant PRECISION bits of SRC.  There may not
2775      be that many; extract what we can.  */
2776   if (precision <= omsb) {
2777     exponent = omsb - 1;
2778     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2779                                                   omsb - precision);
2780     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2781   } else {
2782     exponent = precision - 1;
2783     lost_fraction = lfExactlyZero;
2784     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2785   }
2786 
2787   return normalize(rounding_mode, lost_fraction);
2788 }
2789 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2790 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2791                                                 roundingMode rounding_mode) {
2792   unsigned int partCount = Val.getNumWords();
2793   APInt api = Val;
2794 
2795   sign = false;
2796   if (isSigned && api.isNegative()) {
2797     sign = true;
2798     api = -api;
2799   }
2800 
2801   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2802 }
2803 
2804 /* Convert a two's complement integer SRC to a floating point number,
2805    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2806    integer is signed, in which case it must be sign-extended.  */
2807 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2808 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2809                                           unsigned int srcCount, bool isSigned,
2810                                           roundingMode rounding_mode) {
2811   opStatus status;
2812 
2813   if (isSigned &&
2814       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2815     integerPart *copy;
2816 
2817     /* If we're signed and negative negate a copy.  */
2818     sign = true;
2819     copy = new integerPart[srcCount];
2820     APInt::tcAssign(copy, src, srcCount);
2821     APInt::tcNegate(copy, srcCount);
2822     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2823     delete [] copy;
2824   } else {
2825     sign = false;
2826     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2827   }
2828 
2829   return status;
2830 }
2831 
2832 /* FIXME: should this just take a const APInt reference?  */
2833 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2834 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2835                                           unsigned int width, bool isSigned,
2836                                           roundingMode rounding_mode) {
2837   unsigned int partCount = partCountForBits(width);
2838   APInt api = APInt(width, ArrayRef(parts, partCount));
2839 
2840   sign = false;
2841   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2842     sign = true;
2843     api = -api;
2844   }
2845 
2846   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2847 }
2848 
2849 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2850 IEEEFloat::convertFromHexadecimalString(StringRef s,
2851                                         roundingMode rounding_mode) {
2852   lostFraction lost_fraction = lfExactlyZero;
2853 
2854   category = fcNormal;
2855   zeroSignificand();
2856   exponent = 0;
2857 
2858   integerPart *significand = significandParts();
2859   unsigned partsCount = partCount();
2860   unsigned bitPos = partsCount * integerPartWidth;
2861   bool computedTrailingFraction = false;
2862 
2863   // Skip leading zeroes and any (hexa)decimal point.
2864   StringRef::iterator begin = s.begin();
2865   StringRef::iterator end = s.end();
2866   StringRef::iterator dot;
2867   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2868   if (!PtrOrErr)
2869     return PtrOrErr.takeError();
2870   StringRef::iterator p = *PtrOrErr;
2871   StringRef::iterator firstSignificantDigit = p;
2872 
2873   while (p != end) {
2874     integerPart hex_value;
2875 
2876     if (*p == '.') {
2877       if (dot != end)
2878         return createError("String contains multiple dots");
2879       dot = p++;
2880       continue;
2881     }
2882 
2883     hex_value = hexDigitValue(*p);
2884     if (hex_value == UINT_MAX)
2885       break;
2886 
2887     p++;
2888 
2889     // Store the number while we have space.
2890     if (bitPos) {
2891       bitPos -= 4;
2892       hex_value <<= bitPos % integerPartWidth;
2893       significand[bitPos / integerPartWidth] |= hex_value;
2894     } else if (!computedTrailingFraction) {
2895       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2896       if (!FractOrErr)
2897         return FractOrErr.takeError();
2898       lost_fraction = *FractOrErr;
2899       computedTrailingFraction = true;
2900     }
2901   }
2902 
2903   /* Hex floats require an exponent but not a hexadecimal point.  */
2904   if (p == end)
2905     return createError("Hex strings require an exponent");
2906   if (*p != 'p' && *p != 'P')
2907     return createError("Invalid character in significand");
2908   if (p == begin)
2909     return createError("Significand has no digits");
2910   if (dot != end && p - begin == 1)
2911     return createError("Significand has no digits");
2912 
2913   /* Ignore the exponent if we are zero.  */
2914   if (p != firstSignificantDigit) {
2915     int expAdjustment;
2916 
2917     /* Implicit hexadecimal point?  */
2918     if (dot == end)
2919       dot = p;
2920 
2921     /* Calculate the exponent adjustment implicit in the number of
2922        significant digits.  */
2923     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2924     if (expAdjustment < 0)
2925       expAdjustment++;
2926     expAdjustment = expAdjustment * 4 - 1;
2927 
2928     /* Adjust for writing the significand starting at the most
2929        significant nibble.  */
2930     expAdjustment += semantics->precision;
2931     expAdjustment -= partsCount * integerPartWidth;
2932 
2933     /* Adjust for the given exponent.  */
2934     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2935     if (!ExpOrErr)
2936       return ExpOrErr.takeError();
2937     exponent = *ExpOrErr;
2938   }
2939 
2940   return normalize(rounding_mode, lost_fraction);
2941 }
2942 
2943 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2944 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2945                                         unsigned sigPartCount, int exp,
2946                                         roundingMode rounding_mode) {
2947   unsigned int parts, pow5PartCount;
2948   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2949   integerPart pow5Parts[maxPowerOfFiveParts];
2950   bool isNearest;
2951 
2952   isNearest = (rounding_mode == rmNearestTiesToEven ||
2953                rounding_mode == rmNearestTiesToAway);
2954 
2955   parts = partCountForBits(semantics->precision + 11);
2956 
2957   /* Calculate pow(5, abs(exp)).  */
2958   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2959 
2960   for (;; parts *= 2) {
2961     opStatus sigStatus, powStatus;
2962     unsigned int excessPrecision, truncatedBits;
2963 
2964     calcSemantics.precision = parts * integerPartWidth - 1;
2965     excessPrecision = calcSemantics.precision - semantics->precision;
2966     truncatedBits = excessPrecision;
2967 
2968     IEEEFloat decSig(calcSemantics, uninitialized);
2969     decSig.makeZero(sign);
2970     IEEEFloat pow5(calcSemantics);
2971 
2972     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2973                                                 rmNearestTiesToEven);
2974     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2975                                               rmNearestTiesToEven);
2976     /* Add exp, as 10^n = 5^n * 2^n.  */
2977     decSig.exponent += exp;
2978 
2979     lostFraction calcLostFraction;
2980     integerPart HUerr, HUdistance;
2981     unsigned int powHUerr;
2982 
2983     if (exp >= 0) {
2984       /* multiplySignificand leaves the precision-th bit set to 1.  */
2985       calcLostFraction = decSig.multiplySignificand(pow5);
2986       powHUerr = powStatus != opOK;
2987     } else {
2988       calcLostFraction = decSig.divideSignificand(pow5);
2989       /* Denormal numbers have less precision.  */
2990       if (decSig.exponent < semantics->minExponent) {
2991         excessPrecision += (semantics->minExponent - decSig.exponent);
2992         truncatedBits = excessPrecision;
2993         if (excessPrecision > calcSemantics.precision)
2994           excessPrecision = calcSemantics.precision;
2995       }
2996       /* Extra half-ulp lost in reciprocal of exponent.  */
2997       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2998     }
2999 
3000     /* Both multiplySignificand and divideSignificand return the
3001        result with the integer bit set.  */
3002     assert(APInt::tcExtractBit
3003            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3004 
3005     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3006                        powHUerr);
3007     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3008                                       excessPrecision, isNearest);
3009 
3010     /* Are we guaranteed to round correctly if we truncate?  */
3011     if (HUdistance >= HUerr) {
3012       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3013                        calcSemantics.precision - excessPrecision,
3014                        excessPrecision);
3015       /* Take the exponent of decSig.  If we tcExtract-ed less bits
3016          above we must adjust our exponent to compensate for the
3017          implicit right shift.  */
3018       exponent = (decSig.exponent + semantics->precision
3019                   - (calcSemantics.precision - excessPrecision));
3020       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3021                                                        decSig.partCount(),
3022                                                        truncatedBits);
3023       return normalize(rounding_mode, calcLostFraction);
3024     }
3025   }
3026 }
3027 
3028 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)3029 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3030   decimalInfo D;
3031   opStatus fs;
3032 
3033   /* Scan the text.  */
3034   StringRef::iterator p = str.begin();
3035   if (Error Err = interpretDecimal(p, str.end(), &D))
3036     return std::move(Err);
3037 
3038   /* Handle the quick cases.  First the case of no significant digits,
3039      i.e. zero, and then exponents that are obviously too large or too
3040      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3041      definitely overflows if
3042 
3043            (exp - 1) * L >= maxExponent
3044 
3045      and definitely underflows to zero where
3046 
3047            (exp + 1) * L <= minExponent - precision
3048 
3049      With integer arithmetic the tightest bounds for L are
3050 
3051            93/28 < L < 196/59            [ numerator <= 256 ]
3052            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3053   */
3054 
3055   // Test if we have a zero number allowing for strings with no null terminators
3056   // and zero decimals with non-zero exponents.
3057   //
3058   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3059   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3060   // be at most one dot. On the other hand, if we have a zero with a non-zero
3061   // exponent, then we know that D.firstSigDigit will be non-numeric.
3062   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3063     category = fcZero;
3064     fs = opOK;
3065     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3066       sign = false;
3067 
3068     /* Check whether the normalized exponent is high enough to overflow
3069        max during the log-rebasing in the max-exponent check below. */
3070   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3071     fs = handleOverflow(rounding_mode);
3072 
3073   /* If it wasn't, then it also wasn't high enough to overflow max
3074      during the log-rebasing in the min-exponent check.  Check that it
3075      won't overflow min in either check, then perform the min-exponent
3076      check. */
3077   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3078              (D.normalizedExponent + 1) * 28738 <=
3079                8651 * (semantics->minExponent - (int) semantics->precision)) {
3080     /* Underflow to zero and round.  */
3081     category = fcNormal;
3082     zeroSignificand();
3083     fs = normalize(rounding_mode, lfLessThanHalf);
3084 
3085   /* We can finally safely perform the max-exponent check. */
3086   } else if ((D.normalizedExponent - 1) * 42039
3087              >= 12655 * semantics->maxExponent) {
3088     /* Overflow and round.  */
3089     fs = handleOverflow(rounding_mode);
3090   } else {
3091     integerPart *decSignificand;
3092     unsigned int partCount;
3093 
3094     /* A tight upper bound on number of bits required to hold an
3095        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3096        to hold the full significand, and an extra part required by
3097        tcMultiplyPart.  */
3098     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3099     partCount = partCountForBits(1 + 196 * partCount / 59);
3100     decSignificand = new integerPart[partCount + 1];
3101     partCount = 0;
3102 
3103     /* Convert to binary efficiently - we do almost all multiplication
3104        in an integerPart.  When this would overflow do we do a single
3105        bignum multiplication, and then revert again to multiplication
3106        in an integerPart.  */
3107     do {
3108       integerPart decValue, val, multiplier;
3109 
3110       val = 0;
3111       multiplier = 1;
3112 
3113       do {
3114         if (*p == '.') {
3115           p++;
3116           if (p == str.end()) {
3117             break;
3118           }
3119         }
3120         decValue = decDigitValue(*p++);
3121         if (decValue >= 10U) {
3122           delete[] decSignificand;
3123           return createError("Invalid character in significand");
3124         }
3125         multiplier *= 10;
3126         val = val * 10 + decValue;
3127         /* The maximum number that can be multiplied by ten with any
3128            digit added without overflowing an integerPart.  */
3129       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3130 
3131       /* Multiply out the current part.  */
3132       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3133                             partCount, partCount + 1, false);
3134 
3135       /* If we used another part (likely but not guaranteed), increase
3136          the count.  */
3137       if (decSignificand[partCount])
3138         partCount++;
3139     } while (p <= D.lastSigDigit);
3140 
3141     category = fcNormal;
3142     fs = roundSignificandWithExponent(decSignificand, partCount,
3143                                       D.exponent, rounding_mode);
3144 
3145     delete [] decSignificand;
3146   }
3147 
3148   return fs;
3149 }
3150 
convertFromStringSpecials(StringRef str)3151 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3152   const size_t MIN_NAME_SIZE = 3;
3153 
3154   if (str.size() < MIN_NAME_SIZE)
3155     return false;
3156 
3157   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3158     makeInf(false);
3159     return true;
3160   }
3161 
3162   bool IsNegative = str.front() == '-';
3163   if (IsNegative) {
3164     str = str.drop_front();
3165     if (str.size() < MIN_NAME_SIZE)
3166       return false;
3167 
3168     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3169       makeInf(true);
3170       return true;
3171     }
3172   }
3173 
3174   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3175   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3176   if (IsSignaling) {
3177     str = str.drop_front();
3178     if (str.size() < MIN_NAME_SIZE)
3179       return false;
3180   }
3181 
3182   if (str.starts_with("nan") || str.starts_with("NaN")) {
3183     str = str.drop_front(3);
3184 
3185     // A NaN without payload.
3186     if (str.empty()) {
3187       makeNaN(IsSignaling, IsNegative);
3188       return true;
3189     }
3190 
3191     // Allow the payload to be inside parentheses.
3192     if (str.front() == '(') {
3193       // Parentheses should be balanced (and not empty).
3194       if (str.size() <= 2 || str.back() != ')')
3195         return false;
3196 
3197       str = str.slice(1, str.size() - 1);
3198     }
3199 
3200     // Determine the payload number's radix.
3201     unsigned Radix = 10;
3202     if (str[0] == '0') {
3203       if (str.size() > 1 && tolower(str[1]) == 'x') {
3204         str = str.drop_front(2);
3205         Radix = 16;
3206       } else
3207         Radix = 8;
3208     }
3209 
3210     // Parse the payload and make the NaN.
3211     APInt Payload;
3212     if (!str.getAsInteger(Radix, Payload)) {
3213       makeNaN(IsSignaling, IsNegative, &Payload);
3214       return true;
3215     }
3216   }
3217 
3218   return false;
3219 }
3220 
3221 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)3222 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3223   if (str.empty())
3224     return createError("Invalid string length");
3225 
3226   // Handle special cases.
3227   if (convertFromStringSpecials(str))
3228     return opOK;
3229 
3230   /* Handle a leading minus sign.  */
3231   StringRef::iterator p = str.begin();
3232   size_t slen = str.size();
3233   sign = *p == '-' ? 1 : 0;
3234   if (*p == '-' || *p == '+') {
3235     p++;
3236     slen--;
3237     if (!slen)
3238       return createError("String has no digits");
3239   }
3240 
3241   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3242     if (slen == 2)
3243       return createError("Invalid string");
3244     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3245                                         rounding_mode);
3246   }
3247 
3248   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3249 }
3250 
3251 /* Write out a hexadecimal representation of the floating point value
3252    to DST, which must be of sufficient size, in the C99 form
3253    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3254    excluding the terminating NUL.
3255 
3256    If UPPERCASE, the output is in upper case, otherwise in lower case.
3257 
3258    HEXDIGITS digits appear altogether, rounding the value if
3259    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3260    number precisely is used instead.  If nothing would appear after
3261    the decimal point it is suppressed.
3262 
3263    The decimal exponent is always printed and has at least one digit.
3264    Zero values display an exponent of zero.  Infinities and NaNs
3265    appear as "infinity" or "nan" respectively.
3266 
3267    The above rules are as specified by C99.  There is ambiguity about
3268    what the leading hexadecimal digit should be.  This implementation
3269    uses whatever is necessary so that the exponent is displayed as
3270    stored.  This implies the exponent will fall within the IEEE format
3271    range, and the leading hexadecimal digit will be 0 (for denormals),
3272    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3273    any other digits zero).
3274 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3275 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3276                                            bool upperCase,
3277                                            roundingMode rounding_mode) const {
3278   char *p;
3279 
3280   p = dst;
3281   if (sign)
3282     *dst++ = '-';
3283 
3284   switch (category) {
3285   case fcInfinity:
3286     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3287     dst += sizeof infinityL - 1;
3288     break;
3289 
3290   case fcNaN:
3291     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3292     dst += sizeof NaNU - 1;
3293     break;
3294 
3295   case fcZero:
3296     *dst++ = '0';
3297     *dst++ = upperCase ? 'X': 'x';
3298     *dst++ = '0';
3299     if (hexDigits > 1) {
3300       *dst++ = '.';
3301       memset (dst, '0', hexDigits - 1);
3302       dst += hexDigits - 1;
3303     }
3304     *dst++ = upperCase ? 'P': 'p';
3305     *dst++ = '0';
3306     break;
3307 
3308   case fcNormal:
3309     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3310     break;
3311   }
3312 
3313   *dst = 0;
3314 
3315   return static_cast<unsigned int>(dst - p);
3316 }
3317 
3318 /* Does the hard work of outputting the correctly rounded hexadecimal
3319    form of a normal floating point number with the specified number of
3320    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3321    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3322 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3323                                           bool upperCase,
3324                                           roundingMode rounding_mode) const {
3325   unsigned int count, valueBits, shift, partsCount, outputDigits;
3326   const char *hexDigitChars;
3327   const integerPart *significand;
3328   char *p;
3329   bool roundUp;
3330 
3331   *dst++ = '0';
3332   *dst++ = upperCase ? 'X': 'x';
3333 
3334   roundUp = false;
3335   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3336 
3337   significand = significandParts();
3338   partsCount = partCount();
3339 
3340   /* +3 because the first digit only uses the single integer bit, so
3341      we have 3 virtual zero most-significant-bits.  */
3342   valueBits = semantics->precision + 3;
3343   shift = integerPartWidth - valueBits % integerPartWidth;
3344 
3345   /* The natural number of digits required ignoring trailing
3346      insignificant zeroes.  */
3347   outputDigits = (valueBits - significandLSB () + 3) / 4;
3348 
3349   /* hexDigits of zero means use the required number for the
3350      precision.  Otherwise, see if we are truncating.  If we are,
3351      find out if we need to round away from zero.  */
3352   if (hexDigits) {
3353     if (hexDigits < outputDigits) {
3354       /* We are dropping non-zero bits, so need to check how to round.
3355          "bits" is the number of dropped bits.  */
3356       unsigned int bits;
3357       lostFraction fraction;
3358 
3359       bits = valueBits - hexDigits * 4;
3360       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3361       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3362     }
3363     outputDigits = hexDigits;
3364   }
3365 
3366   /* Write the digits consecutively, and start writing in the location
3367      of the hexadecimal point.  We move the most significant digit
3368      left and add the hexadecimal point later.  */
3369   p = ++dst;
3370 
3371   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3372 
3373   while (outputDigits && count) {
3374     integerPart part;
3375 
3376     /* Put the most significant integerPartWidth bits in "part".  */
3377     if (--count == partsCount)
3378       part = 0;  /* An imaginary higher zero part.  */
3379     else
3380       part = significand[count] << shift;
3381 
3382     if (count && shift)
3383       part |= significand[count - 1] >> (integerPartWidth - shift);
3384 
3385     /* Convert as much of "part" to hexdigits as we can.  */
3386     unsigned int curDigits = integerPartWidth / 4;
3387 
3388     if (curDigits > outputDigits)
3389       curDigits = outputDigits;
3390     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3391     outputDigits -= curDigits;
3392   }
3393 
3394   if (roundUp) {
3395     char *q = dst;
3396 
3397     /* Note that hexDigitChars has a trailing '0'.  */
3398     do {
3399       q--;
3400       *q = hexDigitChars[hexDigitValue (*q) + 1];
3401     } while (*q == '0');
3402     assert(q >= p);
3403   } else {
3404     /* Add trailing zeroes.  */
3405     memset (dst, '0', outputDigits);
3406     dst += outputDigits;
3407   }
3408 
3409   /* Move the most significant digit to before the point, and if there
3410      is something after the decimal point add it.  This must come
3411      after rounding above.  */
3412   p[-1] = p[0];
3413   if (dst -1 == p)
3414     dst--;
3415   else
3416     p[0] = '.';
3417 
3418   /* Finally output the exponent.  */
3419   *dst++ = upperCase ? 'P': 'p';
3420 
3421   return writeSignedDecimal (dst, exponent);
3422 }
3423 
hash_value(const IEEEFloat & Arg)3424 hash_code hash_value(const IEEEFloat &Arg) {
3425   if (!Arg.isFiniteNonZero())
3426     return hash_combine((uint8_t)Arg.category,
3427                         // NaN has no sign, fix it at zero.
3428                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3429                         Arg.semantics->precision);
3430 
3431   // Normal floats need their exponent and significand hashed.
3432   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3433                       Arg.semantics->precision, Arg.exponent,
3434                       hash_combine_range(
3435                         Arg.significandParts(),
3436                         Arg.significandParts() + Arg.partCount()));
3437 }
3438 
3439 // Conversion from APFloat to/from host float/double.  It may eventually be
3440 // possible to eliminate these and have everybody deal with APFloats, but that
3441 // will take a while.  This approach will not easily extend to long double.
3442 // Current implementation requires integerPartWidth==64, which is correct at
3443 // the moment but could be made more general.
3444 
3445 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3446 // the actual IEEE respresentations.  We compensate for that here.
3447 
convertF80LongDoubleAPFloatToAPInt() const3448 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3449   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3450   assert(partCount()==2);
3451 
3452   uint64_t myexponent, mysignificand;
3453 
3454   if (isFiniteNonZero()) {
3455     myexponent = exponent+16383; //bias
3456     mysignificand = significandParts()[0];
3457     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3458       myexponent = 0;   // denormal
3459   } else if (category==fcZero) {
3460     myexponent = 0;
3461     mysignificand = 0;
3462   } else if (category==fcInfinity) {
3463     myexponent = 0x7fff;
3464     mysignificand = 0x8000000000000000ULL;
3465   } else {
3466     assert(category == fcNaN && "Unknown category");
3467     myexponent = 0x7fff;
3468     mysignificand = significandParts()[0];
3469   }
3470 
3471   uint64_t words[2];
3472   words[0] = mysignificand;
3473   words[1] =  ((uint64_t)(sign & 1) << 15) |
3474               (myexponent & 0x7fffLL);
3475   return APInt(80, words);
3476 }
3477 
convertPPCDoubleDoubleAPFloatToAPInt() const3478 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3479   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3480   assert(partCount()==2);
3481 
3482   uint64_t words[2];
3483   opStatus fs;
3484   bool losesInfo;
3485 
3486   // Convert number to double.  To avoid spurious underflows, we re-
3487   // normalize against the "double" minExponent first, and only *then*
3488   // truncate the mantissa.  The result of that second conversion
3489   // may be inexact, but should never underflow.
3490   // Declare fltSemantics before APFloat that uses it (and
3491   // saves pointer to it) to ensure correct destruction order.
3492   fltSemantics extendedSemantics = *semantics;
3493   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3494   IEEEFloat extended(*this);
3495   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3496   assert(fs == opOK && !losesInfo);
3497   (void)fs;
3498 
3499   IEEEFloat u(extended);
3500   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3501   assert(fs == opOK || fs == opInexact);
3502   (void)fs;
3503   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3504 
3505   // If conversion was exact or resulted in a special case, we're done;
3506   // just set the second double to zero.  Otherwise, re-convert back to
3507   // the extended format and compute the difference.  This now should
3508   // convert exactly to double.
3509   if (u.isFiniteNonZero() && losesInfo) {
3510     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3511     assert(fs == opOK && !losesInfo);
3512     (void)fs;
3513 
3514     IEEEFloat v(extended);
3515     v.subtract(u, rmNearestTiesToEven);
3516     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3517     assert(fs == opOK && !losesInfo);
3518     (void)fs;
3519     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3520   } else {
3521     words[1] = 0;
3522   }
3523 
3524   return APInt(128, words);
3525 }
3526 
3527 template <const fltSemantics &S>
convertIEEEFloatToAPInt() const3528 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3529   assert(semantics == &S);
3530 
3531   constexpr int bias = -(S.minExponent - 1);
3532   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3533   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3534   constexpr integerPart integer_bit =
3535       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3536   constexpr uint64_t significand_mask = integer_bit - 1;
3537   constexpr unsigned int exponent_bits =
3538       S.sizeInBits - 1 - trailing_significand_bits;
3539   static_assert(exponent_bits < 64);
3540   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3541 
3542   uint64_t myexponent;
3543   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3544       mysignificand;
3545 
3546   if (isFiniteNonZero()) {
3547     myexponent = exponent + bias;
3548     std::copy_n(significandParts(), mysignificand.size(),
3549                 mysignificand.begin());
3550     if (myexponent == 1 &&
3551         !(significandParts()[integer_bit_part] & integer_bit))
3552       myexponent = 0; // denormal
3553   } else if (category == fcZero) {
3554     myexponent = ::exponentZero(S) + bias;
3555     mysignificand.fill(0);
3556   } else if (category == fcInfinity) {
3557     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3558         S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3559       llvm_unreachable("semantics don't support inf!");
3560     myexponent = ::exponentInf(S) + bias;
3561     mysignificand.fill(0);
3562   } else {
3563     assert(category == fcNaN && "Unknown category!");
3564     if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3565       llvm_unreachable("semantics don't support NaN!");
3566     myexponent = ::exponentNaN(S) + bias;
3567     std::copy_n(significandParts(), mysignificand.size(),
3568                 mysignificand.begin());
3569   }
3570   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3571   auto words_iter =
3572       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3573   if constexpr (significand_mask != 0) {
3574     // Clear the integer bit.
3575     words[mysignificand.size() - 1] &= significand_mask;
3576   }
3577   std::fill(words_iter, words.end(), uint64_t{0});
3578   constexpr size_t last_word = words.size() - 1;
3579   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3580                           << ((S.sizeInBits - 1) % 64);
3581   words[last_word] |= shifted_sign;
3582   uint64_t shifted_exponent = (myexponent & exponent_mask)
3583                               << (trailing_significand_bits % 64);
3584   words[last_word] |= shifted_exponent;
3585   if constexpr (last_word == 0) {
3586     return APInt(S.sizeInBits, words[0]);
3587   }
3588   return APInt(S.sizeInBits, words);
3589 }
3590 
convertQuadrupleAPFloatToAPInt() const3591 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3592   assert(partCount() == 2);
3593   return convertIEEEFloatToAPInt<semIEEEquad>();
3594 }
3595 
convertDoubleAPFloatToAPInt() const3596 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3597   assert(partCount()==1);
3598   return convertIEEEFloatToAPInt<semIEEEdouble>();
3599 }
3600 
convertFloatAPFloatToAPInt() const3601 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3602   assert(partCount()==1);
3603   return convertIEEEFloatToAPInt<semIEEEsingle>();
3604 }
3605 
convertBFloatAPFloatToAPInt() const3606 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3607   assert(partCount() == 1);
3608   return convertIEEEFloatToAPInt<semBFloat>();
3609 }
3610 
convertHalfAPFloatToAPInt() const3611 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3612   assert(partCount()==1);
3613   return convertIEEEFloatToAPInt<semIEEEhalf>();
3614 }
3615 
convertFloat8E5M2APFloatToAPInt() const3616 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3617   assert(partCount() == 1);
3618   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3619 }
3620 
convertFloat8E5M2FNUZAPFloatToAPInt() const3621 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3622   assert(partCount() == 1);
3623   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3624 }
3625 
convertFloat8E4M3APFloatToAPInt() const3626 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3627   assert(partCount() == 1);
3628   return convertIEEEFloatToAPInt<semFloat8E4M3>();
3629 }
3630 
convertFloat8E4M3FNAPFloatToAPInt() const3631 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3632   assert(partCount() == 1);
3633   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3634 }
3635 
convertFloat8E4M3FNUZAPFloatToAPInt() const3636 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3637   assert(partCount() == 1);
3638   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3639 }
3640 
convertFloat8E4M3B11FNUZAPFloatToAPInt() const3641 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3642   assert(partCount() == 1);
3643   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3644 }
3645 
convertFloatTF32APFloatToAPInt() const3646 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3647   assert(partCount() == 1);
3648   return convertIEEEFloatToAPInt<semFloatTF32>();
3649 }
3650 
convertFloat6E3M2FNAPFloatToAPInt() const3651 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3652   assert(partCount() == 1);
3653   return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3654 }
3655 
convertFloat6E2M3FNAPFloatToAPInt() const3656 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3657   assert(partCount() == 1);
3658   return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3659 }
3660 
convertFloat4E2M1FNAPFloatToAPInt() const3661 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3662   assert(partCount() == 1);
3663   return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3664 }
3665 
3666 // This function creates an APInt that is just a bit map of the floating
3667 // point constant as it would appear in memory.  It is not a conversion,
3668 // and treating the result as a normal integer is unlikely to be useful.
3669 
bitcastToAPInt() const3670 APInt IEEEFloat::bitcastToAPInt() const {
3671   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3672     return convertHalfAPFloatToAPInt();
3673 
3674   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3675     return convertBFloatAPFloatToAPInt();
3676 
3677   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3678     return convertFloatAPFloatToAPInt();
3679 
3680   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3681     return convertDoubleAPFloatToAPInt();
3682 
3683   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3684     return convertQuadrupleAPFloatToAPInt();
3685 
3686   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3687     return convertPPCDoubleDoubleAPFloatToAPInt();
3688 
3689   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3690     return convertFloat8E5M2APFloatToAPInt();
3691 
3692   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3693     return convertFloat8E5M2FNUZAPFloatToAPInt();
3694 
3695   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3696     return convertFloat8E4M3APFloatToAPInt();
3697 
3698   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3699     return convertFloat8E4M3FNAPFloatToAPInt();
3700 
3701   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3702     return convertFloat8E4M3FNUZAPFloatToAPInt();
3703 
3704   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3705     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3706 
3707   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3708     return convertFloatTF32APFloatToAPInt();
3709 
3710   if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3711     return convertFloat6E3M2FNAPFloatToAPInt();
3712 
3713   if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3714     return convertFloat6E2M3FNAPFloatToAPInt();
3715 
3716   if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3717     return convertFloat4E2M1FNAPFloatToAPInt();
3718 
3719   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3720          "unknown format!");
3721   return convertF80LongDoubleAPFloatToAPInt();
3722 }
3723 
convertToFloat() const3724 float IEEEFloat::convertToFloat() const {
3725   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3726          "Float semantics are not IEEEsingle");
3727   APInt api = bitcastToAPInt();
3728   return api.bitsToFloat();
3729 }
3730 
convertToDouble() const3731 double IEEEFloat::convertToDouble() const {
3732   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3733          "Float semantics are not IEEEdouble");
3734   APInt api = bitcastToAPInt();
3735   return api.bitsToDouble();
3736 }
3737 
3738 #ifdef HAS_IEE754_FLOAT128
convertToQuad() const3739 float128 IEEEFloat::convertToQuad() const {
3740   assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3741          "Float semantics are not IEEEquads");
3742   APInt api = bitcastToAPInt();
3743   return api.bitsToQuad();
3744 }
3745 #endif
3746 
3747 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3748 /// does not support these bit patterns:
3749 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3750 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3751 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3752 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3753 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3754 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3755   uint64_t i1 = api.getRawData()[0];
3756   uint64_t i2 = api.getRawData()[1];
3757   uint64_t myexponent = (i2 & 0x7fff);
3758   uint64_t mysignificand = i1;
3759   uint8_t myintegerbit = mysignificand >> 63;
3760 
3761   initialize(&semX87DoubleExtended);
3762   assert(partCount()==2);
3763 
3764   sign = static_cast<unsigned int>(i2>>15);
3765   if (myexponent == 0 && mysignificand == 0) {
3766     makeZero(sign);
3767   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3768     makeInf(sign);
3769   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3770              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3771     category = fcNaN;
3772     exponent = exponentNaN();
3773     significandParts()[0] = mysignificand;
3774     significandParts()[1] = 0;
3775   } else {
3776     category = fcNormal;
3777     exponent = myexponent - 16383;
3778     significandParts()[0] = mysignificand;
3779     significandParts()[1] = 0;
3780     if (myexponent==0)          // denormal
3781       exponent = -16382;
3782   }
3783 }
3784 
initFromPPCDoubleDoubleAPInt(const APInt & api)3785 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3786   uint64_t i1 = api.getRawData()[0];
3787   uint64_t i2 = api.getRawData()[1];
3788   opStatus fs;
3789   bool losesInfo;
3790 
3791   // Get the first double and convert to our format.
3792   initFromDoubleAPInt(APInt(64, i1));
3793   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3794   assert(fs == opOK && !losesInfo);
3795   (void)fs;
3796 
3797   // Unless we have a special case, add in second double.
3798   if (isFiniteNonZero()) {
3799     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3800     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3801     assert(fs == opOK && !losesInfo);
3802     (void)fs;
3803 
3804     add(v, rmNearestTiesToEven);
3805   }
3806 }
3807 
3808 template <const fltSemantics &S>
initFromIEEEAPInt(const APInt & api)3809 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3810   assert(api.getBitWidth() == S.sizeInBits);
3811   constexpr integerPart integer_bit = integerPart{1}
3812                                       << ((S.precision - 1) % integerPartWidth);
3813   constexpr uint64_t significand_mask = integer_bit - 1;
3814   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3815   constexpr unsigned int stored_significand_parts =
3816       partCountForBits(trailing_significand_bits);
3817   constexpr unsigned int exponent_bits =
3818       S.sizeInBits - 1 - trailing_significand_bits;
3819   static_assert(exponent_bits < 64);
3820   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3821   constexpr int bias = -(S.minExponent - 1);
3822 
3823   // Copy the bits of the significand. We need to clear out the exponent and
3824   // sign bit in the last word.
3825   std::array<integerPart, stored_significand_parts> mysignificand;
3826   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3827   if constexpr (significand_mask != 0) {
3828     mysignificand[mysignificand.size() - 1] &= significand_mask;
3829   }
3830 
3831   // We assume the last word holds the sign bit, the exponent, and potentially
3832   // some of the trailing significand field.
3833   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3834   uint64_t myexponent =
3835       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3836 
3837   initialize(&S);
3838   assert(partCount() == mysignificand.size());
3839 
3840   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3841 
3842   bool all_zero_significand =
3843       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3844 
3845   bool is_zero = myexponent == 0 && all_zero_significand;
3846 
3847   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3848     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3849       makeInf(sign);
3850       return;
3851     }
3852   }
3853 
3854   bool is_nan = false;
3855 
3856   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3857     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3858   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3859     bool all_ones_significand =
3860         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3861                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3862         (!significand_mask ||
3863          mysignificand[mysignificand.size() - 1] == significand_mask);
3864     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3865   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3866     is_nan = is_zero && sign;
3867   }
3868 
3869   if (is_nan) {
3870     category = fcNaN;
3871     exponent = ::exponentNaN(S);
3872     std::copy_n(mysignificand.begin(), mysignificand.size(),
3873                 significandParts());
3874     return;
3875   }
3876 
3877   if (is_zero) {
3878     makeZero(sign);
3879     return;
3880   }
3881 
3882   category = fcNormal;
3883   exponent = myexponent - bias;
3884   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3885   if (myexponent == 0) // denormal
3886     exponent = S.minExponent;
3887   else
3888     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3889 }
3890 
initFromQuadrupleAPInt(const APInt & api)3891 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3892   initFromIEEEAPInt<semIEEEquad>(api);
3893 }
3894 
initFromDoubleAPInt(const APInt & api)3895 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3896   initFromIEEEAPInt<semIEEEdouble>(api);
3897 }
3898 
initFromFloatAPInt(const APInt & api)3899 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3900   initFromIEEEAPInt<semIEEEsingle>(api);
3901 }
3902 
initFromBFloatAPInt(const APInt & api)3903 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3904   initFromIEEEAPInt<semBFloat>(api);
3905 }
3906 
initFromHalfAPInt(const APInt & api)3907 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3908   initFromIEEEAPInt<semIEEEhalf>(api);
3909 }
3910 
initFromFloat8E5M2APInt(const APInt & api)3911 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3912   initFromIEEEAPInt<semFloat8E5M2>(api);
3913 }
3914 
initFromFloat8E5M2FNUZAPInt(const APInt & api)3915 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3916   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3917 }
3918 
initFromFloat8E4M3APInt(const APInt & api)3919 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3920   initFromIEEEAPInt<semFloat8E4M3>(api);
3921 }
3922 
initFromFloat8E4M3FNAPInt(const APInt & api)3923 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3924   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3925 }
3926 
initFromFloat8E4M3FNUZAPInt(const APInt & api)3927 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3928   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3929 }
3930 
initFromFloat8E4M3B11FNUZAPInt(const APInt & api)3931 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3932   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3933 }
3934 
initFromFloatTF32APInt(const APInt & api)3935 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3936   initFromIEEEAPInt<semFloatTF32>(api);
3937 }
3938 
initFromFloat6E3M2FNAPInt(const APInt & api)3939 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3940   initFromIEEEAPInt<semFloat6E3M2FN>(api);
3941 }
3942 
initFromFloat6E2M3FNAPInt(const APInt & api)3943 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3944   initFromIEEEAPInt<semFloat6E2M3FN>(api);
3945 }
3946 
initFromFloat4E2M1FNAPInt(const APInt & api)3947 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3948   initFromIEEEAPInt<semFloat4E2M1FN>(api);
3949 }
3950 
3951 /// Treat api as containing the bits of a floating point number.
initFromAPInt(const fltSemantics * Sem,const APInt & api)3952 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3953   assert(api.getBitWidth() == Sem->sizeInBits);
3954   if (Sem == &semIEEEhalf)
3955     return initFromHalfAPInt(api);
3956   if (Sem == &semBFloat)
3957     return initFromBFloatAPInt(api);
3958   if (Sem == &semIEEEsingle)
3959     return initFromFloatAPInt(api);
3960   if (Sem == &semIEEEdouble)
3961     return initFromDoubleAPInt(api);
3962   if (Sem == &semX87DoubleExtended)
3963     return initFromF80LongDoubleAPInt(api);
3964   if (Sem == &semIEEEquad)
3965     return initFromQuadrupleAPInt(api);
3966   if (Sem == &semPPCDoubleDoubleLegacy)
3967     return initFromPPCDoubleDoubleAPInt(api);
3968   if (Sem == &semFloat8E5M2)
3969     return initFromFloat8E5M2APInt(api);
3970   if (Sem == &semFloat8E5M2FNUZ)
3971     return initFromFloat8E5M2FNUZAPInt(api);
3972   if (Sem == &semFloat8E4M3)
3973     return initFromFloat8E4M3APInt(api);
3974   if (Sem == &semFloat8E4M3FN)
3975     return initFromFloat8E4M3FNAPInt(api);
3976   if (Sem == &semFloat8E4M3FNUZ)
3977     return initFromFloat8E4M3FNUZAPInt(api);
3978   if (Sem == &semFloat8E4M3B11FNUZ)
3979     return initFromFloat8E4M3B11FNUZAPInt(api);
3980   if (Sem == &semFloatTF32)
3981     return initFromFloatTF32APInt(api);
3982   if (Sem == &semFloat6E3M2FN)
3983     return initFromFloat6E3M2FNAPInt(api);
3984   if (Sem == &semFloat6E2M3FN)
3985     return initFromFloat6E2M3FNAPInt(api);
3986   if (Sem == &semFloat4E2M1FN)
3987     return initFromFloat4E2M1FNAPInt(api);
3988 
3989   llvm_unreachable(nullptr);
3990 }
3991 
3992 /// Make this number the largest magnitude normal number in the given
3993 /// semantics.
makeLargest(bool Negative)3994 void IEEEFloat::makeLargest(bool Negative) {
3995   // We want (in interchange format):
3996   //   sign = {Negative}
3997   //   exponent = 1..10
3998   //   significand = 1..1
3999   category = fcNormal;
4000   sign = Negative;
4001   exponent = semantics->maxExponent;
4002 
4003   // Use memset to set all but the highest integerPart to all ones.
4004   integerPart *significand = significandParts();
4005   unsigned PartCount = partCount();
4006   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4007 
4008   // Set the high integerPart especially setting all unused top bits for
4009   // internal consistency.
4010   const unsigned NumUnusedHighBits =
4011     PartCount*integerPartWidth - semantics->precision;
4012   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4013                                    ? (~integerPart(0) >> NumUnusedHighBits)
4014                                    : 0;
4015 
4016   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4017       semantics->nanEncoding == fltNanEncoding::AllOnes)
4018     significand[0] &= ~integerPart(1);
4019 }
4020 
4021 /// Make this number the smallest magnitude denormal number in the given
4022 /// semantics.
makeSmallest(bool Negative)4023 void IEEEFloat::makeSmallest(bool Negative) {
4024   // We want (in interchange format):
4025   //   sign = {Negative}
4026   //   exponent = 0..0
4027   //   significand = 0..01
4028   category = fcNormal;
4029   sign = Negative;
4030   exponent = semantics->minExponent;
4031   APInt::tcSet(significandParts(), 1, partCount());
4032 }
4033 
makeSmallestNormalized(bool Negative)4034 void IEEEFloat::makeSmallestNormalized(bool Negative) {
4035   // We want (in interchange format):
4036   //   sign = {Negative}
4037   //   exponent = 0..0
4038   //   significand = 10..0
4039 
4040   category = fcNormal;
4041   zeroSignificand();
4042   sign = Negative;
4043   exponent = semantics->minExponent;
4044   APInt::tcSetBit(significandParts(), semantics->precision - 1);
4045 }
4046 
IEEEFloat(const fltSemantics & Sem,const APInt & API)4047 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4048   initFromAPInt(&Sem, API);
4049 }
4050 
IEEEFloat(float f)4051 IEEEFloat::IEEEFloat(float f) {
4052   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4053 }
4054 
IEEEFloat(double d)4055 IEEEFloat::IEEEFloat(double d) {
4056   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4057 }
4058 
4059 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)4060   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4061     Buffer.append(Str.begin(), Str.end());
4062   }
4063 
4064   /// Removes data from the given significand until it is no more
4065   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)4066   void AdjustToPrecision(APInt &significand,
4067                          int &exp, unsigned FormatPrecision) {
4068     unsigned bits = significand.getActiveBits();
4069 
4070     // 196/59 is a very slight overestimate of lg_2(10).
4071     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4072 
4073     if (bits <= bitsRequired) return;
4074 
4075     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4076     if (!tensRemovable) return;
4077 
4078     exp += tensRemovable;
4079 
4080     APInt divisor(significand.getBitWidth(), 1);
4081     APInt powten(significand.getBitWidth(), 10);
4082     while (true) {
4083       if (tensRemovable & 1)
4084         divisor *= powten;
4085       tensRemovable >>= 1;
4086       if (!tensRemovable) break;
4087       powten *= powten;
4088     }
4089 
4090     significand = significand.udiv(divisor);
4091 
4092     // Truncate the significand down to its active bit count.
4093     significand = significand.trunc(significand.getActiveBits());
4094   }
4095 
4096 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)4097   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4098                          int &exp, unsigned FormatPrecision) {
4099     unsigned N = buffer.size();
4100     if (N <= FormatPrecision) return;
4101 
4102     // The most significant figures are the last ones in the buffer.
4103     unsigned FirstSignificant = N - FormatPrecision;
4104 
4105     // Round.
4106     // FIXME: this probably shouldn't use 'round half up'.
4107 
4108     // Rounding down is just a truncation, except we also want to drop
4109     // trailing zeros from the new result.
4110     if (buffer[FirstSignificant - 1] < '5') {
4111       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4112         FirstSignificant++;
4113 
4114       exp += FirstSignificant;
4115       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4116       return;
4117     }
4118 
4119     // Rounding up requires a decimal add-with-carry.  If we continue
4120     // the carry, the newly-introduced zeros will just be truncated.
4121     for (unsigned I = FirstSignificant; I != N; ++I) {
4122       if (buffer[I] == '9') {
4123         FirstSignificant++;
4124       } else {
4125         buffer[I]++;
4126         break;
4127       }
4128     }
4129 
4130     // If we carried through, we have exactly one digit of precision.
4131     if (FirstSignificant == N) {
4132       exp += FirstSignificant;
4133       buffer.clear();
4134       buffer.push_back('1');
4135       return;
4136     }
4137 
4138     exp += FirstSignificant;
4139     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4140   }
4141 
toStringImpl(SmallVectorImpl<char> & Str,const bool isNeg,int exp,APInt significand,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero)4142   void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4143                     APInt significand, unsigned FormatPrecision,
4144                     unsigned FormatMaxPadding, bool TruncateZero) {
4145     const int semanticsPrecision = significand.getBitWidth();
4146 
4147     if (isNeg)
4148       Str.push_back('-');
4149 
4150     // Set FormatPrecision if zero.  We want to do this before we
4151     // truncate trailing zeros, as those are part of the precision.
4152     if (!FormatPrecision) {
4153       // We use enough digits so the number can be round-tripped back to an
4154       // APFloat. The formula comes from "How to Print Floating-Point Numbers
4155       // Accurately" by Steele and White.
4156       // FIXME: Using a formula based purely on the precision is conservative;
4157       // we can print fewer digits depending on the actual value being printed.
4158 
4159       // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4160       FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4161     }
4162 
4163     // Ignore trailing binary zeros.
4164     int trailingZeros = significand.countr_zero();
4165     exp += trailingZeros;
4166     significand.lshrInPlace(trailingZeros);
4167 
4168     // Change the exponent from 2^e to 10^e.
4169     if (exp == 0) {
4170       // Nothing to do.
4171     } else if (exp > 0) {
4172       // Just shift left.
4173       significand = significand.zext(semanticsPrecision + exp);
4174       significand <<= exp;
4175       exp = 0;
4176     } else { /* exp < 0 */
4177       int texp = -exp;
4178 
4179       // We transform this using the identity:
4180       //   (N)(2^-e) == (N)(5^e)(10^-e)
4181       // This means we have to multiply N (the significand) by 5^e.
4182       // To avoid overflow, we have to operate on numbers large
4183       // enough to store N * 5^e:
4184       //   log2(N * 5^e) == log2(N) + e * log2(5)
4185       //                 <= semantics->precision + e * 137 / 59
4186       //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4187 
4188       unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4189 
4190       // Multiply significand by 5^e.
4191       //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4192       significand = significand.zext(precision);
4193       APInt five_to_the_i(precision, 5);
4194       while (true) {
4195         if (texp & 1)
4196           significand *= five_to_the_i;
4197 
4198         texp >>= 1;
4199         if (!texp)
4200           break;
4201         five_to_the_i *= five_to_the_i;
4202       }
4203     }
4204 
4205     AdjustToPrecision(significand, exp, FormatPrecision);
4206 
4207     SmallVector<char, 256> buffer;
4208 
4209     // Fill the buffer.
4210     unsigned precision = significand.getBitWidth();
4211     if (precision < 4) {
4212       // We need enough precision to store the value 10.
4213       precision = 4;
4214       significand = significand.zext(precision);
4215     }
4216     APInt ten(precision, 10);
4217     APInt digit(precision, 0);
4218 
4219     bool inTrail = true;
4220     while (significand != 0) {
4221       // digit <- significand % 10
4222       // significand <- significand / 10
4223       APInt::udivrem(significand, ten, significand, digit);
4224 
4225       unsigned d = digit.getZExtValue();
4226 
4227       // Drop trailing zeros.
4228       if (inTrail && !d)
4229         exp++;
4230       else {
4231         buffer.push_back((char) ('0' + d));
4232         inTrail = false;
4233       }
4234     }
4235 
4236     assert(!buffer.empty() && "no characters in buffer!");
4237 
4238     // Drop down to FormatPrecision.
4239     // TODO: don't do more precise calculations above than are required.
4240     AdjustToPrecision(buffer, exp, FormatPrecision);
4241 
4242     unsigned NDigits = buffer.size();
4243 
4244     // Check whether we should use scientific notation.
4245     bool FormatScientific;
4246     if (!FormatMaxPadding)
4247       FormatScientific = true;
4248     else {
4249       if (exp >= 0) {
4250         // 765e3 --> 765000
4251         //              ^^^
4252         // But we shouldn't make the number look more precise than it is.
4253         FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4254                             NDigits + (unsigned) exp > FormatPrecision);
4255       } else {
4256         // Power of the most significant digit.
4257         int MSD = exp + (int) (NDigits - 1);
4258         if (MSD >= 0) {
4259           // 765e-2 == 7.65
4260           FormatScientific = false;
4261         } else {
4262           // 765e-5 == 0.00765
4263           //           ^ ^^
4264           FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4265         }
4266       }
4267     }
4268 
4269     // Scientific formatting is pretty straightforward.
4270     if (FormatScientific) {
4271       exp += (NDigits - 1);
4272 
4273       Str.push_back(buffer[NDigits-1]);
4274       Str.push_back('.');
4275       if (NDigits == 1 && TruncateZero)
4276         Str.push_back('0');
4277       else
4278         for (unsigned I = 1; I != NDigits; ++I)
4279           Str.push_back(buffer[NDigits-1-I]);
4280       // Fill with zeros up to FormatPrecision.
4281       if (!TruncateZero && FormatPrecision > NDigits - 1)
4282         Str.append(FormatPrecision - NDigits + 1, '0');
4283       // For !TruncateZero we use lower 'e'.
4284       Str.push_back(TruncateZero ? 'E' : 'e');
4285 
4286       Str.push_back(exp >= 0 ? '+' : '-');
4287       if (exp < 0)
4288         exp = -exp;
4289       SmallVector<char, 6> expbuf;
4290       do {
4291         expbuf.push_back((char) ('0' + (exp % 10)));
4292         exp /= 10;
4293       } while (exp);
4294       // Exponent always at least two digits if we do not truncate zeros.
4295       if (!TruncateZero && expbuf.size() < 2)
4296         expbuf.push_back('0');
4297       for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4298         Str.push_back(expbuf[E-1-I]);
4299       return;
4300     }
4301 
4302     // Non-scientific, positive exponents.
4303     if (exp >= 0) {
4304       for (unsigned I = 0; I != NDigits; ++I)
4305         Str.push_back(buffer[NDigits-1-I]);
4306       for (unsigned I = 0; I != (unsigned) exp; ++I)
4307         Str.push_back('0');
4308       return;
4309     }
4310 
4311     // Non-scientific, negative exponents.
4312 
4313     // The number of digits to the left of the decimal point.
4314     int NWholeDigits = exp + (int) NDigits;
4315 
4316     unsigned I = 0;
4317     if (NWholeDigits > 0) {
4318       for (; I != (unsigned) NWholeDigits; ++I)
4319         Str.push_back(buffer[NDigits-I-1]);
4320       Str.push_back('.');
4321     } else {
4322       unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4323 
4324       Str.push_back('0');
4325       Str.push_back('.');
4326       for (unsigned Z = 1; Z != NZeros; ++Z)
4327         Str.push_back('0');
4328     }
4329 
4330     for (; I != NDigits; ++I)
4331       Str.push_back(buffer[NDigits-I-1]);
4332 
4333   }
4334 } // namespace
4335 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4336 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4337                          unsigned FormatMaxPadding, bool TruncateZero) const {
4338   switch (category) {
4339   case fcInfinity:
4340     if (isNegative())
4341       return append(Str, "-Inf");
4342     else
4343       return append(Str, "+Inf");
4344 
4345   case fcNaN: return append(Str, "NaN");
4346 
4347   case fcZero:
4348     if (isNegative())
4349       Str.push_back('-');
4350 
4351     if (!FormatMaxPadding) {
4352       if (TruncateZero)
4353         append(Str, "0.0E+0");
4354       else {
4355         append(Str, "0.0");
4356         if (FormatPrecision > 1)
4357           Str.append(FormatPrecision - 1, '0');
4358         append(Str, "e+00");
4359       }
4360     } else
4361       Str.push_back('0');
4362     return;
4363 
4364   case fcNormal:
4365     break;
4366   }
4367 
4368   // Decompose the number into an APInt and an exponent.
4369   int exp = exponent - ((int) semantics->precision - 1);
4370   APInt significand(
4371       semantics->precision,
4372       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4373 
4374   toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4375                FormatMaxPadding, TruncateZero);
4376 
4377 }
4378 
getExactInverse(APFloat * inv) const4379 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4380   // Special floats and denormals have no exact inverse.
4381   if (!isFiniteNonZero())
4382     return false;
4383 
4384   // Check that the number is a power of two by making sure that only the
4385   // integer bit is set in the significand.
4386   if (significandLSB() != semantics->precision - 1)
4387     return false;
4388 
4389   // Get the inverse.
4390   IEEEFloat reciprocal(*semantics, 1ULL);
4391   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4392     return false;
4393 
4394   // Avoid multiplication with a denormal, it is not safe on all platforms and
4395   // may be slower than a normal division.
4396   if (reciprocal.isDenormal())
4397     return false;
4398 
4399   assert(reciprocal.isFiniteNonZero() &&
4400          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4401 
4402   if (inv)
4403     *inv = APFloat(reciprocal, *semantics);
4404 
4405   return true;
4406 }
4407 
getExactLog2Abs() const4408 int IEEEFloat::getExactLog2Abs() const {
4409   if (!isFinite() || isZero())
4410     return INT_MIN;
4411 
4412   const integerPart *Parts = significandParts();
4413   const int PartCount = partCountForBits(semantics->precision);
4414 
4415   int PopCount = 0;
4416   for (int i = 0; i < PartCount; ++i) {
4417     PopCount += llvm::popcount(Parts[i]);
4418     if (PopCount > 1)
4419       return INT_MIN;
4420   }
4421 
4422   if (exponent != semantics->minExponent)
4423     return exponent;
4424 
4425   int CountrParts = 0;
4426   for (int i = 0; i < PartCount;
4427        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4428     if (Parts[i] != 0) {
4429       return exponent - semantics->precision + CountrParts +
4430              llvm::countr_zero(Parts[i]) + 1;
4431     }
4432   }
4433 
4434   llvm_unreachable("didn't find the set bit");
4435 }
4436 
isSignaling() const4437 bool IEEEFloat::isSignaling() const {
4438   if (!isNaN())
4439     return false;
4440   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4441       semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4442     return false;
4443 
4444   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4445   // first bit of the trailing significand being 0.
4446   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4447 }
4448 
4449 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4450 ///
4451 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4452 /// appropriate sign switching before/after the computation.
next(bool nextDown)4453 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4454   // If we are performing nextDown, swap sign so we have -x.
4455   if (nextDown)
4456     changeSign();
4457 
4458   // Compute nextUp(x)
4459   opStatus result = opOK;
4460 
4461   // Handle each float category separately.
4462   switch (category) {
4463   case fcInfinity:
4464     // nextUp(+inf) = +inf
4465     if (!isNegative())
4466       break;
4467     // nextUp(-inf) = -getLargest()
4468     makeLargest(true);
4469     break;
4470   case fcNaN:
4471     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4472     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4473     //                     change the payload.
4474     if (isSignaling()) {
4475       result = opInvalidOp;
4476       // For consistency, propagate the sign of the sNaN to the qNaN.
4477       makeNaN(false, isNegative(), nullptr);
4478     }
4479     break;
4480   case fcZero:
4481     // nextUp(pm 0) = +getSmallest()
4482     makeSmallest(false);
4483     break;
4484   case fcNormal:
4485     // nextUp(-getSmallest()) = -0
4486     if (isSmallest() && isNegative()) {
4487       APInt::tcSet(significandParts(), 0, partCount());
4488       category = fcZero;
4489       exponent = 0;
4490       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4491         sign = false;
4492       break;
4493     }
4494 
4495     if (isLargest() && !isNegative()) {
4496       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4497         // nextUp(getLargest()) == NAN
4498         makeNaN();
4499         break;
4500       } else if (semantics->nonFiniteBehavior ==
4501                  fltNonfiniteBehavior::FiniteOnly) {
4502         // nextUp(getLargest()) == getLargest()
4503         break;
4504       } else {
4505         // nextUp(getLargest()) == INFINITY
4506         APInt::tcSet(significandParts(), 0, partCount());
4507         category = fcInfinity;
4508         exponent = semantics->maxExponent + 1;
4509         break;
4510       }
4511     }
4512 
4513     // nextUp(normal) == normal + inc.
4514     if (isNegative()) {
4515       // If we are negative, we need to decrement the significand.
4516 
4517       // We only cross a binade boundary that requires adjusting the exponent
4518       // if:
4519       //   1. exponent != semantics->minExponent. This implies we are not in the
4520       //   smallest binade or are dealing with denormals.
4521       //   2. Our significand excluding the integral bit is all zeros.
4522       bool WillCrossBinadeBoundary =
4523         exponent != semantics->minExponent && isSignificandAllZeros();
4524 
4525       // Decrement the significand.
4526       //
4527       // We always do this since:
4528       //   1. If we are dealing with a non-binade decrement, by definition we
4529       //   just decrement the significand.
4530       //   2. If we are dealing with a normal -> normal binade decrement, since
4531       //   we have an explicit integral bit the fact that all bits but the
4532       //   integral bit are zero implies that subtracting one will yield a
4533       //   significand with 0 integral bit and 1 in all other spots. Thus we
4534       //   must just adjust the exponent and set the integral bit to 1.
4535       //   3. If we are dealing with a normal -> denormal binade decrement,
4536       //   since we set the integral bit to 0 when we represent denormals, we
4537       //   just decrement the significand.
4538       integerPart *Parts = significandParts();
4539       APInt::tcDecrement(Parts, partCount());
4540 
4541       if (WillCrossBinadeBoundary) {
4542         // Our result is a normal number. Do the following:
4543         // 1. Set the integral bit to 1.
4544         // 2. Decrement the exponent.
4545         APInt::tcSetBit(Parts, semantics->precision - 1);
4546         exponent--;
4547       }
4548     } else {
4549       // If we are positive, we need to increment the significand.
4550 
4551       // We only cross a binade boundary that requires adjusting the exponent if
4552       // the input is not a denormal and all of said input's significand bits
4553       // are set. If all of said conditions are true: clear the significand, set
4554       // the integral bit to 1, and increment the exponent. If we have a
4555       // denormal always increment since moving denormals and the numbers in the
4556       // smallest normal binade have the same exponent in our representation.
4557       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4558 
4559       if (WillCrossBinadeBoundary) {
4560         integerPart *Parts = significandParts();
4561         APInt::tcSet(Parts, 0, partCount());
4562         APInt::tcSetBit(Parts, semantics->precision - 1);
4563         assert(exponent != semantics->maxExponent &&
4564                "We can not increment an exponent beyond the maxExponent allowed"
4565                " by the given floating point semantics.");
4566         exponent++;
4567       } else {
4568         incrementSignificand();
4569       }
4570     }
4571     break;
4572   }
4573 
4574   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4575   if (nextDown)
4576     changeSign();
4577 
4578   return result;
4579 }
4580 
exponentNaN() const4581 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4582   return ::exponentNaN(*semantics);
4583 }
4584 
exponentInf() const4585 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4586   return ::exponentInf(*semantics);
4587 }
4588 
exponentZero() const4589 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4590   return ::exponentZero(*semantics);
4591 }
4592 
makeInf(bool Negative)4593 void IEEEFloat::makeInf(bool Negative) {
4594   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4595     llvm_unreachable("This floating point format does not support Inf");
4596 
4597   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4598     // There is no Inf, so make NaN instead.
4599     makeNaN(false, Negative);
4600     return;
4601   }
4602   category = fcInfinity;
4603   sign = Negative;
4604   exponent = exponentInf();
4605   APInt::tcSet(significandParts(), 0, partCount());
4606 }
4607 
makeZero(bool Negative)4608 void IEEEFloat::makeZero(bool Negative) {
4609   category = fcZero;
4610   sign = Negative;
4611   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4612     // Merge negative zero to positive because 0b10000...000 is used for NaN
4613     sign = false;
4614   }
4615   exponent = exponentZero();
4616   APInt::tcSet(significandParts(), 0, partCount());
4617 }
4618 
makeQuiet()4619 void IEEEFloat::makeQuiet() {
4620   assert(isNaN());
4621   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4622     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4623 }
4624 
ilogb(const IEEEFloat & Arg)4625 int ilogb(const IEEEFloat &Arg) {
4626   if (Arg.isNaN())
4627     return IEEEFloat::IEK_NaN;
4628   if (Arg.isZero())
4629     return IEEEFloat::IEK_Zero;
4630   if (Arg.isInfinity())
4631     return IEEEFloat::IEK_Inf;
4632   if (!Arg.isDenormal())
4633     return Arg.exponent;
4634 
4635   IEEEFloat Normalized(Arg);
4636   int SignificandBits = Arg.getSemantics().precision - 1;
4637 
4638   Normalized.exponent += SignificandBits;
4639   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4640   return Normalized.exponent - SignificandBits;
4641 }
4642 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4643 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4644   auto MaxExp = X.getSemantics().maxExponent;
4645   auto MinExp = X.getSemantics().minExponent;
4646 
4647   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4648   // overflow; clamp it to a safe range before adding, but ensure that the range
4649   // is large enough that the clamp does not change the result. The range we
4650   // need to support is the difference between the largest possible exponent and
4651   // the normalized exponent of half the smallest denormal.
4652 
4653   int SignificandBits = X.getSemantics().precision - 1;
4654   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4655 
4656   // Clamp to one past the range ends to let normalize handle overlflow.
4657   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4658   X.normalize(RoundingMode, lfExactlyZero);
4659   if (X.isNaN())
4660     X.makeQuiet();
4661   return X;
4662 }
4663 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4664 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4665   Exp = ilogb(Val);
4666 
4667   // Quiet signalling nans.
4668   if (Exp == IEEEFloat::IEK_NaN) {
4669     IEEEFloat Quiet(Val);
4670     Quiet.makeQuiet();
4671     return Quiet;
4672   }
4673 
4674   if (Exp == IEEEFloat::IEK_Inf)
4675     return Val;
4676 
4677   // 1 is added because frexp is defined to return a normalized fraction in
4678   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4679   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4680   return scalbn(Val, -Exp, RM);
4681 }
4682 
DoubleAPFloat(const fltSemantics & S)4683 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4684     : Semantics(&S),
4685       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4686   assert(Semantics == &semPPCDoubleDouble);
4687 }
4688 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4689 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4690     : Semantics(&S),
4691       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4692                             APFloat(semIEEEdouble, uninitialized)}) {
4693   assert(Semantics == &semPPCDoubleDouble);
4694 }
4695 
DoubleAPFloat(const fltSemantics & S,integerPart I)4696 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4697     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4698                                            APFloat(semIEEEdouble)}) {
4699   assert(Semantics == &semPPCDoubleDouble);
4700 }
4701 
DoubleAPFloat(const fltSemantics & S,const APInt & I)4702 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4703     : Semantics(&S),
4704       Floats(new APFloat[2]{
4705           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4706           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4707   assert(Semantics == &semPPCDoubleDouble);
4708 }
4709 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4710 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4711                              APFloat &&Second)
4712     : Semantics(&S),
4713       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4714   assert(Semantics == &semPPCDoubleDouble);
4715   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4716   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4717 }
4718 
DoubleAPFloat(const DoubleAPFloat & RHS)4719 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4720     : Semantics(RHS.Semantics),
4721       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4722                                          APFloat(RHS.Floats[1])}
4723                         : nullptr) {
4724   assert(Semantics == &semPPCDoubleDouble);
4725 }
4726 
DoubleAPFloat(DoubleAPFloat && RHS)4727 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4728     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4729   RHS.Semantics = &semBogus;
4730   assert(Semantics == &semPPCDoubleDouble);
4731 }
4732 
operator =(const DoubleAPFloat & RHS)4733 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4734   if (Semantics == RHS.Semantics && RHS.Floats) {
4735     Floats[0] = RHS.Floats[0];
4736     Floats[1] = RHS.Floats[1];
4737   } else if (this != &RHS) {
4738     this->~DoubleAPFloat();
4739     new (this) DoubleAPFloat(RHS);
4740   }
4741   return *this;
4742 }
4743 
4744 // Implement addition, subtraction, multiplication and division based on:
4745 // "Software for Doubled-Precision Floating-Point Computations",
4746 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4747 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4748                                          const APFloat &c, const APFloat &cc,
4749                                          roundingMode RM) {
4750   int Status = opOK;
4751   APFloat z = a;
4752   Status |= z.add(c, RM);
4753   if (!z.isFinite()) {
4754     if (!z.isInfinity()) {
4755       Floats[0] = std::move(z);
4756       Floats[1].makeZero(/* Neg = */ false);
4757       return (opStatus)Status;
4758     }
4759     Status = opOK;
4760     auto AComparedToC = a.compareAbsoluteValue(c);
4761     z = cc;
4762     Status |= z.add(aa, RM);
4763     if (AComparedToC == APFloat::cmpGreaterThan) {
4764       // z = cc + aa + c + a;
4765       Status |= z.add(c, RM);
4766       Status |= z.add(a, RM);
4767     } else {
4768       // z = cc + aa + a + c;
4769       Status |= z.add(a, RM);
4770       Status |= z.add(c, RM);
4771     }
4772     if (!z.isFinite()) {
4773       Floats[0] = std::move(z);
4774       Floats[1].makeZero(/* Neg = */ false);
4775       return (opStatus)Status;
4776     }
4777     Floats[0] = z;
4778     APFloat zz = aa;
4779     Status |= zz.add(cc, RM);
4780     if (AComparedToC == APFloat::cmpGreaterThan) {
4781       // Floats[1] = a - z + c + zz;
4782       Floats[1] = a;
4783       Status |= Floats[1].subtract(z, RM);
4784       Status |= Floats[1].add(c, RM);
4785       Status |= Floats[1].add(zz, RM);
4786     } else {
4787       // Floats[1] = c - z + a + zz;
4788       Floats[1] = c;
4789       Status |= Floats[1].subtract(z, RM);
4790       Status |= Floats[1].add(a, RM);
4791       Status |= Floats[1].add(zz, RM);
4792     }
4793   } else {
4794     // q = a - z;
4795     APFloat q = a;
4796     Status |= q.subtract(z, RM);
4797 
4798     // zz = q + c + (a - (q + z)) + aa + cc;
4799     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4800     auto zz = q;
4801     Status |= zz.add(c, RM);
4802     Status |= q.add(z, RM);
4803     Status |= q.subtract(a, RM);
4804     q.changeSign();
4805     Status |= zz.add(q, RM);
4806     Status |= zz.add(aa, RM);
4807     Status |= zz.add(cc, RM);
4808     if (zz.isZero() && !zz.isNegative()) {
4809       Floats[0] = std::move(z);
4810       Floats[1].makeZero(/* Neg = */ false);
4811       return opOK;
4812     }
4813     Floats[0] = z;
4814     Status |= Floats[0].add(zz, RM);
4815     if (!Floats[0].isFinite()) {
4816       Floats[1].makeZero(/* Neg = */ false);
4817       return (opStatus)Status;
4818     }
4819     Floats[1] = std::move(z);
4820     Status |= Floats[1].subtract(Floats[0], RM);
4821     Status |= Floats[1].add(zz, RM);
4822   }
4823   return (opStatus)Status;
4824 }
4825 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4826 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4827                                                 const DoubleAPFloat &RHS,
4828                                                 DoubleAPFloat &Out,
4829                                                 roundingMode RM) {
4830   if (LHS.getCategory() == fcNaN) {
4831     Out = LHS;
4832     return opOK;
4833   }
4834   if (RHS.getCategory() == fcNaN) {
4835     Out = RHS;
4836     return opOK;
4837   }
4838   if (LHS.getCategory() == fcZero) {
4839     Out = RHS;
4840     return opOK;
4841   }
4842   if (RHS.getCategory() == fcZero) {
4843     Out = LHS;
4844     return opOK;
4845   }
4846   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4847       LHS.isNegative() != RHS.isNegative()) {
4848     Out.makeNaN(false, Out.isNegative(), nullptr);
4849     return opInvalidOp;
4850   }
4851   if (LHS.getCategory() == fcInfinity) {
4852     Out = LHS;
4853     return opOK;
4854   }
4855   if (RHS.getCategory() == fcInfinity) {
4856     Out = RHS;
4857     return opOK;
4858   }
4859   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4860 
4861   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4862       CC(RHS.Floats[1]);
4863   assert(&A.getSemantics() == &semIEEEdouble);
4864   assert(&AA.getSemantics() == &semIEEEdouble);
4865   assert(&C.getSemantics() == &semIEEEdouble);
4866   assert(&CC.getSemantics() == &semIEEEdouble);
4867   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4868   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4869   return Out.addImpl(A, AA, C, CC, RM);
4870 }
4871 
add(const DoubleAPFloat & RHS,roundingMode RM)4872 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4873                                      roundingMode RM) {
4874   return addWithSpecial(*this, RHS, *this, RM);
4875 }
4876 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4877 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4878                                           roundingMode RM) {
4879   changeSign();
4880   auto Ret = add(RHS, RM);
4881   changeSign();
4882   return Ret;
4883 }
4884 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4885 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4886                                           APFloat::roundingMode RM) {
4887   const auto &LHS = *this;
4888   auto &Out = *this;
4889   /* Interesting observation: For special categories, finding the lowest
4890      common ancestor of the following layered graph gives the correct
4891      return category:
4892 
4893         NaN
4894        /   \
4895      Zero  Inf
4896        \   /
4897        Normal
4898 
4899      e.g. NaN * NaN = NaN
4900           Zero * Inf = NaN
4901           Normal * Zero = Zero
4902           Normal * Inf = Inf
4903   */
4904   if (LHS.getCategory() == fcNaN) {
4905     Out = LHS;
4906     return opOK;
4907   }
4908   if (RHS.getCategory() == fcNaN) {
4909     Out = RHS;
4910     return opOK;
4911   }
4912   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4913       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4914     Out.makeNaN(false, false, nullptr);
4915     return opOK;
4916   }
4917   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4918     Out = LHS;
4919     return opOK;
4920   }
4921   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4922     Out = RHS;
4923     return opOK;
4924   }
4925   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4926          "Special cases not handled exhaustively");
4927 
4928   int Status = opOK;
4929   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4930   // t = a * c
4931   APFloat T = A;
4932   Status |= T.multiply(C, RM);
4933   if (!T.isFiniteNonZero()) {
4934     Floats[0] = T;
4935     Floats[1].makeZero(/* Neg = */ false);
4936     return (opStatus)Status;
4937   }
4938 
4939   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4940   APFloat Tau = A;
4941   T.changeSign();
4942   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4943   T.changeSign();
4944   {
4945     // v = a * d
4946     APFloat V = A;
4947     Status |= V.multiply(D, RM);
4948     // w = b * c
4949     APFloat W = B;
4950     Status |= W.multiply(C, RM);
4951     Status |= V.add(W, RM);
4952     // tau += v + w
4953     Status |= Tau.add(V, RM);
4954   }
4955   // u = t + tau
4956   APFloat U = T;
4957   Status |= U.add(Tau, RM);
4958 
4959   Floats[0] = U;
4960   if (!U.isFinite()) {
4961     Floats[1].makeZero(/* Neg = */ false);
4962   } else {
4963     // Floats[1] = (t - u) + tau
4964     Status |= T.subtract(U, RM);
4965     Status |= T.add(Tau, RM);
4966     Floats[1] = T;
4967   }
4968   return (opStatus)Status;
4969 }
4970 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4971 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4972                                         APFloat::roundingMode RM) {
4973   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4974   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4975   auto Ret =
4976       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4977   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4978   return Ret;
4979 }
4980 
remainder(const DoubleAPFloat & RHS)4981 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4982   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4983   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4984   auto Ret =
4985       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4986   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4987   return Ret;
4988 }
4989 
mod(const DoubleAPFloat & RHS)4990 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4991   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4992   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4993   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4994   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4995   return Ret;
4996 }
4997 
4998 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4999 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5000                                 const DoubleAPFloat &Addend,
5001                                 APFloat::roundingMode RM) {
5002   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5003   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5004   auto Ret = Tmp.fusedMultiplyAdd(
5005       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5006       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5007   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5008   return Ret;
5009 }
5010 
roundToIntegral(APFloat::roundingMode RM)5011 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5012   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5014   auto Ret = Tmp.roundToIntegral(RM);
5015   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5016   return Ret;
5017 }
5018 
changeSign()5019 void DoubleAPFloat::changeSign() {
5020   Floats[0].changeSign();
5021   Floats[1].changeSign();
5022 }
5023 
5024 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const5025 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5026   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5027   if (Result != cmpEqual)
5028     return Result;
5029   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5030   if (Result == cmpLessThan || Result == cmpGreaterThan) {
5031     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5032     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5033     if (Against && !RHSAgainst)
5034       return cmpLessThan;
5035     if (!Against && RHSAgainst)
5036       return cmpGreaterThan;
5037     if (!Against && !RHSAgainst)
5038       return Result;
5039     if (Against && RHSAgainst)
5040       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5041   }
5042   return Result;
5043 }
5044 
getCategory() const5045 APFloat::fltCategory DoubleAPFloat::getCategory() const {
5046   return Floats[0].getCategory();
5047 }
5048 
isNegative() const5049 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5050 
makeInf(bool Neg)5051 void DoubleAPFloat::makeInf(bool Neg) {
5052   Floats[0].makeInf(Neg);
5053   Floats[1].makeZero(/* Neg = */ false);
5054 }
5055 
makeZero(bool Neg)5056 void DoubleAPFloat::makeZero(bool Neg) {
5057   Floats[0].makeZero(Neg);
5058   Floats[1].makeZero(/* Neg = */ false);
5059 }
5060 
makeLargest(bool Neg)5061 void DoubleAPFloat::makeLargest(bool Neg) {
5062   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5063   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5064   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5065   if (Neg)
5066     changeSign();
5067 }
5068 
makeSmallest(bool Neg)5069 void DoubleAPFloat::makeSmallest(bool Neg) {
5070   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5071   Floats[0].makeSmallest(Neg);
5072   Floats[1].makeZero(/* Neg = */ false);
5073 }
5074 
makeSmallestNormalized(bool Neg)5075 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5076   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5077   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5078   if (Neg)
5079     Floats[0].changeSign();
5080   Floats[1].makeZero(/* Neg = */ false);
5081 }
5082 
makeNaN(bool SNaN,bool Neg,const APInt * fill)5083 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5084   Floats[0].makeNaN(SNaN, Neg, fill);
5085   Floats[1].makeZero(/* Neg = */ false);
5086 }
5087 
compare(const DoubleAPFloat & RHS) const5088 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5089   auto Result = Floats[0].compare(RHS.Floats[0]);
5090   // |Float[0]| > |Float[1]|
5091   if (Result == APFloat::cmpEqual)
5092     return Floats[1].compare(RHS.Floats[1]);
5093   return Result;
5094 }
5095 
bitwiseIsEqual(const DoubleAPFloat & RHS) const5096 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5097   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5098          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5099 }
5100 
hash_value(const DoubleAPFloat & Arg)5101 hash_code hash_value(const DoubleAPFloat &Arg) {
5102   if (Arg.Floats)
5103     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5104   return hash_combine(Arg.Semantics);
5105 }
5106 
bitcastToAPInt() const5107 APInt DoubleAPFloat::bitcastToAPInt() const {
5108   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5109   uint64_t Data[] = {
5110       Floats[0].bitcastToAPInt().getRawData()[0],
5111       Floats[1].bitcastToAPInt().getRawData()[0],
5112   };
5113   return APInt(128, 2, Data);
5114 }
5115 
convertFromString(StringRef S,roundingMode RM)5116 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5117                                                              roundingMode RM) {
5118   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5119   APFloat Tmp(semPPCDoubleDoubleLegacy);
5120   auto Ret = Tmp.convertFromString(S, RM);
5121   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5122   return Ret;
5123 }
5124 
next(bool nextDown)5125 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5126   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5127   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5128   auto Ret = Tmp.next(nextDown);
5129   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5130   return Ret;
5131 }
5132 
5133 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const5134 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5135                                 unsigned int Width, bool IsSigned,
5136                                 roundingMode RM, bool *IsExact) const {
5137   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5138   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5139       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5140 }
5141 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)5142 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5143                                                   bool IsSigned,
5144                                                   roundingMode RM) {
5145   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5146   APFloat Tmp(semPPCDoubleDoubleLegacy);
5147   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5148   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5149   return Ret;
5150 }
5151 
5152 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)5153 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5154                                               unsigned int InputSize,
5155                                               bool IsSigned, roundingMode RM) {
5156   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5157   APFloat Tmp(semPPCDoubleDoubleLegacy);
5158   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5159   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5160   return Ret;
5161 }
5162 
5163 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)5164 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5165                                               unsigned int InputSize,
5166                                               bool IsSigned, roundingMode RM) {
5167   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5168   APFloat Tmp(semPPCDoubleDoubleLegacy);
5169   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5170   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5171   return Ret;
5172 }
5173 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const5174 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5175                                                unsigned int HexDigits,
5176                                                bool UpperCase,
5177                                                roundingMode RM) const {
5178   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5179   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5180       .convertToHexString(DST, HexDigits, UpperCase, RM);
5181 }
5182 
isDenormal() const5183 bool DoubleAPFloat::isDenormal() const {
5184   return getCategory() == fcNormal &&
5185          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5186           // (double)(Hi + Lo) == Hi defines a normal number.
5187           Floats[0] != Floats[0] + Floats[1]);
5188 }
5189 
isSmallest() const5190 bool DoubleAPFloat::isSmallest() const {
5191   if (getCategory() != fcNormal)
5192     return false;
5193   DoubleAPFloat Tmp(*this);
5194   Tmp.makeSmallest(this->isNegative());
5195   return Tmp.compare(*this) == cmpEqual;
5196 }
5197 
isSmallestNormalized() const5198 bool DoubleAPFloat::isSmallestNormalized() const {
5199   if (getCategory() != fcNormal)
5200     return false;
5201 
5202   DoubleAPFloat Tmp(*this);
5203   Tmp.makeSmallestNormalized(this->isNegative());
5204   return Tmp.compare(*this) == cmpEqual;
5205 }
5206 
isLargest() const5207 bool DoubleAPFloat::isLargest() const {
5208   if (getCategory() != fcNormal)
5209     return false;
5210   DoubleAPFloat Tmp(*this);
5211   Tmp.makeLargest(this->isNegative());
5212   return Tmp.compare(*this) == cmpEqual;
5213 }
5214 
isInteger() const5215 bool DoubleAPFloat::isInteger() const {
5216   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5217   return Floats[0].isInteger() && Floats[1].isInteger();
5218 }
5219 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const5220 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5221                              unsigned FormatPrecision,
5222                              unsigned FormatMaxPadding,
5223                              bool TruncateZero) const {
5224   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5225   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5226       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5227 }
5228 
getExactInverse(APFloat * inv) const5229 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5230   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5231   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5232   if (!inv)
5233     return Tmp.getExactInverse(nullptr);
5234   APFloat Inv(semPPCDoubleDoubleLegacy);
5235   auto Ret = Tmp.getExactInverse(&Inv);
5236   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5237   return Ret;
5238 }
5239 
getExactLog2() const5240 int DoubleAPFloat::getExactLog2() const {
5241   // TODO: Implement me
5242   return INT_MIN;
5243 }
5244 
getExactLog2Abs() const5245 int DoubleAPFloat::getExactLog2Abs() const {
5246   // TODO: Implement me
5247   return INT_MIN;
5248 }
5249 
scalbn(const DoubleAPFloat & Arg,int Exp,APFloat::roundingMode RM)5250 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5251                      APFloat::roundingMode RM) {
5252   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5253   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5254                        scalbn(Arg.Floats[1], Exp, RM));
5255 }
5256 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)5257 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5258                     APFloat::roundingMode RM) {
5259   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5260   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5261   APFloat Second = Arg.Floats[1];
5262   if (Arg.getCategory() == APFloat::fcNormal)
5263     Second = scalbn(Second, -Exp, RM);
5264   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5265 }
5266 
5267 } // namespace detail
5268 
Storage(IEEEFloat F,const fltSemantics & Semantics)5269 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5270   if (usesLayout<IEEEFloat>(Semantics)) {
5271     new (&IEEE) IEEEFloat(std::move(F));
5272     return;
5273   }
5274   if (usesLayout<DoubleAPFloat>(Semantics)) {
5275     const fltSemantics& S = F.getSemantics();
5276     new (&Double)
5277         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5278                       APFloat(semIEEEdouble));
5279     return;
5280   }
5281   llvm_unreachable("Unexpected semantics");
5282 }
5283 
convertFromString(StringRef Str,roundingMode RM)5284 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5285                                                        roundingMode RM) {
5286   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5287 }
5288 
hash_value(const APFloat & Arg)5289 hash_code hash_value(const APFloat &Arg) {
5290   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5291     return hash_value(Arg.U.IEEE);
5292   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5293     return hash_value(Arg.U.Double);
5294   llvm_unreachable("Unexpected semantics");
5295 }
5296 
APFloat(const fltSemantics & Semantics,StringRef S)5297 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5298     : APFloat(Semantics) {
5299   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5300   assert(StatusOrErr && "Invalid floating point representation");
5301   consumeError(StatusOrErr.takeError());
5302 }
5303 
classify() const5304 FPClassTest APFloat::classify() const {
5305   if (isZero())
5306     return isNegative() ? fcNegZero : fcPosZero;
5307   if (isNormal())
5308     return isNegative() ? fcNegNormal : fcPosNormal;
5309   if (isDenormal())
5310     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5311   if (isInfinity())
5312     return isNegative() ? fcNegInf : fcPosInf;
5313   assert(isNaN() && "Other class of FP constant");
5314   return isSignaling() ? fcSNan : fcQNan;
5315 }
5316 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)5317 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5318                                    roundingMode RM, bool *losesInfo) {
5319   if (&getSemantics() == &ToSemantics) {
5320     *losesInfo = false;
5321     return opOK;
5322   }
5323   if (usesLayout<IEEEFloat>(getSemantics()) &&
5324       usesLayout<IEEEFloat>(ToSemantics))
5325     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5326   if (usesLayout<IEEEFloat>(getSemantics()) &&
5327       usesLayout<DoubleAPFloat>(ToSemantics)) {
5328     assert(&ToSemantics == &semPPCDoubleDouble);
5329     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5330     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5331     return Ret;
5332   }
5333   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5334       usesLayout<IEEEFloat>(ToSemantics)) {
5335     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5336     *this = APFloat(std::move(getIEEE()), ToSemantics);
5337     return Ret;
5338   }
5339   llvm_unreachable("Unexpected semantics");
5340 }
5341 
getAllOnesValue(const fltSemantics & Semantics)5342 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5343   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5344 }
5345 
print(raw_ostream & OS) const5346 void APFloat::print(raw_ostream &OS) const {
5347   SmallVector<char, 16> Buffer;
5348   toString(Buffer);
5349   OS << Buffer << "\n";
5350 }
5351 
5352 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const5353 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5354 #endif
5355 
Profile(FoldingSetNodeID & NID) const5356 void APFloat::Profile(FoldingSetNodeID &NID) const {
5357   NID.Add(bitcastToAPInt());
5358 }
5359 
5360 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5361    an APSInt, whose initial bit-width and signed-ness are used to determine the
5362    precision of the conversion.
5363  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const5364 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5365                                             roundingMode rounding_mode,
5366                                             bool *isExact) const {
5367   unsigned bitWidth = result.getBitWidth();
5368   SmallVector<uint64_t, 4> parts(result.getNumWords());
5369   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5370                                      rounding_mode, isExact);
5371   // Keeps the original signed-ness.
5372   result = APInt(bitWidth, parts);
5373   return status;
5374 }
5375 
convertToDouble() const5376 double APFloat::convertToDouble() const {
5377   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5378     return getIEEE().convertToDouble();
5379   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5380          "Float semantics is not representable by IEEEdouble");
5381   APFloat Temp = *this;
5382   bool LosesInfo;
5383   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5384   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5385   (void)St;
5386   return Temp.getIEEE().convertToDouble();
5387 }
5388 
5389 #ifdef HAS_IEE754_FLOAT128
convertToQuad() const5390 float128 APFloat::convertToQuad() const {
5391   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5392     return getIEEE().convertToQuad();
5393   assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5394          "Float semantics is not representable by IEEEquad");
5395   APFloat Temp = *this;
5396   bool LosesInfo;
5397   opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5398   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5399   (void)St;
5400   return Temp.getIEEE().convertToQuad();
5401 }
5402 #endif
5403 
convertToFloat() const5404 float APFloat::convertToFloat() const {
5405   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5406     return getIEEE().convertToFloat();
5407   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5408          "Float semantics is not representable by IEEEsingle");
5409   APFloat Temp = *this;
5410   bool LosesInfo;
5411   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5412   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5413   (void)St;
5414   return Temp.getIEEE().convertToFloat();
5415 }
5416 
5417 } // namespace llvm
5418 
5419 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5420