1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40 using namespace llvm;
41
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50 /* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54 namespace llvm {
55
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
74 FiniteOnly,
75 };
76
77 // How NaN values are represented. This is curently only used in combination
78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79 // while having IEEE non-finite behavior is liable to lead to unexpected
80 // results.
81 enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
84 IEEE,
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
92 AllOnes,
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
99 NegativeZero,
100 };
101
102 /* Represents floating point arithmetic semantics. */
103 struct fltSemantics {
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
106 APFloatBase::ExponentType maxExponent;
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
110 APFloatBase::ExponentType minExponent;
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120
121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122 // Returns true if any number described by this semantics can be precisely
123 // represented by the specified semantics. Does not take into account
124 // the value of fltNonfiniteBehavior.
isRepresentableByllvm::fltSemantics125 bool isRepresentableBy(const fltSemantics &S) const {
126 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
127 precision <= S.precision;
128 }
129 };
130
131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137 static constexpr fltSemantics semFloat8E5M2FNUZ = {
138 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
140 static constexpr fltSemantics semFloat8E4M3FN = {
141 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
142 static constexpr fltSemantics semFloat8E4M3FNUZ = {
143 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
144 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
145 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
147 static constexpr fltSemantics semFloat6E3M2FN = {
148 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
149 static constexpr fltSemantics semFloat6E2M3FN = {
150 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
151 static constexpr fltSemantics semFloat4E2M1FN = {
152 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
153 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
154 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
155
156 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
157 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
158 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
159 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
160 to each other, and two 11-bit exponents.
161
162 Note: we need to make the value different from semBogus as otherwise
163 an unsafe optimization may collapse both values to a single address,
164 and we heavily rely on them having distinct addresses. */
165 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
166
167 /* These are legacy semantics for the fallback, inaccrurate implementation of
168 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
169 operation. It's equivalent to having an IEEE number with consecutive 106
170 bits of mantissa and 11 bits of exponent.
171
172 It's not equivalent to IBM double-double. For example, a legit IBM
173 double-double, 1 + epsilon:
174
175 1 + epsilon = 1 + (1 >> 1076)
176
177 is not representable by a consecutive 106 bits of mantissa.
178
179 Currently, these semantics are used in the following way:
180
181 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
182 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
183 semPPCDoubleDoubleLegacy -> IEEE operations
184
185 We use bitcastToAPInt() to get the bit representation (in APInt) of the
186 underlying IEEEdouble, then use the APInt constructor to construct the
187 legacy IEEE float.
188
189 TODO: Implement all operations in semPPCDoubleDouble, and delete these
190 semantics. */
191 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
192 53 + 53, 128};
193
EnumToSemantics(Semantics S)194 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
195 switch (S) {
196 case S_IEEEhalf:
197 return IEEEhalf();
198 case S_BFloat:
199 return BFloat();
200 case S_IEEEsingle:
201 return IEEEsingle();
202 case S_IEEEdouble:
203 return IEEEdouble();
204 case S_IEEEquad:
205 return IEEEquad();
206 case S_PPCDoubleDouble:
207 return PPCDoubleDouble();
208 case S_Float8E5M2:
209 return Float8E5M2();
210 case S_Float8E5M2FNUZ:
211 return Float8E5M2FNUZ();
212 case S_Float8E4M3:
213 return Float8E4M3();
214 case S_Float8E4M3FN:
215 return Float8E4M3FN();
216 case S_Float8E4M3FNUZ:
217 return Float8E4M3FNUZ();
218 case S_Float8E4M3B11FNUZ:
219 return Float8E4M3B11FNUZ();
220 case S_FloatTF32:
221 return FloatTF32();
222 case S_Float6E3M2FN:
223 return Float6E3M2FN();
224 case S_Float6E2M3FN:
225 return Float6E2M3FN();
226 case S_Float4E2M1FN:
227 return Float4E2M1FN();
228 case S_x87DoubleExtended:
229 return x87DoubleExtended();
230 }
231 llvm_unreachable("Unrecognised floating semantics");
232 }
233
234 APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)235 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
236 if (&Sem == &llvm::APFloat::IEEEhalf())
237 return S_IEEEhalf;
238 else if (&Sem == &llvm::APFloat::BFloat())
239 return S_BFloat;
240 else if (&Sem == &llvm::APFloat::IEEEsingle())
241 return S_IEEEsingle;
242 else if (&Sem == &llvm::APFloat::IEEEdouble())
243 return S_IEEEdouble;
244 else if (&Sem == &llvm::APFloat::IEEEquad())
245 return S_IEEEquad;
246 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
247 return S_PPCDoubleDouble;
248 else if (&Sem == &llvm::APFloat::Float8E5M2())
249 return S_Float8E5M2;
250 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
251 return S_Float8E5M2FNUZ;
252 else if (&Sem == &llvm::APFloat::Float8E4M3())
253 return S_Float8E4M3;
254 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
255 return S_Float8E4M3FN;
256 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
257 return S_Float8E4M3FNUZ;
258 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
259 return S_Float8E4M3B11FNUZ;
260 else if (&Sem == &llvm::APFloat::FloatTF32())
261 return S_FloatTF32;
262 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
263 return S_Float6E3M2FN;
264 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
265 return S_Float6E2M3FN;
266 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
267 return S_Float4E2M1FN;
268 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
269 return S_x87DoubleExtended;
270 else
271 llvm_unreachable("Unknown floating semantics");
272 }
273
IEEEhalf()274 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
BFloat()275 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
IEEEsingle()276 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
IEEEdouble()277 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
IEEEquad()278 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
PPCDoubleDouble()279 const fltSemantics &APFloatBase::PPCDoubleDouble() {
280 return semPPCDoubleDouble;
281 }
Float8E5M2()282 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
Float8E5M2FNUZ()283 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
Float8E4M3()284 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
Float8E4M3FN()285 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
Float8E4M3FNUZ()286 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
Float8E4M3B11FNUZ()287 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
288 return semFloat8E4M3B11FNUZ;
289 }
FloatTF32()290 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
Float6E3M2FN()291 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
Float6E2M3FN()292 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
Float4E2M1FN()293 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
x87DoubleExtended()294 const fltSemantics &APFloatBase::x87DoubleExtended() {
295 return semX87DoubleExtended;
296 }
Bogus()297 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
298
299 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
300 constexpr RoundingMode APFloatBase::rmTowardPositive;
301 constexpr RoundingMode APFloatBase::rmTowardNegative;
302 constexpr RoundingMode APFloatBase::rmTowardZero;
303 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
304
305 /* A tight upper bound on number of parts required to hold the value
306 pow(5, power) is
307
308 power * 815 / (351 * integerPartWidth) + 1
309
310 However, whilst the result may require only this many parts,
311 because we are multiplying two values to get it, the
312 multiplication may require an extra part with the excess part
313 being zero (consider the trivial case of 1 * 1, tcFullMultiply
314 requires two parts to hold the single-part result). So we add an
315 extra one to guarantee enough space whilst multiplying. */
316 const unsigned int maxExponent = 16383;
317 const unsigned int maxPrecision = 113;
318 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
319 const unsigned int maxPowerOfFiveParts =
320 2 +
321 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
322
semanticsPrecision(const fltSemantics & semantics)323 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
324 return semantics.precision;
325 }
326 APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)327 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
328 return semantics.maxExponent;
329 }
330 APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)331 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
332 return semantics.minExponent;
333 }
semanticsSizeInBits(const fltSemantics & semantics)334 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
335 return semantics.sizeInBits;
336 }
semanticsIntSizeInBits(const fltSemantics & semantics,bool isSigned)337 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
338 bool isSigned) {
339 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
340 // at least one more bit than the MaxExponent to hold the max FP value.
341 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
342 // Extra sign bit needed.
343 if (isSigned)
344 ++MinBitWidth;
345 return MinBitWidth;
346 }
347
isRepresentableAsNormalIn(const fltSemantics & Src,const fltSemantics & Dst)348 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
349 const fltSemantics &Dst) {
350 // Exponent range must be larger.
351 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
352 return false;
353
354 // If the mantissa is long enough, the result value could still be denormal
355 // with a larger exponent range.
356 //
357 // FIXME: This condition is probably not accurate but also shouldn't be a
358 // practical concern with existing types.
359 return Dst.precision >= Src.precision;
360 }
361
getSizeInBits(const fltSemantics & Sem)362 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
363 return Sem.sizeInBits;
364 }
365
366 static constexpr APFloatBase::ExponentType
exponentZero(const fltSemantics & semantics)367 exponentZero(const fltSemantics &semantics) {
368 return semantics.minExponent - 1;
369 }
370
371 static constexpr APFloatBase::ExponentType
exponentInf(const fltSemantics & semantics)372 exponentInf(const fltSemantics &semantics) {
373 return semantics.maxExponent + 1;
374 }
375
376 static constexpr APFloatBase::ExponentType
exponentNaN(const fltSemantics & semantics)377 exponentNaN(const fltSemantics &semantics) {
378 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
379 if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
380 return exponentZero(semantics);
381 return semantics.maxExponent;
382 }
383 return semantics.maxExponent + 1;
384 }
385
386 /* A bunch of private, handy routines. */
387
createError(const Twine & Err)388 static inline Error createError(const Twine &Err) {
389 return make_error<StringError>(Err, inconvertibleErrorCode());
390 }
391
partCountForBits(unsigned int bits)392 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
393 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
394 }
395
396 /* Returns 0U-9U. Return values >= 10U are not digits. */
397 static inline unsigned int
decDigitValue(unsigned int c)398 decDigitValue(unsigned int c)
399 {
400 return c - '0';
401 }
402
403 /* Return the value of a decimal exponent of the form
404 [+-]ddddddd.
405
406 If the exponent overflows, returns a large exponent with the
407 appropriate sign. */
readExponent(StringRef::iterator begin,StringRef::iterator end)408 static Expected<int> readExponent(StringRef::iterator begin,
409 StringRef::iterator end) {
410 bool isNegative;
411 unsigned int absExponent;
412 const unsigned int overlargeExponent = 24000; /* FIXME. */
413 StringRef::iterator p = begin;
414
415 // Treat no exponent as 0 to match binutils
416 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
417 return 0;
418 }
419
420 isNegative = (*p == '-');
421 if (*p == '-' || *p == '+') {
422 p++;
423 if (p == end)
424 return createError("Exponent has no digits");
425 }
426
427 absExponent = decDigitValue(*p++);
428 if (absExponent >= 10U)
429 return createError("Invalid character in exponent");
430
431 for (; p != end; ++p) {
432 unsigned int value;
433
434 value = decDigitValue(*p);
435 if (value >= 10U)
436 return createError("Invalid character in exponent");
437
438 absExponent = absExponent * 10U + value;
439 if (absExponent >= overlargeExponent) {
440 absExponent = overlargeExponent;
441 break;
442 }
443 }
444
445 if (isNegative)
446 return -(int) absExponent;
447 else
448 return (int) absExponent;
449 }
450
451 /* This is ugly and needs cleaning up, but I don't immediately see
452 how whilst remaining safe. */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)453 static Expected<int> totalExponent(StringRef::iterator p,
454 StringRef::iterator end,
455 int exponentAdjustment) {
456 int unsignedExponent;
457 bool negative, overflow;
458 int exponent = 0;
459
460 if (p == end)
461 return createError("Exponent has no digits");
462
463 negative = *p == '-';
464 if (*p == '-' || *p == '+') {
465 p++;
466 if (p == end)
467 return createError("Exponent has no digits");
468 }
469
470 unsignedExponent = 0;
471 overflow = false;
472 for (; p != end; ++p) {
473 unsigned int value;
474
475 value = decDigitValue(*p);
476 if (value >= 10U)
477 return createError("Invalid character in exponent");
478
479 unsignedExponent = unsignedExponent * 10 + value;
480 if (unsignedExponent > 32767) {
481 overflow = true;
482 break;
483 }
484 }
485
486 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
487 overflow = true;
488
489 if (!overflow) {
490 exponent = unsignedExponent;
491 if (negative)
492 exponent = -exponent;
493 exponent += exponentAdjustment;
494 if (exponent > 32767 || exponent < -32768)
495 overflow = true;
496 }
497
498 if (overflow)
499 exponent = negative ? -32768: 32767;
500
501 return exponent;
502 }
503
504 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)505 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
506 StringRef::iterator *dot) {
507 StringRef::iterator p = begin;
508 *dot = end;
509 while (p != end && *p == '0')
510 p++;
511
512 if (p != end && *p == '.') {
513 *dot = p++;
514
515 if (end - begin == 1)
516 return createError("Significand has no digits");
517
518 while (p != end && *p == '0')
519 p++;
520 }
521
522 return p;
523 }
524
525 /* Given a normal decimal floating point number of the form
526
527 dddd.dddd[eE][+-]ddd
528
529 where the decimal point and exponent are optional, fill out the
530 structure D. Exponent is appropriate if the significand is
531 treated as an integer, and normalizedExponent if the significand
532 is taken to have the decimal point after a single leading
533 non-zero digit.
534
535 If the value is zero, V->firstSigDigit points to a non-digit, and
536 the return exponent is zero.
537 */
538 struct decimalInfo {
539 const char *firstSigDigit;
540 const char *lastSigDigit;
541 int exponent;
542 int normalizedExponent;
543 };
544
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)545 static Error interpretDecimal(StringRef::iterator begin,
546 StringRef::iterator end, decimalInfo *D) {
547 StringRef::iterator dot = end;
548
549 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
550 if (!PtrOrErr)
551 return PtrOrErr.takeError();
552 StringRef::iterator p = *PtrOrErr;
553
554 D->firstSigDigit = p;
555 D->exponent = 0;
556 D->normalizedExponent = 0;
557
558 for (; p != end; ++p) {
559 if (*p == '.') {
560 if (dot != end)
561 return createError("String contains multiple dots");
562 dot = p++;
563 if (p == end)
564 break;
565 }
566 if (decDigitValue(*p) >= 10U)
567 break;
568 }
569
570 if (p != end) {
571 if (*p != 'e' && *p != 'E')
572 return createError("Invalid character in significand");
573 if (p == begin)
574 return createError("Significand has no digits");
575 if (dot != end && p - begin == 1)
576 return createError("Significand has no digits");
577
578 /* p points to the first non-digit in the string */
579 auto ExpOrErr = readExponent(p + 1, end);
580 if (!ExpOrErr)
581 return ExpOrErr.takeError();
582 D->exponent = *ExpOrErr;
583
584 /* Implied decimal point? */
585 if (dot == end)
586 dot = p;
587 }
588
589 /* If number is all zeroes accept any exponent. */
590 if (p != D->firstSigDigit) {
591 /* Drop insignificant trailing zeroes. */
592 if (p != begin) {
593 do
594 do
595 p--;
596 while (p != begin && *p == '0');
597 while (p != begin && *p == '.');
598 }
599
600 /* Adjust the exponents for any decimal point. */
601 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
602 D->normalizedExponent = (D->exponent +
603 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
604 - (dot > D->firstSigDigit && dot < p)));
605 }
606
607 D->lastSigDigit = p;
608 return Error::success();
609 }
610
611 /* Return the trailing fraction of a hexadecimal number.
612 DIGITVALUE is the first hex digit of the fraction, P points to
613 the next digit. */
614 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)615 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
616 unsigned int digitValue) {
617 unsigned int hexDigit;
618
619 /* If the first trailing digit isn't 0 or 8 we can work out the
620 fraction immediately. */
621 if (digitValue > 8)
622 return lfMoreThanHalf;
623 else if (digitValue < 8 && digitValue > 0)
624 return lfLessThanHalf;
625
626 // Otherwise we need to find the first non-zero digit.
627 while (p != end && (*p == '0' || *p == '.'))
628 p++;
629
630 if (p == end)
631 return createError("Invalid trailing hexadecimal fraction!");
632
633 hexDigit = hexDigitValue(*p);
634
635 /* If we ran off the end it is exactly zero or one-half, otherwise
636 a little more. */
637 if (hexDigit == UINT_MAX)
638 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
639 else
640 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
641 }
642
643 /* Return the fraction lost were a bignum truncated losing the least
644 significant BITS bits. */
645 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)646 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
647 unsigned int partCount,
648 unsigned int bits)
649 {
650 unsigned int lsb;
651
652 lsb = APInt::tcLSB(parts, partCount);
653
654 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
655 if (bits <= lsb)
656 return lfExactlyZero;
657 if (bits == lsb + 1)
658 return lfExactlyHalf;
659 if (bits <= partCount * APFloatBase::integerPartWidth &&
660 APInt::tcExtractBit(parts, bits - 1))
661 return lfMoreThanHalf;
662
663 return lfLessThanHalf;
664 }
665
666 /* Shift DST right BITS bits noting lost fraction. */
667 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)668 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
669 {
670 lostFraction lost_fraction;
671
672 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
673
674 APInt::tcShiftRight(dst, parts, bits);
675
676 return lost_fraction;
677 }
678
679 /* Combine the effect of two lost fractions. */
680 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)681 combineLostFractions(lostFraction moreSignificant,
682 lostFraction lessSignificant)
683 {
684 if (lessSignificant != lfExactlyZero) {
685 if (moreSignificant == lfExactlyZero)
686 moreSignificant = lfLessThanHalf;
687 else if (moreSignificant == lfExactlyHalf)
688 moreSignificant = lfMoreThanHalf;
689 }
690
691 return moreSignificant;
692 }
693
694 /* The error from the true value, in half-ulps, on multiplying two
695 floating point numbers, which differ from the value they
696 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
697 than the returned value.
698
699 See "How to Read Floating Point Numbers Accurately" by William D
700 Clinger. */
701 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)702 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
703 {
704 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
705
706 if (HUerr1 + HUerr2 == 0)
707 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
708 else
709 return inexactMultiply + 2 * (HUerr1 + HUerr2);
710 }
711
712 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
713 when the least significant BITS are truncated. BITS cannot be
714 zero. */
715 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)716 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
717 bool isNearest) {
718 unsigned int count, partBits;
719 APFloatBase::integerPart part, boundary;
720
721 assert(bits != 0);
722
723 bits--;
724 count = bits / APFloatBase::integerPartWidth;
725 partBits = bits % APFloatBase::integerPartWidth + 1;
726
727 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
728
729 if (isNearest)
730 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
731 else
732 boundary = 0;
733
734 if (count == 0) {
735 if (part - boundary <= boundary - part)
736 return part - boundary;
737 else
738 return boundary - part;
739 }
740
741 if (part == boundary) {
742 while (--count)
743 if (parts[count])
744 return ~(APFloatBase::integerPart) 0; /* A lot. */
745
746 return parts[0];
747 } else if (part == boundary - 1) {
748 while (--count)
749 if (~parts[count])
750 return ~(APFloatBase::integerPart) 0; /* A lot. */
751
752 return -parts[0];
753 }
754
755 return ~(APFloatBase::integerPart) 0; /* A lot. */
756 }
757
758 /* Place pow(5, power) in DST, and return the number of parts used.
759 DST must be at least one part larger than size of the answer. */
760 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)761 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
762 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
763 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
764 pow5s[0] = 78125 * 5;
765
766 unsigned int partsCount = 1;
767 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
768 unsigned int result;
769 assert(power <= maxExponent);
770
771 p1 = dst;
772 p2 = scratch;
773
774 *p1 = firstEightPowers[power & 7];
775 power >>= 3;
776
777 result = 1;
778 pow5 = pow5s;
779
780 for (unsigned int n = 0; power; power >>= 1, n++) {
781 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
782 if (n != 0) {
783 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
784 partsCount, partsCount);
785 partsCount *= 2;
786 if (pow5[partsCount - 1] == 0)
787 partsCount--;
788 }
789
790 if (power & 1) {
791 APFloatBase::integerPart *tmp;
792
793 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
794 result += partsCount;
795 if (p2[result - 1] == 0)
796 result--;
797
798 /* Now result is in p1 with partsCount parts and p2 is scratch
799 space. */
800 tmp = p1;
801 p1 = p2;
802 p2 = tmp;
803 }
804
805 pow5 += partsCount;
806 }
807
808 if (p1 != dst)
809 APInt::tcAssign(dst, p1, result);
810
811 return result;
812 }
813
814 /* Zero at the end to avoid modular arithmetic when adding one; used
815 when rounding up during hexadecimal output. */
816 static const char hexDigitsLower[] = "0123456789abcdef0";
817 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
818 static const char infinityL[] = "infinity";
819 static const char infinityU[] = "INFINITY";
820 static const char NaNL[] = "nan";
821 static const char NaNU[] = "NAN";
822
823 /* Write out an integerPart in hexadecimal, starting with the most
824 significant nibble. Write out exactly COUNT hexdigits, return
825 COUNT. */
826 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)827 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
828 const char *hexDigitChars)
829 {
830 unsigned int result = count;
831
832 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
833
834 part >>= (APFloatBase::integerPartWidth - 4 * count);
835 while (count--) {
836 dst[count] = hexDigitChars[part & 0xf];
837 part >>= 4;
838 }
839
840 return result;
841 }
842
843 /* Write out an unsigned decimal integer. */
844 static char *
writeUnsignedDecimal(char * dst,unsigned int n)845 writeUnsignedDecimal (char *dst, unsigned int n)
846 {
847 char buff[40], *p;
848
849 p = buff;
850 do
851 *p++ = '0' + n % 10;
852 while (n /= 10);
853
854 do
855 *dst++ = *--p;
856 while (p != buff);
857
858 return dst;
859 }
860
861 /* Write out a signed decimal integer. */
862 static char *
writeSignedDecimal(char * dst,int value)863 writeSignedDecimal (char *dst, int value)
864 {
865 if (value < 0) {
866 *dst++ = '-';
867 dst = writeUnsignedDecimal(dst, -(unsigned) value);
868 } else
869 dst = writeUnsignedDecimal(dst, value);
870
871 return dst;
872 }
873
874 namespace detail {
875 /* Constructors. */
initialize(const fltSemantics * ourSemantics)876 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
877 unsigned int count;
878
879 semantics = ourSemantics;
880 count = partCount();
881 if (count > 1)
882 significand.parts = new integerPart[count];
883 }
884
freeSignificand()885 void IEEEFloat::freeSignificand() {
886 if (needsCleanup())
887 delete [] significand.parts;
888 }
889
assign(const IEEEFloat & rhs)890 void IEEEFloat::assign(const IEEEFloat &rhs) {
891 assert(semantics == rhs.semantics);
892
893 sign = rhs.sign;
894 category = rhs.category;
895 exponent = rhs.exponent;
896 if (isFiniteNonZero() || category == fcNaN)
897 copySignificand(rhs);
898 }
899
copySignificand(const IEEEFloat & rhs)900 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
901 assert(isFiniteNonZero() || category == fcNaN);
902 assert(rhs.partCount() >= partCount());
903
904 APInt::tcAssign(significandParts(), rhs.significandParts(),
905 partCount());
906 }
907
908 /* Make this number a NaN, with an arbitrary but deterministic value
909 for the significand. If double or longer, this is a signalling NaN,
910 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)911 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
912 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
913 llvm_unreachable("This floating point format does not support NaN");
914
915 category = fcNaN;
916 sign = Negative;
917 exponent = exponentNaN();
918
919 integerPart *significand = significandParts();
920 unsigned numParts = partCount();
921
922 APInt fill_storage;
923 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
924 // Finite-only types do not distinguish signalling and quiet NaN, so
925 // make them all signalling.
926 SNaN = false;
927 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
928 sign = true;
929 fill_storage = APInt::getZero(semantics->precision - 1);
930 } else {
931 fill_storage = APInt::getAllOnes(semantics->precision - 1);
932 }
933 fill = &fill_storage;
934 }
935
936 // Set the significand bits to the fill.
937 if (!fill || fill->getNumWords() < numParts)
938 APInt::tcSet(significand, 0, numParts);
939 if (fill) {
940 APInt::tcAssign(significand, fill->getRawData(),
941 std::min(fill->getNumWords(), numParts));
942
943 // Zero out the excess bits of the significand.
944 unsigned bitsToPreserve = semantics->precision - 1;
945 unsigned part = bitsToPreserve / 64;
946 bitsToPreserve %= 64;
947 significand[part] &= ((1ULL << bitsToPreserve) - 1);
948 for (part++; part != numParts; ++part)
949 significand[part] = 0;
950 }
951
952 unsigned QNaNBit = semantics->precision - 2;
953
954 if (SNaN) {
955 // We always have to clear the QNaN bit to make it an SNaN.
956 APInt::tcClearBit(significand, QNaNBit);
957
958 // If there are no bits set in the payload, we have to set
959 // *something* to make it a NaN instead of an infinity;
960 // conventionally, this is the next bit down from the QNaN bit.
961 if (APInt::tcIsZero(significand, numParts))
962 APInt::tcSetBit(significand, QNaNBit - 1);
963 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
964 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
965 // Do nothing.
966 } else {
967 // We always have to set the QNaN bit to make it a QNaN.
968 APInt::tcSetBit(significand, QNaNBit);
969 }
970
971 // For x87 extended precision, we want to make a NaN, not a
972 // pseudo-NaN. Maybe we should expose the ability to make
973 // pseudo-NaNs?
974 if (semantics == &semX87DoubleExtended)
975 APInt::tcSetBit(significand, QNaNBit + 1);
976 }
977
operator =(const IEEEFloat & rhs)978 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
979 if (this != &rhs) {
980 if (semantics != rhs.semantics) {
981 freeSignificand();
982 initialize(rhs.semantics);
983 }
984 assign(rhs);
985 }
986
987 return *this;
988 }
989
operator =(IEEEFloat && rhs)990 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
991 freeSignificand();
992
993 semantics = rhs.semantics;
994 significand = rhs.significand;
995 exponent = rhs.exponent;
996 category = rhs.category;
997 sign = rhs.sign;
998
999 rhs.semantics = &semBogus;
1000 return *this;
1001 }
1002
isDenormal() const1003 bool IEEEFloat::isDenormal() const {
1004 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1005 (APInt::tcExtractBit(significandParts(),
1006 semantics->precision - 1) == 0);
1007 }
1008
isSmallest() const1009 bool IEEEFloat::isSmallest() const {
1010 // The smallest number by magnitude in our format will be the smallest
1011 // denormal, i.e. the floating point number with exponent being minimum
1012 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1013 return isFiniteNonZero() && exponent == semantics->minExponent &&
1014 significandMSB() == 0;
1015 }
1016
isSmallestNormalized() const1017 bool IEEEFloat::isSmallestNormalized() const {
1018 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1019 isSignificandAllZerosExceptMSB();
1020 }
1021
isSignificandAllOnes() const1022 bool IEEEFloat::isSignificandAllOnes() const {
1023 // Test if the significand excluding the integral bit is all ones. This allows
1024 // us to test for binade boundaries.
1025 const integerPart *Parts = significandParts();
1026 const unsigned PartCount = partCountForBits(semantics->precision);
1027 for (unsigned i = 0; i < PartCount - 1; i++)
1028 if (~Parts[i])
1029 return false;
1030
1031 // Set the unused high bits to all ones when we compare.
1032 const unsigned NumHighBits =
1033 PartCount*integerPartWidth - semantics->precision + 1;
1034 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1035 "Can not have more high bits to fill than integerPartWidth");
1036 const integerPart HighBitFill =
1037 ~integerPart(0) << (integerPartWidth - NumHighBits);
1038 if (~(Parts[PartCount - 1] | HighBitFill))
1039 return false;
1040
1041 return true;
1042 }
1043
isSignificandAllOnesExceptLSB() const1044 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1045 // Test if the significand excluding the integral bit is all ones except for
1046 // the least significant bit.
1047 const integerPart *Parts = significandParts();
1048
1049 if (Parts[0] & 1)
1050 return false;
1051
1052 const unsigned PartCount = partCountForBits(semantics->precision);
1053 for (unsigned i = 0; i < PartCount - 1; i++) {
1054 if (~Parts[i] & ~unsigned{!i})
1055 return false;
1056 }
1057
1058 // Set the unused high bits to all ones when we compare.
1059 const unsigned NumHighBits =
1060 PartCount * integerPartWidth - semantics->precision + 1;
1061 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1062 "Can not have more high bits to fill than integerPartWidth");
1063 const integerPart HighBitFill = ~integerPart(0)
1064 << (integerPartWidth - NumHighBits);
1065 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1066 return false;
1067
1068 return true;
1069 }
1070
isSignificandAllZeros() const1071 bool IEEEFloat::isSignificandAllZeros() const {
1072 // Test if the significand excluding the integral bit is all zeros. This
1073 // allows us to test for binade boundaries.
1074 const integerPart *Parts = significandParts();
1075 const unsigned PartCount = partCountForBits(semantics->precision);
1076
1077 for (unsigned i = 0; i < PartCount - 1; i++)
1078 if (Parts[i])
1079 return false;
1080
1081 // Compute how many bits are used in the final word.
1082 const unsigned NumHighBits =
1083 PartCount*integerPartWidth - semantics->precision + 1;
1084 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1085 "clear than integerPartWidth");
1086 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1087
1088 if (Parts[PartCount - 1] & HighBitMask)
1089 return false;
1090
1091 return true;
1092 }
1093
isSignificandAllZerosExceptMSB() const1094 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1095 const integerPart *Parts = significandParts();
1096 const unsigned PartCount = partCountForBits(semantics->precision);
1097
1098 for (unsigned i = 0; i < PartCount - 1; i++) {
1099 if (Parts[i])
1100 return false;
1101 }
1102
1103 const unsigned NumHighBits =
1104 PartCount * integerPartWidth - semantics->precision + 1;
1105 return Parts[PartCount - 1] == integerPart(1)
1106 << (integerPartWidth - NumHighBits);
1107 }
1108
isLargest() const1109 bool IEEEFloat::isLargest() const {
1110 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1111 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1112 // The largest number by magnitude in our format will be the floating point
1113 // number with maximum exponent and with significand that is all ones except
1114 // the LSB.
1115 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1116 isSignificandAllOnesExceptLSB();
1117 } else {
1118 // The largest number by magnitude in our format will be the floating point
1119 // number with maximum exponent and with significand that is all ones.
1120 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1121 isSignificandAllOnes();
1122 }
1123 }
1124
isInteger() const1125 bool IEEEFloat::isInteger() const {
1126 // This could be made more efficient; I'm going for obviously correct.
1127 if (!isFinite()) return false;
1128 IEEEFloat truncated = *this;
1129 truncated.roundToIntegral(rmTowardZero);
1130 return compare(truncated) == cmpEqual;
1131 }
1132
bitwiseIsEqual(const IEEEFloat & rhs) const1133 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1134 if (this == &rhs)
1135 return true;
1136 if (semantics != rhs.semantics ||
1137 category != rhs.category ||
1138 sign != rhs.sign)
1139 return false;
1140 if (category==fcZero || category==fcInfinity)
1141 return true;
1142
1143 if (isFiniteNonZero() && exponent != rhs.exponent)
1144 return false;
1145
1146 return std::equal(significandParts(), significandParts() + partCount(),
1147 rhs.significandParts());
1148 }
1149
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)1150 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1151 initialize(&ourSemantics);
1152 sign = 0;
1153 category = fcNormal;
1154 zeroSignificand();
1155 exponent = ourSemantics.precision - 1;
1156 significandParts()[0] = value;
1157 normalize(rmNearestTiesToEven, lfExactlyZero);
1158 }
1159
IEEEFloat(const fltSemantics & ourSemantics)1160 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1161 initialize(&ourSemantics);
1162 makeZero(false);
1163 }
1164
1165 // Delegate to the previous constructor, because later copy constructor may
1166 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)1167 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1168 : IEEEFloat(ourSemantics) {}
1169
IEEEFloat(const IEEEFloat & rhs)1170 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1171 initialize(rhs.semantics);
1172 assign(rhs);
1173 }
1174
IEEEFloat(IEEEFloat && rhs)1175 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1176 *this = std::move(rhs);
1177 }
1178
~IEEEFloat()1179 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1180
partCount() const1181 unsigned int IEEEFloat::partCount() const {
1182 return partCountForBits(semantics->precision + 1);
1183 }
1184
significandParts() const1185 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1186 return const_cast<IEEEFloat *>(this)->significandParts();
1187 }
1188
significandParts()1189 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1190 if (partCount() > 1)
1191 return significand.parts;
1192 else
1193 return &significand.part;
1194 }
1195
zeroSignificand()1196 void IEEEFloat::zeroSignificand() {
1197 APInt::tcSet(significandParts(), 0, partCount());
1198 }
1199
1200 /* Increment an fcNormal floating point number's significand. */
incrementSignificand()1201 void IEEEFloat::incrementSignificand() {
1202 integerPart carry;
1203
1204 carry = APInt::tcIncrement(significandParts(), partCount());
1205
1206 /* Our callers should never cause us to overflow. */
1207 assert(carry == 0);
1208 (void)carry;
1209 }
1210
1211 /* Add the significand of the RHS. Returns the carry flag. */
addSignificand(const IEEEFloat & rhs)1212 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1213 integerPart *parts;
1214
1215 parts = significandParts();
1216
1217 assert(semantics == rhs.semantics);
1218 assert(exponent == rhs.exponent);
1219
1220 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1221 }
1222
1223 /* Subtract the significand of the RHS with a borrow flag. Returns
1224 the borrow flag. */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)1225 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1226 integerPart borrow) {
1227 integerPart *parts;
1228
1229 parts = significandParts();
1230
1231 assert(semantics == rhs.semantics);
1232 assert(exponent == rhs.exponent);
1233
1234 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1235 partCount());
1236 }
1237
1238 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1239 on to the full-precision result of the multiplication. Returns the
1240 lost fraction. */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1242 IEEEFloat addend) {
1243 unsigned int omsb; // One, not zero, based MSB.
1244 unsigned int partsCount, newPartsCount, precision;
1245 integerPart *lhsSignificand;
1246 integerPart scratch[4];
1247 integerPart *fullSignificand;
1248 lostFraction lost_fraction;
1249 bool ignored;
1250
1251 assert(semantics == rhs.semantics);
1252
1253 precision = semantics->precision;
1254
1255 // Allocate space for twice as many bits as the original significand, plus one
1256 // extra bit for the addition to overflow into.
1257 newPartsCount = partCountForBits(precision * 2 + 1);
1258
1259 if (newPartsCount > 4)
1260 fullSignificand = new integerPart[newPartsCount];
1261 else
1262 fullSignificand = scratch;
1263
1264 lhsSignificand = significandParts();
1265 partsCount = partCount();
1266
1267 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1268 rhs.significandParts(), partsCount, partsCount);
1269
1270 lost_fraction = lfExactlyZero;
1271 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1272 exponent += rhs.exponent;
1273
1274 // Assume the operands involved in the multiplication are single-precision
1275 // FP, and the two multiplicants are:
1276 // *this = a23 . a22 ... a0 * 2^e1
1277 // rhs = b23 . b22 ... b0 * 2^e2
1278 // the result of multiplication is:
1279 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1280 // Note that there are three significant bits at the left-hand side of the
1281 // radix point: two for the multiplication, and an overflow bit for the
1282 // addition (that will always be zero at this point). Move the radix point
1283 // toward left by two bits, and adjust exponent accordingly.
1284 exponent += 2;
1285
1286 if (addend.isNonZero()) {
1287 // The intermediate result of the multiplication has "2 * precision"
1288 // signicant bit; adjust the addend to be consistent with mul result.
1289 //
1290 Significand savedSignificand = significand;
1291 const fltSemantics *savedSemantics = semantics;
1292 fltSemantics extendedSemantics;
1293 opStatus status;
1294 unsigned int extendedPrecision;
1295
1296 // Normalize our MSB to one below the top bit to allow for overflow.
1297 extendedPrecision = 2 * precision + 1;
1298 if (omsb != extendedPrecision - 1) {
1299 assert(extendedPrecision > omsb);
1300 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1301 (extendedPrecision - 1) - omsb);
1302 exponent -= (extendedPrecision - 1) - omsb;
1303 }
1304
1305 /* Create new semantics. */
1306 extendedSemantics = *semantics;
1307 extendedSemantics.precision = extendedPrecision;
1308
1309 if (newPartsCount == 1)
1310 significand.part = fullSignificand[0];
1311 else
1312 significand.parts = fullSignificand;
1313 semantics = &extendedSemantics;
1314
1315 // Make a copy so we can convert it to the extended semantics.
1316 // Note that we cannot convert the addend directly, as the extendedSemantics
1317 // is a local variable (which we take a reference to).
1318 IEEEFloat extendedAddend(addend);
1319 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1320 assert(status == opOK);
1321 (void)status;
1322
1323 // Shift the significand of the addend right by one bit. This guarantees
1324 // that the high bit of the significand is zero (same as fullSignificand),
1325 // so the addition will overflow (if it does overflow at all) into the top bit.
1326 lost_fraction = extendedAddend.shiftSignificandRight(1);
1327 assert(lost_fraction == lfExactlyZero &&
1328 "Lost precision while shifting addend for fused-multiply-add.");
1329
1330 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1331
1332 /* Restore our state. */
1333 if (newPartsCount == 1)
1334 fullSignificand[0] = significand.part;
1335 significand = savedSignificand;
1336 semantics = savedSemantics;
1337
1338 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1339 }
1340
1341 // Convert the result having "2 * precision" significant-bits back to the one
1342 // having "precision" significant-bits. First, move the radix point from
1343 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1344 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1345 exponent -= precision + 1;
1346
1347 // In case MSB resides at the left-hand side of radix point, shift the
1348 // mantissa right by some amount to make sure the MSB reside right before
1349 // the radix point (i.e. "MSB . rest-significant-bits").
1350 //
1351 // Note that the result is not normalized when "omsb < precision". So, the
1352 // caller needs to call IEEEFloat::normalize() if normalized value is
1353 // expected.
1354 if (omsb > precision) {
1355 unsigned int bits, significantParts;
1356 lostFraction lf;
1357
1358 bits = omsb - precision;
1359 significantParts = partCountForBits(omsb);
1360 lf = shiftRight(fullSignificand, significantParts, bits);
1361 lost_fraction = combineLostFractions(lf, lost_fraction);
1362 exponent += bits;
1363 }
1364
1365 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1366
1367 if (newPartsCount > 4)
1368 delete [] fullSignificand;
1369
1370 return lost_fraction;
1371 }
1372
multiplySignificand(const IEEEFloat & rhs)1373 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1374 return multiplySignificand(rhs, IEEEFloat(*semantics));
1375 }
1376
1377 /* Multiply the significands of LHS and RHS to DST. */
divideSignificand(const IEEEFloat & rhs)1378 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1379 unsigned int bit, i, partsCount;
1380 const integerPart *rhsSignificand;
1381 integerPart *lhsSignificand, *dividend, *divisor;
1382 integerPart scratch[4];
1383 lostFraction lost_fraction;
1384
1385 assert(semantics == rhs.semantics);
1386
1387 lhsSignificand = significandParts();
1388 rhsSignificand = rhs.significandParts();
1389 partsCount = partCount();
1390
1391 if (partsCount > 2)
1392 dividend = new integerPart[partsCount * 2];
1393 else
1394 dividend = scratch;
1395
1396 divisor = dividend + partsCount;
1397
1398 /* Copy the dividend and divisor as they will be modified in-place. */
1399 for (i = 0; i < partsCount; i++) {
1400 dividend[i] = lhsSignificand[i];
1401 divisor[i] = rhsSignificand[i];
1402 lhsSignificand[i] = 0;
1403 }
1404
1405 exponent -= rhs.exponent;
1406
1407 unsigned int precision = semantics->precision;
1408
1409 /* Normalize the divisor. */
1410 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1411 if (bit) {
1412 exponent += bit;
1413 APInt::tcShiftLeft(divisor, partsCount, bit);
1414 }
1415
1416 /* Normalize the dividend. */
1417 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1418 if (bit) {
1419 exponent -= bit;
1420 APInt::tcShiftLeft(dividend, partsCount, bit);
1421 }
1422
1423 /* Ensure the dividend >= divisor initially for the loop below.
1424 Incidentally, this means that the division loop below is
1425 guaranteed to set the integer bit to one. */
1426 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1427 exponent--;
1428 APInt::tcShiftLeft(dividend, partsCount, 1);
1429 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1430 }
1431
1432 /* Long division. */
1433 for (bit = precision; bit; bit -= 1) {
1434 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1435 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1436 APInt::tcSetBit(lhsSignificand, bit - 1);
1437 }
1438
1439 APInt::tcShiftLeft(dividend, partsCount, 1);
1440 }
1441
1442 /* Figure out the lost fraction. */
1443 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1444
1445 if (cmp > 0)
1446 lost_fraction = lfMoreThanHalf;
1447 else if (cmp == 0)
1448 lost_fraction = lfExactlyHalf;
1449 else if (APInt::tcIsZero(dividend, partsCount))
1450 lost_fraction = lfExactlyZero;
1451 else
1452 lost_fraction = lfLessThanHalf;
1453
1454 if (partsCount > 2)
1455 delete [] dividend;
1456
1457 return lost_fraction;
1458 }
1459
significandMSB() const1460 unsigned int IEEEFloat::significandMSB() const {
1461 return APInt::tcMSB(significandParts(), partCount());
1462 }
1463
significandLSB() const1464 unsigned int IEEEFloat::significandLSB() const {
1465 return APInt::tcLSB(significandParts(), partCount());
1466 }
1467
1468 /* Note that a zero result is NOT normalized to fcZero. */
shiftSignificandRight(unsigned int bits)1469 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1470 /* Our exponent should not overflow. */
1471 assert((ExponentType) (exponent + bits) >= exponent);
1472
1473 exponent += bits;
1474
1475 return shiftRight(significandParts(), partCount(), bits);
1476 }
1477
1478 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
shiftSignificandLeft(unsigned int bits)1479 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1480 assert(bits < semantics->precision);
1481
1482 if (bits) {
1483 unsigned int partsCount = partCount();
1484
1485 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1486 exponent -= bits;
1487
1488 assert(!APInt::tcIsZero(significandParts(), partsCount));
1489 }
1490 }
1491
1492 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1493 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1494 int compare;
1495
1496 assert(semantics == rhs.semantics);
1497 assert(isFiniteNonZero());
1498 assert(rhs.isFiniteNonZero());
1499
1500 compare = exponent - rhs.exponent;
1501
1502 /* If exponents are equal, do an unsigned bignum comparison of the
1503 significands. */
1504 if (compare == 0)
1505 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1506 partCount());
1507
1508 if (compare > 0)
1509 return cmpGreaterThan;
1510 else if (compare < 0)
1511 return cmpLessThan;
1512 else
1513 return cmpEqual;
1514 }
1515
1516 /* Set the least significant BITS bits of a bignum, clear the
1517 rest. */
tcSetLeastSignificantBits(APInt::WordType * dst,unsigned parts,unsigned bits)1518 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1519 unsigned bits) {
1520 unsigned i = 0;
1521 while (bits > APInt::APINT_BITS_PER_WORD) {
1522 dst[i++] = ~(APInt::WordType)0;
1523 bits -= APInt::APINT_BITS_PER_WORD;
1524 }
1525
1526 if (bits)
1527 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1528
1529 while (i < parts)
1530 dst[i++] = 0;
1531 }
1532
1533 /* Handle overflow. Sign is preserved. We either become infinity or
1534 the largest finite number. */
handleOverflow(roundingMode rounding_mode)1535 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1536 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1537 /* Infinity? */
1538 if (rounding_mode == rmNearestTiesToEven ||
1539 rounding_mode == rmNearestTiesToAway ||
1540 (rounding_mode == rmTowardPositive && !sign) ||
1541 (rounding_mode == rmTowardNegative && sign)) {
1542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1543 makeNaN(false, sign);
1544 else
1545 category = fcInfinity;
1546 return static_cast<opStatus>(opOverflow | opInexact);
1547 }
1548 }
1549
1550 /* Otherwise we become the largest finite number. */
1551 category = fcNormal;
1552 exponent = semantics->maxExponent;
1553 tcSetLeastSignificantBits(significandParts(), partCount(),
1554 semantics->precision);
1555 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1556 semantics->nanEncoding == fltNanEncoding::AllOnes)
1557 APInt::tcClearBit(significandParts(), 0);
1558
1559 return opInexact;
1560 }
1561
1562 /* Returns TRUE if, when truncating the current number, with BIT the
1563 new LSB, with the given lost fraction and rounding mode, the result
1564 would need to be rounded away from zero (i.e., by increasing the
1565 signficand). This routine must work for fcZero of both signs, and
1566 fcNormal numbers. */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1567 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1568 lostFraction lost_fraction,
1569 unsigned int bit) const {
1570 /* NaNs and infinities should not have lost fractions. */
1571 assert(isFiniteNonZero() || category == fcZero);
1572
1573 /* Current callers never pass this so we don't handle it. */
1574 assert(lost_fraction != lfExactlyZero);
1575
1576 switch (rounding_mode) {
1577 case rmNearestTiesToAway:
1578 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1579
1580 case rmNearestTiesToEven:
1581 if (lost_fraction == lfMoreThanHalf)
1582 return true;
1583
1584 /* Our zeroes don't have a significand to test. */
1585 if (lost_fraction == lfExactlyHalf && category != fcZero)
1586 return APInt::tcExtractBit(significandParts(), bit);
1587
1588 return false;
1589
1590 case rmTowardZero:
1591 return false;
1592
1593 case rmTowardPositive:
1594 return !sign;
1595
1596 case rmTowardNegative:
1597 return sign;
1598
1599 default:
1600 break;
1601 }
1602 llvm_unreachable("Invalid rounding mode found");
1603 }
1604
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1605 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1606 lostFraction lost_fraction) {
1607 unsigned int omsb; /* One, not zero, based MSB. */
1608 int exponentChange;
1609
1610 if (!isFiniteNonZero())
1611 return opOK;
1612
1613 /* Before rounding normalize the exponent of fcNormal numbers. */
1614 omsb = significandMSB() + 1;
1615
1616 if (omsb) {
1617 /* OMSB is numbered from 1. We want to place it in the integer
1618 bit numbered PRECISION if possible, with a compensating change in
1619 the exponent. */
1620 exponentChange = omsb - semantics->precision;
1621
1622 /* If the resulting exponent is too high, overflow according to
1623 the rounding mode. */
1624 if (exponent + exponentChange > semantics->maxExponent)
1625 return handleOverflow(rounding_mode);
1626
1627 /* Subnormal numbers have exponent minExponent, and their MSB
1628 is forced based on that. */
1629 if (exponent + exponentChange < semantics->minExponent)
1630 exponentChange = semantics->minExponent - exponent;
1631
1632 /* Shifting left is easy as we don't lose precision. */
1633 if (exponentChange < 0) {
1634 assert(lost_fraction == lfExactlyZero);
1635
1636 shiftSignificandLeft(-exponentChange);
1637
1638 return opOK;
1639 }
1640
1641 if (exponentChange > 0) {
1642 lostFraction lf;
1643
1644 /* Shift right and capture any new lost fraction. */
1645 lf = shiftSignificandRight(exponentChange);
1646
1647 lost_fraction = combineLostFractions(lf, lost_fraction);
1648
1649 /* Keep OMSB up-to-date. */
1650 if (omsb > (unsigned) exponentChange)
1651 omsb -= exponentChange;
1652 else
1653 omsb = 0;
1654 }
1655 }
1656
1657 // The all-ones values is an overflow if NaN is all ones. If NaN is
1658 // represented by negative zero, then it is a valid finite value.
1659 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1660 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1661 exponent == semantics->maxExponent && isSignificandAllOnes())
1662 return handleOverflow(rounding_mode);
1663
1664 /* Now round the number according to rounding_mode given the lost
1665 fraction. */
1666
1667 /* As specified in IEEE 754, since we do not trap we do not report
1668 underflow for exact results. */
1669 if (lost_fraction == lfExactlyZero) {
1670 /* Canonicalize zeroes. */
1671 if (omsb == 0) {
1672 category = fcZero;
1673 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1674 sign = false;
1675 }
1676
1677 return opOK;
1678 }
1679
1680 /* Increment the significand if we're rounding away from zero. */
1681 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1682 if (omsb == 0)
1683 exponent = semantics->minExponent;
1684
1685 incrementSignificand();
1686 omsb = significandMSB() + 1;
1687
1688 /* Did the significand increment overflow? */
1689 if (omsb == (unsigned) semantics->precision + 1) {
1690 /* Renormalize by incrementing the exponent and shifting our
1691 significand right one. However if we already have the
1692 maximum exponent we overflow to infinity. */
1693 if (exponent == semantics->maxExponent)
1694 // Invoke overflow handling with a rounding mode that will guarantee
1695 // that the result gets turned into the correct infinity representation.
1696 // This is needed instead of just setting the category to infinity to
1697 // account for 8-bit floating point types that have no inf, only NaN.
1698 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1699
1700 shiftSignificandRight(1);
1701
1702 return opInexact;
1703 }
1704
1705 // The all-ones values is an overflow if NaN is all ones. If NaN is
1706 // represented by negative zero, then it is a valid finite value.
1707 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1708 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1709 exponent == semantics->maxExponent && isSignificandAllOnes())
1710 return handleOverflow(rounding_mode);
1711 }
1712
1713 /* The normal case - we were and are not denormal, and any
1714 significand increment above didn't overflow. */
1715 if (omsb == semantics->precision)
1716 return opInexact;
1717
1718 /* We have a non-zero denormal. */
1719 assert(omsb < semantics->precision);
1720
1721 /* Canonicalize zeroes. */
1722 if (omsb == 0) {
1723 category = fcZero;
1724 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1725 sign = false;
1726 }
1727
1728 /* The fcZero case is a denormal that underflowed to zero. */
1729 return (opStatus) (opUnderflow | opInexact);
1730 }
1731
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1732 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1733 bool subtract) {
1734 switch (PackCategoriesIntoKey(category, rhs.category)) {
1735 default:
1736 llvm_unreachable(nullptr);
1737
1738 case PackCategoriesIntoKey(fcZero, fcNaN):
1739 case PackCategoriesIntoKey(fcNormal, fcNaN):
1740 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1741 assign(rhs);
1742 [[fallthrough]];
1743 case PackCategoriesIntoKey(fcNaN, fcZero):
1744 case PackCategoriesIntoKey(fcNaN, fcNormal):
1745 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1746 case PackCategoriesIntoKey(fcNaN, fcNaN):
1747 if (isSignaling()) {
1748 makeQuiet();
1749 return opInvalidOp;
1750 }
1751 return rhs.isSignaling() ? opInvalidOp : opOK;
1752
1753 case PackCategoriesIntoKey(fcNormal, fcZero):
1754 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1755 case PackCategoriesIntoKey(fcInfinity, fcZero):
1756 return opOK;
1757
1758 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1759 case PackCategoriesIntoKey(fcZero, fcInfinity):
1760 category = fcInfinity;
1761 sign = rhs.sign ^ subtract;
1762 return opOK;
1763
1764 case PackCategoriesIntoKey(fcZero, fcNormal):
1765 assign(rhs);
1766 sign = rhs.sign ^ subtract;
1767 return opOK;
1768
1769 case PackCategoriesIntoKey(fcZero, fcZero):
1770 /* Sign depends on rounding mode; handled by caller. */
1771 return opOK;
1772
1773 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1774 /* Differently signed infinities can only be validly
1775 subtracted. */
1776 if (((sign ^ rhs.sign)!=0) != subtract) {
1777 makeNaN();
1778 return opInvalidOp;
1779 }
1780
1781 return opOK;
1782
1783 case PackCategoriesIntoKey(fcNormal, fcNormal):
1784 return opDivByZero;
1785 }
1786 }
1787
1788 /* Add or subtract two normal numbers. */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1789 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1790 bool subtract) {
1791 integerPart carry;
1792 lostFraction lost_fraction;
1793 int bits;
1794
1795 /* Determine if the operation on the absolute values is effectively
1796 an addition or subtraction. */
1797 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1798
1799 /* Are we bigger exponent-wise than the RHS? */
1800 bits = exponent - rhs.exponent;
1801
1802 /* Subtraction is more subtle than one might naively expect. */
1803 if (subtract) {
1804 IEEEFloat temp_rhs(rhs);
1805
1806 if (bits == 0)
1807 lost_fraction = lfExactlyZero;
1808 else if (bits > 0) {
1809 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1810 shiftSignificandLeft(1);
1811 } else {
1812 lost_fraction = shiftSignificandRight(-bits - 1);
1813 temp_rhs.shiftSignificandLeft(1);
1814 }
1815
1816 // Should we reverse the subtraction.
1817 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1818 carry = temp_rhs.subtractSignificand
1819 (*this, lost_fraction != lfExactlyZero);
1820 copySignificand(temp_rhs);
1821 sign = !sign;
1822 } else {
1823 carry = subtractSignificand
1824 (temp_rhs, lost_fraction != lfExactlyZero);
1825 }
1826
1827 /* Invert the lost fraction - it was on the RHS and
1828 subtracted. */
1829 if (lost_fraction == lfLessThanHalf)
1830 lost_fraction = lfMoreThanHalf;
1831 else if (lost_fraction == lfMoreThanHalf)
1832 lost_fraction = lfLessThanHalf;
1833
1834 /* The code above is intended to ensure that no borrow is
1835 necessary. */
1836 assert(!carry);
1837 (void)carry;
1838 } else {
1839 if (bits > 0) {
1840 IEEEFloat temp_rhs(rhs);
1841
1842 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1843 carry = addSignificand(temp_rhs);
1844 } else {
1845 lost_fraction = shiftSignificandRight(-bits);
1846 carry = addSignificand(rhs);
1847 }
1848
1849 /* We have a guard bit; generating a carry cannot happen. */
1850 assert(!carry);
1851 (void)carry;
1852 }
1853
1854 return lost_fraction;
1855 }
1856
multiplySpecials(const IEEEFloat & rhs)1857 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1858 switch (PackCategoriesIntoKey(category, rhs.category)) {
1859 default:
1860 llvm_unreachable(nullptr);
1861
1862 case PackCategoriesIntoKey(fcZero, fcNaN):
1863 case PackCategoriesIntoKey(fcNormal, fcNaN):
1864 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1865 assign(rhs);
1866 sign = false;
1867 [[fallthrough]];
1868 case PackCategoriesIntoKey(fcNaN, fcZero):
1869 case PackCategoriesIntoKey(fcNaN, fcNormal):
1870 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1871 case PackCategoriesIntoKey(fcNaN, fcNaN):
1872 sign ^= rhs.sign; // restore the original sign
1873 if (isSignaling()) {
1874 makeQuiet();
1875 return opInvalidOp;
1876 }
1877 return rhs.isSignaling() ? opInvalidOp : opOK;
1878
1879 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1880 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1881 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1882 category = fcInfinity;
1883 return opOK;
1884
1885 case PackCategoriesIntoKey(fcZero, fcNormal):
1886 case PackCategoriesIntoKey(fcNormal, fcZero):
1887 case PackCategoriesIntoKey(fcZero, fcZero):
1888 category = fcZero;
1889 return opOK;
1890
1891 case PackCategoriesIntoKey(fcZero, fcInfinity):
1892 case PackCategoriesIntoKey(fcInfinity, fcZero):
1893 makeNaN();
1894 return opInvalidOp;
1895
1896 case PackCategoriesIntoKey(fcNormal, fcNormal):
1897 return opOK;
1898 }
1899 }
1900
divideSpecials(const IEEEFloat & rhs)1901 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1902 switch (PackCategoriesIntoKey(category, rhs.category)) {
1903 default:
1904 llvm_unreachable(nullptr);
1905
1906 case PackCategoriesIntoKey(fcZero, fcNaN):
1907 case PackCategoriesIntoKey(fcNormal, fcNaN):
1908 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1909 assign(rhs);
1910 sign = false;
1911 [[fallthrough]];
1912 case PackCategoriesIntoKey(fcNaN, fcZero):
1913 case PackCategoriesIntoKey(fcNaN, fcNormal):
1914 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1915 case PackCategoriesIntoKey(fcNaN, fcNaN):
1916 sign ^= rhs.sign; // restore the original sign
1917 if (isSignaling()) {
1918 makeQuiet();
1919 return opInvalidOp;
1920 }
1921 return rhs.isSignaling() ? opInvalidOp : opOK;
1922
1923 case PackCategoriesIntoKey(fcInfinity, fcZero):
1924 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1925 case PackCategoriesIntoKey(fcZero, fcInfinity):
1926 case PackCategoriesIntoKey(fcZero, fcNormal):
1927 return opOK;
1928
1929 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1930 category = fcZero;
1931 return opOK;
1932
1933 case PackCategoriesIntoKey(fcNormal, fcZero):
1934 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1935 makeNaN(false, sign);
1936 else
1937 category = fcInfinity;
1938 return opDivByZero;
1939
1940 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1941 case PackCategoriesIntoKey(fcZero, fcZero):
1942 makeNaN();
1943 return opInvalidOp;
1944
1945 case PackCategoriesIntoKey(fcNormal, fcNormal):
1946 return opOK;
1947 }
1948 }
1949
modSpecials(const IEEEFloat & rhs)1950 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1951 switch (PackCategoriesIntoKey(category, rhs.category)) {
1952 default:
1953 llvm_unreachable(nullptr);
1954
1955 case PackCategoriesIntoKey(fcZero, fcNaN):
1956 case PackCategoriesIntoKey(fcNormal, fcNaN):
1957 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1958 assign(rhs);
1959 [[fallthrough]];
1960 case PackCategoriesIntoKey(fcNaN, fcZero):
1961 case PackCategoriesIntoKey(fcNaN, fcNormal):
1962 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1963 case PackCategoriesIntoKey(fcNaN, fcNaN):
1964 if (isSignaling()) {
1965 makeQuiet();
1966 return opInvalidOp;
1967 }
1968 return rhs.isSignaling() ? opInvalidOp : opOK;
1969
1970 case PackCategoriesIntoKey(fcZero, fcInfinity):
1971 case PackCategoriesIntoKey(fcZero, fcNormal):
1972 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1973 return opOK;
1974
1975 case PackCategoriesIntoKey(fcNormal, fcZero):
1976 case PackCategoriesIntoKey(fcInfinity, fcZero):
1977 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1978 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1979 case PackCategoriesIntoKey(fcZero, fcZero):
1980 makeNaN();
1981 return opInvalidOp;
1982
1983 case PackCategoriesIntoKey(fcNormal, fcNormal):
1984 return opOK;
1985 }
1986 }
1987
remainderSpecials(const IEEEFloat & rhs)1988 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1989 switch (PackCategoriesIntoKey(category, rhs.category)) {
1990 default:
1991 llvm_unreachable(nullptr);
1992
1993 case PackCategoriesIntoKey(fcZero, fcNaN):
1994 case PackCategoriesIntoKey(fcNormal, fcNaN):
1995 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1996 assign(rhs);
1997 [[fallthrough]];
1998 case PackCategoriesIntoKey(fcNaN, fcZero):
1999 case PackCategoriesIntoKey(fcNaN, fcNormal):
2000 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2001 case PackCategoriesIntoKey(fcNaN, fcNaN):
2002 if (isSignaling()) {
2003 makeQuiet();
2004 return opInvalidOp;
2005 }
2006 return rhs.isSignaling() ? opInvalidOp : opOK;
2007
2008 case PackCategoriesIntoKey(fcZero, fcInfinity):
2009 case PackCategoriesIntoKey(fcZero, fcNormal):
2010 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2011 return opOK;
2012
2013 case PackCategoriesIntoKey(fcNormal, fcZero):
2014 case PackCategoriesIntoKey(fcInfinity, fcZero):
2015 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2016 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2017 case PackCategoriesIntoKey(fcZero, fcZero):
2018 makeNaN();
2019 return opInvalidOp;
2020
2021 case PackCategoriesIntoKey(fcNormal, fcNormal):
2022 return opDivByZero; // fake status, indicating this is not a special case
2023 }
2024 }
2025
2026 /* Change sign. */
changeSign()2027 void IEEEFloat::changeSign() {
2028 // With NaN-as-negative-zero, neither NaN or negative zero can change
2029 // their signs.
2030 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2031 (isZero() || isNaN()))
2032 return;
2033 /* Look mummy, this one's easy. */
2034 sign = !sign;
2035 }
2036
2037 /* Normalized addition or subtraction. */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)2038 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2039 roundingMode rounding_mode,
2040 bool subtract) {
2041 opStatus fs;
2042
2043 fs = addOrSubtractSpecials(rhs, subtract);
2044
2045 /* This return code means it was not a simple case. */
2046 if (fs == opDivByZero) {
2047 lostFraction lost_fraction;
2048
2049 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2050 fs = normalize(rounding_mode, lost_fraction);
2051
2052 /* Can only be zero if we lost no fraction. */
2053 assert(category != fcZero || lost_fraction == lfExactlyZero);
2054 }
2055
2056 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2057 positive zero unless rounding to minus infinity, except that
2058 adding two like-signed zeroes gives that zero. */
2059 if (category == fcZero) {
2060 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2061 sign = (rounding_mode == rmTowardNegative);
2062 // NaN-in-negative-zero means zeros need to be normalized to +0.
2063 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2064 sign = false;
2065 }
2066
2067 return fs;
2068 }
2069
2070 /* Normalized addition. */
add(const IEEEFloat & rhs,roundingMode rounding_mode)2071 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2072 roundingMode rounding_mode) {
2073 return addOrSubtract(rhs, rounding_mode, false);
2074 }
2075
2076 /* Normalized subtraction. */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)2077 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2078 roundingMode rounding_mode) {
2079 return addOrSubtract(rhs, rounding_mode, true);
2080 }
2081
2082 /* Normalized multiply. */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)2083 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2084 roundingMode rounding_mode) {
2085 opStatus fs;
2086
2087 sign ^= rhs.sign;
2088 fs = multiplySpecials(rhs);
2089
2090 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2091 sign = false;
2092 if (isFiniteNonZero()) {
2093 lostFraction lost_fraction = multiplySignificand(rhs);
2094 fs = normalize(rounding_mode, lost_fraction);
2095 if (lost_fraction != lfExactlyZero)
2096 fs = (opStatus) (fs | opInexact);
2097 }
2098
2099 return fs;
2100 }
2101
2102 /* Normalized divide. */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)2103 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2104 roundingMode rounding_mode) {
2105 opStatus fs;
2106
2107 sign ^= rhs.sign;
2108 fs = divideSpecials(rhs);
2109
2110 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2111 sign = false;
2112 if (isFiniteNonZero()) {
2113 lostFraction lost_fraction = divideSignificand(rhs);
2114 fs = normalize(rounding_mode, lost_fraction);
2115 if (lost_fraction != lfExactlyZero)
2116 fs = (opStatus) (fs | opInexact);
2117 }
2118
2119 return fs;
2120 }
2121
2122 /* Normalized remainder. */
remainder(const IEEEFloat & rhs)2123 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2124 opStatus fs;
2125 unsigned int origSign = sign;
2126
2127 // First handle the special cases.
2128 fs = remainderSpecials(rhs);
2129 if (fs != opDivByZero)
2130 return fs;
2131
2132 fs = opOK;
2133
2134 // Make sure the current value is less than twice the denom. If the addition
2135 // did not succeed (an overflow has happened), which means that the finite
2136 // value we currently posses must be less than twice the denom (as we are
2137 // using the same semantics).
2138 IEEEFloat P2 = rhs;
2139 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2140 fs = mod(P2);
2141 assert(fs == opOK);
2142 }
2143
2144 // Lets work with absolute numbers.
2145 IEEEFloat P = rhs;
2146 P.sign = false;
2147 sign = false;
2148
2149 //
2150 // To calculate the remainder we use the following scheme.
2151 //
2152 // The remainder is defained as follows:
2153 //
2154 // remainder = numer - rquot * denom = x - r * p
2155 //
2156 // Where r is the result of: x/p, rounded toward the nearest integral value
2157 // (with halfway cases rounded toward the even number).
2158 //
2159 // Currently, (after x mod 2p):
2160 // r is the number of 2p's present inside x, which is inherently, an even
2161 // number of p's.
2162 //
2163 // We may split the remaining calculation into 4 options:
2164 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2165 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2166 // are done as well.
2167 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2168 // to subtract 1p at least once.
2169 // - if x >= p then we must subtract p at least once, as x must be a
2170 // remainder.
2171 //
2172 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2173 //
2174 // We can now split the remaining calculation to the following 3 options:
2175 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2176 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2177 // must round up to the next even number. so we must subtract p once more.
2178 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2179 // integral, and subtract p once more.
2180 //
2181
2182 // Extend the semantics to prevent an overflow/underflow or inexact result.
2183 bool losesInfo;
2184 fltSemantics extendedSemantics = *semantics;
2185 extendedSemantics.maxExponent++;
2186 extendedSemantics.minExponent--;
2187 extendedSemantics.precision += 2;
2188
2189 IEEEFloat VEx = *this;
2190 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2191 assert(fs == opOK && !losesInfo);
2192 IEEEFloat PEx = P;
2193 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2194 assert(fs == opOK && !losesInfo);
2195
2196 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2197 // any fraction.
2198 fs = VEx.add(VEx, rmNearestTiesToEven);
2199 assert(fs == opOK);
2200
2201 if (VEx.compare(PEx) == cmpGreaterThan) {
2202 fs = subtract(P, rmNearestTiesToEven);
2203 assert(fs == opOK);
2204
2205 // Make VEx = this.add(this), but because we have different semantics, we do
2206 // not want to `convert` again, so we just subtract PEx twice (which equals
2207 // to the desired value).
2208 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2209 assert(fs == opOK);
2210 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2211 assert(fs == opOK);
2212
2213 cmpResult result = VEx.compare(PEx);
2214 if (result == cmpGreaterThan || result == cmpEqual) {
2215 fs = subtract(P, rmNearestTiesToEven);
2216 assert(fs == opOK);
2217 }
2218 }
2219
2220 if (isZero()) {
2221 sign = origSign; // IEEE754 requires this
2222 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223 // But some 8-bit floats only have positive 0.
2224 sign = false;
2225 }
2226
2227 else
2228 sign ^= origSign;
2229 return fs;
2230 }
2231
2232 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)2233 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2234 opStatus fs;
2235 fs = modSpecials(rhs);
2236 unsigned int origSign = sign;
2237
2238 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2239 compareAbsoluteValue(rhs) != cmpLessThan) {
2240 int Exp = ilogb(*this) - ilogb(rhs);
2241 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2242 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2243 // check for it.
2244 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2245 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2246 V.sign = sign;
2247
2248 fs = subtract(V, rmNearestTiesToEven);
2249 assert(fs==opOK);
2250 }
2251 if (isZero()) {
2252 sign = origSign; // fmod requires this
2253 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2254 sign = false;
2255 }
2256 return fs;
2257 }
2258
2259 /* Normalized fused-multiply-add. */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)2260 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2261 const IEEEFloat &addend,
2262 roundingMode rounding_mode) {
2263 opStatus fs;
2264
2265 /* Post-multiplication sign, before addition. */
2266 sign ^= multiplicand.sign;
2267
2268 /* If and only if all arguments are normal do we need to do an
2269 extended-precision calculation. */
2270 if (isFiniteNonZero() &&
2271 multiplicand.isFiniteNonZero() &&
2272 addend.isFinite()) {
2273 lostFraction lost_fraction;
2274
2275 lost_fraction = multiplySignificand(multiplicand, addend);
2276 fs = normalize(rounding_mode, lost_fraction);
2277 if (lost_fraction != lfExactlyZero)
2278 fs = (opStatus) (fs | opInexact);
2279
2280 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2281 positive zero unless rounding to minus infinity, except that
2282 adding two like-signed zeroes gives that zero. */
2283 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2284 sign = (rounding_mode == rmTowardNegative);
2285 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2286 sign = false;
2287 }
2288 } else {
2289 fs = multiplySpecials(multiplicand);
2290
2291 /* FS can only be opOK or opInvalidOp. There is no more work
2292 to do in the latter case. The IEEE-754R standard says it is
2293 implementation-defined in this case whether, if ADDEND is a
2294 quiet NaN, we raise invalid op; this implementation does so.
2295
2296 If we need to do the addition we can do so with normal
2297 precision. */
2298 if (fs == opOK)
2299 fs = addOrSubtract(addend, rounding_mode, false);
2300 }
2301
2302 return fs;
2303 }
2304
2305 /* Rounding-mode correct round to integral value. */
roundToIntegral(roundingMode rounding_mode)2306 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2307 opStatus fs;
2308
2309 if (isInfinity())
2310 // [IEEE Std 754-2008 6.1]:
2311 // The behavior of infinity in floating-point arithmetic is derived from the
2312 // limiting cases of real arithmetic with operands of arbitrarily
2313 // large magnitude, when such a limit exists.
2314 // ...
2315 // Operations on infinite operands are usually exact and therefore signal no
2316 // exceptions ...
2317 return opOK;
2318
2319 if (isNaN()) {
2320 if (isSignaling()) {
2321 // [IEEE Std 754-2008 6.2]:
2322 // Under default exception handling, any operation signaling an invalid
2323 // operation exception and for which a floating-point result is to be
2324 // delivered shall deliver a quiet NaN.
2325 makeQuiet();
2326 // [IEEE Std 754-2008 6.2]:
2327 // Signaling NaNs shall be reserved operands that, under default exception
2328 // handling, signal the invalid operation exception(see 7.2) for every
2329 // general-computational and signaling-computational operation except for
2330 // the conversions described in 5.12.
2331 return opInvalidOp;
2332 } else {
2333 // [IEEE Std 754-2008 6.2]:
2334 // For an operation with quiet NaN inputs, other than maximum and minimum
2335 // operations, if a floating-point result is to be delivered the result
2336 // shall be a quiet NaN which should be one of the input NaNs.
2337 // ...
2338 // Every general-computational and quiet-computational operation involving
2339 // one or more input NaNs, none of them signaling, shall signal no
2340 // exception, except fusedMultiplyAdd might signal the invalid operation
2341 // exception(see 7.2).
2342 return opOK;
2343 }
2344 }
2345
2346 if (isZero()) {
2347 // [IEEE Std 754-2008 6.3]:
2348 // ... the sign of the result of conversions, the quantize operation, the
2349 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2350 // the sign of the first or only operand.
2351 return opOK;
2352 }
2353
2354 // If the exponent is large enough, we know that this value is already
2355 // integral, and the arithmetic below would potentially cause it to saturate
2356 // to +/-Inf. Bail out early instead.
2357 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2358 return opOK;
2359
2360 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2361 // precision of our format, and then subtract it back off again. The choice
2362 // of rounding modes for the addition/subtraction determines the rounding mode
2363 // for our integral rounding as well.
2364 // NOTE: When the input value is negative, we do subtraction followed by
2365 // addition instead.
2366 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2367 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2368 IEEEFloat MagicConstant(*semantics);
2369 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2370 rmNearestTiesToEven);
2371 assert(fs == opOK);
2372 MagicConstant.sign = sign;
2373
2374 // Preserve the input sign so that we can handle the case of zero result
2375 // correctly.
2376 bool inputSign = isNegative();
2377
2378 fs = add(MagicConstant, rounding_mode);
2379
2380 // Current value and 'MagicConstant' are both integers, so the result of the
2381 // subtraction is always exact according to Sterbenz' lemma.
2382 subtract(MagicConstant, rounding_mode);
2383
2384 // Restore the input sign.
2385 if (inputSign != isNegative())
2386 changeSign();
2387
2388 return fs;
2389 }
2390
2391
2392 /* Comparison requires normalized numbers. */
compare(const IEEEFloat & rhs) const2393 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2394 cmpResult result;
2395
2396 assert(semantics == rhs.semantics);
2397
2398 switch (PackCategoriesIntoKey(category, rhs.category)) {
2399 default:
2400 llvm_unreachable(nullptr);
2401
2402 case PackCategoriesIntoKey(fcNaN, fcZero):
2403 case PackCategoriesIntoKey(fcNaN, fcNormal):
2404 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2405 case PackCategoriesIntoKey(fcNaN, fcNaN):
2406 case PackCategoriesIntoKey(fcZero, fcNaN):
2407 case PackCategoriesIntoKey(fcNormal, fcNaN):
2408 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2409 return cmpUnordered;
2410
2411 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2412 case PackCategoriesIntoKey(fcInfinity, fcZero):
2413 case PackCategoriesIntoKey(fcNormal, fcZero):
2414 if (sign)
2415 return cmpLessThan;
2416 else
2417 return cmpGreaterThan;
2418
2419 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2420 case PackCategoriesIntoKey(fcZero, fcInfinity):
2421 case PackCategoriesIntoKey(fcZero, fcNormal):
2422 if (rhs.sign)
2423 return cmpGreaterThan;
2424 else
2425 return cmpLessThan;
2426
2427 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2428 if (sign == rhs.sign)
2429 return cmpEqual;
2430 else if (sign)
2431 return cmpLessThan;
2432 else
2433 return cmpGreaterThan;
2434
2435 case PackCategoriesIntoKey(fcZero, fcZero):
2436 return cmpEqual;
2437
2438 case PackCategoriesIntoKey(fcNormal, fcNormal):
2439 break;
2440 }
2441
2442 /* Two normal numbers. Do they have the same sign? */
2443 if (sign != rhs.sign) {
2444 if (sign)
2445 result = cmpLessThan;
2446 else
2447 result = cmpGreaterThan;
2448 } else {
2449 /* Compare absolute values; invert result if negative. */
2450 result = compareAbsoluteValue(rhs);
2451
2452 if (sign) {
2453 if (result == cmpLessThan)
2454 result = cmpGreaterThan;
2455 else if (result == cmpGreaterThan)
2456 result = cmpLessThan;
2457 }
2458 }
2459
2460 return result;
2461 }
2462
2463 /// IEEEFloat::convert - convert a value of one floating point type to another.
2464 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2465 /// records whether the transformation lost information, i.e. whether
2466 /// converting the result back to the original type will produce the
2467 /// original value (this is almost the same as return value==fsOK, but there
2468 /// are edge cases where this is not so).
2469
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2470 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2471 roundingMode rounding_mode,
2472 bool *losesInfo) {
2473 lostFraction lostFraction;
2474 unsigned int newPartCount, oldPartCount;
2475 opStatus fs;
2476 int shift;
2477 const fltSemantics &fromSemantics = *semantics;
2478 bool is_signaling = isSignaling();
2479
2480 lostFraction = lfExactlyZero;
2481 newPartCount = partCountForBits(toSemantics.precision + 1);
2482 oldPartCount = partCount();
2483 shift = toSemantics.precision - fromSemantics.precision;
2484
2485 bool X86SpecialNan = false;
2486 if (&fromSemantics == &semX87DoubleExtended &&
2487 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2488 (!(*significandParts() & 0x8000000000000000ULL) ||
2489 !(*significandParts() & 0x4000000000000000ULL))) {
2490 // x86 has some unusual NaNs which cannot be represented in any other
2491 // format; note them here.
2492 X86SpecialNan = true;
2493 }
2494
2495 // If this is a truncation of a denormal number, and the target semantics
2496 // has larger exponent range than the source semantics (this can happen
2497 // when truncating from PowerPC double-double to double format), the
2498 // right shift could lose result mantissa bits. Adjust exponent instead
2499 // of performing excessive shift.
2500 // Also do a similar trick in case shifting denormal would produce zero
2501 // significand as this case isn't handled correctly by normalize.
2502 if (shift < 0 && isFiniteNonZero()) {
2503 int omsb = significandMSB() + 1;
2504 int exponentChange = omsb - fromSemantics.precision;
2505 if (exponent + exponentChange < toSemantics.minExponent)
2506 exponentChange = toSemantics.minExponent - exponent;
2507 if (exponentChange < shift)
2508 exponentChange = shift;
2509 if (exponentChange < 0) {
2510 shift -= exponentChange;
2511 exponent += exponentChange;
2512 } else if (omsb <= -shift) {
2513 exponentChange = omsb + shift - 1; // leave at least one bit set
2514 shift -= exponentChange;
2515 exponent += exponentChange;
2516 }
2517 }
2518
2519 // If this is a truncation, perform the shift before we narrow the storage.
2520 if (shift < 0 && (isFiniteNonZero() ||
2521 (category == fcNaN && semantics->nonFiniteBehavior !=
2522 fltNonfiniteBehavior::NanOnly)))
2523 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2524
2525 // Fix the storage so it can hold to new value.
2526 if (newPartCount > oldPartCount) {
2527 // The new type requires more storage; make it available.
2528 integerPart *newParts;
2529 newParts = new integerPart[newPartCount];
2530 APInt::tcSet(newParts, 0, newPartCount);
2531 if (isFiniteNonZero() || category==fcNaN)
2532 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2533 freeSignificand();
2534 significand.parts = newParts;
2535 } else if (newPartCount == 1 && oldPartCount != 1) {
2536 // Switch to built-in storage for a single part.
2537 integerPart newPart = 0;
2538 if (isFiniteNonZero() || category==fcNaN)
2539 newPart = significandParts()[0];
2540 freeSignificand();
2541 significand.part = newPart;
2542 }
2543
2544 // Now that we have the right storage, switch the semantics.
2545 semantics = &toSemantics;
2546
2547 // If this is an extension, perform the shift now that the storage is
2548 // available.
2549 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2550 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2551
2552 if (isFiniteNonZero()) {
2553 fs = normalize(rounding_mode, lostFraction);
2554 *losesInfo = (fs != opOK);
2555 } else if (category == fcNaN) {
2556 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2557 *losesInfo =
2558 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2559 makeNaN(false, sign);
2560 return is_signaling ? opInvalidOp : opOK;
2561 }
2562
2563 // If NaN is negative zero, we need to create a new NaN to avoid converting
2564 // NaN to -Inf.
2565 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2566 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2567 makeNaN(false, false);
2568
2569 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2570
2571 // For x87 extended precision, we want to make a NaN, not a special NaN if
2572 // the input wasn't special either.
2573 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2574 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2575
2576 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2577 // This also guarantees that a sNaN does not become Inf on a truncation
2578 // that loses all payload bits.
2579 if (is_signaling) {
2580 makeQuiet();
2581 fs = opInvalidOp;
2582 } else {
2583 fs = opOK;
2584 }
2585 } else if (category == fcInfinity &&
2586 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2587 makeNaN(false, sign);
2588 *losesInfo = true;
2589 fs = opInexact;
2590 } else if (category == fcZero &&
2591 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2592 // Negative zero loses info, but positive zero doesn't.
2593 *losesInfo =
2594 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2595 fs = *losesInfo ? opInexact : opOK;
2596 // NaN is negative zero means -0 -> +0, which can lose information
2597 sign = false;
2598 } else {
2599 *losesInfo = false;
2600 fs = opOK;
2601 }
2602
2603 return fs;
2604 }
2605
2606 /* Convert a floating point number to an integer according to the
2607 rounding mode. If the rounded integer value is out of range this
2608 returns an invalid operation exception and the contents of the
2609 destination parts are unspecified. If the rounded value is in
2610 range but the floating point number is not the exact integer, the C
2611 standard doesn't require an inexact exception to be raised. IEEE
2612 854 does require it so we do that.
2613
2614 Note that for conversions to integer type the C standard requires
2615 round-to-zero to always be used. */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2616 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2617 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2618 roundingMode rounding_mode, bool *isExact) const {
2619 lostFraction lost_fraction;
2620 const integerPart *src;
2621 unsigned int dstPartsCount, truncatedBits;
2622
2623 *isExact = false;
2624
2625 /* Handle the three special cases first. */
2626 if (category == fcInfinity || category == fcNaN)
2627 return opInvalidOp;
2628
2629 dstPartsCount = partCountForBits(width);
2630 assert(dstPartsCount <= parts.size() && "Integer too big");
2631
2632 if (category == fcZero) {
2633 APInt::tcSet(parts.data(), 0, dstPartsCount);
2634 // Negative zero can't be represented as an int.
2635 *isExact = !sign;
2636 return opOK;
2637 }
2638
2639 src = significandParts();
2640
2641 /* Step 1: place our absolute value, with any fraction truncated, in
2642 the destination. */
2643 if (exponent < 0) {
2644 /* Our absolute value is less than one; truncate everything. */
2645 APInt::tcSet(parts.data(), 0, dstPartsCount);
2646 /* For exponent -1 the integer bit represents .5, look at that.
2647 For smaller exponents leftmost truncated bit is 0. */
2648 truncatedBits = semantics->precision -1U - exponent;
2649 } else {
2650 /* We want the most significant (exponent + 1) bits; the rest are
2651 truncated. */
2652 unsigned int bits = exponent + 1U;
2653
2654 /* Hopelessly large in magnitude? */
2655 if (bits > width)
2656 return opInvalidOp;
2657
2658 if (bits < semantics->precision) {
2659 /* We truncate (semantics->precision - bits) bits. */
2660 truncatedBits = semantics->precision - bits;
2661 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2662 } else {
2663 /* We want at least as many bits as are available. */
2664 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2665 0);
2666 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2667 bits - semantics->precision);
2668 truncatedBits = 0;
2669 }
2670 }
2671
2672 /* Step 2: work out any lost fraction, and increment the absolute
2673 value if we would round away from zero. */
2674 if (truncatedBits) {
2675 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2676 truncatedBits);
2677 if (lost_fraction != lfExactlyZero &&
2678 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2679 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2680 return opInvalidOp; /* Overflow. */
2681 }
2682 } else {
2683 lost_fraction = lfExactlyZero;
2684 }
2685
2686 /* Step 3: check if we fit in the destination. */
2687 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2688
2689 if (sign) {
2690 if (!isSigned) {
2691 /* Negative numbers cannot be represented as unsigned. */
2692 if (omsb != 0)
2693 return opInvalidOp;
2694 } else {
2695 /* It takes omsb bits to represent the unsigned integer value.
2696 We lose a bit for the sign, but care is needed as the
2697 maximally negative integer is a special case. */
2698 if (omsb == width &&
2699 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2700 return opInvalidOp;
2701
2702 /* This case can happen because of rounding. */
2703 if (omsb > width)
2704 return opInvalidOp;
2705 }
2706
2707 APInt::tcNegate (parts.data(), dstPartsCount);
2708 } else {
2709 if (omsb >= width + !isSigned)
2710 return opInvalidOp;
2711 }
2712
2713 if (lost_fraction == lfExactlyZero) {
2714 *isExact = true;
2715 return opOK;
2716 } else
2717 return opInexact;
2718 }
2719
2720 /* Same as convertToSignExtendedInteger, except we provide
2721 deterministic values in case of an invalid operation exception,
2722 namely zero for NaNs and the minimal or maximal value respectively
2723 for underflow or overflow.
2724 The *isExact output tells whether the result is exact, in the sense
2725 that converting it back to the original floating point type produces
2726 the original value. This is almost equivalent to result==opOK,
2727 except for negative zeroes.
2728 */
2729 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2730 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2731 unsigned int width, bool isSigned,
2732 roundingMode rounding_mode, bool *isExact) const {
2733 opStatus fs;
2734
2735 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2736 isExact);
2737
2738 if (fs == opInvalidOp) {
2739 unsigned int bits, dstPartsCount;
2740
2741 dstPartsCount = partCountForBits(width);
2742 assert(dstPartsCount <= parts.size() && "Integer too big");
2743
2744 if (category == fcNaN)
2745 bits = 0;
2746 else if (sign)
2747 bits = isSigned;
2748 else
2749 bits = width - isSigned;
2750
2751 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2752 if (sign && isSigned)
2753 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2754 }
2755
2756 return fs;
2757 }
2758
2759 /* Convert an unsigned integer SRC to a floating point number,
2760 rounding according to ROUNDING_MODE. The sign of the floating
2761 point number is not modified. */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2762 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2763 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2764 unsigned int omsb, precision, dstCount;
2765 integerPart *dst;
2766 lostFraction lost_fraction;
2767
2768 category = fcNormal;
2769 omsb = APInt::tcMSB(src, srcCount) + 1;
2770 dst = significandParts();
2771 dstCount = partCount();
2772 precision = semantics->precision;
2773
2774 /* We want the most significant PRECISION bits of SRC. There may not
2775 be that many; extract what we can. */
2776 if (precision <= omsb) {
2777 exponent = omsb - 1;
2778 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2779 omsb - precision);
2780 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2781 } else {
2782 exponent = precision - 1;
2783 lost_fraction = lfExactlyZero;
2784 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2785 }
2786
2787 return normalize(rounding_mode, lost_fraction);
2788 }
2789
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2790 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2791 roundingMode rounding_mode) {
2792 unsigned int partCount = Val.getNumWords();
2793 APInt api = Val;
2794
2795 sign = false;
2796 if (isSigned && api.isNegative()) {
2797 sign = true;
2798 api = -api;
2799 }
2800
2801 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2802 }
2803
2804 /* Convert a two's complement integer SRC to a floating point number,
2805 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2806 integer is signed, in which case it must be sign-extended. */
2807 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2808 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2809 unsigned int srcCount, bool isSigned,
2810 roundingMode rounding_mode) {
2811 opStatus status;
2812
2813 if (isSigned &&
2814 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2815 integerPart *copy;
2816
2817 /* If we're signed and negative negate a copy. */
2818 sign = true;
2819 copy = new integerPart[srcCount];
2820 APInt::tcAssign(copy, src, srcCount);
2821 APInt::tcNegate(copy, srcCount);
2822 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2823 delete [] copy;
2824 } else {
2825 sign = false;
2826 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2827 }
2828
2829 return status;
2830 }
2831
2832 /* FIXME: should this just take a const APInt reference? */
2833 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2834 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2835 unsigned int width, bool isSigned,
2836 roundingMode rounding_mode) {
2837 unsigned int partCount = partCountForBits(width);
2838 APInt api = APInt(width, ArrayRef(parts, partCount));
2839
2840 sign = false;
2841 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2842 sign = true;
2843 api = -api;
2844 }
2845
2846 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2847 }
2848
2849 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2850 IEEEFloat::convertFromHexadecimalString(StringRef s,
2851 roundingMode rounding_mode) {
2852 lostFraction lost_fraction = lfExactlyZero;
2853
2854 category = fcNormal;
2855 zeroSignificand();
2856 exponent = 0;
2857
2858 integerPart *significand = significandParts();
2859 unsigned partsCount = partCount();
2860 unsigned bitPos = partsCount * integerPartWidth;
2861 bool computedTrailingFraction = false;
2862
2863 // Skip leading zeroes and any (hexa)decimal point.
2864 StringRef::iterator begin = s.begin();
2865 StringRef::iterator end = s.end();
2866 StringRef::iterator dot;
2867 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2868 if (!PtrOrErr)
2869 return PtrOrErr.takeError();
2870 StringRef::iterator p = *PtrOrErr;
2871 StringRef::iterator firstSignificantDigit = p;
2872
2873 while (p != end) {
2874 integerPart hex_value;
2875
2876 if (*p == '.') {
2877 if (dot != end)
2878 return createError("String contains multiple dots");
2879 dot = p++;
2880 continue;
2881 }
2882
2883 hex_value = hexDigitValue(*p);
2884 if (hex_value == UINT_MAX)
2885 break;
2886
2887 p++;
2888
2889 // Store the number while we have space.
2890 if (bitPos) {
2891 bitPos -= 4;
2892 hex_value <<= bitPos % integerPartWidth;
2893 significand[bitPos / integerPartWidth] |= hex_value;
2894 } else if (!computedTrailingFraction) {
2895 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2896 if (!FractOrErr)
2897 return FractOrErr.takeError();
2898 lost_fraction = *FractOrErr;
2899 computedTrailingFraction = true;
2900 }
2901 }
2902
2903 /* Hex floats require an exponent but not a hexadecimal point. */
2904 if (p == end)
2905 return createError("Hex strings require an exponent");
2906 if (*p != 'p' && *p != 'P')
2907 return createError("Invalid character in significand");
2908 if (p == begin)
2909 return createError("Significand has no digits");
2910 if (dot != end && p - begin == 1)
2911 return createError("Significand has no digits");
2912
2913 /* Ignore the exponent if we are zero. */
2914 if (p != firstSignificantDigit) {
2915 int expAdjustment;
2916
2917 /* Implicit hexadecimal point? */
2918 if (dot == end)
2919 dot = p;
2920
2921 /* Calculate the exponent adjustment implicit in the number of
2922 significant digits. */
2923 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2924 if (expAdjustment < 0)
2925 expAdjustment++;
2926 expAdjustment = expAdjustment * 4 - 1;
2927
2928 /* Adjust for writing the significand starting at the most
2929 significant nibble. */
2930 expAdjustment += semantics->precision;
2931 expAdjustment -= partsCount * integerPartWidth;
2932
2933 /* Adjust for the given exponent. */
2934 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2935 if (!ExpOrErr)
2936 return ExpOrErr.takeError();
2937 exponent = *ExpOrErr;
2938 }
2939
2940 return normalize(rounding_mode, lost_fraction);
2941 }
2942
2943 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2944 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2945 unsigned sigPartCount, int exp,
2946 roundingMode rounding_mode) {
2947 unsigned int parts, pow5PartCount;
2948 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2949 integerPart pow5Parts[maxPowerOfFiveParts];
2950 bool isNearest;
2951
2952 isNearest = (rounding_mode == rmNearestTiesToEven ||
2953 rounding_mode == rmNearestTiesToAway);
2954
2955 parts = partCountForBits(semantics->precision + 11);
2956
2957 /* Calculate pow(5, abs(exp)). */
2958 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2959
2960 for (;; parts *= 2) {
2961 opStatus sigStatus, powStatus;
2962 unsigned int excessPrecision, truncatedBits;
2963
2964 calcSemantics.precision = parts * integerPartWidth - 1;
2965 excessPrecision = calcSemantics.precision - semantics->precision;
2966 truncatedBits = excessPrecision;
2967
2968 IEEEFloat decSig(calcSemantics, uninitialized);
2969 decSig.makeZero(sign);
2970 IEEEFloat pow5(calcSemantics);
2971
2972 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2973 rmNearestTiesToEven);
2974 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2975 rmNearestTiesToEven);
2976 /* Add exp, as 10^n = 5^n * 2^n. */
2977 decSig.exponent += exp;
2978
2979 lostFraction calcLostFraction;
2980 integerPart HUerr, HUdistance;
2981 unsigned int powHUerr;
2982
2983 if (exp >= 0) {
2984 /* multiplySignificand leaves the precision-th bit set to 1. */
2985 calcLostFraction = decSig.multiplySignificand(pow5);
2986 powHUerr = powStatus != opOK;
2987 } else {
2988 calcLostFraction = decSig.divideSignificand(pow5);
2989 /* Denormal numbers have less precision. */
2990 if (decSig.exponent < semantics->minExponent) {
2991 excessPrecision += (semantics->minExponent - decSig.exponent);
2992 truncatedBits = excessPrecision;
2993 if (excessPrecision > calcSemantics.precision)
2994 excessPrecision = calcSemantics.precision;
2995 }
2996 /* Extra half-ulp lost in reciprocal of exponent. */
2997 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2998 }
2999
3000 /* Both multiplySignificand and divideSignificand return the
3001 result with the integer bit set. */
3002 assert(APInt::tcExtractBit
3003 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3004
3005 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3006 powHUerr);
3007 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3008 excessPrecision, isNearest);
3009
3010 /* Are we guaranteed to round correctly if we truncate? */
3011 if (HUdistance >= HUerr) {
3012 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3013 calcSemantics.precision - excessPrecision,
3014 excessPrecision);
3015 /* Take the exponent of decSig. If we tcExtract-ed less bits
3016 above we must adjust our exponent to compensate for the
3017 implicit right shift. */
3018 exponent = (decSig.exponent + semantics->precision
3019 - (calcSemantics.precision - excessPrecision));
3020 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3021 decSig.partCount(),
3022 truncatedBits);
3023 return normalize(rounding_mode, calcLostFraction);
3024 }
3025 }
3026 }
3027
3028 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)3029 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3030 decimalInfo D;
3031 opStatus fs;
3032
3033 /* Scan the text. */
3034 StringRef::iterator p = str.begin();
3035 if (Error Err = interpretDecimal(p, str.end(), &D))
3036 return std::move(Err);
3037
3038 /* Handle the quick cases. First the case of no significant digits,
3039 i.e. zero, and then exponents that are obviously too large or too
3040 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3041 definitely overflows if
3042
3043 (exp - 1) * L >= maxExponent
3044
3045 and definitely underflows to zero where
3046
3047 (exp + 1) * L <= minExponent - precision
3048
3049 With integer arithmetic the tightest bounds for L are
3050
3051 93/28 < L < 196/59 [ numerator <= 256 ]
3052 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3053 */
3054
3055 // Test if we have a zero number allowing for strings with no null terminators
3056 // and zero decimals with non-zero exponents.
3057 //
3058 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3059 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3060 // be at most one dot. On the other hand, if we have a zero with a non-zero
3061 // exponent, then we know that D.firstSigDigit will be non-numeric.
3062 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3063 category = fcZero;
3064 fs = opOK;
3065 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3066 sign = false;
3067
3068 /* Check whether the normalized exponent is high enough to overflow
3069 max during the log-rebasing in the max-exponent check below. */
3070 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3071 fs = handleOverflow(rounding_mode);
3072
3073 /* If it wasn't, then it also wasn't high enough to overflow max
3074 during the log-rebasing in the min-exponent check. Check that it
3075 won't overflow min in either check, then perform the min-exponent
3076 check. */
3077 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3078 (D.normalizedExponent + 1) * 28738 <=
3079 8651 * (semantics->minExponent - (int) semantics->precision)) {
3080 /* Underflow to zero and round. */
3081 category = fcNormal;
3082 zeroSignificand();
3083 fs = normalize(rounding_mode, lfLessThanHalf);
3084
3085 /* We can finally safely perform the max-exponent check. */
3086 } else if ((D.normalizedExponent - 1) * 42039
3087 >= 12655 * semantics->maxExponent) {
3088 /* Overflow and round. */
3089 fs = handleOverflow(rounding_mode);
3090 } else {
3091 integerPart *decSignificand;
3092 unsigned int partCount;
3093
3094 /* A tight upper bound on number of bits required to hold an
3095 N-digit decimal integer is N * 196 / 59. Allocate enough space
3096 to hold the full significand, and an extra part required by
3097 tcMultiplyPart. */
3098 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3099 partCount = partCountForBits(1 + 196 * partCount / 59);
3100 decSignificand = new integerPart[partCount + 1];
3101 partCount = 0;
3102
3103 /* Convert to binary efficiently - we do almost all multiplication
3104 in an integerPart. When this would overflow do we do a single
3105 bignum multiplication, and then revert again to multiplication
3106 in an integerPart. */
3107 do {
3108 integerPart decValue, val, multiplier;
3109
3110 val = 0;
3111 multiplier = 1;
3112
3113 do {
3114 if (*p == '.') {
3115 p++;
3116 if (p == str.end()) {
3117 break;
3118 }
3119 }
3120 decValue = decDigitValue(*p++);
3121 if (decValue >= 10U) {
3122 delete[] decSignificand;
3123 return createError("Invalid character in significand");
3124 }
3125 multiplier *= 10;
3126 val = val * 10 + decValue;
3127 /* The maximum number that can be multiplied by ten with any
3128 digit added without overflowing an integerPart. */
3129 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3130
3131 /* Multiply out the current part. */
3132 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3133 partCount, partCount + 1, false);
3134
3135 /* If we used another part (likely but not guaranteed), increase
3136 the count. */
3137 if (decSignificand[partCount])
3138 partCount++;
3139 } while (p <= D.lastSigDigit);
3140
3141 category = fcNormal;
3142 fs = roundSignificandWithExponent(decSignificand, partCount,
3143 D.exponent, rounding_mode);
3144
3145 delete [] decSignificand;
3146 }
3147
3148 return fs;
3149 }
3150
convertFromStringSpecials(StringRef str)3151 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3152 const size_t MIN_NAME_SIZE = 3;
3153
3154 if (str.size() < MIN_NAME_SIZE)
3155 return false;
3156
3157 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3158 makeInf(false);
3159 return true;
3160 }
3161
3162 bool IsNegative = str.front() == '-';
3163 if (IsNegative) {
3164 str = str.drop_front();
3165 if (str.size() < MIN_NAME_SIZE)
3166 return false;
3167
3168 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3169 makeInf(true);
3170 return true;
3171 }
3172 }
3173
3174 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3175 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3176 if (IsSignaling) {
3177 str = str.drop_front();
3178 if (str.size() < MIN_NAME_SIZE)
3179 return false;
3180 }
3181
3182 if (str.starts_with("nan") || str.starts_with("NaN")) {
3183 str = str.drop_front(3);
3184
3185 // A NaN without payload.
3186 if (str.empty()) {
3187 makeNaN(IsSignaling, IsNegative);
3188 return true;
3189 }
3190
3191 // Allow the payload to be inside parentheses.
3192 if (str.front() == '(') {
3193 // Parentheses should be balanced (and not empty).
3194 if (str.size() <= 2 || str.back() != ')')
3195 return false;
3196
3197 str = str.slice(1, str.size() - 1);
3198 }
3199
3200 // Determine the payload number's radix.
3201 unsigned Radix = 10;
3202 if (str[0] == '0') {
3203 if (str.size() > 1 && tolower(str[1]) == 'x') {
3204 str = str.drop_front(2);
3205 Radix = 16;
3206 } else
3207 Radix = 8;
3208 }
3209
3210 // Parse the payload and make the NaN.
3211 APInt Payload;
3212 if (!str.getAsInteger(Radix, Payload)) {
3213 makeNaN(IsSignaling, IsNegative, &Payload);
3214 return true;
3215 }
3216 }
3217
3218 return false;
3219 }
3220
3221 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)3222 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3223 if (str.empty())
3224 return createError("Invalid string length");
3225
3226 // Handle special cases.
3227 if (convertFromStringSpecials(str))
3228 return opOK;
3229
3230 /* Handle a leading minus sign. */
3231 StringRef::iterator p = str.begin();
3232 size_t slen = str.size();
3233 sign = *p == '-' ? 1 : 0;
3234 if (*p == '-' || *p == '+') {
3235 p++;
3236 slen--;
3237 if (!slen)
3238 return createError("String has no digits");
3239 }
3240
3241 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3242 if (slen == 2)
3243 return createError("Invalid string");
3244 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3245 rounding_mode);
3246 }
3247
3248 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3249 }
3250
3251 /* Write out a hexadecimal representation of the floating point value
3252 to DST, which must be of sufficient size, in the C99 form
3253 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3254 excluding the terminating NUL.
3255
3256 If UPPERCASE, the output is in upper case, otherwise in lower case.
3257
3258 HEXDIGITS digits appear altogether, rounding the value if
3259 necessary. If HEXDIGITS is 0, the minimal precision to display the
3260 number precisely is used instead. If nothing would appear after
3261 the decimal point it is suppressed.
3262
3263 The decimal exponent is always printed and has at least one digit.
3264 Zero values display an exponent of zero. Infinities and NaNs
3265 appear as "infinity" or "nan" respectively.
3266
3267 The above rules are as specified by C99. There is ambiguity about
3268 what the leading hexadecimal digit should be. This implementation
3269 uses whatever is necessary so that the exponent is displayed as
3270 stored. This implies the exponent will fall within the IEEE format
3271 range, and the leading hexadecimal digit will be 0 (for denormals),
3272 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3273 any other digits zero).
3274 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3275 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3276 bool upperCase,
3277 roundingMode rounding_mode) const {
3278 char *p;
3279
3280 p = dst;
3281 if (sign)
3282 *dst++ = '-';
3283
3284 switch (category) {
3285 case fcInfinity:
3286 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3287 dst += sizeof infinityL - 1;
3288 break;
3289
3290 case fcNaN:
3291 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3292 dst += sizeof NaNU - 1;
3293 break;
3294
3295 case fcZero:
3296 *dst++ = '0';
3297 *dst++ = upperCase ? 'X': 'x';
3298 *dst++ = '0';
3299 if (hexDigits > 1) {
3300 *dst++ = '.';
3301 memset (dst, '0', hexDigits - 1);
3302 dst += hexDigits - 1;
3303 }
3304 *dst++ = upperCase ? 'P': 'p';
3305 *dst++ = '0';
3306 break;
3307
3308 case fcNormal:
3309 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3310 break;
3311 }
3312
3313 *dst = 0;
3314
3315 return static_cast<unsigned int>(dst - p);
3316 }
3317
3318 /* Does the hard work of outputting the correctly rounded hexadecimal
3319 form of a normal floating point number with the specified number of
3320 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3321 digits necessary to print the value precisely is output. */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3322 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3323 bool upperCase,
3324 roundingMode rounding_mode) const {
3325 unsigned int count, valueBits, shift, partsCount, outputDigits;
3326 const char *hexDigitChars;
3327 const integerPart *significand;
3328 char *p;
3329 bool roundUp;
3330
3331 *dst++ = '0';
3332 *dst++ = upperCase ? 'X': 'x';
3333
3334 roundUp = false;
3335 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3336
3337 significand = significandParts();
3338 partsCount = partCount();
3339
3340 /* +3 because the first digit only uses the single integer bit, so
3341 we have 3 virtual zero most-significant-bits. */
3342 valueBits = semantics->precision + 3;
3343 shift = integerPartWidth - valueBits % integerPartWidth;
3344
3345 /* The natural number of digits required ignoring trailing
3346 insignificant zeroes. */
3347 outputDigits = (valueBits - significandLSB () + 3) / 4;
3348
3349 /* hexDigits of zero means use the required number for the
3350 precision. Otherwise, see if we are truncating. If we are,
3351 find out if we need to round away from zero. */
3352 if (hexDigits) {
3353 if (hexDigits < outputDigits) {
3354 /* We are dropping non-zero bits, so need to check how to round.
3355 "bits" is the number of dropped bits. */
3356 unsigned int bits;
3357 lostFraction fraction;
3358
3359 bits = valueBits - hexDigits * 4;
3360 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3361 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3362 }
3363 outputDigits = hexDigits;
3364 }
3365
3366 /* Write the digits consecutively, and start writing in the location
3367 of the hexadecimal point. We move the most significant digit
3368 left and add the hexadecimal point later. */
3369 p = ++dst;
3370
3371 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3372
3373 while (outputDigits && count) {
3374 integerPart part;
3375
3376 /* Put the most significant integerPartWidth bits in "part". */
3377 if (--count == partsCount)
3378 part = 0; /* An imaginary higher zero part. */
3379 else
3380 part = significand[count] << shift;
3381
3382 if (count && shift)
3383 part |= significand[count - 1] >> (integerPartWidth - shift);
3384
3385 /* Convert as much of "part" to hexdigits as we can. */
3386 unsigned int curDigits = integerPartWidth / 4;
3387
3388 if (curDigits > outputDigits)
3389 curDigits = outputDigits;
3390 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3391 outputDigits -= curDigits;
3392 }
3393
3394 if (roundUp) {
3395 char *q = dst;
3396
3397 /* Note that hexDigitChars has a trailing '0'. */
3398 do {
3399 q--;
3400 *q = hexDigitChars[hexDigitValue (*q) + 1];
3401 } while (*q == '0');
3402 assert(q >= p);
3403 } else {
3404 /* Add trailing zeroes. */
3405 memset (dst, '0', outputDigits);
3406 dst += outputDigits;
3407 }
3408
3409 /* Move the most significant digit to before the point, and if there
3410 is something after the decimal point add it. This must come
3411 after rounding above. */
3412 p[-1] = p[0];
3413 if (dst -1 == p)
3414 dst--;
3415 else
3416 p[0] = '.';
3417
3418 /* Finally output the exponent. */
3419 *dst++ = upperCase ? 'P': 'p';
3420
3421 return writeSignedDecimal (dst, exponent);
3422 }
3423
hash_value(const IEEEFloat & Arg)3424 hash_code hash_value(const IEEEFloat &Arg) {
3425 if (!Arg.isFiniteNonZero())
3426 return hash_combine((uint8_t)Arg.category,
3427 // NaN has no sign, fix it at zero.
3428 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3429 Arg.semantics->precision);
3430
3431 // Normal floats need their exponent and significand hashed.
3432 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3433 Arg.semantics->precision, Arg.exponent,
3434 hash_combine_range(
3435 Arg.significandParts(),
3436 Arg.significandParts() + Arg.partCount()));
3437 }
3438
3439 // Conversion from APFloat to/from host float/double. It may eventually be
3440 // possible to eliminate these and have everybody deal with APFloats, but that
3441 // will take a while. This approach will not easily extend to long double.
3442 // Current implementation requires integerPartWidth==64, which is correct at
3443 // the moment but could be made more general.
3444
3445 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3446 // the actual IEEE respresentations. We compensate for that here.
3447
convertF80LongDoubleAPFloatToAPInt() const3448 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3449 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3450 assert(partCount()==2);
3451
3452 uint64_t myexponent, mysignificand;
3453
3454 if (isFiniteNonZero()) {
3455 myexponent = exponent+16383; //bias
3456 mysignificand = significandParts()[0];
3457 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3458 myexponent = 0; // denormal
3459 } else if (category==fcZero) {
3460 myexponent = 0;
3461 mysignificand = 0;
3462 } else if (category==fcInfinity) {
3463 myexponent = 0x7fff;
3464 mysignificand = 0x8000000000000000ULL;
3465 } else {
3466 assert(category == fcNaN && "Unknown category");
3467 myexponent = 0x7fff;
3468 mysignificand = significandParts()[0];
3469 }
3470
3471 uint64_t words[2];
3472 words[0] = mysignificand;
3473 words[1] = ((uint64_t)(sign & 1) << 15) |
3474 (myexponent & 0x7fffLL);
3475 return APInt(80, words);
3476 }
3477
convertPPCDoubleDoubleAPFloatToAPInt() const3478 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3479 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3480 assert(partCount()==2);
3481
3482 uint64_t words[2];
3483 opStatus fs;
3484 bool losesInfo;
3485
3486 // Convert number to double. To avoid spurious underflows, we re-
3487 // normalize against the "double" minExponent first, and only *then*
3488 // truncate the mantissa. The result of that second conversion
3489 // may be inexact, but should never underflow.
3490 // Declare fltSemantics before APFloat that uses it (and
3491 // saves pointer to it) to ensure correct destruction order.
3492 fltSemantics extendedSemantics = *semantics;
3493 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3494 IEEEFloat extended(*this);
3495 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3496 assert(fs == opOK && !losesInfo);
3497 (void)fs;
3498
3499 IEEEFloat u(extended);
3500 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3501 assert(fs == opOK || fs == opInexact);
3502 (void)fs;
3503 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3504
3505 // If conversion was exact or resulted in a special case, we're done;
3506 // just set the second double to zero. Otherwise, re-convert back to
3507 // the extended format and compute the difference. This now should
3508 // convert exactly to double.
3509 if (u.isFiniteNonZero() && losesInfo) {
3510 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3511 assert(fs == opOK && !losesInfo);
3512 (void)fs;
3513
3514 IEEEFloat v(extended);
3515 v.subtract(u, rmNearestTiesToEven);
3516 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3517 assert(fs == opOK && !losesInfo);
3518 (void)fs;
3519 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3520 } else {
3521 words[1] = 0;
3522 }
3523
3524 return APInt(128, words);
3525 }
3526
3527 template <const fltSemantics &S>
convertIEEEFloatToAPInt() const3528 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3529 assert(semantics == &S);
3530
3531 constexpr int bias = -(S.minExponent - 1);
3532 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3533 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3534 constexpr integerPart integer_bit =
3535 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3536 constexpr uint64_t significand_mask = integer_bit - 1;
3537 constexpr unsigned int exponent_bits =
3538 S.sizeInBits - 1 - trailing_significand_bits;
3539 static_assert(exponent_bits < 64);
3540 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3541
3542 uint64_t myexponent;
3543 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3544 mysignificand;
3545
3546 if (isFiniteNonZero()) {
3547 myexponent = exponent + bias;
3548 std::copy_n(significandParts(), mysignificand.size(),
3549 mysignificand.begin());
3550 if (myexponent == 1 &&
3551 !(significandParts()[integer_bit_part] & integer_bit))
3552 myexponent = 0; // denormal
3553 } else if (category == fcZero) {
3554 myexponent = ::exponentZero(S) + bias;
3555 mysignificand.fill(0);
3556 } else if (category == fcInfinity) {
3557 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3558 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3559 llvm_unreachable("semantics don't support inf!");
3560 myexponent = ::exponentInf(S) + bias;
3561 mysignificand.fill(0);
3562 } else {
3563 assert(category == fcNaN && "Unknown category!");
3564 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3565 llvm_unreachable("semantics don't support NaN!");
3566 myexponent = ::exponentNaN(S) + bias;
3567 std::copy_n(significandParts(), mysignificand.size(),
3568 mysignificand.begin());
3569 }
3570 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3571 auto words_iter =
3572 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3573 if constexpr (significand_mask != 0) {
3574 // Clear the integer bit.
3575 words[mysignificand.size() - 1] &= significand_mask;
3576 }
3577 std::fill(words_iter, words.end(), uint64_t{0});
3578 constexpr size_t last_word = words.size() - 1;
3579 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3580 << ((S.sizeInBits - 1) % 64);
3581 words[last_word] |= shifted_sign;
3582 uint64_t shifted_exponent = (myexponent & exponent_mask)
3583 << (trailing_significand_bits % 64);
3584 words[last_word] |= shifted_exponent;
3585 if constexpr (last_word == 0) {
3586 return APInt(S.sizeInBits, words[0]);
3587 }
3588 return APInt(S.sizeInBits, words);
3589 }
3590
convertQuadrupleAPFloatToAPInt() const3591 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3592 assert(partCount() == 2);
3593 return convertIEEEFloatToAPInt<semIEEEquad>();
3594 }
3595
convertDoubleAPFloatToAPInt() const3596 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3597 assert(partCount()==1);
3598 return convertIEEEFloatToAPInt<semIEEEdouble>();
3599 }
3600
convertFloatAPFloatToAPInt() const3601 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3602 assert(partCount()==1);
3603 return convertIEEEFloatToAPInt<semIEEEsingle>();
3604 }
3605
convertBFloatAPFloatToAPInt() const3606 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3607 assert(partCount() == 1);
3608 return convertIEEEFloatToAPInt<semBFloat>();
3609 }
3610
convertHalfAPFloatToAPInt() const3611 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3612 assert(partCount()==1);
3613 return convertIEEEFloatToAPInt<semIEEEhalf>();
3614 }
3615
convertFloat8E5M2APFloatToAPInt() const3616 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3617 assert(partCount() == 1);
3618 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3619 }
3620
convertFloat8E5M2FNUZAPFloatToAPInt() const3621 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3622 assert(partCount() == 1);
3623 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3624 }
3625
convertFloat8E4M3APFloatToAPInt() const3626 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3627 assert(partCount() == 1);
3628 return convertIEEEFloatToAPInt<semFloat8E4M3>();
3629 }
3630
convertFloat8E4M3FNAPFloatToAPInt() const3631 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3632 assert(partCount() == 1);
3633 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3634 }
3635
convertFloat8E4M3FNUZAPFloatToAPInt() const3636 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3637 assert(partCount() == 1);
3638 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3639 }
3640
convertFloat8E4M3B11FNUZAPFloatToAPInt() const3641 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3642 assert(partCount() == 1);
3643 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3644 }
3645
convertFloatTF32APFloatToAPInt() const3646 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3647 assert(partCount() == 1);
3648 return convertIEEEFloatToAPInt<semFloatTF32>();
3649 }
3650
convertFloat6E3M2FNAPFloatToAPInt() const3651 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3652 assert(partCount() == 1);
3653 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3654 }
3655
convertFloat6E2M3FNAPFloatToAPInt() const3656 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3657 assert(partCount() == 1);
3658 return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3659 }
3660
convertFloat4E2M1FNAPFloatToAPInt() const3661 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3662 assert(partCount() == 1);
3663 return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3664 }
3665
3666 // This function creates an APInt that is just a bit map of the floating
3667 // point constant as it would appear in memory. It is not a conversion,
3668 // and treating the result as a normal integer is unlikely to be useful.
3669
bitcastToAPInt() const3670 APInt IEEEFloat::bitcastToAPInt() const {
3671 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3672 return convertHalfAPFloatToAPInt();
3673
3674 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3675 return convertBFloatAPFloatToAPInt();
3676
3677 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3678 return convertFloatAPFloatToAPInt();
3679
3680 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3681 return convertDoubleAPFloatToAPInt();
3682
3683 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3684 return convertQuadrupleAPFloatToAPInt();
3685
3686 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3687 return convertPPCDoubleDoubleAPFloatToAPInt();
3688
3689 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3690 return convertFloat8E5M2APFloatToAPInt();
3691
3692 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3693 return convertFloat8E5M2FNUZAPFloatToAPInt();
3694
3695 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3696 return convertFloat8E4M3APFloatToAPInt();
3697
3698 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3699 return convertFloat8E4M3FNAPFloatToAPInt();
3700
3701 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3702 return convertFloat8E4M3FNUZAPFloatToAPInt();
3703
3704 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3705 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3706
3707 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3708 return convertFloatTF32APFloatToAPInt();
3709
3710 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3711 return convertFloat6E3M2FNAPFloatToAPInt();
3712
3713 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3714 return convertFloat6E2M3FNAPFloatToAPInt();
3715
3716 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3717 return convertFloat4E2M1FNAPFloatToAPInt();
3718
3719 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3720 "unknown format!");
3721 return convertF80LongDoubleAPFloatToAPInt();
3722 }
3723
convertToFloat() const3724 float IEEEFloat::convertToFloat() const {
3725 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3726 "Float semantics are not IEEEsingle");
3727 APInt api = bitcastToAPInt();
3728 return api.bitsToFloat();
3729 }
3730
convertToDouble() const3731 double IEEEFloat::convertToDouble() const {
3732 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3733 "Float semantics are not IEEEdouble");
3734 APInt api = bitcastToAPInt();
3735 return api.bitsToDouble();
3736 }
3737
3738 #ifdef HAS_IEE754_FLOAT128
convertToQuad() const3739 float128 IEEEFloat::convertToQuad() const {
3740 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3741 "Float semantics are not IEEEquads");
3742 APInt api = bitcastToAPInt();
3743 return api.bitsToQuad();
3744 }
3745 #endif
3746
3747 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3748 /// does not support these bit patterns:
3749 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3750 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3751 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3752 /// exponent = 0, integer bit 1 ("pseudodenormal")
3753 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3754 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3755 uint64_t i1 = api.getRawData()[0];
3756 uint64_t i2 = api.getRawData()[1];
3757 uint64_t myexponent = (i2 & 0x7fff);
3758 uint64_t mysignificand = i1;
3759 uint8_t myintegerbit = mysignificand >> 63;
3760
3761 initialize(&semX87DoubleExtended);
3762 assert(partCount()==2);
3763
3764 sign = static_cast<unsigned int>(i2>>15);
3765 if (myexponent == 0 && mysignificand == 0) {
3766 makeZero(sign);
3767 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3768 makeInf(sign);
3769 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3770 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3771 category = fcNaN;
3772 exponent = exponentNaN();
3773 significandParts()[0] = mysignificand;
3774 significandParts()[1] = 0;
3775 } else {
3776 category = fcNormal;
3777 exponent = myexponent - 16383;
3778 significandParts()[0] = mysignificand;
3779 significandParts()[1] = 0;
3780 if (myexponent==0) // denormal
3781 exponent = -16382;
3782 }
3783 }
3784
initFromPPCDoubleDoubleAPInt(const APInt & api)3785 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3786 uint64_t i1 = api.getRawData()[0];
3787 uint64_t i2 = api.getRawData()[1];
3788 opStatus fs;
3789 bool losesInfo;
3790
3791 // Get the first double and convert to our format.
3792 initFromDoubleAPInt(APInt(64, i1));
3793 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3794 assert(fs == opOK && !losesInfo);
3795 (void)fs;
3796
3797 // Unless we have a special case, add in second double.
3798 if (isFiniteNonZero()) {
3799 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3800 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3801 assert(fs == opOK && !losesInfo);
3802 (void)fs;
3803
3804 add(v, rmNearestTiesToEven);
3805 }
3806 }
3807
3808 template <const fltSemantics &S>
initFromIEEEAPInt(const APInt & api)3809 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3810 assert(api.getBitWidth() == S.sizeInBits);
3811 constexpr integerPart integer_bit = integerPart{1}
3812 << ((S.precision - 1) % integerPartWidth);
3813 constexpr uint64_t significand_mask = integer_bit - 1;
3814 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3815 constexpr unsigned int stored_significand_parts =
3816 partCountForBits(trailing_significand_bits);
3817 constexpr unsigned int exponent_bits =
3818 S.sizeInBits - 1 - trailing_significand_bits;
3819 static_assert(exponent_bits < 64);
3820 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3821 constexpr int bias = -(S.minExponent - 1);
3822
3823 // Copy the bits of the significand. We need to clear out the exponent and
3824 // sign bit in the last word.
3825 std::array<integerPart, stored_significand_parts> mysignificand;
3826 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3827 if constexpr (significand_mask != 0) {
3828 mysignificand[mysignificand.size() - 1] &= significand_mask;
3829 }
3830
3831 // We assume the last word holds the sign bit, the exponent, and potentially
3832 // some of the trailing significand field.
3833 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3834 uint64_t myexponent =
3835 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3836
3837 initialize(&S);
3838 assert(partCount() == mysignificand.size());
3839
3840 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3841
3842 bool all_zero_significand =
3843 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3844
3845 bool is_zero = myexponent == 0 && all_zero_significand;
3846
3847 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3848 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3849 makeInf(sign);
3850 return;
3851 }
3852 }
3853
3854 bool is_nan = false;
3855
3856 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3857 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3858 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3859 bool all_ones_significand =
3860 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3861 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3862 (!significand_mask ||
3863 mysignificand[mysignificand.size() - 1] == significand_mask);
3864 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3865 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3866 is_nan = is_zero && sign;
3867 }
3868
3869 if (is_nan) {
3870 category = fcNaN;
3871 exponent = ::exponentNaN(S);
3872 std::copy_n(mysignificand.begin(), mysignificand.size(),
3873 significandParts());
3874 return;
3875 }
3876
3877 if (is_zero) {
3878 makeZero(sign);
3879 return;
3880 }
3881
3882 category = fcNormal;
3883 exponent = myexponent - bias;
3884 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3885 if (myexponent == 0) // denormal
3886 exponent = S.minExponent;
3887 else
3888 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3889 }
3890
initFromQuadrupleAPInt(const APInt & api)3891 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3892 initFromIEEEAPInt<semIEEEquad>(api);
3893 }
3894
initFromDoubleAPInt(const APInt & api)3895 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3896 initFromIEEEAPInt<semIEEEdouble>(api);
3897 }
3898
initFromFloatAPInt(const APInt & api)3899 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3900 initFromIEEEAPInt<semIEEEsingle>(api);
3901 }
3902
initFromBFloatAPInt(const APInt & api)3903 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3904 initFromIEEEAPInt<semBFloat>(api);
3905 }
3906
initFromHalfAPInt(const APInt & api)3907 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3908 initFromIEEEAPInt<semIEEEhalf>(api);
3909 }
3910
initFromFloat8E5M2APInt(const APInt & api)3911 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3912 initFromIEEEAPInt<semFloat8E5M2>(api);
3913 }
3914
initFromFloat8E5M2FNUZAPInt(const APInt & api)3915 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3916 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3917 }
3918
initFromFloat8E4M3APInt(const APInt & api)3919 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3920 initFromIEEEAPInt<semFloat8E4M3>(api);
3921 }
3922
initFromFloat8E4M3FNAPInt(const APInt & api)3923 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3924 initFromIEEEAPInt<semFloat8E4M3FN>(api);
3925 }
3926
initFromFloat8E4M3FNUZAPInt(const APInt & api)3927 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3928 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3929 }
3930
initFromFloat8E4M3B11FNUZAPInt(const APInt & api)3931 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3932 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3933 }
3934
initFromFloatTF32APInt(const APInt & api)3935 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3936 initFromIEEEAPInt<semFloatTF32>(api);
3937 }
3938
initFromFloat6E3M2FNAPInt(const APInt & api)3939 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3940 initFromIEEEAPInt<semFloat6E3M2FN>(api);
3941 }
3942
initFromFloat6E2M3FNAPInt(const APInt & api)3943 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3944 initFromIEEEAPInt<semFloat6E2M3FN>(api);
3945 }
3946
initFromFloat4E2M1FNAPInt(const APInt & api)3947 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3948 initFromIEEEAPInt<semFloat4E2M1FN>(api);
3949 }
3950
3951 /// Treat api as containing the bits of a floating point number.
initFromAPInt(const fltSemantics * Sem,const APInt & api)3952 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3953 assert(api.getBitWidth() == Sem->sizeInBits);
3954 if (Sem == &semIEEEhalf)
3955 return initFromHalfAPInt(api);
3956 if (Sem == &semBFloat)
3957 return initFromBFloatAPInt(api);
3958 if (Sem == &semIEEEsingle)
3959 return initFromFloatAPInt(api);
3960 if (Sem == &semIEEEdouble)
3961 return initFromDoubleAPInt(api);
3962 if (Sem == &semX87DoubleExtended)
3963 return initFromF80LongDoubleAPInt(api);
3964 if (Sem == &semIEEEquad)
3965 return initFromQuadrupleAPInt(api);
3966 if (Sem == &semPPCDoubleDoubleLegacy)
3967 return initFromPPCDoubleDoubleAPInt(api);
3968 if (Sem == &semFloat8E5M2)
3969 return initFromFloat8E5M2APInt(api);
3970 if (Sem == &semFloat8E5M2FNUZ)
3971 return initFromFloat8E5M2FNUZAPInt(api);
3972 if (Sem == &semFloat8E4M3)
3973 return initFromFloat8E4M3APInt(api);
3974 if (Sem == &semFloat8E4M3FN)
3975 return initFromFloat8E4M3FNAPInt(api);
3976 if (Sem == &semFloat8E4M3FNUZ)
3977 return initFromFloat8E4M3FNUZAPInt(api);
3978 if (Sem == &semFloat8E4M3B11FNUZ)
3979 return initFromFloat8E4M3B11FNUZAPInt(api);
3980 if (Sem == &semFloatTF32)
3981 return initFromFloatTF32APInt(api);
3982 if (Sem == &semFloat6E3M2FN)
3983 return initFromFloat6E3M2FNAPInt(api);
3984 if (Sem == &semFloat6E2M3FN)
3985 return initFromFloat6E2M3FNAPInt(api);
3986 if (Sem == &semFloat4E2M1FN)
3987 return initFromFloat4E2M1FNAPInt(api);
3988
3989 llvm_unreachable(nullptr);
3990 }
3991
3992 /// Make this number the largest magnitude normal number in the given
3993 /// semantics.
makeLargest(bool Negative)3994 void IEEEFloat::makeLargest(bool Negative) {
3995 // We want (in interchange format):
3996 // sign = {Negative}
3997 // exponent = 1..10
3998 // significand = 1..1
3999 category = fcNormal;
4000 sign = Negative;
4001 exponent = semantics->maxExponent;
4002
4003 // Use memset to set all but the highest integerPart to all ones.
4004 integerPart *significand = significandParts();
4005 unsigned PartCount = partCount();
4006 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4007
4008 // Set the high integerPart especially setting all unused top bits for
4009 // internal consistency.
4010 const unsigned NumUnusedHighBits =
4011 PartCount*integerPartWidth - semantics->precision;
4012 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4013 ? (~integerPart(0) >> NumUnusedHighBits)
4014 : 0;
4015
4016 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4017 semantics->nanEncoding == fltNanEncoding::AllOnes)
4018 significand[0] &= ~integerPart(1);
4019 }
4020
4021 /// Make this number the smallest magnitude denormal number in the given
4022 /// semantics.
makeSmallest(bool Negative)4023 void IEEEFloat::makeSmallest(bool Negative) {
4024 // We want (in interchange format):
4025 // sign = {Negative}
4026 // exponent = 0..0
4027 // significand = 0..01
4028 category = fcNormal;
4029 sign = Negative;
4030 exponent = semantics->minExponent;
4031 APInt::tcSet(significandParts(), 1, partCount());
4032 }
4033
makeSmallestNormalized(bool Negative)4034 void IEEEFloat::makeSmallestNormalized(bool Negative) {
4035 // We want (in interchange format):
4036 // sign = {Negative}
4037 // exponent = 0..0
4038 // significand = 10..0
4039
4040 category = fcNormal;
4041 zeroSignificand();
4042 sign = Negative;
4043 exponent = semantics->minExponent;
4044 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4045 }
4046
IEEEFloat(const fltSemantics & Sem,const APInt & API)4047 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4048 initFromAPInt(&Sem, API);
4049 }
4050
IEEEFloat(float f)4051 IEEEFloat::IEEEFloat(float f) {
4052 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4053 }
4054
IEEEFloat(double d)4055 IEEEFloat::IEEEFloat(double d) {
4056 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4057 }
4058
4059 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)4060 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4061 Buffer.append(Str.begin(), Str.end());
4062 }
4063
4064 /// Removes data from the given significand until it is no more
4065 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)4066 void AdjustToPrecision(APInt &significand,
4067 int &exp, unsigned FormatPrecision) {
4068 unsigned bits = significand.getActiveBits();
4069
4070 // 196/59 is a very slight overestimate of lg_2(10).
4071 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4072
4073 if (bits <= bitsRequired) return;
4074
4075 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4076 if (!tensRemovable) return;
4077
4078 exp += tensRemovable;
4079
4080 APInt divisor(significand.getBitWidth(), 1);
4081 APInt powten(significand.getBitWidth(), 10);
4082 while (true) {
4083 if (tensRemovable & 1)
4084 divisor *= powten;
4085 tensRemovable >>= 1;
4086 if (!tensRemovable) break;
4087 powten *= powten;
4088 }
4089
4090 significand = significand.udiv(divisor);
4091
4092 // Truncate the significand down to its active bit count.
4093 significand = significand.trunc(significand.getActiveBits());
4094 }
4095
4096
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)4097 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4098 int &exp, unsigned FormatPrecision) {
4099 unsigned N = buffer.size();
4100 if (N <= FormatPrecision) return;
4101
4102 // The most significant figures are the last ones in the buffer.
4103 unsigned FirstSignificant = N - FormatPrecision;
4104
4105 // Round.
4106 // FIXME: this probably shouldn't use 'round half up'.
4107
4108 // Rounding down is just a truncation, except we also want to drop
4109 // trailing zeros from the new result.
4110 if (buffer[FirstSignificant - 1] < '5') {
4111 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4112 FirstSignificant++;
4113
4114 exp += FirstSignificant;
4115 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4116 return;
4117 }
4118
4119 // Rounding up requires a decimal add-with-carry. If we continue
4120 // the carry, the newly-introduced zeros will just be truncated.
4121 for (unsigned I = FirstSignificant; I != N; ++I) {
4122 if (buffer[I] == '9') {
4123 FirstSignificant++;
4124 } else {
4125 buffer[I]++;
4126 break;
4127 }
4128 }
4129
4130 // If we carried through, we have exactly one digit of precision.
4131 if (FirstSignificant == N) {
4132 exp += FirstSignificant;
4133 buffer.clear();
4134 buffer.push_back('1');
4135 return;
4136 }
4137
4138 exp += FirstSignificant;
4139 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4140 }
4141
toStringImpl(SmallVectorImpl<char> & Str,const bool isNeg,int exp,APInt significand,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero)4142 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4143 APInt significand, unsigned FormatPrecision,
4144 unsigned FormatMaxPadding, bool TruncateZero) {
4145 const int semanticsPrecision = significand.getBitWidth();
4146
4147 if (isNeg)
4148 Str.push_back('-');
4149
4150 // Set FormatPrecision if zero. We want to do this before we
4151 // truncate trailing zeros, as those are part of the precision.
4152 if (!FormatPrecision) {
4153 // We use enough digits so the number can be round-tripped back to an
4154 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4155 // Accurately" by Steele and White.
4156 // FIXME: Using a formula based purely on the precision is conservative;
4157 // we can print fewer digits depending on the actual value being printed.
4158
4159 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4160 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4161 }
4162
4163 // Ignore trailing binary zeros.
4164 int trailingZeros = significand.countr_zero();
4165 exp += trailingZeros;
4166 significand.lshrInPlace(trailingZeros);
4167
4168 // Change the exponent from 2^e to 10^e.
4169 if (exp == 0) {
4170 // Nothing to do.
4171 } else if (exp > 0) {
4172 // Just shift left.
4173 significand = significand.zext(semanticsPrecision + exp);
4174 significand <<= exp;
4175 exp = 0;
4176 } else { /* exp < 0 */
4177 int texp = -exp;
4178
4179 // We transform this using the identity:
4180 // (N)(2^-e) == (N)(5^e)(10^-e)
4181 // This means we have to multiply N (the significand) by 5^e.
4182 // To avoid overflow, we have to operate on numbers large
4183 // enough to store N * 5^e:
4184 // log2(N * 5^e) == log2(N) + e * log2(5)
4185 // <= semantics->precision + e * 137 / 59
4186 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4187
4188 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4189
4190 // Multiply significand by 5^e.
4191 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4192 significand = significand.zext(precision);
4193 APInt five_to_the_i(precision, 5);
4194 while (true) {
4195 if (texp & 1)
4196 significand *= five_to_the_i;
4197
4198 texp >>= 1;
4199 if (!texp)
4200 break;
4201 five_to_the_i *= five_to_the_i;
4202 }
4203 }
4204
4205 AdjustToPrecision(significand, exp, FormatPrecision);
4206
4207 SmallVector<char, 256> buffer;
4208
4209 // Fill the buffer.
4210 unsigned precision = significand.getBitWidth();
4211 if (precision < 4) {
4212 // We need enough precision to store the value 10.
4213 precision = 4;
4214 significand = significand.zext(precision);
4215 }
4216 APInt ten(precision, 10);
4217 APInt digit(precision, 0);
4218
4219 bool inTrail = true;
4220 while (significand != 0) {
4221 // digit <- significand % 10
4222 // significand <- significand / 10
4223 APInt::udivrem(significand, ten, significand, digit);
4224
4225 unsigned d = digit.getZExtValue();
4226
4227 // Drop trailing zeros.
4228 if (inTrail && !d)
4229 exp++;
4230 else {
4231 buffer.push_back((char) ('0' + d));
4232 inTrail = false;
4233 }
4234 }
4235
4236 assert(!buffer.empty() && "no characters in buffer!");
4237
4238 // Drop down to FormatPrecision.
4239 // TODO: don't do more precise calculations above than are required.
4240 AdjustToPrecision(buffer, exp, FormatPrecision);
4241
4242 unsigned NDigits = buffer.size();
4243
4244 // Check whether we should use scientific notation.
4245 bool FormatScientific;
4246 if (!FormatMaxPadding)
4247 FormatScientific = true;
4248 else {
4249 if (exp >= 0) {
4250 // 765e3 --> 765000
4251 // ^^^
4252 // But we shouldn't make the number look more precise than it is.
4253 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4254 NDigits + (unsigned) exp > FormatPrecision);
4255 } else {
4256 // Power of the most significant digit.
4257 int MSD = exp + (int) (NDigits - 1);
4258 if (MSD >= 0) {
4259 // 765e-2 == 7.65
4260 FormatScientific = false;
4261 } else {
4262 // 765e-5 == 0.00765
4263 // ^ ^^
4264 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4265 }
4266 }
4267 }
4268
4269 // Scientific formatting is pretty straightforward.
4270 if (FormatScientific) {
4271 exp += (NDigits - 1);
4272
4273 Str.push_back(buffer[NDigits-1]);
4274 Str.push_back('.');
4275 if (NDigits == 1 && TruncateZero)
4276 Str.push_back('0');
4277 else
4278 for (unsigned I = 1; I != NDigits; ++I)
4279 Str.push_back(buffer[NDigits-1-I]);
4280 // Fill with zeros up to FormatPrecision.
4281 if (!TruncateZero && FormatPrecision > NDigits - 1)
4282 Str.append(FormatPrecision - NDigits + 1, '0');
4283 // For !TruncateZero we use lower 'e'.
4284 Str.push_back(TruncateZero ? 'E' : 'e');
4285
4286 Str.push_back(exp >= 0 ? '+' : '-');
4287 if (exp < 0)
4288 exp = -exp;
4289 SmallVector<char, 6> expbuf;
4290 do {
4291 expbuf.push_back((char) ('0' + (exp % 10)));
4292 exp /= 10;
4293 } while (exp);
4294 // Exponent always at least two digits if we do not truncate zeros.
4295 if (!TruncateZero && expbuf.size() < 2)
4296 expbuf.push_back('0');
4297 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4298 Str.push_back(expbuf[E-1-I]);
4299 return;
4300 }
4301
4302 // Non-scientific, positive exponents.
4303 if (exp >= 0) {
4304 for (unsigned I = 0; I != NDigits; ++I)
4305 Str.push_back(buffer[NDigits-1-I]);
4306 for (unsigned I = 0; I != (unsigned) exp; ++I)
4307 Str.push_back('0');
4308 return;
4309 }
4310
4311 // Non-scientific, negative exponents.
4312
4313 // The number of digits to the left of the decimal point.
4314 int NWholeDigits = exp + (int) NDigits;
4315
4316 unsigned I = 0;
4317 if (NWholeDigits > 0) {
4318 for (; I != (unsigned) NWholeDigits; ++I)
4319 Str.push_back(buffer[NDigits-I-1]);
4320 Str.push_back('.');
4321 } else {
4322 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4323
4324 Str.push_back('0');
4325 Str.push_back('.');
4326 for (unsigned Z = 1; Z != NZeros; ++Z)
4327 Str.push_back('0');
4328 }
4329
4330 for (; I != NDigits; ++I)
4331 Str.push_back(buffer[NDigits-I-1]);
4332
4333 }
4334 } // namespace
4335
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4336 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4337 unsigned FormatMaxPadding, bool TruncateZero) const {
4338 switch (category) {
4339 case fcInfinity:
4340 if (isNegative())
4341 return append(Str, "-Inf");
4342 else
4343 return append(Str, "+Inf");
4344
4345 case fcNaN: return append(Str, "NaN");
4346
4347 case fcZero:
4348 if (isNegative())
4349 Str.push_back('-');
4350
4351 if (!FormatMaxPadding) {
4352 if (TruncateZero)
4353 append(Str, "0.0E+0");
4354 else {
4355 append(Str, "0.0");
4356 if (FormatPrecision > 1)
4357 Str.append(FormatPrecision - 1, '0');
4358 append(Str, "e+00");
4359 }
4360 } else
4361 Str.push_back('0');
4362 return;
4363
4364 case fcNormal:
4365 break;
4366 }
4367
4368 // Decompose the number into an APInt and an exponent.
4369 int exp = exponent - ((int) semantics->precision - 1);
4370 APInt significand(
4371 semantics->precision,
4372 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4373
4374 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4375 FormatMaxPadding, TruncateZero);
4376
4377 }
4378
getExactInverse(APFloat * inv) const4379 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4380 // Special floats and denormals have no exact inverse.
4381 if (!isFiniteNonZero())
4382 return false;
4383
4384 // Check that the number is a power of two by making sure that only the
4385 // integer bit is set in the significand.
4386 if (significandLSB() != semantics->precision - 1)
4387 return false;
4388
4389 // Get the inverse.
4390 IEEEFloat reciprocal(*semantics, 1ULL);
4391 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4392 return false;
4393
4394 // Avoid multiplication with a denormal, it is not safe on all platforms and
4395 // may be slower than a normal division.
4396 if (reciprocal.isDenormal())
4397 return false;
4398
4399 assert(reciprocal.isFiniteNonZero() &&
4400 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4401
4402 if (inv)
4403 *inv = APFloat(reciprocal, *semantics);
4404
4405 return true;
4406 }
4407
getExactLog2Abs() const4408 int IEEEFloat::getExactLog2Abs() const {
4409 if (!isFinite() || isZero())
4410 return INT_MIN;
4411
4412 const integerPart *Parts = significandParts();
4413 const int PartCount = partCountForBits(semantics->precision);
4414
4415 int PopCount = 0;
4416 for (int i = 0; i < PartCount; ++i) {
4417 PopCount += llvm::popcount(Parts[i]);
4418 if (PopCount > 1)
4419 return INT_MIN;
4420 }
4421
4422 if (exponent != semantics->minExponent)
4423 return exponent;
4424
4425 int CountrParts = 0;
4426 for (int i = 0; i < PartCount;
4427 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4428 if (Parts[i] != 0) {
4429 return exponent - semantics->precision + CountrParts +
4430 llvm::countr_zero(Parts[i]) + 1;
4431 }
4432 }
4433
4434 llvm_unreachable("didn't find the set bit");
4435 }
4436
isSignaling() const4437 bool IEEEFloat::isSignaling() const {
4438 if (!isNaN())
4439 return false;
4440 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4441 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4442 return false;
4443
4444 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4445 // first bit of the trailing significand being 0.
4446 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4447 }
4448
4449 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4450 ///
4451 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4452 /// appropriate sign switching before/after the computation.
next(bool nextDown)4453 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4454 // If we are performing nextDown, swap sign so we have -x.
4455 if (nextDown)
4456 changeSign();
4457
4458 // Compute nextUp(x)
4459 opStatus result = opOK;
4460
4461 // Handle each float category separately.
4462 switch (category) {
4463 case fcInfinity:
4464 // nextUp(+inf) = +inf
4465 if (!isNegative())
4466 break;
4467 // nextUp(-inf) = -getLargest()
4468 makeLargest(true);
4469 break;
4470 case fcNaN:
4471 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4472 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4473 // change the payload.
4474 if (isSignaling()) {
4475 result = opInvalidOp;
4476 // For consistency, propagate the sign of the sNaN to the qNaN.
4477 makeNaN(false, isNegative(), nullptr);
4478 }
4479 break;
4480 case fcZero:
4481 // nextUp(pm 0) = +getSmallest()
4482 makeSmallest(false);
4483 break;
4484 case fcNormal:
4485 // nextUp(-getSmallest()) = -0
4486 if (isSmallest() && isNegative()) {
4487 APInt::tcSet(significandParts(), 0, partCount());
4488 category = fcZero;
4489 exponent = 0;
4490 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4491 sign = false;
4492 break;
4493 }
4494
4495 if (isLargest() && !isNegative()) {
4496 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4497 // nextUp(getLargest()) == NAN
4498 makeNaN();
4499 break;
4500 } else if (semantics->nonFiniteBehavior ==
4501 fltNonfiniteBehavior::FiniteOnly) {
4502 // nextUp(getLargest()) == getLargest()
4503 break;
4504 } else {
4505 // nextUp(getLargest()) == INFINITY
4506 APInt::tcSet(significandParts(), 0, partCount());
4507 category = fcInfinity;
4508 exponent = semantics->maxExponent + 1;
4509 break;
4510 }
4511 }
4512
4513 // nextUp(normal) == normal + inc.
4514 if (isNegative()) {
4515 // If we are negative, we need to decrement the significand.
4516
4517 // We only cross a binade boundary that requires adjusting the exponent
4518 // if:
4519 // 1. exponent != semantics->minExponent. This implies we are not in the
4520 // smallest binade or are dealing with denormals.
4521 // 2. Our significand excluding the integral bit is all zeros.
4522 bool WillCrossBinadeBoundary =
4523 exponent != semantics->minExponent && isSignificandAllZeros();
4524
4525 // Decrement the significand.
4526 //
4527 // We always do this since:
4528 // 1. If we are dealing with a non-binade decrement, by definition we
4529 // just decrement the significand.
4530 // 2. If we are dealing with a normal -> normal binade decrement, since
4531 // we have an explicit integral bit the fact that all bits but the
4532 // integral bit are zero implies that subtracting one will yield a
4533 // significand with 0 integral bit and 1 in all other spots. Thus we
4534 // must just adjust the exponent and set the integral bit to 1.
4535 // 3. If we are dealing with a normal -> denormal binade decrement,
4536 // since we set the integral bit to 0 when we represent denormals, we
4537 // just decrement the significand.
4538 integerPart *Parts = significandParts();
4539 APInt::tcDecrement(Parts, partCount());
4540
4541 if (WillCrossBinadeBoundary) {
4542 // Our result is a normal number. Do the following:
4543 // 1. Set the integral bit to 1.
4544 // 2. Decrement the exponent.
4545 APInt::tcSetBit(Parts, semantics->precision - 1);
4546 exponent--;
4547 }
4548 } else {
4549 // If we are positive, we need to increment the significand.
4550
4551 // We only cross a binade boundary that requires adjusting the exponent if
4552 // the input is not a denormal and all of said input's significand bits
4553 // are set. If all of said conditions are true: clear the significand, set
4554 // the integral bit to 1, and increment the exponent. If we have a
4555 // denormal always increment since moving denormals and the numbers in the
4556 // smallest normal binade have the same exponent in our representation.
4557 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4558
4559 if (WillCrossBinadeBoundary) {
4560 integerPart *Parts = significandParts();
4561 APInt::tcSet(Parts, 0, partCount());
4562 APInt::tcSetBit(Parts, semantics->precision - 1);
4563 assert(exponent != semantics->maxExponent &&
4564 "We can not increment an exponent beyond the maxExponent allowed"
4565 " by the given floating point semantics.");
4566 exponent++;
4567 } else {
4568 incrementSignificand();
4569 }
4570 }
4571 break;
4572 }
4573
4574 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4575 if (nextDown)
4576 changeSign();
4577
4578 return result;
4579 }
4580
exponentNaN() const4581 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4582 return ::exponentNaN(*semantics);
4583 }
4584
exponentInf() const4585 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4586 return ::exponentInf(*semantics);
4587 }
4588
exponentZero() const4589 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4590 return ::exponentZero(*semantics);
4591 }
4592
makeInf(bool Negative)4593 void IEEEFloat::makeInf(bool Negative) {
4594 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4595 llvm_unreachable("This floating point format does not support Inf");
4596
4597 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4598 // There is no Inf, so make NaN instead.
4599 makeNaN(false, Negative);
4600 return;
4601 }
4602 category = fcInfinity;
4603 sign = Negative;
4604 exponent = exponentInf();
4605 APInt::tcSet(significandParts(), 0, partCount());
4606 }
4607
makeZero(bool Negative)4608 void IEEEFloat::makeZero(bool Negative) {
4609 category = fcZero;
4610 sign = Negative;
4611 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4612 // Merge negative zero to positive because 0b10000...000 is used for NaN
4613 sign = false;
4614 }
4615 exponent = exponentZero();
4616 APInt::tcSet(significandParts(), 0, partCount());
4617 }
4618
makeQuiet()4619 void IEEEFloat::makeQuiet() {
4620 assert(isNaN());
4621 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4622 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4623 }
4624
ilogb(const IEEEFloat & Arg)4625 int ilogb(const IEEEFloat &Arg) {
4626 if (Arg.isNaN())
4627 return IEEEFloat::IEK_NaN;
4628 if (Arg.isZero())
4629 return IEEEFloat::IEK_Zero;
4630 if (Arg.isInfinity())
4631 return IEEEFloat::IEK_Inf;
4632 if (!Arg.isDenormal())
4633 return Arg.exponent;
4634
4635 IEEEFloat Normalized(Arg);
4636 int SignificandBits = Arg.getSemantics().precision - 1;
4637
4638 Normalized.exponent += SignificandBits;
4639 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4640 return Normalized.exponent - SignificandBits;
4641 }
4642
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4643 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4644 auto MaxExp = X.getSemantics().maxExponent;
4645 auto MinExp = X.getSemantics().minExponent;
4646
4647 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4648 // overflow; clamp it to a safe range before adding, but ensure that the range
4649 // is large enough that the clamp does not change the result. The range we
4650 // need to support is the difference between the largest possible exponent and
4651 // the normalized exponent of half the smallest denormal.
4652
4653 int SignificandBits = X.getSemantics().precision - 1;
4654 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4655
4656 // Clamp to one past the range ends to let normalize handle overlflow.
4657 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4658 X.normalize(RoundingMode, lfExactlyZero);
4659 if (X.isNaN())
4660 X.makeQuiet();
4661 return X;
4662 }
4663
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4664 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4665 Exp = ilogb(Val);
4666
4667 // Quiet signalling nans.
4668 if (Exp == IEEEFloat::IEK_NaN) {
4669 IEEEFloat Quiet(Val);
4670 Quiet.makeQuiet();
4671 return Quiet;
4672 }
4673
4674 if (Exp == IEEEFloat::IEK_Inf)
4675 return Val;
4676
4677 // 1 is added because frexp is defined to return a normalized fraction in
4678 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4679 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4680 return scalbn(Val, -Exp, RM);
4681 }
4682
DoubleAPFloat(const fltSemantics & S)4683 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4684 : Semantics(&S),
4685 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4686 assert(Semantics == &semPPCDoubleDouble);
4687 }
4688
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4689 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4690 : Semantics(&S),
4691 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4692 APFloat(semIEEEdouble, uninitialized)}) {
4693 assert(Semantics == &semPPCDoubleDouble);
4694 }
4695
DoubleAPFloat(const fltSemantics & S,integerPart I)4696 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4697 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4698 APFloat(semIEEEdouble)}) {
4699 assert(Semantics == &semPPCDoubleDouble);
4700 }
4701
DoubleAPFloat(const fltSemantics & S,const APInt & I)4702 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4703 : Semantics(&S),
4704 Floats(new APFloat[2]{
4705 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4706 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4707 assert(Semantics == &semPPCDoubleDouble);
4708 }
4709
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4710 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4711 APFloat &&Second)
4712 : Semantics(&S),
4713 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4714 assert(Semantics == &semPPCDoubleDouble);
4715 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4716 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4717 }
4718
DoubleAPFloat(const DoubleAPFloat & RHS)4719 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4720 : Semantics(RHS.Semantics),
4721 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4722 APFloat(RHS.Floats[1])}
4723 : nullptr) {
4724 assert(Semantics == &semPPCDoubleDouble);
4725 }
4726
DoubleAPFloat(DoubleAPFloat && RHS)4727 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4728 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4729 RHS.Semantics = &semBogus;
4730 assert(Semantics == &semPPCDoubleDouble);
4731 }
4732
operator =(const DoubleAPFloat & RHS)4733 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4734 if (Semantics == RHS.Semantics && RHS.Floats) {
4735 Floats[0] = RHS.Floats[0];
4736 Floats[1] = RHS.Floats[1];
4737 } else if (this != &RHS) {
4738 this->~DoubleAPFloat();
4739 new (this) DoubleAPFloat(RHS);
4740 }
4741 return *this;
4742 }
4743
4744 // Implement addition, subtraction, multiplication and division based on:
4745 // "Software for Doubled-Precision Floating-Point Computations",
4746 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4747 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4748 const APFloat &c, const APFloat &cc,
4749 roundingMode RM) {
4750 int Status = opOK;
4751 APFloat z = a;
4752 Status |= z.add(c, RM);
4753 if (!z.isFinite()) {
4754 if (!z.isInfinity()) {
4755 Floats[0] = std::move(z);
4756 Floats[1].makeZero(/* Neg = */ false);
4757 return (opStatus)Status;
4758 }
4759 Status = opOK;
4760 auto AComparedToC = a.compareAbsoluteValue(c);
4761 z = cc;
4762 Status |= z.add(aa, RM);
4763 if (AComparedToC == APFloat::cmpGreaterThan) {
4764 // z = cc + aa + c + a;
4765 Status |= z.add(c, RM);
4766 Status |= z.add(a, RM);
4767 } else {
4768 // z = cc + aa + a + c;
4769 Status |= z.add(a, RM);
4770 Status |= z.add(c, RM);
4771 }
4772 if (!z.isFinite()) {
4773 Floats[0] = std::move(z);
4774 Floats[1].makeZero(/* Neg = */ false);
4775 return (opStatus)Status;
4776 }
4777 Floats[0] = z;
4778 APFloat zz = aa;
4779 Status |= zz.add(cc, RM);
4780 if (AComparedToC == APFloat::cmpGreaterThan) {
4781 // Floats[1] = a - z + c + zz;
4782 Floats[1] = a;
4783 Status |= Floats[1].subtract(z, RM);
4784 Status |= Floats[1].add(c, RM);
4785 Status |= Floats[1].add(zz, RM);
4786 } else {
4787 // Floats[1] = c - z + a + zz;
4788 Floats[1] = c;
4789 Status |= Floats[1].subtract(z, RM);
4790 Status |= Floats[1].add(a, RM);
4791 Status |= Floats[1].add(zz, RM);
4792 }
4793 } else {
4794 // q = a - z;
4795 APFloat q = a;
4796 Status |= q.subtract(z, RM);
4797
4798 // zz = q + c + (a - (q + z)) + aa + cc;
4799 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4800 auto zz = q;
4801 Status |= zz.add(c, RM);
4802 Status |= q.add(z, RM);
4803 Status |= q.subtract(a, RM);
4804 q.changeSign();
4805 Status |= zz.add(q, RM);
4806 Status |= zz.add(aa, RM);
4807 Status |= zz.add(cc, RM);
4808 if (zz.isZero() && !zz.isNegative()) {
4809 Floats[0] = std::move(z);
4810 Floats[1].makeZero(/* Neg = */ false);
4811 return opOK;
4812 }
4813 Floats[0] = z;
4814 Status |= Floats[0].add(zz, RM);
4815 if (!Floats[0].isFinite()) {
4816 Floats[1].makeZero(/* Neg = */ false);
4817 return (opStatus)Status;
4818 }
4819 Floats[1] = std::move(z);
4820 Status |= Floats[1].subtract(Floats[0], RM);
4821 Status |= Floats[1].add(zz, RM);
4822 }
4823 return (opStatus)Status;
4824 }
4825
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4826 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4827 const DoubleAPFloat &RHS,
4828 DoubleAPFloat &Out,
4829 roundingMode RM) {
4830 if (LHS.getCategory() == fcNaN) {
4831 Out = LHS;
4832 return opOK;
4833 }
4834 if (RHS.getCategory() == fcNaN) {
4835 Out = RHS;
4836 return opOK;
4837 }
4838 if (LHS.getCategory() == fcZero) {
4839 Out = RHS;
4840 return opOK;
4841 }
4842 if (RHS.getCategory() == fcZero) {
4843 Out = LHS;
4844 return opOK;
4845 }
4846 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4847 LHS.isNegative() != RHS.isNegative()) {
4848 Out.makeNaN(false, Out.isNegative(), nullptr);
4849 return opInvalidOp;
4850 }
4851 if (LHS.getCategory() == fcInfinity) {
4852 Out = LHS;
4853 return opOK;
4854 }
4855 if (RHS.getCategory() == fcInfinity) {
4856 Out = RHS;
4857 return opOK;
4858 }
4859 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4860
4861 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4862 CC(RHS.Floats[1]);
4863 assert(&A.getSemantics() == &semIEEEdouble);
4864 assert(&AA.getSemantics() == &semIEEEdouble);
4865 assert(&C.getSemantics() == &semIEEEdouble);
4866 assert(&CC.getSemantics() == &semIEEEdouble);
4867 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4868 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4869 return Out.addImpl(A, AA, C, CC, RM);
4870 }
4871
add(const DoubleAPFloat & RHS,roundingMode RM)4872 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4873 roundingMode RM) {
4874 return addWithSpecial(*this, RHS, *this, RM);
4875 }
4876
subtract(const DoubleAPFloat & RHS,roundingMode RM)4877 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4878 roundingMode RM) {
4879 changeSign();
4880 auto Ret = add(RHS, RM);
4881 changeSign();
4882 return Ret;
4883 }
4884
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4885 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4886 APFloat::roundingMode RM) {
4887 const auto &LHS = *this;
4888 auto &Out = *this;
4889 /* Interesting observation: For special categories, finding the lowest
4890 common ancestor of the following layered graph gives the correct
4891 return category:
4892
4893 NaN
4894 / \
4895 Zero Inf
4896 \ /
4897 Normal
4898
4899 e.g. NaN * NaN = NaN
4900 Zero * Inf = NaN
4901 Normal * Zero = Zero
4902 Normal * Inf = Inf
4903 */
4904 if (LHS.getCategory() == fcNaN) {
4905 Out = LHS;
4906 return opOK;
4907 }
4908 if (RHS.getCategory() == fcNaN) {
4909 Out = RHS;
4910 return opOK;
4911 }
4912 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4913 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4914 Out.makeNaN(false, false, nullptr);
4915 return opOK;
4916 }
4917 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4918 Out = LHS;
4919 return opOK;
4920 }
4921 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4922 Out = RHS;
4923 return opOK;
4924 }
4925 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4926 "Special cases not handled exhaustively");
4927
4928 int Status = opOK;
4929 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4930 // t = a * c
4931 APFloat T = A;
4932 Status |= T.multiply(C, RM);
4933 if (!T.isFiniteNonZero()) {
4934 Floats[0] = T;
4935 Floats[1].makeZero(/* Neg = */ false);
4936 return (opStatus)Status;
4937 }
4938
4939 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4940 APFloat Tau = A;
4941 T.changeSign();
4942 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4943 T.changeSign();
4944 {
4945 // v = a * d
4946 APFloat V = A;
4947 Status |= V.multiply(D, RM);
4948 // w = b * c
4949 APFloat W = B;
4950 Status |= W.multiply(C, RM);
4951 Status |= V.add(W, RM);
4952 // tau += v + w
4953 Status |= Tau.add(V, RM);
4954 }
4955 // u = t + tau
4956 APFloat U = T;
4957 Status |= U.add(Tau, RM);
4958
4959 Floats[0] = U;
4960 if (!U.isFinite()) {
4961 Floats[1].makeZero(/* Neg = */ false);
4962 } else {
4963 // Floats[1] = (t - u) + tau
4964 Status |= T.subtract(U, RM);
4965 Status |= T.add(Tau, RM);
4966 Floats[1] = T;
4967 }
4968 return (opStatus)Status;
4969 }
4970
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4971 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4972 APFloat::roundingMode RM) {
4973 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4974 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4975 auto Ret =
4976 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4977 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4978 return Ret;
4979 }
4980
remainder(const DoubleAPFloat & RHS)4981 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4982 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4983 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4984 auto Ret =
4985 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4986 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4987 return Ret;
4988 }
4989
mod(const DoubleAPFloat & RHS)4990 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4991 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4992 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4993 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4994 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4995 return Ret;
4996 }
4997
4998 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4999 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5000 const DoubleAPFloat &Addend,
5001 APFloat::roundingMode RM) {
5002 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5003 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5004 auto Ret = Tmp.fusedMultiplyAdd(
5005 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5006 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5007 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5008 return Ret;
5009 }
5010
roundToIntegral(APFloat::roundingMode RM)5011 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5012 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5014 auto Ret = Tmp.roundToIntegral(RM);
5015 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5016 return Ret;
5017 }
5018
changeSign()5019 void DoubleAPFloat::changeSign() {
5020 Floats[0].changeSign();
5021 Floats[1].changeSign();
5022 }
5023
5024 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const5025 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5026 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5027 if (Result != cmpEqual)
5028 return Result;
5029 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5030 if (Result == cmpLessThan || Result == cmpGreaterThan) {
5031 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5032 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5033 if (Against && !RHSAgainst)
5034 return cmpLessThan;
5035 if (!Against && RHSAgainst)
5036 return cmpGreaterThan;
5037 if (!Against && !RHSAgainst)
5038 return Result;
5039 if (Against && RHSAgainst)
5040 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5041 }
5042 return Result;
5043 }
5044
getCategory() const5045 APFloat::fltCategory DoubleAPFloat::getCategory() const {
5046 return Floats[0].getCategory();
5047 }
5048
isNegative() const5049 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5050
makeInf(bool Neg)5051 void DoubleAPFloat::makeInf(bool Neg) {
5052 Floats[0].makeInf(Neg);
5053 Floats[1].makeZero(/* Neg = */ false);
5054 }
5055
makeZero(bool Neg)5056 void DoubleAPFloat::makeZero(bool Neg) {
5057 Floats[0].makeZero(Neg);
5058 Floats[1].makeZero(/* Neg = */ false);
5059 }
5060
makeLargest(bool Neg)5061 void DoubleAPFloat::makeLargest(bool Neg) {
5062 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5063 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5064 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5065 if (Neg)
5066 changeSign();
5067 }
5068
makeSmallest(bool Neg)5069 void DoubleAPFloat::makeSmallest(bool Neg) {
5070 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5071 Floats[0].makeSmallest(Neg);
5072 Floats[1].makeZero(/* Neg = */ false);
5073 }
5074
makeSmallestNormalized(bool Neg)5075 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5076 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5077 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5078 if (Neg)
5079 Floats[0].changeSign();
5080 Floats[1].makeZero(/* Neg = */ false);
5081 }
5082
makeNaN(bool SNaN,bool Neg,const APInt * fill)5083 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5084 Floats[0].makeNaN(SNaN, Neg, fill);
5085 Floats[1].makeZero(/* Neg = */ false);
5086 }
5087
compare(const DoubleAPFloat & RHS) const5088 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5089 auto Result = Floats[0].compare(RHS.Floats[0]);
5090 // |Float[0]| > |Float[1]|
5091 if (Result == APFloat::cmpEqual)
5092 return Floats[1].compare(RHS.Floats[1]);
5093 return Result;
5094 }
5095
bitwiseIsEqual(const DoubleAPFloat & RHS) const5096 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5097 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5098 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5099 }
5100
hash_value(const DoubleAPFloat & Arg)5101 hash_code hash_value(const DoubleAPFloat &Arg) {
5102 if (Arg.Floats)
5103 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5104 return hash_combine(Arg.Semantics);
5105 }
5106
bitcastToAPInt() const5107 APInt DoubleAPFloat::bitcastToAPInt() const {
5108 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5109 uint64_t Data[] = {
5110 Floats[0].bitcastToAPInt().getRawData()[0],
5111 Floats[1].bitcastToAPInt().getRawData()[0],
5112 };
5113 return APInt(128, 2, Data);
5114 }
5115
convertFromString(StringRef S,roundingMode RM)5116 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5117 roundingMode RM) {
5118 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5119 APFloat Tmp(semPPCDoubleDoubleLegacy);
5120 auto Ret = Tmp.convertFromString(S, RM);
5121 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5122 return Ret;
5123 }
5124
next(bool nextDown)5125 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5126 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5127 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5128 auto Ret = Tmp.next(nextDown);
5129 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5130 return Ret;
5131 }
5132
5133 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const5134 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5135 unsigned int Width, bool IsSigned,
5136 roundingMode RM, bool *IsExact) const {
5137 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5138 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5139 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5140 }
5141
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)5142 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5143 bool IsSigned,
5144 roundingMode RM) {
5145 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5146 APFloat Tmp(semPPCDoubleDoubleLegacy);
5147 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5148 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5149 return Ret;
5150 }
5151
5152 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)5153 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5154 unsigned int InputSize,
5155 bool IsSigned, roundingMode RM) {
5156 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5157 APFloat Tmp(semPPCDoubleDoubleLegacy);
5158 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5159 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5160 return Ret;
5161 }
5162
5163 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)5164 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5165 unsigned int InputSize,
5166 bool IsSigned, roundingMode RM) {
5167 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5168 APFloat Tmp(semPPCDoubleDoubleLegacy);
5169 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5170 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5171 return Ret;
5172 }
5173
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const5174 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5175 unsigned int HexDigits,
5176 bool UpperCase,
5177 roundingMode RM) const {
5178 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5179 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5180 .convertToHexString(DST, HexDigits, UpperCase, RM);
5181 }
5182
isDenormal() const5183 bool DoubleAPFloat::isDenormal() const {
5184 return getCategory() == fcNormal &&
5185 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5186 // (double)(Hi + Lo) == Hi defines a normal number.
5187 Floats[0] != Floats[0] + Floats[1]);
5188 }
5189
isSmallest() const5190 bool DoubleAPFloat::isSmallest() const {
5191 if (getCategory() != fcNormal)
5192 return false;
5193 DoubleAPFloat Tmp(*this);
5194 Tmp.makeSmallest(this->isNegative());
5195 return Tmp.compare(*this) == cmpEqual;
5196 }
5197
isSmallestNormalized() const5198 bool DoubleAPFloat::isSmallestNormalized() const {
5199 if (getCategory() != fcNormal)
5200 return false;
5201
5202 DoubleAPFloat Tmp(*this);
5203 Tmp.makeSmallestNormalized(this->isNegative());
5204 return Tmp.compare(*this) == cmpEqual;
5205 }
5206
isLargest() const5207 bool DoubleAPFloat::isLargest() const {
5208 if (getCategory() != fcNormal)
5209 return false;
5210 DoubleAPFloat Tmp(*this);
5211 Tmp.makeLargest(this->isNegative());
5212 return Tmp.compare(*this) == cmpEqual;
5213 }
5214
isInteger() const5215 bool DoubleAPFloat::isInteger() const {
5216 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5217 return Floats[0].isInteger() && Floats[1].isInteger();
5218 }
5219
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const5220 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5221 unsigned FormatPrecision,
5222 unsigned FormatMaxPadding,
5223 bool TruncateZero) const {
5224 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5225 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5226 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5227 }
5228
getExactInverse(APFloat * inv) const5229 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5230 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5231 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5232 if (!inv)
5233 return Tmp.getExactInverse(nullptr);
5234 APFloat Inv(semPPCDoubleDoubleLegacy);
5235 auto Ret = Tmp.getExactInverse(&Inv);
5236 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5237 return Ret;
5238 }
5239
getExactLog2() const5240 int DoubleAPFloat::getExactLog2() const {
5241 // TODO: Implement me
5242 return INT_MIN;
5243 }
5244
getExactLog2Abs() const5245 int DoubleAPFloat::getExactLog2Abs() const {
5246 // TODO: Implement me
5247 return INT_MIN;
5248 }
5249
scalbn(const DoubleAPFloat & Arg,int Exp,APFloat::roundingMode RM)5250 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5251 APFloat::roundingMode RM) {
5252 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5253 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5254 scalbn(Arg.Floats[1], Exp, RM));
5255 }
5256
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)5257 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5258 APFloat::roundingMode RM) {
5259 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5260 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5261 APFloat Second = Arg.Floats[1];
5262 if (Arg.getCategory() == APFloat::fcNormal)
5263 Second = scalbn(Second, -Exp, RM);
5264 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5265 }
5266
5267 } // namespace detail
5268
Storage(IEEEFloat F,const fltSemantics & Semantics)5269 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5270 if (usesLayout<IEEEFloat>(Semantics)) {
5271 new (&IEEE) IEEEFloat(std::move(F));
5272 return;
5273 }
5274 if (usesLayout<DoubleAPFloat>(Semantics)) {
5275 const fltSemantics& S = F.getSemantics();
5276 new (&Double)
5277 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5278 APFloat(semIEEEdouble));
5279 return;
5280 }
5281 llvm_unreachable("Unexpected semantics");
5282 }
5283
convertFromString(StringRef Str,roundingMode RM)5284 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5285 roundingMode RM) {
5286 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5287 }
5288
hash_value(const APFloat & Arg)5289 hash_code hash_value(const APFloat &Arg) {
5290 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5291 return hash_value(Arg.U.IEEE);
5292 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5293 return hash_value(Arg.U.Double);
5294 llvm_unreachable("Unexpected semantics");
5295 }
5296
APFloat(const fltSemantics & Semantics,StringRef S)5297 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5298 : APFloat(Semantics) {
5299 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5300 assert(StatusOrErr && "Invalid floating point representation");
5301 consumeError(StatusOrErr.takeError());
5302 }
5303
classify() const5304 FPClassTest APFloat::classify() const {
5305 if (isZero())
5306 return isNegative() ? fcNegZero : fcPosZero;
5307 if (isNormal())
5308 return isNegative() ? fcNegNormal : fcPosNormal;
5309 if (isDenormal())
5310 return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5311 if (isInfinity())
5312 return isNegative() ? fcNegInf : fcPosInf;
5313 assert(isNaN() && "Other class of FP constant");
5314 return isSignaling() ? fcSNan : fcQNan;
5315 }
5316
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)5317 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5318 roundingMode RM, bool *losesInfo) {
5319 if (&getSemantics() == &ToSemantics) {
5320 *losesInfo = false;
5321 return opOK;
5322 }
5323 if (usesLayout<IEEEFloat>(getSemantics()) &&
5324 usesLayout<IEEEFloat>(ToSemantics))
5325 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5326 if (usesLayout<IEEEFloat>(getSemantics()) &&
5327 usesLayout<DoubleAPFloat>(ToSemantics)) {
5328 assert(&ToSemantics == &semPPCDoubleDouble);
5329 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5330 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5331 return Ret;
5332 }
5333 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5334 usesLayout<IEEEFloat>(ToSemantics)) {
5335 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5336 *this = APFloat(std::move(getIEEE()), ToSemantics);
5337 return Ret;
5338 }
5339 llvm_unreachable("Unexpected semantics");
5340 }
5341
getAllOnesValue(const fltSemantics & Semantics)5342 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5343 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5344 }
5345
print(raw_ostream & OS) const5346 void APFloat::print(raw_ostream &OS) const {
5347 SmallVector<char, 16> Buffer;
5348 toString(Buffer);
5349 OS << Buffer << "\n";
5350 }
5351
5352 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const5353 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5354 #endif
5355
Profile(FoldingSetNodeID & NID) const5356 void APFloat::Profile(FoldingSetNodeID &NID) const {
5357 NID.Add(bitcastToAPInt());
5358 }
5359
5360 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5361 an APSInt, whose initial bit-width and signed-ness are used to determine the
5362 precision of the conversion.
5363 */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const5364 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5365 roundingMode rounding_mode,
5366 bool *isExact) const {
5367 unsigned bitWidth = result.getBitWidth();
5368 SmallVector<uint64_t, 4> parts(result.getNumWords());
5369 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5370 rounding_mode, isExact);
5371 // Keeps the original signed-ness.
5372 result = APInt(bitWidth, parts);
5373 return status;
5374 }
5375
convertToDouble() const5376 double APFloat::convertToDouble() const {
5377 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5378 return getIEEE().convertToDouble();
5379 assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5380 "Float semantics is not representable by IEEEdouble");
5381 APFloat Temp = *this;
5382 bool LosesInfo;
5383 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5384 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5385 (void)St;
5386 return Temp.getIEEE().convertToDouble();
5387 }
5388
5389 #ifdef HAS_IEE754_FLOAT128
convertToQuad() const5390 float128 APFloat::convertToQuad() const {
5391 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5392 return getIEEE().convertToQuad();
5393 assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5394 "Float semantics is not representable by IEEEquad");
5395 APFloat Temp = *this;
5396 bool LosesInfo;
5397 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5398 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5399 (void)St;
5400 return Temp.getIEEE().convertToQuad();
5401 }
5402 #endif
5403
convertToFloat() const5404 float APFloat::convertToFloat() const {
5405 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5406 return getIEEE().convertToFloat();
5407 assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5408 "Float semantics is not representable by IEEEsingle");
5409 APFloat Temp = *this;
5410 bool LosesInfo;
5411 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5412 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5413 (void)St;
5414 return Temp.getIEEE().convertToFloat();
5415 }
5416
5417 } // namespace llvm
5418
5419 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5420