xref: /freebsd/contrib/llvm-project/libc/src/__support/FPUtil/FPBits.h (revision bb722a7d0f1642bff6487f943ad0427799a6e5bf)
1 //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // -----------------------------------------------------------------------------
10 //                               **** WARNING ****
11 // This file is shared with libc++. You should also be careful when adding
12 // dependencies to this file, since it needs to build for all libc++ targets.
13 // -----------------------------------------------------------------------------
14 
15 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
16 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
17 
18 #include "src/__support/CPP/bit.h"
19 #include "src/__support/CPP/type_traits.h"
20 #include "src/__support/common.h"
21 #include "src/__support/libc_assert.h"       // LIBC_ASSERT
22 #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR
23 #include "src/__support/macros/config.h"
24 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128
25 #include "src/__support/math_extras.h"             // mask_trailing_ones
26 #include "src/__support/sign.h"                    // Sign
27 #include "src/__support/uint128.h"
28 
29 #include <stdint.h>
30 
31 namespace LIBC_NAMESPACE_DECL {
32 namespace fputil {
33 
34 // The supported floating point types.
35 enum class FPType {
36   IEEE754_Binary16,
37   IEEE754_Binary32,
38   IEEE754_Binary64,
39   IEEE754_Binary128,
40   X86_Binary80,
41   BFloat16
42 };
43 
44 // The classes hierarchy is as follows:
45 //
46 //             ┌───────────────────┐
47 //             │ FPLayout<FPType>  │
48 //             └─────────▲─────────┘
49 //                       │
50 //             ┌─────────┴─────────┐
51 //             │ FPStorage<FPType> │
52 //             └─────────▲─────────┘
53 //                       │
54 //          ┌────────────┴─────────────┐
55 //          │                          │
56 // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐
57 // │ FPRepSem<FPType> │ │  FPRepSem<FPType::X86_Binary80  │
58 // └────────▲─────────┘ └──────────────▲──────────────────┘
59 //          │                          │
60 //          └────────────┬─────────────┘
61 //                       │
62 //               ┌───────┴───────┐
63 //               │  FPRepImpl<T> │
64 //               └───────▲───────┘
65 //                       │
66 //              ┌────────┴────────┐
67 //        ┌─────┴─────┐     ┌─────┴─────┐
68 //        │  FPRep<T> │     │ FPBits<T> │
69 //        └───────────┘     └───────────┘
70 //
71 // - 'FPLayout' defines only a few constants, namely the 'StorageType' and
72 //   length of the sign, the exponent, fraction and significand parts.
73 // - 'FPStorage' builds more constants on top of those from 'FPLayout' like
74 //   exponent bias and masks. It also holds the bit representation of the
75 //   floating point as a 'StorageType' type and defines tools to assemble or
76 //   test these parts.
77 // - 'FPRepSem' defines functions to interact semantically with the floating
78 //   point representation. The default implementation is the one for 'IEEE754',
79 //   a specialization is provided for X86 Extended Precision.
80 // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to
81 //   all implementations or build on the ones in 'FPRepSem'.
82 // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep'
83 //   instances when using Builders (static functions to create values).
84 // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the
85 //   native C++ floating point type instead of 'FPType'. An additional 'get_val'
86 //   function allows getting the C++ floating point type value back. Builders
87 //   called from 'FPBits' return 'FPBits' instances.
88 
89 namespace internal {
90 
91 // Defines the layout (sign, exponent, significand) of a floating point type in
92 // memory. It also defines its associated StorageType, i.e., the unsigned
93 // integer type used to manipulate its representation.
94 // Additionally we provide the fractional part length, i.e., the number of bits
95 // after the decimal dot when the number is in normal form.
96 template <FPType> struct FPLayout {};
97 
98 template <> struct FPLayout<FPType::IEEE754_Binary16> {
99   using StorageType = uint16_t;
100   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
101   LIBC_INLINE_VAR static constexpr int EXP_LEN = 5;
102   LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
103   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
104 };
105 
106 template <> struct FPLayout<FPType::IEEE754_Binary32> {
107   using StorageType = uint32_t;
108   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
109   LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
110   LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
111   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
112 };
113 
114 template <> struct FPLayout<FPType::IEEE754_Binary64> {
115   using StorageType = uint64_t;
116   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
117   LIBC_INLINE_VAR static constexpr int EXP_LEN = 11;
118   LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
119   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
120 };
121 
122 template <> struct FPLayout<FPType::IEEE754_Binary128> {
123   using StorageType = UInt128;
124   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
125   LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
126   LIBC_INLINE_VAR static constexpr int SIG_LEN = 112;
127   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
128 };
129 
130 template <> struct FPLayout<FPType::X86_Binary80> {
131 #if __SIZEOF_LONG_DOUBLE__ == 12
132   using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>;
133 #else
134   using StorageType = UInt128;
135 #endif
136   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
137   LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
138   LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
139   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1;
140 };
141 
142 template <> struct FPLayout<FPType::BFloat16> {
143   using StorageType = uint16_t;
144   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
145   LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
146   LIBC_INLINE_VAR static constexpr int SIG_LEN = 7;
147   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
148 };
149 
150 // FPStorage derives useful constants from the FPLayout above.
151 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
152   using UP = FPLayout<fp_type>;
153 
154   using UP::EXP_LEN;  // The number of bits for the *exponent* part
155   using UP::SIG_LEN;  // The number of bits for the *significand* part
156   using UP::SIGN_LEN; // The number of bits for the *sign* part
157   // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`.
158   LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN;
159 
160   // The number of bits after the decimal dot when the number is in normal form.
161   using UP::FRACTION_LEN;
162 
163   // An unsigned integer that is wide enough to contain all of the floating
164   // point bits.
165   using StorageType = typename UP::StorageType;
166 
167   // The number of bits in StorageType.
168   LIBC_INLINE_VAR static constexpr int STORAGE_LEN =
169       sizeof(StorageType) * CHAR_BIT;
170   static_assert(STORAGE_LEN >= TOTAL_LEN);
171 
172   // The exponent bias. Always positive.
173   LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
174       (1U << (EXP_LEN - 1U)) - 1U;
175   static_assert(EXP_BIAS > 0);
176 
177   // The bit pattern that keeps only the *significand* part.
178   LIBC_INLINE_VAR static constexpr StorageType SIG_MASK =
179       mask_trailing_ones<StorageType, SIG_LEN>();
180   // The bit pattern that keeps only the *exponent* part.
181   LIBC_INLINE_VAR static constexpr StorageType EXP_MASK =
182       mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN;
183   // The bit pattern that keeps only the *sign* part.
184   LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK =
185       mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN);
186   // The bit pattern that keeps only the *exponent + significand* part.
187   LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK =
188       mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>();
189   // The bit pattern that keeps only the *sign + exponent + significand* part.
190   LIBC_INLINE_VAR static constexpr StorageType FP_MASK =
191       mask_trailing_ones<StorageType, TOTAL_LEN>();
192   // The bit pattern that keeps only the *fraction* part.
193   // i.e., the *significand* without the leading one.
194   LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK =
195       mask_trailing_ones<StorageType, FRACTION_LEN>();
196 
197   static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
198   static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");
199 
200 protected:
201   // Merge bits from 'a' and 'b' values according to 'mask'.
202   // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when
203   // corresponding bits are ones.
204   LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b,
205                                                  StorageType mask) {
206     // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
207     return a ^ ((a ^ b) & mask);
208   }
209 
210   // A stongly typed integer that prevents mixing and matching integers with
211   // different semantics.
212   template <typename T> struct TypedInt {
213     using value_type = T;
214     LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {}
215     LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default;
216     LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default;
217 
218     LIBC_INLINE constexpr explicit operator T() const { return value; }
219 
220     LIBC_INLINE constexpr StorageType to_storage_type() const {
221       return StorageType(value);
222     }
223 
224     LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) {
225       return a.value == b.value;
226     }
227     LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) {
228       return a.value != b.value;
229     }
230 
231   protected:
232     T value;
233   };
234 
235   // An opaque type to store a floating point exponent.
236   // We define special values but it is valid to create arbitrary values as long
237   // as they are in the range [min, max].
238   struct Exponent : public TypedInt<int32_t> {
239     using UP = TypedInt<int32_t>;
240     using UP::UP;
241     LIBC_INLINE static constexpr auto subnormal() {
242       return Exponent(-EXP_BIAS);
243     }
244     LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); }
245     LIBC_INLINE static constexpr auto zero() { return Exponent(0); }
246     LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); }
247     LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); }
248   };
249 
250   // An opaque type to store a floating point biased exponent.
251   // We define special values but it is valid to create arbitrary values as long
252   // as they are in the range [zero, bits_all_ones].
253   // Values greater than bits_all_ones are truncated.
254   struct BiasedExponent : public TypedInt<uint32_t> {
255     using UP = TypedInt<uint32_t>;
256     using UP::UP;
257 
258     LIBC_INLINE constexpr BiasedExponent(Exponent exp)
259         : UP(static_cast<uint32_t>(static_cast<int32_t>(exp) + EXP_BIAS)) {}
260 
261     // Cast operator to get convert from BiasedExponent to Exponent.
262     LIBC_INLINE constexpr operator Exponent() const {
263       return Exponent(static_cast<int32_t>(UP::value - EXP_BIAS));
264     }
265 
266     LIBC_INLINE constexpr BiasedExponent &operator++() {
267       LIBC_ASSERT(*this != BiasedExponent(Exponent::inf()));
268       ++UP::value;
269       return *this;
270     }
271 
272     LIBC_INLINE constexpr BiasedExponent &operator--() {
273       LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal()));
274       --UP::value;
275       return *this;
276     }
277   };
278 
279   // An opaque type to store a floating point significand.
280   // We define special values but it is valid to create arbitrary values as long
281   // as they are in the range [zero, bits_all_ones].
282   // Note that the semantics of the Significand are implementation dependent.
283   // Values greater than bits_all_ones are truncated.
284   struct Significand : public TypedInt<StorageType> {
285     using UP = TypedInt<StorageType>;
286     using UP::UP;
287 
288     LIBC_INLINE friend constexpr Significand operator|(const Significand a,
289                                                        const Significand b) {
290       return Significand(
291           StorageType(a.to_storage_type() | b.to_storage_type()));
292     }
293     LIBC_INLINE friend constexpr Significand operator^(const Significand a,
294                                                        const Significand b) {
295       return Significand(
296           StorageType(a.to_storage_type() ^ b.to_storage_type()));
297     }
298     LIBC_INLINE friend constexpr Significand operator>>(const Significand a,
299                                                         int shift) {
300       return Significand(StorageType(a.to_storage_type() >> shift));
301     }
302 
303     LIBC_INLINE static constexpr auto zero() {
304       return Significand(StorageType(0));
305     }
306     LIBC_INLINE static constexpr auto lsb() {
307       return Significand(StorageType(1));
308     }
309     LIBC_INLINE static constexpr auto msb() {
310       return Significand(StorageType(1) << (SIG_LEN - 1));
311     }
312     LIBC_INLINE static constexpr auto bits_all_ones() {
313       return Significand(SIG_MASK);
314     }
315   };
316 
317   LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) {
318     return (exp.to_storage_type() << SIG_LEN) & EXP_MASK;
319   }
320 
321   LIBC_INLINE static constexpr StorageType encode(Significand value) {
322     return value.to_storage_type() & SIG_MASK;
323   }
324 
325   LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp,
326                                                   Significand sig) {
327     return encode(exp) | encode(sig);
328   }
329 
330   LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp,
331                                                   Significand sig) {
332     if (sign.is_neg())
333       return SIGN_MASK | encode(exp, sig);
334     return encode(exp, sig);
335   }
336 
337   // The floating point number representation as an unsigned integer.
338   StorageType bits{};
339 
340   LIBC_INLINE constexpr FPStorage() : bits(0) {}
341   LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {}
342 
343   // Observers
344   LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; }
345   LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; }
346   LIBC_INLINE constexpr StorageType exp_sig_bits() const {
347     return bits & EXP_SIG_MASK;
348   }
349 
350   // Parts
351   LIBC_INLINE constexpr BiasedExponent biased_exponent() const {
352     return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN));
353   }
354   LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) {
355     bits = merge(bits, encode(biased), EXP_MASK);
356   }
357 
358 public:
359   LIBC_INLINE constexpr Sign sign() const {
360     return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS;
361   }
362   LIBC_INLINE constexpr void set_sign(Sign signVal) {
363     if (sign() != signVal)
364       bits ^= SIGN_MASK;
365   }
366 };
367 
368 // This layer defines all functions that are specific to how the the floating
369 // point type is encoded. It enables constructions, modification and observation
370 // of values manipulated as 'StorageType'.
371 template <FPType fp_type, typename RetT>
372 struct FPRepSem : public FPStorage<fp_type> {
373   using UP = FPStorage<fp_type>;
374   using typename UP::StorageType;
375   using UP::FRACTION_LEN;
376   using UP::FRACTION_MASK;
377 
378 protected:
379   using typename UP::Exponent;
380   using typename UP::Significand;
381   using UP::bits;
382   using UP::encode;
383   using UP::exp_bits;
384   using UP::exp_sig_bits;
385   using UP::sig_bits;
386   using UP::UP;
387 
388 public:
389   // Builders
390   LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
391     return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
392   }
393   LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
394     return RetT(encode(sign, Exponent::zero(), Significand::zero()));
395   }
396   LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
397     return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
398   }
399   LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
400     return RetT(
401         encode(sign, Exponent::subnormal(), Significand::bits_all_ones()));
402   }
403   LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
404     return RetT(encode(sign, Exponent::min(), Significand::zero()));
405   }
406   LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
407     return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
408   }
409   LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
410     return RetT(encode(sign, Exponent::inf(), Significand::zero()));
411   }
412   LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
413                                                   StorageType v = 0) {
414     return RetT(encode(sign, Exponent::inf(),
415                        (v ? Significand(v) : (Significand::msb() >> 1))));
416   }
417   LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
418                                               StorageType v = 0) {
419     return RetT(
420         encode(sign, Exponent::inf(), Significand::msb() | Significand(v)));
421   }
422 
423   // Observers
424   LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; }
425   LIBC_INLINE constexpr bool is_nan() const {
426     return exp_sig_bits() > encode(Exponent::inf(), Significand::zero());
427   }
428   LIBC_INLINE constexpr bool is_quiet_nan() const {
429     return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb());
430   }
431   LIBC_INLINE constexpr bool is_signaling_nan() const {
432     return is_nan() && !is_quiet_nan();
433   }
434   LIBC_INLINE constexpr bool is_inf() const {
435     return exp_sig_bits() == encode(Exponent::inf(), Significand::zero());
436   }
437   LIBC_INLINE constexpr bool is_finite() const {
438     return exp_bits() != encode(Exponent::inf());
439   }
440   LIBC_INLINE
441   constexpr bool is_subnormal() const {
442     return exp_bits() == encode(Exponent::subnormal());
443   }
444   LIBC_INLINE constexpr bool is_normal() const {
445     return is_finite() && !is_subnormal();
446   }
447   LIBC_INLINE constexpr RetT next_toward_inf() const {
448     if (is_finite())
449       return RetT(bits + StorageType(1));
450     return RetT(bits);
451   }
452 
453   // Returns the mantissa with the implicit bit set iff the current
454   // value is a valid normal number.
455   LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
456     if (is_subnormal())
457       return sig_bits();
458     return (StorageType(1) << UP::SIG_LEN) | sig_bits();
459   }
460 };
461 
462 // Specialization for the X86 Extended Precision type.
463 template <typename RetT>
464 struct FPRepSem<FPType::X86_Binary80, RetT>
465     : public FPStorage<FPType::X86_Binary80> {
466   using UP = FPStorage<FPType::X86_Binary80>;
467   using typename UP::StorageType;
468   using UP::FRACTION_LEN;
469   using UP::FRACTION_MASK;
470 
471   // The x86 80 bit float represents the leading digit of the mantissa
472   // explicitly. This is the mask for that bit.
473   static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1)
474                                                    << FRACTION_LEN;
475   // The X80 significand is made of an explicit bit and the fractional part.
476   static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0,
477                 "the explicit bit and the fractional part should not overlap");
478   static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK,
479                 "the explicit bit and the fractional part should cover the "
480                 "whole significand");
481 
482 protected:
483   using typename UP::Exponent;
484   using typename UP::Significand;
485   using UP::encode;
486   using UP::UP;
487 
488 public:
489   // Builders
490   LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
491     return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
492   }
493   LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
494     return RetT(encode(sign, Exponent::zero(), Significand::msb()));
495   }
496   LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
497     return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
498   }
499   LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
500     return RetT(encode(sign, Exponent::subnormal(),
501                        Significand::bits_all_ones() ^ Significand::msb()));
502   }
503   LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
504     return RetT(encode(sign, Exponent::min(), Significand::msb()));
505   }
506   LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
507     return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
508   }
509   LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
510     return RetT(encode(sign, Exponent::inf(), Significand::msb()));
511   }
512   LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
513                                                   StorageType v = 0) {
514     return RetT(encode(sign, Exponent::inf(),
515                        Significand::msb() |
516                            (v ? Significand(v) : (Significand::msb() >> 2))));
517   }
518   LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
519                                               StorageType v = 0) {
520     return RetT(encode(sign, Exponent::inf(),
521                        Significand::msb() | (Significand::msb() >> 1) |
522                            Significand(v)));
523   }
524 
525   // Observers
526   LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; }
527   LIBC_INLINE constexpr bool is_nan() const {
528     // Most encoding forms from the table found in
529     // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format
530     // are interpreted as NaN.
531     // More precisely :
532     // - Pseudo-Infinity
533     // - Pseudo Not a Number
534     // - Signalling Not a Number
535     // - Floating-point Indefinite
536     // - Quiet Not a Number
537     // - Unnormal
538     // This can be reduced to the following logic:
539     if (exp_bits() == encode(Exponent::inf()))
540       return !is_inf();
541     if (exp_bits() != encode(Exponent::subnormal()))
542       return (sig_bits() & encode(Significand::msb())) == 0;
543     return false;
544   }
545   LIBC_INLINE constexpr bool is_quiet_nan() const {
546     return exp_sig_bits() >=
547            encode(Exponent::inf(),
548                   Significand::msb() | (Significand::msb() >> 1));
549   }
550   LIBC_INLINE constexpr bool is_signaling_nan() const {
551     return is_nan() && !is_quiet_nan();
552   }
553   LIBC_INLINE constexpr bool is_inf() const {
554     return exp_sig_bits() == encode(Exponent::inf(), Significand::msb());
555   }
556   LIBC_INLINE constexpr bool is_finite() const {
557     return !is_inf() && !is_nan();
558   }
559   LIBC_INLINE
560   constexpr bool is_subnormal() const {
561     return exp_bits() == encode(Exponent::subnormal());
562   }
563   LIBC_INLINE constexpr bool is_normal() const {
564     const auto exp = exp_bits();
565     if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf()))
566       return false;
567     return get_implicit_bit();
568   }
569   LIBC_INLINE constexpr RetT next_toward_inf() const {
570     if (is_finite()) {
571       if (exp_sig_bits() == max_normal().uintval()) {
572         return inf(sign());
573       } else if (exp_sig_bits() == max_subnormal().uintval()) {
574         return min_normal(sign());
575       } else if (sig_bits() == SIG_MASK) {
576         return RetT(encode(sign(), ++biased_exponent(), Significand::zero()));
577       } else {
578         return RetT(bits + StorageType(1));
579       }
580     }
581     return RetT(bits);
582   }
583 
584   LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
585     return sig_bits();
586   }
587 
588   // This functions is specific to FPRepSem<FPType::X86_Binary80>.
589   // TODO: Remove if possible.
590   LIBC_INLINE constexpr bool get_implicit_bit() const {
591     return static_cast<bool>(bits & EXPLICIT_BIT_MASK);
592   }
593 
594   // This functions is specific to FPRepSem<FPType::X86_Binary80>.
595   // TODO: Remove if possible.
596   LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
597     if (get_implicit_bit() != implicitVal)
598       bits ^= EXPLICIT_BIT_MASK;
599   }
600 };
601 
602 // 'FPRepImpl' is the bottom of the class hierarchy that only deals with
603 // 'FPType'. The operations dealing with specific float semantics are
604 // implemented by 'FPRepSem' above and specialized when needed.
605 //
606 // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions
607 // creating new values (Builders) can return the appropriate type. That is, when
608 // creating a value through 'FPBits' below the builder will return an 'FPBits'
609 // value.
610 // FPBits<float>::zero(); // returns an FPBits<>
611 //
612 // When we don't care about specific C++ floating point type we can use
613 // 'FPRep' and specify the 'FPType' directly.
614 // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<>
615 template <FPType fp_type, typename RetT>
616 struct FPRepImpl : public FPRepSem<fp_type, RetT> {
617   using UP = FPRepSem<fp_type, RetT>;
618   using StorageType = typename UP::StorageType;
619 
620 protected:
621   using UP::bits;
622   using UP::encode;
623   using UP::exp_bits;
624   using UP::exp_sig_bits;
625 
626   using typename UP::BiasedExponent;
627   using typename UP::Exponent;
628   using typename UP::Significand;
629 
630   using UP::FP_MASK;
631 
632 public:
633   // Constants.
634   using UP::EXP_BIAS;
635   using UP::EXP_MASK;
636   using UP::FRACTION_MASK;
637   using UP::SIG_LEN;
638   using UP::SIG_MASK;
639   using UP::SIGN_MASK;
640   LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
641       (1 << UP::EXP_LEN) - 1;
642 
643   // CTors
644   LIBC_INLINE constexpr FPRepImpl() = default;
645   LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {}
646 
647   // Comparison
648   LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) {
649     return a.uintval() == b.uintval();
650   }
651   LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) {
652     return a.uintval() != b.uintval();
653   }
654 
655   // Representation
656   LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; }
657   LIBC_INLINE constexpr void set_uintval(StorageType value) {
658     bits = (value & FP_MASK);
659   }
660 
661   // Builders
662   using UP::inf;
663   using UP::max_normal;
664   using UP::max_subnormal;
665   using UP::min_normal;
666   using UP::min_subnormal;
667   using UP::one;
668   using UP::quiet_nan;
669   using UP::signaling_nan;
670   using UP::zero;
671 
672   // Modifiers
673   LIBC_INLINE constexpr RetT abs() const {
674     return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK));
675   }
676 
677   // Observers
678   using UP::get_explicit_mantissa;
679   using UP::is_finite;
680   using UP::is_inf;
681   using UP::is_nan;
682   using UP::is_normal;
683   using UP::is_quiet_nan;
684   using UP::is_signaling_nan;
685   using UP::is_subnormal;
686   using UP::is_zero;
687   using UP::next_toward_inf;
688   using UP::sign;
689   LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); }
690   LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); }
691   LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); }
692 
693   LIBC_INLINE constexpr uint16_t get_biased_exponent() const {
694     return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent()));
695   }
696 
697   LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) {
698     UP::set_biased_exponent(BiasedExponent(static_cast<uint32_t>(biased)));
699   }
700 
701   LIBC_INLINE constexpr int get_exponent() const {
702     return static_cast<int32_t>(Exponent(UP::biased_exponent()));
703   }
704 
705   // If the number is subnormal, the exponent is treated as if it were the
706   // minimum exponent for a normal number. This is to keep continuity between
707   // the normal and subnormal ranges, but it causes problems for functions where
708   // values are calculated from the exponent, since just subtracting the bias
709   // will give a slightly incorrect result. Additionally, zero has an exponent
710   // of zero, and that should actually be treated as zero.
711   LIBC_INLINE constexpr int get_explicit_exponent() const {
712     Exponent exponent(UP::biased_exponent());
713     if (is_zero())
714       exponent = Exponent::zero();
715     if (exponent == Exponent::subnormal())
716       exponent = Exponent::min();
717     return static_cast<int32_t>(exponent);
718   }
719 
720   LIBC_INLINE constexpr StorageType get_mantissa() const {
721     return bits & FRACTION_MASK;
722   }
723 
724   LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
725     bits = UP::merge(bits, mantVal, FRACTION_MASK);
726   }
727 
728   LIBC_INLINE constexpr void set_significand(StorageType sigVal) {
729     bits = UP::merge(bits, sigVal, SIG_MASK);
730   }
731   // Unsafe function to create a floating point representation.
732   // It simply packs the sign, biased exponent and mantissa values without
733   // checking bound nor normalization.
734   //
735   // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
736   // in the 'mantissa' by the caller.  This function will not check for its
737   // validity.
738   //
739   // FIXME: Use an uint32_t for 'biased_exp'.
740   LIBC_INLINE static constexpr RetT
741   create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
742     return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
743                        Significand(mantissa)));
744   }
745 
746   // The function converts integer number and unbiased exponent to proper
747   // float T type:
748   //   Result = number * 2^(ep+1 - exponent_bias)
749   // Be careful!
750   //   1) "ep" is the raw exponent value.
751   //   2) The function adds +1 to ep for seamless normalized to denormalized
752   //      transition.
753   //   3) The function does not check exponent high limit.
754   //   4) "number" zero value is not processed correctly.
755   //   5) Number is unsigned, so the result can be only positive.
756   LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) {
757     FPRepImpl result(0);
758     int lz =
759         UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number));
760 
761     number <<= lz;
762     ep -= lz;
763 
764     if (LIBC_LIKELY(ep >= 0)) {
765       // Implicit number bit will be removed by mask
766       result.set_significand(number);
767       result.set_biased_exponent(static_cast<StorageType>(ep + 1));
768     } else {
769       result.set_significand(number >> static_cast<unsigned>(-ep));
770     }
771     return RetT(result.uintval());
772   }
773 };
774 
775 // A generic class to manipulate floating point formats.
776 // It derives its functionality to FPRepImpl above.
777 template <FPType fp_type>
778 struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> {
779   using UP = FPRepImpl<fp_type, FPRep<fp_type>>;
780   using StorageType = typename UP::StorageType;
781   using UP::UP;
782 
783   LIBC_INLINE constexpr explicit operator StorageType() const {
784     return UP::uintval();
785   }
786 };
787 
788 } // namespace internal
789 
790 // Returns the FPType corresponding to C++ type T on the host.
791 template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
792   using UnqualT = cpp::remove_cv_t<T>;
793   if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24)
794     return FPType::IEEE754_Binary32;
795   else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53)
796     return FPType::IEEE754_Binary64;
797   else if constexpr (cpp::is_same_v<UnqualT, long double>) {
798     if constexpr (__LDBL_MANT_DIG__ == 53)
799       return FPType::IEEE754_Binary64;
800     else if constexpr (__LDBL_MANT_DIG__ == 64)
801       return FPType::X86_Binary80;
802     else if constexpr (__LDBL_MANT_DIG__ == 113)
803       return FPType::IEEE754_Binary128;
804   }
805 #if defined(LIBC_TYPES_HAS_FLOAT16)
806   else if constexpr (cpp::is_same_v<UnqualT, float16>)
807     return FPType::IEEE754_Binary16;
808 #endif
809 #if defined(LIBC_TYPES_HAS_FLOAT128)
810   else if constexpr (cpp::is_same_v<UnqualT, float128>)
811     return FPType::IEEE754_Binary128;
812 #endif
813   else if constexpr (cpp::is_same_v<UnqualT, bfloat16>)
814     return FPType::BFloat16;
815   else
816     static_assert(cpp::always_false<UnqualT>, "Unsupported type");
817 }
818 
819 // -----------------------------------------------------------------------------
820 //                               **** WARNING ****
821 // This interface is shared with libc++, if you change this interface you need
822 // to update it in both libc and libc++. You should also be careful when adding
823 // dependencies to this file, since it needs to build for all libc++ targets.
824 // -----------------------------------------------------------------------------
825 // A generic class to manipulate C++ floating point formats.
826 // It derives its functionality to FPRepImpl above.
827 template <typename T>
828 struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> {
829   static_assert(cpp::is_floating_point_v<T>,
830                 "FPBits instantiated with invalid type.");
831   using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>;
832   using StorageType = typename UP::StorageType;
833 
834   // Constructors.
835   LIBC_INLINE constexpr FPBits() = default;
836 
837   template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) {
838     using Unqual = typename cpp::remove_cv_t<XType>;
839     if constexpr (cpp::is_same_v<Unqual, T>) {
840       UP::bits = cpp::bit_cast<StorageType>(x);
841     } else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
842       UP::bits = x;
843     } else {
844       // We don't want accidental type promotions/conversions, so we require
845       // exact type match.
846       static_assert(cpp::always_false<XType>);
847     }
848   }
849 
850   // Floating-point conversions.
851   LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); }
852 };
853 
854 } // namespace fputil
855 } // namespace LIBC_NAMESPACE_DECL
856 
857 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
858