1*bb722a7dSDimitry Andric //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===// 2*bb722a7dSDimitry Andric // 3*bb722a7dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*bb722a7dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*bb722a7dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*bb722a7dSDimitry Andric // 7*bb722a7dSDimitry Andric //===----------------------------------------------------------------------===// 8*bb722a7dSDimitry Andric 9*bb722a7dSDimitry Andric // ----------------------------------------------------------------------------- 10*bb722a7dSDimitry Andric // **** WARNING **** 11*bb722a7dSDimitry Andric // This file is shared with libc++. You should also be careful when adding 12*bb722a7dSDimitry Andric // dependencies to this file, since it needs to build for all libc++ targets. 13*bb722a7dSDimitry Andric // ----------------------------------------------------------------------------- 14*bb722a7dSDimitry Andric 15*bb722a7dSDimitry Andric #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 16*bb722a7dSDimitry Andric #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 17*bb722a7dSDimitry Andric 18*bb722a7dSDimitry Andric #include "src/__support/CPP/bit.h" 19*bb722a7dSDimitry Andric #include "src/__support/CPP/type_traits.h" 20*bb722a7dSDimitry Andric #include "src/__support/common.h" 21*bb722a7dSDimitry Andric #include "src/__support/libc_assert.h" // LIBC_ASSERT 22*bb722a7dSDimitry Andric #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR 23*bb722a7dSDimitry Andric #include "src/__support/macros/config.h" 24*bb722a7dSDimitry Andric #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128 25*bb722a7dSDimitry Andric #include "src/__support/math_extras.h" // mask_trailing_ones 26*bb722a7dSDimitry Andric #include "src/__support/sign.h" // Sign 27*bb722a7dSDimitry Andric #include "src/__support/uint128.h" 28*bb722a7dSDimitry Andric 29*bb722a7dSDimitry Andric #include <stdint.h> 30*bb722a7dSDimitry Andric 31*bb722a7dSDimitry Andric namespace LIBC_NAMESPACE_DECL { 32*bb722a7dSDimitry Andric namespace fputil { 33*bb722a7dSDimitry Andric 34*bb722a7dSDimitry Andric // The supported floating point types. 35*bb722a7dSDimitry Andric enum class FPType { 36*bb722a7dSDimitry Andric IEEE754_Binary16, 37*bb722a7dSDimitry Andric IEEE754_Binary32, 38*bb722a7dSDimitry Andric IEEE754_Binary64, 39*bb722a7dSDimitry Andric IEEE754_Binary128, 40*bb722a7dSDimitry Andric X86_Binary80, 41*bb722a7dSDimitry Andric BFloat16 42*bb722a7dSDimitry Andric }; 43*bb722a7dSDimitry Andric 44*bb722a7dSDimitry Andric // The classes hierarchy is as follows: 45*bb722a7dSDimitry Andric // 46*bb722a7dSDimitry Andric // ┌───────────────────┐ 47*bb722a7dSDimitry Andric // │ FPLayout<FPType> │ 48*bb722a7dSDimitry Andric // └─────────▲─────────┘ 49*bb722a7dSDimitry Andric // │ 50*bb722a7dSDimitry Andric // ┌─────────┴─────────┐ 51*bb722a7dSDimitry Andric // │ FPStorage<FPType> │ 52*bb722a7dSDimitry Andric // └─────────▲─────────┘ 53*bb722a7dSDimitry Andric // │ 54*bb722a7dSDimitry Andric // ┌────────────┴─────────────┐ 55*bb722a7dSDimitry Andric // │ │ 56*bb722a7dSDimitry Andric // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐ 57*bb722a7dSDimitry Andric // │ FPRepSem<FPType> │ │ FPRepSem<FPType::X86_Binary80 │ 58*bb722a7dSDimitry Andric // └────────▲─────────┘ └──────────────▲──────────────────┘ 59*bb722a7dSDimitry Andric // │ │ 60*bb722a7dSDimitry Andric // └────────────┬─────────────┘ 61*bb722a7dSDimitry Andric // │ 62*bb722a7dSDimitry Andric // ┌───────┴───────┐ 63*bb722a7dSDimitry Andric // │ FPRepImpl<T> │ 64*bb722a7dSDimitry Andric // └───────▲───────┘ 65*bb722a7dSDimitry Andric // │ 66*bb722a7dSDimitry Andric // ┌────────┴────────┐ 67*bb722a7dSDimitry Andric // ┌─────┴─────┐ ┌─────┴─────┐ 68*bb722a7dSDimitry Andric // │ FPRep<T> │ │ FPBits<T> │ 69*bb722a7dSDimitry Andric // └───────────┘ └───────────┘ 70*bb722a7dSDimitry Andric // 71*bb722a7dSDimitry Andric // - 'FPLayout' defines only a few constants, namely the 'StorageType' and 72*bb722a7dSDimitry Andric // length of the sign, the exponent, fraction and significand parts. 73*bb722a7dSDimitry Andric // - 'FPStorage' builds more constants on top of those from 'FPLayout' like 74*bb722a7dSDimitry Andric // exponent bias and masks. It also holds the bit representation of the 75*bb722a7dSDimitry Andric // floating point as a 'StorageType' type and defines tools to assemble or 76*bb722a7dSDimitry Andric // test these parts. 77*bb722a7dSDimitry Andric // - 'FPRepSem' defines functions to interact semantically with the floating 78*bb722a7dSDimitry Andric // point representation. The default implementation is the one for 'IEEE754', 79*bb722a7dSDimitry Andric // a specialization is provided for X86 Extended Precision. 80*bb722a7dSDimitry Andric // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to 81*bb722a7dSDimitry Andric // all implementations or build on the ones in 'FPRepSem'. 82*bb722a7dSDimitry Andric // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep' 83*bb722a7dSDimitry Andric // instances when using Builders (static functions to create values). 84*bb722a7dSDimitry Andric // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the 85*bb722a7dSDimitry Andric // native C++ floating point type instead of 'FPType'. An additional 'get_val' 86*bb722a7dSDimitry Andric // function allows getting the C++ floating point type value back. Builders 87*bb722a7dSDimitry Andric // called from 'FPBits' return 'FPBits' instances. 88*bb722a7dSDimitry Andric 89*bb722a7dSDimitry Andric namespace internal { 90*bb722a7dSDimitry Andric 91*bb722a7dSDimitry Andric // Defines the layout (sign, exponent, significand) of a floating point type in 92*bb722a7dSDimitry Andric // memory. It also defines its associated StorageType, i.e., the unsigned 93*bb722a7dSDimitry Andric // integer type used to manipulate its representation. 94*bb722a7dSDimitry Andric // Additionally we provide the fractional part length, i.e., the number of bits 95*bb722a7dSDimitry Andric // after the decimal dot when the number is in normal form. 96*bb722a7dSDimitry Andric template <FPType> struct FPLayout {}; 97*bb722a7dSDimitry Andric 98*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::IEEE754_Binary16> { 99*bb722a7dSDimitry Andric using StorageType = uint16_t; 100*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 101*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 5; 102*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 10; 103*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 104*bb722a7dSDimitry Andric }; 105*bb722a7dSDimitry Andric 106*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::IEEE754_Binary32> { 107*bb722a7dSDimitry Andric using StorageType = uint32_t; 108*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 109*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 110*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 23; 111*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 112*bb722a7dSDimitry Andric }; 113*bb722a7dSDimitry Andric 114*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::IEEE754_Binary64> { 115*bb722a7dSDimitry Andric using StorageType = uint64_t; 116*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 117*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 11; 118*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 52; 119*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 120*bb722a7dSDimitry Andric }; 121*bb722a7dSDimitry Andric 122*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::IEEE754_Binary128> { 123*bb722a7dSDimitry Andric using StorageType = UInt128; 124*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 125*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 126*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 112; 127*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 128*bb722a7dSDimitry Andric }; 129*bb722a7dSDimitry Andric 130*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::X86_Binary80> { 131*bb722a7dSDimitry Andric #if __SIZEOF_LONG_DOUBLE__ == 12 132*bb722a7dSDimitry Andric using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>; 133*bb722a7dSDimitry Andric #else 134*bb722a7dSDimitry Andric using StorageType = UInt128; 135*bb722a7dSDimitry Andric #endif 136*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 137*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 138*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 64; 139*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1; 140*bb722a7dSDimitry Andric }; 141*bb722a7dSDimitry Andric 142*bb722a7dSDimitry Andric template <> struct FPLayout<FPType::BFloat16> { 143*bb722a7dSDimitry Andric using StorageType = uint16_t; 144*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 145*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 146*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int SIG_LEN = 7; 147*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 148*bb722a7dSDimitry Andric }; 149*bb722a7dSDimitry Andric 150*bb722a7dSDimitry Andric // FPStorage derives useful constants from the FPLayout above. 151*bb722a7dSDimitry Andric template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> { 152*bb722a7dSDimitry Andric using UP = FPLayout<fp_type>; 153*bb722a7dSDimitry Andric 154*bb722a7dSDimitry Andric using UP::EXP_LEN; // The number of bits for the *exponent* part 155*bb722a7dSDimitry Andric using UP::SIG_LEN; // The number of bits for the *significand* part 156*bb722a7dSDimitry Andric using UP::SIGN_LEN; // The number of bits for the *sign* part 157*bb722a7dSDimitry Andric // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`. 158*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN; 159*bb722a7dSDimitry Andric 160*bb722a7dSDimitry Andric // The number of bits after the decimal dot when the number is in normal form. 161*bb722a7dSDimitry Andric using UP::FRACTION_LEN; 162*bb722a7dSDimitry Andric 163*bb722a7dSDimitry Andric // An unsigned integer that is wide enough to contain all of the floating 164*bb722a7dSDimitry Andric // point bits. 165*bb722a7dSDimitry Andric using StorageType = typename UP::StorageType; 166*bb722a7dSDimitry Andric 167*bb722a7dSDimitry Andric // The number of bits in StorageType. 168*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int STORAGE_LEN = 169*bb722a7dSDimitry Andric sizeof(StorageType) * CHAR_BIT; 170*bb722a7dSDimitry Andric static_assert(STORAGE_LEN >= TOTAL_LEN); 171*bb722a7dSDimitry Andric 172*bb722a7dSDimitry Andric // The exponent bias. Always positive. 173*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS = 174*bb722a7dSDimitry Andric (1U << (EXP_LEN - 1U)) - 1U; 175*bb722a7dSDimitry Andric static_assert(EXP_BIAS > 0); 176*bb722a7dSDimitry Andric 177*bb722a7dSDimitry Andric // The bit pattern that keeps only the *significand* part. 178*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType SIG_MASK = 179*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, SIG_LEN>(); 180*bb722a7dSDimitry Andric // The bit pattern that keeps only the *exponent* part. 181*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType EXP_MASK = 182*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN; 183*bb722a7dSDimitry Andric // The bit pattern that keeps only the *sign* part. 184*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK = 185*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN); 186*bb722a7dSDimitry Andric // The bit pattern that keeps only the *exponent + significand* part. 187*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK = 188*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>(); 189*bb722a7dSDimitry Andric // The bit pattern that keeps only the *sign + exponent + significand* part. 190*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType FP_MASK = 191*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, TOTAL_LEN>(); 192*bb722a7dSDimitry Andric // The bit pattern that keeps only the *fraction* part. 193*bb722a7dSDimitry Andric // i.e., the *significand* without the leading one. 194*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK = 195*bb722a7dSDimitry Andric mask_trailing_ones<StorageType, FRACTION_LEN>(); 196*bb722a7dSDimitry Andric 197*bb722a7dSDimitry Andric static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint"); 198*bb722a7dSDimitry Andric static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover"); 199*bb722a7dSDimitry Andric 200*bb722a7dSDimitry Andric protected: 201*bb722a7dSDimitry Andric // Merge bits from 'a' and 'b' values according to 'mask'. 202*bb722a7dSDimitry Andric // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when 203*bb722a7dSDimitry Andric // corresponding bits are ones. 204*bb722a7dSDimitry Andric LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b, 205*bb722a7dSDimitry Andric StorageType mask) { 206*bb722a7dSDimitry Andric // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 207*bb722a7dSDimitry Andric return a ^ ((a ^ b) & mask); 208*bb722a7dSDimitry Andric } 209*bb722a7dSDimitry Andric 210*bb722a7dSDimitry Andric // A stongly typed integer that prevents mixing and matching integers with 211*bb722a7dSDimitry Andric // different semantics. 212*bb722a7dSDimitry Andric template <typename T> struct TypedInt { 213*bb722a7dSDimitry Andric using value_type = T; 214*bb722a7dSDimitry Andric LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {} 215*bb722a7dSDimitry Andric LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default; 216*bb722a7dSDimitry Andric LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default; 217*bb722a7dSDimitry Andric 218*bb722a7dSDimitry Andric LIBC_INLINE constexpr explicit operator T() const { return value; } 219*bb722a7dSDimitry Andric 220*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType to_storage_type() const { 221*bb722a7dSDimitry Andric return StorageType(value); 222*bb722a7dSDimitry Andric } 223*bb722a7dSDimitry Andric 224*bb722a7dSDimitry Andric LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) { 225*bb722a7dSDimitry Andric return a.value == b.value; 226*bb722a7dSDimitry Andric } 227*bb722a7dSDimitry Andric LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) { 228*bb722a7dSDimitry Andric return a.value != b.value; 229*bb722a7dSDimitry Andric } 230*bb722a7dSDimitry Andric 231*bb722a7dSDimitry Andric protected: 232*bb722a7dSDimitry Andric T value; 233*bb722a7dSDimitry Andric }; 234*bb722a7dSDimitry Andric 235*bb722a7dSDimitry Andric // An opaque type to store a floating point exponent. 236*bb722a7dSDimitry Andric // We define special values but it is valid to create arbitrary values as long 237*bb722a7dSDimitry Andric // as they are in the range [min, max]. 238*bb722a7dSDimitry Andric struct Exponent : public TypedInt<int32_t> { 239*bb722a7dSDimitry Andric using UP = TypedInt<int32_t>; 240*bb722a7dSDimitry Andric using UP::UP; 241*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto subnormal() { 242*bb722a7dSDimitry Andric return Exponent(-EXP_BIAS); 243*bb722a7dSDimitry Andric } 244*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); } 245*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto zero() { return Exponent(0); } 246*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); } 247*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); } 248*bb722a7dSDimitry Andric }; 249*bb722a7dSDimitry Andric 250*bb722a7dSDimitry Andric // An opaque type to store a floating point biased exponent. 251*bb722a7dSDimitry Andric // We define special values but it is valid to create arbitrary values as long 252*bb722a7dSDimitry Andric // as they are in the range [zero, bits_all_ones]. 253*bb722a7dSDimitry Andric // Values greater than bits_all_ones are truncated. 254*bb722a7dSDimitry Andric struct BiasedExponent : public TypedInt<uint32_t> { 255*bb722a7dSDimitry Andric using UP = TypedInt<uint32_t>; 256*bb722a7dSDimitry Andric using UP::UP; 257*bb722a7dSDimitry Andric 258*bb722a7dSDimitry Andric LIBC_INLINE constexpr BiasedExponent(Exponent exp) 259*bb722a7dSDimitry Andric : UP(static_cast<uint32_t>(static_cast<int32_t>(exp) + EXP_BIAS)) {} 260*bb722a7dSDimitry Andric 261*bb722a7dSDimitry Andric // Cast operator to get convert from BiasedExponent to Exponent. 262*bb722a7dSDimitry Andric LIBC_INLINE constexpr operator Exponent() const { 263*bb722a7dSDimitry Andric return Exponent(static_cast<int32_t>(UP::value - EXP_BIAS)); 264*bb722a7dSDimitry Andric } 265*bb722a7dSDimitry Andric 266*bb722a7dSDimitry Andric LIBC_INLINE constexpr BiasedExponent &operator++() { 267*bb722a7dSDimitry Andric LIBC_ASSERT(*this != BiasedExponent(Exponent::inf())); 268*bb722a7dSDimitry Andric ++UP::value; 269*bb722a7dSDimitry Andric return *this; 270*bb722a7dSDimitry Andric } 271*bb722a7dSDimitry Andric 272*bb722a7dSDimitry Andric LIBC_INLINE constexpr BiasedExponent &operator--() { 273*bb722a7dSDimitry Andric LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal())); 274*bb722a7dSDimitry Andric --UP::value; 275*bb722a7dSDimitry Andric return *this; 276*bb722a7dSDimitry Andric } 277*bb722a7dSDimitry Andric }; 278*bb722a7dSDimitry Andric 279*bb722a7dSDimitry Andric // An opaque type to store a floating point significand. 280*bb722a7dSDimitry Andric // We define special values but it is valid to create arbitrary values as long 281*bb722a7dSDimitry Andric // as they are in the range [zero, bits_all_ones]. 282*bb722a7dSDimitry Andric // Note that the semantics of the Significand are implementation dependent. 283*bb722a7dSDimitry Andric // Values greater than bits_all_ones are truncated. 284*bb722a7dSDimitry Andric struct Significand : public TypedInt<StorageType> { 285*bb722a7dSDimitry Andric using UP = TypedInt<StorageType>; 286*bb722a7dSDimitry Andric using UP::UP; 287*bb722a7dSDimitry Andric 288*bb722a7dSDimitry Andric LIBC_INLINE friend constexpr Significand operator|(const Significand a, 289*bb722a7dSDimitry Andric const Significand b) { 290*bb722a7dSDimitry Andric return Significand( 291*bb722a7dSDimitry Andric StorageType(a.to_storage_type() | b.to_storage_type())); 292*bb722a7dSDimitry Andric } 293*bb722a7dSDimitry Andric LIBC_INLINE friend constexpr Significand operator^(const Significand a, 294*bb722a7dSDimitry Andric const Significand b) { 295*bb722a7dSDimitry Andric return Significand( 296*bb722a7dSDimitry Andric StorageType(a.to_storage_type() ^ b.to_storage_type())); 297*bb722a7dSDimitry Andric } 298*bb722a7dSDimitry Andric LIBC_INLINE friend constexpr Significand operator>>(const Significand a, 299*bb722a7dSDimitry Andric int shift) { 300*bb722a7dSDimitry Andric return Significand(StorageType(a.to_storage_type() >> shift)); 301*bb722a7dSDimitry Andric } 302*bb722a7dSDimitry Andric 303*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto zero() { 304*bb722a7dSDimitry Andric return Significand(StorageType(0)); 305*bb722a7dSDimitry Andric } 306*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto lsb() { 307*bb722a7dSDimitry Andric return Significand(StorageType(1)); 308*bb722a7dSDimitry Andric } 309*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto msb() { 310*bb722a7dSDimitry Andric return Significand(StorageType(1) << (SIG_LEN - 1)); 311*bb722a7dSDimitry Andric } 312*bb722a7dSDimitry Andric LIBC_INLINE static constexpr auto bits_all_ones() { 313*bb722a7dSDimitry Andric return Significand(SIG_MASK); 314*bb722a7dSDimitry Andric } 315*bb722a7dSDimitry Andric }; 316*bb722a7dSDimitry Andric 317*bb722a7dSDimitry Andric LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) { 318*bb722a7dSDimitry Andric return (exp.to_storage_type() << SIG_LEN) & EXP_MASK; 319*bb722a7dSDimitry Andric } 320*bb722a7dSDimitry Andric 321*bb722a7dSDimitry Andric LIBC_INLINE static constexpr StorageType encode(Significand value) { 322*bb722a7dSDimitry Andric return value.to_storage_type() & SIG_MASK; 323*bb722a7dSDimitry Andric } 324*bb722a7dSDimitry Andric 325*bb722a7dSDimitry Andric LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp, 326*bb722a7dSDimitry Andric Significand sig) { 327*bb722a7dSDimitry Andric return encode(exp) | encode(sig); 328*bb722a7dSDimitry Andric } 329*bb722a7dSDimitry Andric 330*bb722a7dSDimitry Andric LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp, 331*bb722a7dSDimitry Andric Significand sig) { 332*bb722a7dSDimitry Andric if (sign.is_neg()) 333*bb722a7dSDimitry Andric return SIGN_MASK | encode(exp, sig); 334*bb722a7dSDimitry Andric return encode(exp, sig); 335*bb722a7dSDimitry Andric } 336*bb722a7dSDimitry Andric 337*bb722a7dSDimitry Andric // The floating point number representation as an unsigned integer. 338*bb722a7dSDimitry Andric StorageType bits{}; 339*bb722a7dSDimitry Andric 340*bb722a7dSDimitry Andric LIBC_INLINE constexpr FPStorage() : bits(0) {} 341*bb722a7dSDimitry Andric LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {} 342*bb722a7dSDimitry Andric 343*bb722a7dSDimitry Andric // Observers 344*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; } 345*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; } 346*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType exp_sig_bits() const { 347*bb722a7dSDimitry Andric return bits & EXP_SIG_MASK; 348*bb722a7dSDimitry Andric } 349*bb722a7dSDimitry Andric 350*bb722a7dSDimitry Andric // Parts 351*bb722a7dSDimitry Andric LIBC_INLINE constexpr BiasedExponent biased_exponent() const { 352*bb722a7dSDimitry Andric return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN)); 353*bb722a7dSDimitry Andric } 354*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) { 355*bb722a7dSDimitry Andric bits = merge(bits, encode(biased), EXP_MASK); 356*bb722a7dSDimitry Andric } 357*bb722a7dSDimitry Andric 358*bb722a7dSDimitry Andric public: 359*bb722a7dSDimitry Andric LIBC_INLINE constexpr Sign sign() const { 360*bb722a7dSDimitry Andric return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS; 361*bb722a7dSDimitry Andric } 362*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_sign(Sign signVal) { 363*bb722a7dSDimitry Andric if (sign() != signVal) 364*bb722a7dSDimitry Andric bits ^= SIGN_MASK; 365*bb722a7dSDimitry Andric } 366*bb722a7dSDimitry Andric }; 367*bb722a7dSDimitry Andric 368*bb722a7dSDimitry Andric // This layer defines all functions that are specific to how the the floating 369*bb722a7dSDimitry Andric // point type is encoded. It enables constructions, modification and observation 370*bb722a7dSDimitry Andric // of values manipulated as 'StorageType'. 371*bb722a7dSDimitry Andric template <FPType fp_type, typename RetT> 372*bb722a7dSDimitry Andric struct FPRepSem : public FPStorage<fp_type> { 373*bb722a7dSDimitry Andric using UP = FPStorage<fp_type>; 374*bb722a7dSDimitry Andric using typename UP::StorageType; 375*bb722a7dSDimitry Andric using UP::FRACTION_LEN; 376*bb722a7dSDimitry Andric using UP::FRACTION_MASK; 377*bb722a7dSDimitry Andric 378*bb722a7dSDimitry Andric protected: 379*bb722a7dSDimitry Andric using typename UP::Exponent; 380*bb722a7dSDimitry Andric using typename UP::Significand; 381*bb722a7dSDimitry Andric using UP::bits; 382*bb722a7dSDimitry Andric using UP::encode; 383*bb722a7dSDimitry Andric using UP::exp_bits; 384*bb722a7dSDimitry Andric using UP::exp_sig_bits; 385*bb722a7dSDimitry Andric using UP::sig_bits; 386*bb722a7dSDimitry Andric using UP::UP; 387*bb722a7dSDimitry Andric 388*bb722a7dSDimitry Andric public: 389*bb722a7dSDimitry Andric // Builders 390*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 391*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 392*bb722a7dSDimitry Andric } 393*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 394*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::zero(), Significand::zero())); 395*bb722a7dSDimitry Andric } 396*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 397*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 398*bb722a7dSDimitry Andric } 399*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 400*bb722a7dSDimitry Andric return RetT( 401*bb722a7dSDimitry Andric encode(sign, Exponent::subnormal(), Significand::bits_all_ones())); 402*bb722a7dSDimitry Andric } 403*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 404*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::min(), Significand::zero())); 405*bb722a7dSDimitry Andric } 406*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 407*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 408*bb722a7dSDimitry Andric } 409*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 410*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::inf(), Significand::zero())); 411*bb722a7dSDimitry Andric } 412*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 413*bb722a7dSDimitry Andric StorageType v = 0) { 414*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::inf(), 415*bb722a7dSDimitry Andric (v ? Significand(v) : (Significand::msb() >> 1)))); 416*bb722a7dSDimitry Andric } 417*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 418*bb722a7dSDimitry Andric StorageType v = 0) { 419*bb722a7dSDimitry Andric return RetT( 420*bb722a7dSDimitry Andric encode(sign, Exponent::inf(), Significand::msb() | Significand(v))); 421*bb722a7dSDimitry Andric } 422*bb722a7dSDimitry Andric 423*bb722a7dSDimitry Andric // Observers 424*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 425*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_nan() const { 426*bb722a7dSDimitry Andric return exp_sig_bits() > encode(Exponent::inf(), Significand::zero()); 427*bb722a7dSDimitry Andric } 428*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_quiet_nan() const { 429*bb722a7dSDimitry Andric return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb()); 430*bb722a7dSDimitry Andric } 431*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_signaling_nan() const { 432*bb722a7dSDimitry Andric return is_nan() && !is_quiet_nan(); 433*bb722a7dSDimitry Andric } 434*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_inf() const { 435*bb722a7dSDimitry Andric return exp_sig_bits() == encode(Exponent::inf(), Significand::zero()); 436*bb722a7dSDimitry Andric } 437*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_finite() const { 438*bb722a7dSDimitry Andric return exp_bits() != encode(Exponent::inf()); 439*bb722a7dSDimitry Andric } 440*bb722a7dSDimitry Andric LIBC_INLINE 441*bb722a7dSDimitry Andric constexpr bool is_subnormal() const { 442*bb722a7dSDimitry Andric return exp_bits() == encode(Exponent::subnormal()); 443*bb722a7dSDimitry Andric } 444*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_normal() const { 445*bb722a7dSDimitry Andric return is_finite() && !is_subnormal(); 446*bb722a7dSDimitry Andric } 447*bb722a7dSDimitry Andric LIBC_INLINE constexpr RetT next_toward_inf() const { 448*bb722a7dSDimitry Andric if (is_finite()) 449*bb722a7dSDimitry Andric return RetT(bits + StorageType(1)); 450*bb722a7dSDimitry Andric return RetT(bits); 451*bb722a7dSDimitry Andric } 452*bb722a7dSDimitry Andric 453*bb722a7dSDimitry Andric // Returns the mantissa with the implicit bit set iff the current 454*bb722a7dSDimitry Andric // value is a valid normal number. 455*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 456*bb722a7dSDimitry Andric if (is_subnormal()) 457*bb722a7dSDimitry Andric return sig_bits(); 458*bb722a7dSDimitry Andric return (StorageType(1) << UP::SIG_LEN) | sig_bits(); 459*bb722a7dSDimitry Andric } 460*bb722a7dSDimitry Andric }; 461*bb722a7dSDimitry Andric 462*bb722a7dSDimitry Andric // Specialization for the X86 Extended Precision type. 463*bb722a7dSDimitry Andric template <typename RetT> 464*bb722a7dSDimitry Andric struct FPRepSem<FPType::X86_Binary80, RetT> 465*bb722a7dSDimitry Andric : public FPStorage<FPType::X86_Binary80> { 466*bb722a7dSDimitry Andric using UP = FPStorage<FPType::X86_Binary80>; 467*bb722a7dSDimitry Andric using typename UP::StorageType; 468*bb722a7dSDimitry Andric using UP::FRACTION_LEN; 469*bb722a7dSDimitry Andric using UP::FRACTION_MASK; 470*bb722a7dSDimitry Andric 471*bb722a7dSDimitry Andric // The x86 80 bit float represents the leading digit of the mantissa 472*bb722a7dSDimitry Andric // explicitly. This is the mask for that bit. 473*bb722a7dSDimitry Andric static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1) 474*bb722a7dSDimitry Andric << FRACTION_LEN; 475*bb722a7dSDimitry Andric // The X80 significand is made of an explicit bit and the fractional part. 476*bb722a7dSDimitry Andric static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0, 477*bb722a7dSDimitry Andric "the explicit bit and the fractional part should not overlap"); 478*bb722a7dSDimitry Andric static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK, 479*bb722a7dSDimitry Andric "the explicit bit and the fractional part should cover the " 480*bb722a7dSDimitry Andric "whole significand"); 481*bb722a7dSDimitry Andric 482*bb722a7dSDimitry Andric protected: 483*bb722a7dSDimitry Andric using typename UP::Exponent; 484*bb722a7dSDimitry Andric using typename UP::Significand; 485*bb722a7dSDimitry Andric using UP::encode; 486*bb722a7dSDimitry Andric using UP::UP; 487*bb722a7dSDimitry Andric 488*bb722a7dSDimitry Andric public: 489*bb722a7dSDimitry Andric // Builders 490*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 491*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 492*bb722a7dSDimitry Andric } 493*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 494*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::zero(), Significand::msb())); 495*bb722a7dSDimitry Andric } 496*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 497*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 498*bb722a7dSDimitry Andric } 499*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 500*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::subnormal(), 501*bb722a7dSDimitry Andric Significand::bits_all_ones() ^ Significand::msb())); 502*bb722a7dSDimitry Andric } 503*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 504*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::min(), Significand::msb())); 505*bb722a7dSDimitry Andric } 506*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 507*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 508*bb722a7dSDimitry Andric } 509*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 510*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::inf(), Significand::msb())); 511*bb722a7dSDimitry Andric } 512*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 513*bb722a7dSDimitry Andric StorageType v = 0) { 514*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::inf(), 515*bb722a7dSDimitry Andric Significand::msb() | 516*bb722a7dSDimitry Andric (v ? Significand(v) : (Significand::msb() >> 2)))); 517*bb722a7dSDimitry Andric } 518*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 519*bb722a7dSDimitry Andric StorageType v = 0) { 520*bb722a7dSDimitry Andric return RetT(encode(sign, Exponent::inf(), 521*bb722a7dSDimitry Andric Significand::msb() | (Significand::msb() >> 1) | 522*bb722a7dSDimitry Andric Significand(v))); 523*bb722a7dSDimitry Andric } 524*bb722a7dSDimitry Andric 525*bb722a7dSDimitry Andric // Observers 526*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 527*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_nan() const { 528*bb722a7dSDimitry Andric // Most encoding forms from the table found in 529*bb722a7dSDimitry Andric // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format 530*bb722a7dSDimitry Andric // are interpreted as NaN. 531*bb722a7dSDimitry Andric // More precisely : 532*bb722a7dSDimitry Andric // - Pseudo-Infinity 533*bb722a7dSDimitry Andric // - Pseudo Not a Number 534*bb722a7dSDimitry Andric // - Signalling Not a Number 535*bb722a7dSDimitry Andric // - Floating-point Indefinite 536*bb722a7dSDimitry Andric // - Quiet Not a Number 537*bb722a7dSDimitry Andric // - Unnormal 538*bb722a7dSDimitry Andric // This can be reduced to the following logic: 539*bb722a7dSDimitry Andric if (exp_bits() == encode(Exponent::inf())) 540*bb722a7dSDimitry Andric return !is_inf(); 541*bb722a7dSDimitry Andric if (exp_bits() != encode(Exponent::subnormal())) 542*bb722a7dSDimitry Andric return (sig_bits() & encode(Significand::msb())) == 0; 543*bb722a7dSDimitry Andric return false; 544*bb722a7dSDimitry Andric } 545*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_quiet_nan() const { 546*bb722a7dSDimitry Andric return exp_sig_bits() >= 547*bb722a7dSDimitry Andric encode(Exponent::inf(), 548*bb722a7dSDimitry Andric Significand::msb() | (Significand::msb() >> 1)); 549*bb722a7dSDimitry Andric } 550*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_signaling_nan() const { 551*bb722a7dSDimitry Andric return is_nan() && !is_quiet_nan(); 552*bb722a7dSDimitry Andric } 553*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_inf() const { 554*bb722a7dSDimitry Andric return exp_sig_bits() == encode(Exponent::inf(), Significand::msb()); 555*bb722a7dSDimitry Andric } 556*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_finite() const { 557*bb722a7dSDimitry Andric return !is_inf() && !is_nan(); 558*bb722a7dSDimitry Andric } 559*bb722a7dSDimitry Andric LIBC_INLINE 560*bb722a7dSDimitry Andric constexpr bool is_subnormal() const { 561*bb722a7dSDimitry Andric return exp_bits() == encode(Exponent::subnormal()); 562*bb722a7dSDimitry Andric } 563*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_normal() const { 564*bb722a7dSDimitry Andric const auto exp = exp_bits(); 565*bb722a7dSDimitry Andric if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf())) 566*bb722a7dSDimitry Andric return false; 567*bb722a7dSDimitry Andric return get_implicit_bit(); 568*bb722a7dSDimitry Andric } 569*bb722a7dSDimitry Andric LIBC_INLINE constexpr RetT next_toward_inf() const { 570*bb722a7dSDimitry Andric if (is_finite()) { 571*bb722a7dSDimitry Andric if (exp_sig_bits() == max_normal().uintval()) { 572*bb722a7dSDimitry Andric return inf(sign()); 573*bb722a7dSDimitry Andric } else if (exp_sig_bits() == max_subnormal().uintval()) { 574*bb722a7dSDimitry Andric return min_normal(sign()); 575*bb722a7dSDimitry Andric } else if (sig_bits() == SIG_MASK) { 576*bb722a7dSDimitry Andric return RetT(encode(sign(), ++biased_exponent(), Significand::zero())); 577*bb722a7dSDimitry Andric } else { 578*bb722a7dSDimitry Andric return RetT(bits + StorageType(1)); 579*bb722a7dSDimitry Andric } 580*bb722a7dSDimitry Andric } 581*bb722a7dSDimitry Andric return RetT(bits); 582*bb722a7dSDimitry Andric } 583*bb722a7dSDimitry Andric 584*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 585*bb722a7dSDimitry Andric return sig_bits(); 586*bb722a7dSDimitry Andric } 587*bb722a7dSDimitry Andric 588*bb722a7dSDimitry Andric // This functions is specific to FPRepSem<FPType::X86_Binary80>. 589*bb722a7dSDimitry Andric // TODO: Remove if possible. 590*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool get_implicit_bit() const { 591*bb722a7dSDimitry Andric return static_cast<bool>(bits & EXPLICIT_BIT_MASK); 592*bb722a7dSDimitry Andric } 593*bb722a7dSDimitry Andric 594*bb722a7dSDimitry Andric // This functions is specific to FPRepSem<FPType::X86_Binary80>. 595*bb722a7dSDimitry Andric // TODO: Remove if possible. 596*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) { 597*bb722a7dSDimitry Andric if (get_implicit_bit() != implicitVal) 598*bb722a7dSDimitry Andric bits ^= EXPLICIT_BIT_MASK; 599*bb722a7dSDimitry Andric } 600*bb722a7dSDimitry Andric }; 601*bb722a7dSDimitry Andric 602*bb722a7dSDimitry Andric // 'FPRepImpl' is the bottom of the class hierarchy that only deals with 603*bb722a7dSDimitry Andric // 'FPType'. The operations dealing with specific float semantics are 604*bb722a7dSDimitry Andric // implemented by 'FPRepSem' above and specialized when needed. 605*bb722a7dSDimitry Andric // 606*bb722a7dSDimitry Andric // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions 607*bb722a7dSDimitry Andric // creating new values (Builders) can return the appropriate type. That is, when 608*bb722a7dSDimitry Andric // creating a value through 'FPBits' below the builder will return an 'FPBits' 609*bb722a7dSDimitry Andric // value. 610*bb722a7dSDimitry Andric // FPBits<float>::zero(); // returns an FPBits<> 611*bb722a7dSDimitry Andric // 612*bb722a7dSDimitry Andric // When we don't care about specific C++ floating point type we can use 613*bb722a7dSDimitry Andric // 'FPRep' and specify the 'FPType' directly. 614*bb722a7dSDimitry Andric // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<> 615*bb722a7dSDimitry Andric template <FPType fp_type, typename RetT> 616*bb722a7dSDimitry Andric struct FPRepImpl : public FPRepSem<fp_type, RetT> { 617*bb722a7dSDimitry Andric using UP = FPRepSem<fp_type, RetT>; 618*bb722a7dSDimitry Andric using StorageType = typename UP::StorageType; 619*bb722a7dSDimitry Andric 620*bb722a7dSDimitry Andric protected: 621*bb722a7dSDimitry Andric using UP::bits; 622*bb722a7dSDimitry Andric using UP::encode; 623*bb722a7dSDimitry Andric using UP::exp_bits; 624*bb722a7dSDimitry Andric using UP::exp_sig_bits; 625*bb722a7dSDimitry Andric 626*bb722a7dSDimitry Andric using typename UP::BiasedExponent; 627*bb722a7dSDimitry Andric using typename UP::Exponent; 628*bb722a7dSDimitry Andric using typename UP::Significand; 629*bb722a7dSDimitry Andric 630*bb722a7dSDimitry Andric using UP::FP_MASK; 631*bb722a7dSDimitry Andric 632*bb722a7dSDimitry Andric public: 633*bb722a7dSDimitry Andric // Constants. 634*bb722a7dSDimitry Andric using UP::EXP_BIAS; 635*bb722a7dSDimitry Andric using UP::EXP_MASK; 636*bb722a7dSDimitry Andric using UP::FRACTION_MASK; 637*bb722a7dSDimitry Andric using UP::SIG_LEN; 638*bb722a7dSDimitry Andric using UP::SIG_MASK; 639*bb722a7dSDimitry Andric using UP::SIGN_MASK; 640*bb722a7dSDimitry Andric LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT = 641*bb722a7dSDimitry Andric (1 << UP::EXP_LEN) - 1; 642*bb722a7dSDimitry Andric 643*bb722a7dSDimitry Andric // CTors 644*bb722a7dSDimitry Andric LIBC_INLINE constexpr FPRepImpl() = default; 645*bb722a7dSDimitry Andric LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {} 646*bb722a7dSDimitry Andric 647*bb722a7dSDimitry Andric // Comparison 648*bb722a7dSDimitry Andric LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) { 649*bb722a7dSDimitry Andric return a.uintval() == b.uintval(); 650*bb722a7dSDimitry Andric } 651*bb722a7dSDimitry Andric LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) { 652*bb722a7dSDimitry Andric return a.uintval() != b.uintval(); 653*bb722a7dSDimitry Andric } 654*bb722a7dSDimitry Andric 655*bb722a7dSDimitry Andric // Representation 656*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; } 657*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_uintval(StorageType value) { 658*bb722a7dSDimitry Andric bits = (value & FP_MASK); 659*bb722a7dSDimitry Andric } 660*bb722a7dSDimitry Andric 661*bb722a7dSDimitry Andric // Builders 662*bb722a7dSDimitry Andric using UP::inf; 663*bb722a7dSDimitry Andric using UP::max_normal; 664*bb722a7dSDimitry Andric using UP::max_subnormal; 665*bb722a7dSDimitry Andric using UP::min_normal; 666*bb722a7dSDimitry Andric using UP::min_subnormal; 667*bb722a7dSDimitry Andric using UP::one; 668*bb722a7dSDimitry Andric using UP::quiet_nan; 669*bb722a7dSDimitry Andric using UP::signaling_nan; 670*bb722a7dSDimitry Andric using UP::zero; 671*bb722a7dSDimitry Andric 672*bb722a7dSDimitry Andric // Modifiers 673*bb722a7dSDimitry Andric LIBC_INLINE constexpr RetT abs() const { 674*bb722a7dSDimitry Andric return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK)); 675*bb722a7dSDimitry Andric } 676*bb722a7dSDimitry Andric 677*bb722a7dSDimitry Andric // Observers 678*bb722a7dSDimitry Andric using UP::get_explicit_mantissa; 679*bb722a7dSDimitry Andric using UP::is_finite; 680*bb722a7dSDimitry Andric using UP::is_inf; 681*bb722a7dSDimitry Andric using UP::is_nan; 682*bb722a7dSDimitry Andric using UP::is_normal; 683*bb722a7dSDimitry Andric using UP::is_quiet_nan; 684*bb722a7dSDimitry Andric using UP::is_signaling_nan; 685*bb722a7dSDimitry Andric using UP::is_subnormal; 686*bb722a7dSDimitry Andric using UP::is_zero; 687*bb722a7dSDimitry Andric using UP::next_toward_inf; 688*bb722a7dSDimitry Andric using UP::sign; 689*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); } 690*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); } 691*bb722a7dSDimitry Andric LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); } 692*bb722a7dSDimitry Andric 693*bb722a7dSDimitry Andric LIBC_INLINE constexpr uint16_t get_biased_exponent() const { 694*bb722a7dSDimitry Andric return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent())); 695*bb722a7dSDimitry Andric } 696*bb722a7dSDimitry Andric 697*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) { 698*bb722a7dSDimitry Andric UP::set_biased_exponent(BiasedExponent(static_cast<uint32_t>(biased))); 699*bb722a7dSDimitry Andric } 700*bb722a7dSDimitry Andric 701*bb722a7dSDimitry Andric LIBC_INLINE constexpr int get_exponent() const { 702*bb722a7dSDimitry Andric return static_cast<int32_t>(Exponent(UP::biased_exponent())); 703*bb722a7dSDimitry Andric } 704*bb722a7dSDimitry Andric 705*bb722a7dSDimitry Andric // If the number is subnormal, the exponent is treated as if it were the 706*bb722a7dSDimitry Andric // minimum exponent for a normal number. This is to keep continuity between 707*bb722a7dSDimitry Andric // the normal and subnormal ranges, but it causes problems for functions where 708*bb722a7dSDimitry Andric // values are calculated from the exponent, since just subtracting the bias 709*bb722a7dSDimitry Andric // will give a slightly incorrect result. Additionally, zero has an exponent 710*bb722a7dSDimitry Andric // of zero, and that should actually be treated as zero. 711*bb722a7dSDimitry Andric LIBC_INLINE constexpr int get_explicit_exponent() const { 712*bb722a7dSDimitry Andric Exponent exponent(UP::biased_exponent()); 713*bb722a7dSDimitry Andric if (is_zero()) 714*bb722a7dSDimitry Andric exponent = Exponent::zero(); 715*bb722a7dSDimitry Andric if (exponent == Exponent::subnormal()) 716*bb722a7dSDimitry Andric exponent = Exponent::min(); 717*bb722a7dSDimitry Andric return static_cast<int32_t>(exponent); 718*bb722a7dSDimitry Andric } 719*bb722a7dSDimitry Andric 720*bb722a7dSDimitry Andric LIBC_INLINE constexpr StorageType get_mantissa() const { 721*bb722a7dSDimitry Andric return bits & FRACTION_MASK; 722*bb722a7dSDimitry Andric } 723*bb722a7dSDimitry Andric 724*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) { 725*bb722a7dSDimitry Andric bits = UP::merge(bits, mantVal, FRACTION_MASK); 726*bb722a7dSDimitry Andric } 727*bb722a7dSDimitry Andric 728*bb722a7dSDimitry Andric LIBC_INLINE constexpr void set_significand(StorageType sigVal) { 729*bb722a7dSDimitry Andric bits = UP::merge(bits, sigVal, SIG_MASK); 730*bb722a7dSDimitry Andric } 731*bb722a7dSDimitry Andric // Unsafe function to create a floating point representation. 732*bb722a7dSDimitry Andric // It simply packs the sign, biased exponent and mantissa values without 733*bb722a7dSDimitry Andric // checking bound nor normalization. 734*bb722a7dSDimitry Andric // 735*bb722a7dSDimitry Andric // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly 736*bb722a7dSDimitry Andric // in the 'mantissa' by the caller. This function will not check for its 737*bb722a7dSDimitry Andric // validity. 738*bb722a7dSDimitry Andric // 739*bb722a7dSDimitry Andric // FIXME: Use an uint32_t for 'biased_exp'. 740*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT 741*bb722a7dSDimitry Andric create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { 742*bb722a7dSDimitry Andric return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)), 743*bb722a7dSDimitry Andric Significand(mantissa))); 744*bb722a7dSDimitry Andric } 745*bb722a7dSDimitry Andric 746*bb722a7dSDimitry Andric // The function converts integer number and unbiased exponent to proper 747*bb722a7dSDimitry Andric // float T type: 748*bb722a7dSDimitry Andric // Result = number * 2^(ep+1 - exponent_bias) 749*bb722a7dSDimitry Andric // Be careful! 750*bb722a7dSDimitry Andric // 1) "ep" is the raw exponent value. 751*bb722a7dSDimitry Andric // 2) The function adds +1 to ep for seamless normalized to denormalized 752*bb722a7dSDimitry Andric // transition. 753*bb722a7dSDimitry Andric // 3) The function does not check exponent high limit. 754*bb722a7dSDimitry Andric // 4) "number" zero value is not processed correctly. 755*bb722a7dSDimitry Andric // 5) Number is unsigned, so the result can be only positive. 756*bb722a7dSDimitry Andric LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) { 757*bb722a7dSDimitry Andric FPRepImpl result(0); 758*bb722a7dSDimitry Andric int lz = 759*bb722a7dSDimitry Andric UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number)); 760*bb722a7dSDimitry Andric 761*bb722a7dSDimitry Andric number <<= lz; 762*bb722a7dSDimitry Andric ep -= lz; 763*bb722a7dSDimitry Andric 764*bb722a7dSDimitry Andric if (LIBC_LIKELY(ep >= 0)) { 765*bb722a7dSDimitry Andric // Implicit number bit will be removed by mask 766*bb722a7dSDimitry Andric result.set_significand(number); 767*bb722a7dSDimitry Andric result.set_biased_exponent(static_cast<StorageType>(ep + 1)); 768*bb722a7dSDimitry Andric } else { 769*bb722a7dSDimitry Andric result.set_significand(number >> static_cast<unsigned>(-ep)); 770*bb722a7dSDimitry Andric } 771*bb722a7dSDimitry Andric return RetT(result.uintval()); 772*bb722a7dSDimitry Andric } 773*bb722a7dSDimitry Andric }; 774*bb722a7dSDimitry Andric 775*bb722a7dSDimitry Andric // A generic class to manipulate floating point formats. 776*bb722a7dSDimitry Andric // It derives its functionality to FPRepImpl above. 777*bb722a7dSDimitry Andric template <FPType fp_type> 778*bb722a7dSDimitry Andric struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> { 779*bb722a7dSDimitry Andric using UP = FPRepImpl<fp_type, FPRep<fp_type>>; 780*bb722a7dSDimitry Andric using StorageType = typename UP::StorageType; 781*bb722a7dSDimitry Andric using UP::UP; 782*bb722a7dSDimitry Andric 783*bb722a7dSDimitry Andric LIBC_INLINE constexpr explicit operator StorageType() const { 784*bb722a7dSDimitry Andric return UP::uintval(); 785*bb722a7dSDimitry Andric } 786*bb722a7dSDimitry Andric }; 787*bb722a7dSDimitry Andric 788*bb722a7dSDimitry Andric } // namespace internal 789*bb722a7dSDimitry Andric 790*bb722a7dSDimitry Andric // Returns the FPType corresponding to C++ type T on the host. 791*bb722a7dSDimitry Andric template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() { 792*bb722a7dSDimitry Andric using UnqualT = cpp::remove_cv_t<T>; 793*bb722a7dSDimitry Andric if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24) 794*bb722a7dSDimitry Andric return FPType::IEEE754_Binary32; 795*bb722a7dSDimitry Andric else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53) 796*bb722a7dSDimitry Andric return FPType::IEEE754_Binary64; 797*bb722a7dSDimitry Andric else if constexpr (cpp::is_same_v<UnqualT, long double>) { 798*bb722a7dSDimitry Andric if constexpr (__LDBL_MANT_DIG__ == 53) 799*bb722a7dSDimitry Andric return FPType::IEEE754_Binary64; 800*bb722a7dSDimitry Andric else if constexpr (__LDBL_MANT_DIG__ == 64) 801*bb722a7dSDimitry Andric return FPType::X86_Binary80; 802*bb722a7dSDimitry Andric else if constexpr (__LDBL_MANT_DIG__ == 113) 803*bb722a7dSDimitry Andric return FPType::IEEE754_Binary128; 804*bb722a7dSDimitry Andric } 805*bb722a7dSDimitry Andric #if defined(LIBC_TYPES_HAS_FLOAT16) 806*bb722a7dSDimitry Andric else if constexpr (cpp::is_same_v<UnqualT, float16>) 807*bb722a7dSDimitry Andric return FPType::IEEE754_Binary16; 808*bb722a7dSDimitry Andric #endif 809*bb722a7dSDimitry Andric #if defined(LIBC_TYPES_HAS_FLOAT128) 810*bb722a7dSDimitry Andric else if constexpr (cpp::is_same_v<UnqualT, float128>) 811*bb722a7dSDimitry Andric return FPType::IEEE754_Binary128; 812*bb722a7dSDimitry Andric #endif 813*bb722a7dSDimitry Andric else if constexpr (cpp::is_same_v<UnqualT, bfloat16>) 814*bb722a7dSDimitry Andric return FPType::BFloat16; 815*bb722a7dSDimitry Andric else 816*bb722a7dSDimitry Andric static_assert(cpp::always_false<UnqualT>, "Unsupported type"); 817*bb722a7dSDimitry Andric } 818*bb722a7dSDimitry Andric 819*bb722a7dSDimitry Andric // ----------------------------------------------------------------------------- 820*bb722a7dSDimitry Andric // **** WARNING **** 821*bb722a7dSDimitry Andric // This interface is shared with libc++, if you change this interface you need 822*bb722a7dSDimitry Andric // to update it in both libc and libc++. You should also be careful when adding 823*bb722a7dSDimitry Andric // dependencies to this file, since it needs to build for all libc++ targets. 824*bb722a7dSDimitry Andric // ----------------------------------------------------------------------------- 825*bb722a7dSDimitry Andric // A generic class to manipulate C++ floating point formats. 826*bb722a7dSDimitry Andric // It derives its functionality to FPRepImpl above. 827*bb722a7dSDimitry Andric template <typename T> 828*bb722a7dSDimitry Andric struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> { 829*bb722a7dSDimitry Andric static_assert(cpp::is_floating_point_v<T>, 830*bb722a7dSDimitry Andric "FPBits instantiated with invalid type."); 831*bb722a7dSDimitry Andric using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>; 832*bb722a7dSDimitry Andric using StorageType = typename UP::StorageType; 833*bb722a7dSDimitry Andric 834*bb722a7dSDimitry Andric // Constructors. 835*bb722a7dSDimitry Andric LIBC_INLINE constexpr FPBits() = default; 836*bb722a7dSDimitry Andric 837*bb722a7dSDimitry Andric template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) { 838*bb722a7dSDimitry Andric using Unqual = typename cpp::remove_cv_t<XType>; 839*bb722a7dSDimitry Andric if constexpr (cpp::is_same_v<Unqual, T>) { 840*bb722a7dSDimitry Andric UP::bits = cpp::bit_cast<StorageType>(x); 841*bb722a7dSDimitry Andric } else if constexpr (cpp::is_same_v<Unqual, StorageType>) { 842*bb722a7dSDimitry Andric UP::bits = x; 843*bb722a7dSDimitry Andric } else { 844*bb722a7dSDimitry Andric // We don't want accidental type promotions/conversions, so we require 845*bb722a7dSDimitry Andric // exact type match. 846*bb722a7dSDimitry Andric static_assert(cpp::always_false<XType>); 847*bb722a7dSDimitry Andric } 848*bb722a7dSDimitry Andric } 849*bb722a7dSDimitry Andric 850*bb722a7dSDimitry Andric // Floating-point conversions. 851*bb722a7dSDimitry Andric LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); } 852*bb722a7dSDimitry Andric }; 853*bb722a7dSDimitry Andric 854*bb722a7dSDimitry Andric } // namespace fputil 855*bb722a7dSDimitry Andric } // namespace LIBC_NAMESPACE_DECL 856*bb722a7dSDimitry Andric 857*bb722a7dSDimitry Andric #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 858