1 //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // ----------------------------------------------------------------------------- 10 // **** WARNING **** 11 // This file is shared with libc++. You should also be careful when adding 12 // dependencies to this file, since it needs to build for all libc++ targets. 13 // ----------------------------------------------------------------------------- 14 15 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 16 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 17 18 #include "src/__support/CPP/bit.h" 19 #include "src/__support/CPP/type_traits.h" 20 #include "src/__support/common.h" 21 #include "src/__support/libc_assert.h" // LIBC_ASSERT 22 #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR 23 #include "src/__support/macros/config.h" 24 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128 25 #include "src/__support/math_extras.h" // mask_trailing_ones 26 #include "src/__support/sign.h" // Sign 27 #include "src/__support/uint128.h" 28 29 #include <stdint.h> 30 31 namespace LIBC_NAMESPACE_DECL { 32 namespace fputil { 33 34 // The supported floating point types. 35 enum class FPType { 36 IEEE754_Binary16, 37 IEEE754_Binary32, 38 IEEE754_Binary64, 39 IEEE754_Binary128, 40 X86_Binary80, 41 BFloat16 42 }; 43 44 // The classes hierarchy is as follows: 45 // 46 // ┌───────────────────┐ 47 // │ FPLayout<FPType> │ 48 // └─────────▲─────────┘ 49 // │ 50 // ┌─────────┴─────────┐ 51 // │ FPStorage<FPType> │ 52 // └─────────▲─────────┘ 53 // │ 54 // ┌────────────┴─────────────┐ 55 // │ │ 56 // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐ 57 // │ FPRepSem<FPType> │ │ FPRepSem<FPType::X86_Binary80 │ 58 // └────────▲─────────┘ └──────────────▲──────────────────┘ 59 // │ │ 60 // └────────────┬─────────────┘ 61 // │ 62 // ┌───────┴───────┐ 63 // │ FPRepImpl<T> │ 64 // └───────▲───────┘ 65 // │ 66 // ┌────────┴────────┐ 67 // ┌─────┴─────┐ ┌─────┴─────┐ 68 // │ FPRep<T> │ │ FPBits<T> │ 69 // └───────────┘ └───────────┘ 70 // 71 // - 'FPLayout' defines only a few constants, namely the 'StorageType' and 72 // length of the sign, the exponent, fraction and significand parts. 73 // - 'FPStorage' builds more constants on top of those from 'FPLayout' like 74 // exponent bias and masks. It also holds the bit representation of the 75 // floating point as a 'StorageType' type and defines tools to assemble or 76 // test these parts. 77 // - 'FPRepSem' defines functions to interact semantically with the floating 78 // point representation. The default implementation is the one for 'IEEE754', 79 // a specialization is provided for X86 Extended Precision. 80 // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to 81 // all implementations or build on the ones in 'FPRepSem'. 82 // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep' 83 // instances when using Builders (static functions to create values). 84 // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the 85 // native C++ floating point type instead of 'FPType'. An additional 'get_val' 86 // function allows getting the C++ floating point type value back. Builders 87 // called from 'FPBits' return 'FPBits' instances. 88 89 namespace internal { 90 91 // Defines the layout (sign, exponent, significand) of a floating point type in 92 // memory. It also defines its associated StorageType, i.e., the unsigned 93 // integer type used to manipulate its representation. 94 // Additionally we provide the fractional part length, i.e., the number of bits 95 // after the decimal dot when the number is in normal form. 96 template <FPType> struct FPLayout {}; 97 98 template <> struct FPLayout<FPType::IEEE754_Binary16> { 99 using StorageType = uint16_t; 100 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 101 LIBC_INLINE_VAR static constexpr int EXP_LEN = 5; 102 LIBC_INLINE_VAR static constexpr int SIG_LEN = 10; 103 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 104 }; 105 106 template <> struct FPLayout<FPType::IEEE754_Binary32> { 107 using StorageType = uint32_t; 108 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 109 LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 110 LIBC_INLINE_VAR static constexpr int SIG_LEN = 23; 111 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 112 }; 113 114 template <> struct FPLayout<FPType::IEEE754_Binary64> { 115 using StorageType = uint64_t; 116 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 117 LIBC_INLINE_VAR static constexpr int EXP_LEN = 11; 118 LIBC_INLINE_VAR static constexpr int SIG_LEN = 52; 119 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 120 }; 121 122 template <> struct FPLayout<FPType::IEEE754_Binary128> { 123 using StorageType = UInt128; 124 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 125 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 126 LIBC_INLINE_VAR static constexpr int SIG_LEN = 112; 127 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 128 }; 129 130 template <> struct FPLayout<FPType::X86_Binary80> { 131 #if __SIZEOF_LONG_DOUBLE__ == 12 132 using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>; 133 #else 134 using StorageType = UInt128; 135 #endif 136 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 137 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 138 LIBC_INLINE_VAR static constexpr int SIG_LEN = 64; 139 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1; 140 }; 141 142 template <> struct FPLayout<FPType::BFloat16> { 143 using StorageType = uint16_t; 144 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 145 LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 146 LIBC_INLINE_VAR static constexpr int SIG_LEN = 7; 147 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 148 }; 149 150 // FPStorage derives useful constants from the FPLayout above. 151 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> { 152 using UP = FPLayout<fp_type>; 153 154 using UP::EXP_LEN; // The number of bits for the *exponent* part 155 using UP::SIG_LEN; // The number of bits for the *significand* part 156 using UP::SIGN_LEN; // The number of bits for the *sign* part 157 // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`. 158 LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN; 159 160 // The number of bits after the decimal dot when the number is in normal form. 161 using UP::FRACTION_LEN; 162 163 // An unsigned integer that is wide enough to contain all of the floating 164 // point bits. 165 using StorageType = typename UP::StorageType; 166 167 // The number of bits in StorageType. 168 LIBC_INLINE_VAR static constexpr int STORAGE_LEN = 169 sizeof(StorageType) * CHAR_BIT; 170 static_assert(STORAGE_LEN >= TOTAL_LEN); 171 172 // The exponent bias. Always positive. 173 LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS = 174 (1U << (EXP_LEN - 1U)) - 1U; 175 static_assert(EXP_BIAS > 0); 176 177 // The bit pattern that keeps only the *significand* part. 178 LIBC_INLINE_VAR static constexpr StorageType SIG_MASK = 179 mask_trailing_ones<StorageType, SIG_LEN>(); 180 // The bit pattern that keeps only the *exponent* part. 181 LIBC_INLINE_VAR static constexpr StorageType EXP_MASK = 182 mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN; 183 // The bit pattern that keeps only the *sign* part. 184 LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK = 185 mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN); 186 // The bit pattern that keeps only the *exponent + significand* part. 187 LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK = 188 mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>(); 189 // The bit pattern that keeps only the *sign + exponent + significand* part. 190 LIBC_INLINE_VAR static constexpr StorageType FP_MASK = 191 mask_trailing_ones<StorageType, TOTAL_LEN>(); 192 // The bit pattern that keeps only the *fraction* part. 193 // i.e., the *significand* without the leading one. 194 LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK = 195 mask_trailing_ones<StorageType, FRACTION_LEN>(); 196 197 static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint"); 198 static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover"); 199 200 protected: 201 // Merge bits from 'a' and 'b' values according to 'mask'. 202 // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when 203 // corresponding bits are ones. 204 LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b, 205 StorageType mask) { 206 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 207 return a ^ ((a ^ b) & mask); 208 } 209 210 // A stongly typed integer that prevents mixing and matching integers with 211 // different semantics. 212 template <typename T> struct TypedInt { 213 using value_type = T; 214 LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {} 215 LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default; 216 LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default; 217 218 LIBC_INLINE constexpr explicit operator T() const { return value; } 219 220 LIBC_INLINE constexpr StorageType to_storage_type() const { 221 return StorageType(value); 222 } 223 224 LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) { 225 return a.value == b.value; 226 } 227 LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) { 228 return a.value != b.value; 229 } 230 231 protected: 232 T value; 233 }; 234 235 // An opaque type to store a floating point exponent. 236 // We define special values but it is valid to create arbitrary values as long 237 // as they are in the range [min, max]. 238 struct Exponent : public TypedInt<int32_t> { 239 using UP = TypedInt<int32_t>; 240 using UP::UP; 241 LIBC_INLINE static constexpr auto subnormal() { 242 return Exponent(-EXP_BIAS); 243 } 244 LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); } 245 LIBC_INLINE static constexpr auto zero() { return Exponent(0); } 246 LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); } 247 LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); } 248 }; 249 250 // An opaque type to store a floating point biased exponent. 251 // We define special values but it is valid to create arbitrary values as long 252 // as they are in the range [zero, bits_all_ones]. 253 // Values greater than bits_all_ones are truncated. 254 struct BiasedExponent : public TypedInt<uint32_t> { 255 using UP = TypedInt<uint32_t>; 256 using UP::UP; 257 258 LIBC_INLINE constexpr BiasedExponent(Exponent exp) 259 : UP(static_cast<uint32_t>(static_cast<int32_t>(exp) + EXP_BIAS)) {} 260 261 // Cast operator to get convert from BiasedExponent to Exponent. 262 LIBC_INLINE constexpr operator Exponent() const { 263 return Exponent(static_cast<int32_t>(UP::value - EXP_BIAS)); 264 } 265 266 LIBC_INLINE constexpr BiasedExponent &operator++() { 267 LIBC_ASSERT(*this != BiasedExponent(Exponent::inf())); 268 ++UP::value; 269 return *this; 270 } 271 272 LIBC_INLINE constexpr BiasedExponent &operator--() { 273 LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal())); 274 --UP::value; 275 return *this; 276 } 277 }; 278 279 // An opaque type to store a floating point significand. 280 // We define special values but it is valid to create arbitrary values as long 281 // as they are in the range [zero, bits_all_ones]. 282 // Note that the semantics of the Significand are implementation dependent. 283 // Values greater than bits_all_ones are truncated. 284 struct Significand : public TypedInt<StorageType> { 285 using UP = TypedInt<StorageType>; 286 using UP::UP; 287 288 LIBC_INLINE friend constexpr Significand operator|(const Significand a, 289 const Significand b) { 290 return Significand( 291 StorageType(a.to_storage_type() | b.to_storage_type())); 292 } 293 LIBC_INLINE friend constexpr Significand operator^(const Significand a, 294 const Significand b) { 295 return Significand( 296 StorageType(a.to_storage_type() ^ b.to_storage_type())); 297 } 298 LIBC_INLINE friend constexpr Significand operator>>(const Significand a, 299 int shift) { 300 return Significand(StorageType(a.to_storage_type() >> shift)); 301 } 302 303 LIBC_INLINE static constexpr auto zero() { 304 return Significand(StorageType(0)); 305 } 306 LIBC_INLINE static constexpr auto lsb() { 307 return Significand(StorageType(1)); 308 } 309 LIBC_INLINE static constexpr auto msb() { 310 return Significand(StorageType(1) << (SIG_LEN - 1)); 311 } 312 LIBC_INLINE static constexpr auto bits_all_ones() { 313 return Significand(SIG_MASK); 314 } 315 }; 316 317 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) { 318 return (exp.to_storage_type() << SIG_LEN) & EXP_MASK; 319 } 320 321 LIBC_INLINE static constexpr StorageType encode(Significand value) { 322 return value.to_storage_type() & SIG_MASK; 323 } 324 325 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp, 326 Significand sig) { 327 return encode(exp) | encode(sig); 328 } 329 330 LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp, 331 Significand sig) { 332 if (sign.is_neg()) 333 return SIGN_MASK | encode(exp, sig); 334 return encode(exp, sig); 335 } 336 337 // The floating point number representation as an unsigned integer. 338 StorageType bits{}; 339 340 LIBC_INLINE constexpr FPStorage() : bits(0) {} 341 LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {} 342 343 // Observers 344 LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; } 345 LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; } 346 LIBC_INLINE constexpr StorageType exp_sig_bits() const { 347 return bits & EXP_SIG_MASK; 348 } 349 350 // Parts 351 LIBC_INLINE constexpr BiasedExponent biased_exponent() const { 352 return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN)); 353 } 354 LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) { 355 bits = merge(bits, encode(biased), EXP_MASK); 356 } 357 358 public: 359 LIBC_INLINE constexpr Sign sign() const { 360 return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS; 361 } 362 LIBC_INLINE constexpr void set_sign(Sign signVal) { 363 if (sign() != signVal) 364 bits ^= SIGN_MASK; 365 } 366 }; 367 368 // This layer defines all functions that are specific to how the the floating 369 // point type is encoded. It enables constructions, modification and observation 370 // of values manipulated as 'StorageType'. 371 template <FPType fp_type, typename RetT> 372 struct FPRepSem : public FPStorage<fp_type> { 373 using UP = FPStorage<fp_type>; 374 using typename UP::StorageType; 375 using UP::FRACTION_LEN; 376 using UP::FRACTION_MASK; 377 378 protected: 379 using typename UP::Exponent; 380 using typename UP::Significand; 381 using UP::bits; 382 using UP::encode; 383 using UP::exp_bits; 384 using UP::exp_sig_bits; 385 using UP::sig_bits; 386 using UP::UP; 387 388 public: 389 // Builders 390 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 391 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 392 } 393 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 394 return RetT(encode(sign, Exponent::zero(), Significand::zero())); 395 } 396 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 397 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 398 } 399 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 400 return RetT( 401 encode(sign, Exponent::subnormal(), Significand::bits_all_ones())); 402 } 403 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 404 return RetT(encode(sign, Exponent::min(), Significand::zero())); 405 } 406 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 407 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 408 } 409 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 410 return RetT(encode(sign, Exponent::inf(), Significand::zero())); 411 } 412 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 413 StorageType v = 0) { 414 return RetT(encode(sign, Exponent::inf(), 415 (v ? Significand(v) : (Significand::msb() >> 1)))); 416 } 417 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 418 StorageType v = 0) { 419 return RetT( 420 encode(sign, Exponent::inf(), Significand::msb() | Significand(v))); 421 } 422 423 // Observers 424 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 425 LIBC_INLINE constexpr bool is_nan() const { 426 return exp_sig_bits() > encode(Exponent::inf(), Significand::zero()); 427 } 428 LIBC_INLINE constexpr bool is_quiet_nan() const { 429 return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb()); 430 } 431 LIBC_INLINE constexpr bool is_signaling_nan() const { 432 return is_nan() && !is_quiet_nan(); 433 } 434 LIBC_INLINE constexpr bool is_inf() const { 435 return exp_sig_bits() == encode(Exponent::inf(), Significand::zero()); 436 } 437 LIBC_INLINE constexpr bool is_finite() const { 438 return exp_bits() != encode(Exponent::inf()); 439 } 440 LIBC_INLINE 441 constexpr bool is_subnormal() const { 442 return exp_bits() == encode(Exponent::subnormal()); 443 } 444 LIBC_INLINE constexpr bool is_normal() const { 445 return is_finite() && !is_subnormal(); 446 } 447 LIBC_INLINE constexpr RetT next_toward_inf() const { 448 if (is_finite()) 449 return RetT(bits + StorageType(1)); 450 return RetT(bits); 451 } 452 453 // Returns the mantissa with the implicit bit set iff the current 454 // value is a valid normal number. 455 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 456 if (is_subnormal()) 457 return sig_bits(); 458 return (StorageType(1) << UP::SIG_LEN) | sig_bits(); 459 } 460 }; 461 462 // Specialization for the X86 Extended Precision type. 463 template <typename RetT> 464 struct FPRepSem<FPType::X86_Binary80, RetT> 465 : public FPStorage<FPType::X86_Binary80> { 466 using UP = FPStorage<FPType::X86_Binary80>; 467 using typename UP::StorageType; 468 using UP::FRACTION_LEN; 469 using UP::FRACTION_MASK; 470 471 // The x86 80 bit float represents the leading digit of the mantissa 472 // explicitly. This is the mask for that bit. 473 static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1) 474 << FRACTION_LEN; 475 // The X80 significand is made of an explicit bit and the fractional part. 476 static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0, 477 "the explicit bit and the fractional part should not overlap"); 478 static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK, 479 "the explicit bit and the fractional part should cover the " 480 "whole significand"); 481 482 protected: 483 using typename UP::Exponent; 484 using typename UP::Significand; 485 using UP::encode; 486 using UP::UP; 487 488 public: 489 // Builders 490 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 491 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 492 } 493 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 494 return RetT(encode(sign, Exponent::zero(), Significand::msb())); 495 } 496 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 497 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 498 } 499 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 500 return RetT(encode(sign, Exponent::subnormal(), 501 Significand::bits_all_ones() ^ Significand::msb())); 502 } 503 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 504 return RetT(encode(sign, Exponent::min(), Significand::msb())); 505 } 506 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 507 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 508 } 509 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 510 return RetT(encode(sign, Exponent::inf(), Significand::msb())); 511 } 512 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 513 StorageType v = 0) { 514 return RetT(encode(sign, Exponent::inf(), 515 Significand::msb() | 516 (v ? Significand(v) : (Significand::msb() >> 2)))); 517 } 518 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 519 StorageType v = 0) { 520 return RetT(encode(sign, Exponent::inf(), 521 Significand::msb() | (Significand::msb() >> 1) | 522 Significand(v))); 523 } 524 525 // Observers 526 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 527 LIBC_INLINE constexpr bool is_nan() const { 528 // Most encoding forms from the table found in 529 // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format 530 // are interpreted as NaN. 531 // More precisely : 532 // - Pseudo-Infinity 533 // - Pseudo Not a Number 534 // - Signalling Not a Number 535 // - Floating-point Indefinite 536 // - Quiet Not a Number 537 // - Unnormal 538 // This can be reduced to the following logic: 539 if (exp_bits() == encode(Exponent::inf())) 540 return !is_inf(); 541 if (exp_bits() != encode(Exponent::subnormal())) 542 return (sig_bits() & encode(Significand::msb())) == 0; 543 return false; 544 } 545 LIBC_INLINE constexpr bool is_quiet_nan() const { 546 return exp_sig_bits() >= 547 encode(Exponent::inf(), 548 Significand::msb() | (Significand::msb() >> 1)); 549 } 550 LIBC_INLINE constexpr bool is_signaling_nan() const { 551 return is_nan() && !is_quiet_nan(); 552 } 553 LIBC_INLINE constexpr bool is_inf() const { 554 return exp_sig_bits() == encode(Exponent::inf(), Significand::msb()); 555 } 556 LIBC_INLINE constexpr bool is_finite() const { 557 return !is_inf() && !is_nan(); 558 } 559 LIBC_INLINE 560 constexpr bool is_subnormal() const { 561 return exp_bits() == encode(Exponent::subnormal()); 562 } 563 LIBC_INLINE constexpr bool is_normal() const { 564 const auto exp = exp_bits(); 565 if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf())) 566 return false; 567 return get_implicit_bit(); 568 } 569 LIBC_INLINE constexpr RetT next_toward_inf() const { 570 if (is_finite()) { 571 if (exp_sig_bits() == max_normal().uintval()) { 572 return inf(sign()); 573 } else if (exp_sig_bits() == max_subnormal().uintval()) { 574 return min_normal(sign()); 575 } else if (sig_bits() == SIG_MASK) { 576 return RetT(encode(sign(), ++biased_exponent(), Significand::zero())); 577 } else { 578 return RetT(bits + StorageType(1)); 579 } 580 } 581 return RetT(bits); 582 } 583 584 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 585 return sig_bits(); 586 } 587 588 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 589 // TODO: Remove if possible. 590 LIBC_INLINE constexpr bool get_implicit_bit() const { 591 return static_cast<bool>(bits & EXPLICIT_BIT_MASK); 592 } 593 594 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 595 // TODO: Remove if possible. 596 LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) { 597 if (get_implicit_bit() != implicitVal) 598 bits ^= EXPLICIT_BIT_MASK; 599 } 600 }; 601 602 // 'FPRepImpl' is the bottom of the class hierarchy that only deals with 603 // 'FPType'. The operations dealing with specific float semantics are 604 // implemented by 'FPRepSem' above and specialized when needed. 605 // 606 // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions 607 // creating new values (Builders) can return the appropriate type. That is, when 608 // creating a value through 'FPBits' below the builder will return an 'FPBits' 609 // value. 610 // FPBits<float>::zero(); // returns an FPBits<> 611 // 612 // When we don't care about specific C++ floating point type we can use 613 // 'FPRep' and specify the 'FPType' directly. 614 // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<> 615 template <FPType fp_type, typename RetT> 616 struct FPRepImpl : public FPRepSem<fp_type, RetT> { 617 using UP = FPRepSem<fp_type, RetT>; 618 using StorageType = typename UP::StorageType; 619 620 protected: 621 using UP::bits; 622 using UP::encode; 623 using UP::exp_bits; 624 using UP::exp_sig_bits; 625 626 using typename UP::BiasedExponent; 627 using typename UP::Exponent; 628 using typename UP::Significand; 629 630 using UP::FP_MASK; 631 632 public: 633 // Constants. 634 using UP::EXP_BIAS; 635 using UP::EXP_MASK; 636 using UP::FRACTION_MASK; 637 using UP::SIG_LEN; 638 using UP::SIG_MASK; 639 using UP::SIGN_MASK; 640 LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT = 641 (1 << UP::EXP_LEN) - 1; 642 643 // CTors 644 LIBC_INLINE constexpr FPRepImpl() = default; 645 LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {} 646 647 // Comparison 648 LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) { 649 return a.uintval() == b.uintval(); 650 } 651 LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) { 652 return a.uintval() != b.uintval(); 653 } 654 655 // Representation 656 LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; } 657 LIBC_INLINE constexpr void set_uintval(StorageType value) { 658 bits = (value & FP_MASK); 659 } 660 661 // Builders 662 using UP::inf; 663 using UP::max_normal; 664 using UP::max_subnormal; 665 using UP::min_normal; 666 using UP::min_subnormal; 667 using UP::one; 668 using UP::quiet_nan; 669 using UP::signaling_nan; 670 using UP::zero; 671 672 // Modifiers 673 LIBC_INLINE constexpr RetT abs() const { 674 return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK)); 675 } 676 677 // Observers 678 using UP::get_explicit_mantissa; 679 using UP::is_finite; 680 using UP::is_inf; 681 using UP::is_nan; 682 using UP::is_normal; 683 using UP::is_quiet_nan; 684 using UP::is_signaling_nan; 685 using UP::is_subnormal; 686 using UP::is_zero; 687 using UP::next_toward_inf; 688 using UP::sign; 689 LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); } 690 LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); } 691 LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); } 692 693 LIBC_INLINE constexpr uint16_t get_biased_exponent() const { 694 return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent())); 695 } 696 697 LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) { 698 UP::set_biased_exponent(BiasedExponent(static_cast<uint32_t>(biased))); 699 } 700 701 LIBC_INLINE constexpr int get_exponent() const { 702 return static_cast<int32_t>(Exponent(UP::biased_exponent())); 703 } 704 705 // If the number is subnormal, the exponent is treated as if it were the 706 // minimum exponent for a normal number. This is to keep continuity between 707 // the normal and subnormal ranges, but it causes problems for functions where 708 // values are calculated from the exponent, since just subtracting the bias 709 // will give a slightly incorrect result. Additionally, zero has an exponent 710 // of zero, and that should actually be treated as zero. 711 LIBC_INLINE constexpr int get_explicit_exponent() const { 712 Exponent exponent(UP::biased_exponent()); 713 if (is_zero()) 714 exponent = Exponent::zero(); 715 if (exponent == Exponent::subnormal()) 716 exponent = Exponent::min(); 717 return static_cast<int32_t>(exponent); 718 } 719 720 LIBC_INLINE constexpr StorageType get_mantissa() const { 721 return bits & FRACTION_MASK; 722 } 723 724 LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) { 725 bits = UP::merge(bits, mantVal, FRACTION_MASK); 726 } 727 728 LIBC_INLINE constexpr void set_significand(StorageType sigVal) { 729 bits = UP::merge(bits, sigVal, SIG_MASK); 730 } 731 // Unsafe function to create a floating point representation. 732 // It simply packs the sign, biased exponent and mantissa values without 733 // checking bound nor normalization. 734 // 735 // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly 736 // in the 'mantissa' by the caller. This function will not check for its 737 // validity. 738 // 739 // FIXME: Use an uint32_t for 'biased_exp'. 740 LIBC_INLINE static constexpr RetT 741 create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { 742 return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)), 743 Significand(mantissa))); 744 } 745 746 // The function converts integer number and unbiased exponent to proper 747 // float T type: 748 // Result = number * 2^(ep+1 - exponent_bias) 749 // Be careful! 750 // 1) "ep" is the raw exponent value. 751 // 2) The function adds +1 to ep for seamless normalized to denormalized 752 // transition. 753 // 3) The function does not check exponent high limit. 754 // 4) "number" zero value is not processed correctly. 755 // 5) Number is unsigned, so the result can be only positive. 756 LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) { 757 FPRepImpl result(0); 758 int lz = 759 UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number)); 760 761 number <<= lz; 762 ep -= lz; 763 764 if (LIBC_LIKELY(ep >= 0)) { 765 // Implicit number bit will be removed by mask 766 result.set_significand(number); 767 result.set_biased_exponent(static_cast<StorageType>(ep + 1)); 768 } else { 769 result.set_significand(number >> static_cast<unsigned>(-ep)); 770 } 771 return RetT(result.uintval()); 772 } 773 }; 774 775 // A generic class to manipulate floating point formats. 776 // It derives its functionality to FPRepImpl above. 777 template <FPType fp_type> 778 struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> { 779 using UP = FPRepImpl<fp_type, FPRep<fp_type>>; 780 using StorageType = typename UP::StorageType; 781 using UP::UP; 782 783 LIBC_INLINE constexpr explicit operator StorageType() const { 784 return UP::uintval(); 785 } 786 }; 787 788 } // namespace internal 789 790 // Returns the FPType corresponding to C++ type T on the host. 791 template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() { 792 using UnqualT = cpp::remove_cv_t<T>; 793 if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24) 794 return FPType::IEEE754_Binary32; 795 else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53) 796 return FPType::IEEE754_Binary64; 797 else if constexpr (cpp::is_same_v<UnqualT, long double>) { 798 if constexpr (__LDBL_MANT_DIG__ == 53) 799 return FPType::IEEE754_Binary64; 800 else if constexpr (__LDBL_MANT_DIG__ == 64) 801 return FPType::X86_Binary80; 802 else if constexpr (__LDBL_MANT_DIG__ == 113) 803 return FPType::IEEE754_Binary128; 804 } 805 #if defined(LIBC_TYPES_HAS_FLOAT16) 806 else if constexpr (cpp::is_same_v<UnqualT, float16>) 807 return FPType::IEEE754_Binary16; 808 #endif 809 #if defined(LIBC_TYPES_HAS_FLOAT128) 810 else if constexpr (cpp::is_same_v<UnqualT, float128>) 811 return FPType::IEEE754_Binary128; 812 #endif 813 else if constexpr (cpp::is_same_v<UnqualT, bfloat16>) 814 return FPType::BFloat16; 815 else 816 static_assert(cpp::always_false<UnqualT>, "Unsupported type"); 817 } 818 819 // ----------------------------------------------------------------------------- 820 // **** WARNING **** 821 // This interface is shared with libc++, if you change this interface you need 822 // to update it in both libc and libc++. You should also be careful when adding 823 // dependencies to this file, since it needs to build for all libc++ targets. 824 // ----------------------------------------------------------------------------- 825 // A generic class to manipulate C++ floating point formats. 826 // It derives its functionality to FPRepImpl above. 827 template <typename T> 828 struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> { 829 static_assert(cpp::is_floating_point_v<T>, 830 "FPBits instantiated with invalid type."); 831 using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>; 832 using StorageType = typename UP::StorageType; 833 834 // Constructors. 835 LIBC_INLINE constexpr FPBits() = default; 836 837 template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) { 838 using Unqual = typename cpp::remove_cv_t<XType>; 839 if constexpr (cpp::is_same_v<Unqual, T>) { 840 UP::bits = cpp::bit_cast<StorageType>(x); 841 } else if constexpr (cpp::is_same_v<Unqual, StorageType>) { 842 UP::bits = x; 843 } else { 844 // We don't want accidental type promotions/conversions, so we require 845 // exact type match. 846 static_assert(cpp::always_false<XType>); 847 } 848 } 849 850 // Floating-point conversions. 851 LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); } 852 }; 853 854 } // namespace fputil 855 } // namespace LIBC_NAMESPACE_DECL 856 857 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 858