xref: /freebsd/contrib/llvm-project/libc/src/__support/FPUtil/bfloat16.h (revision bb722a7d0f1642bff6487f943ad0427799a6e5bf)
1 //===-- Definition of bfloat16 data type. -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
11 
12 #include "src/__support/CPP/bit.h"
13 #include "src/__support/CPP/type_traits.h"
14 #include "src/__support/FPUtil/cast.h"
15 #include "src/__support/FPUtil/dyadic_float.h"
16 #include "src/__support/macros/config.h"
17 #include "src/__support/macros/properties/types.h"
18 
19 #include <stdint.h>
20 
21 namespace LIBC_NAMESPACE_DECL {
22 namespace fputil {
23 
24 struct BFloat16 {
25   uint16_t bits;
26 
27   LIBC_INLINE BFloat16() = default;
28 
BFloat16BFloat1629   LIBC_INLINE constexpr explicit BFloat16(uint16_t bits) : bits(bits) {}
30 
BFloat16BFloat1631   template <typename T> LIBC_INLINE constexpr explicit BFloat16(T value) {
32     if constexpr (cpp::is_floating_point_v<T>) {
33       bits = fputil::cast<bfloat16>(value).bits;
34     } else if constexpr (cpp::is_integral_v<T>) {
35       Sign sign = Sign::POS;
36 
37       if constexpr (cpp::is_signed_v<T>) {
38         if (value < 0) {
39           sign = Sign::NEG;
40           value = -value;
41         }
42       }
43 
44       fputil::DyadicFloat<cpp::numeric_limits<cpp::make_unsigned_t<T>>::digits>
45           xd(sign, 0, value);
46       bits = xd.template as<bfloat16, /*ShouldSignalExceptions=*/true>().bits;
47 
48     } else {
49       bits = fputil::cast<bfloat16>(static_cast<float>(value)).bits;
50     }
51   }
52 
53   template <cpp::enable_if_t<fputil::get_fp_type<float>() ==
54                                  fputil::FPType::IEEE754_Binary32,
55                              int> = 0>
56   LIBC_INLINE constexpr operator float() const {
57     uint32_t x_bits = static_cast<uint32_t>(bits) << 16U;
58     return cpp::bit_cast<float>(x_bits);
59   }
60 }; // struct BFloat16
61 
62 } // namespace fputil
63 } // namespace LIBC_NAMESPACE_DECL
64 
65 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_BFLOAT16_H
66