1*bb722a7dSDimitry Andric //===-- Implementation header for expf16 ------------------------*- C++ -*-===//
2*bb722a7dSDimitry Andric //
3*bb722a7dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bb722a7dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*bb722a7dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bb722a7dSDimitry Andric //
7*bb722a7dSDimitry Andric //===----------------------------------------------------------------------===//
8*bb722a7dSDimitry Andric
9*bb722a7dSDimitry Andric #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H
10*bb722a7dSDimitry Andric #define LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H
11*bb722a7dSDimitry Andric
12*bb722a7dSDimitry Andric #include "include/llvm-libc-macros/float16-macros.h"
13*bb722a7dSDimitry Andric
14*bb722a7dSDimitry Andric #ifdef LIBC_TYPES_HAS_FLOAT16
15*bb722a7dSDimitry Andric
16*bb722a7dSDimitry Andric #include "hdr/errno_macros.h"
17*bb722a7dSDimitry Andric #include "hdr/fenv_macros.h"
18*bb722a7dSDimitry Andric #include "src/__support/FPUtil/FEnvImpl.h"
19*bb722a7dSDimitry Andric #include "src/__support/FPUtil/FPBits.h"
20*bb722a7dSDimitry Andric #include "src/__support/FPUtil/PolyEval.h"
21*bb722a7dSDimitry Andric #include "src/__support/FPUtil/cast.h"
22*bb722a7dSDimitry Andric #include "src/__support/FPUtil/except_value_utils.h"
23*bb722a7dSDimitry Andric #include "src/__support/FPUtil/rounding_mode.h"
24*bb722a7dSDimitry Andric #include "src/__support/common.h"
25*bb722a7dSDimitry Andric #include "src/__support/macros/config.h"
26*bb722a7dSDimitry Andric #include "src/__support/macros/optimization.h"
27*bb722a7dSDimitry Andric
28*bb722a7dSDimitry Andric #include "expf16_utils.h"
29*bb722a7dSDimitry Andric
30*bb722a7dSDimitry Andric namespace LIBC_NAMESPACE_DECL {
31*bb722a7dSDimitry Andric
32*bb722a7dSDimitry Andric namespace math {
33*bb722a7dSDimitry Andric
expf16(float16 x)34*bb722a7dSDimitry Andric static constexpr float16 expf16(float16 x) {
35*bb722a7dSDimitry Andric #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
36*bb722a7dSDimitry Andric constexpr fputil::ExceptValues<float16, 2> EXPF16_EXCEPTS_LO = {{
37*bb722a7dSDimitry Andric // (input, RZ output, RU offset, RD offset, RN offset)
38*bb722a7dSDimitry Andric // x = 0x1.de4p-8, expf16(x) = 0x1.01cp+0 (RZ)
39*bb722a7dSDimitry Andric {0x1f79U, 0x3c07U, 1U, 0U, 0U},
40*bb722a7dSDimitry Andric // x = 0x1.73cp-6, expf16(x) = 0x1.05cp+0 (RZ)
41*bb722a7dSDimitry Andric {0x25cfU, 0x3c17U, 1U, 0U, 0U},
42*bb722a7dSDimitry Andric }};
43*bb722a7dSDimitry Andric
44*bb722a7dSDimitry Andric constexpr fputil::ExceptValues<float16, 3> EXPF16_EXCEPTS_HI = {{
45*bb722a7dSDimitry Andric // (input, RZ output, RU offset, RD offset, RN offset)
46*bb722a7dSDimitry Andric // x = 0x1.c34p+0, expf16(x) = 0x1.74cp+2 (RZ)
47*bb722a7dSDimitry Andric {0x3f0dU, 0x45d3U, 1U, 0U, 1U},
48*bb722a7dSDimitry Andric // x = -0x1.488p-5, expf16(x) = 0x1.ebcp-1 (RZ)
49*bb722a7dSDimitry Andric {0xa922U, 0x3bafU, 1U, 0U, 0U},
50*bb722a7dSDimitry Andric // x = -0x1.55p-5, expf16(x) = 0x1.ebp-1 (RZ)
51*bb722a7dSDimitry Andric {0xa954U, 0x3bacU, 1U, 0U, 0U},
52*bb722a7dSDimitry Andric }};
53*bb722a7dSDimitry Andric #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
54*bb722a7dSDimitry Andric
55*bb722a7dSDimitry Andric using FPBits = fputil::FPBits<float16>;
56*bb722a7dSDimitry Andric FPBits x_bits(x);
57*bb722a7dSDimitry Andric
58*bb722a7dSDimitry Andric uint16_t x_u = x_bits.uintval();
59*bb722a7dSDimitry Andric uint16_t x_abs = x_u & 0x7fffU;
60*bb722a7dSDimitry Andric
61*bb722a7dSDimitry Andric // When 0 < |x| <= 2^(-5), or |x| >= 12, or x is NaN.
62*bb722a7dSDimitry Andric if (LIBC_UNLIKELY(x_abs <= 0x2800U || x_abs >= 0x4a00U)) {
63*bb722a7dSDimitry Andric // exp(NaN) = NaN
64*bb722a7dSDimitry Andric if (x_bits.is_nan()) {
65*bb722a7dSDimitry Andric if (x_bits.is_signaling_nan()) {
66*bb722a7dSDimitry Andric fputil::raise_except_if_required(FE_INVALID);
67*bb722a7dSDimitry Andric return FPBits::quiet_nan().get_val();
68*bb722a7dSDimitry Andric }
69*bb722a7dSDimitry Andric
70*bb722a7dSDimitry Andric return x;
71*bb722a7dSDimitry Andric }
72*bb722a7dSDimitry Andric
73*bb722a7dSDimitry Andric // When x >= 12.
74*bb722a7dSDimitry Andric if (x_bits.is_pos() && x_abs >= 0x4a00U) {
75*bb722a7dSDimitry Andric // exp(+inf) = +inf
76*bb722a7dSDimitry Andric if (x_bits.is_inf())
77*bb722a7dSDimitry Andric return FPBits::inf().get_val();
78*bb722a7dSDimitry Andric
79*bb722a7dSDimitry Andric switch (fputil::quick_get_round()) {
80*bb722a7dSDimitry Andric case FE_TONEAREST:
81*bb722a7dSDimitry Andric case FE_UPWARD:
82*bb722a7dSDimitry Andric fputil::set_errno_if_required(ERANGE);
83*bb722a7dSDimitry Andric fputil::raise_except_if_required(FE_OVERFLOW);
84*bb722a7dSDimitry Andric return FPBits::inf().get_val();
85*bb722a7dSDimitry Andric default:
86*bb722a7dSDimitry Andric return FPBits::max_normal().get_val();
87*bb722a7dSDimitry Andric }
88*bb722a7dSDimitry Andric }
89*bb722a7dSDimitry Andric
90*bb722a7dSDimitry Andric // When x <= -18.
91*bb722a7dSDimitry Andric if (x_u >= 0xcc80U) {
92*bb722a7dSDimitry Andric // exp(-inf) = +0
93*bb722a7dSDimitry Andric if (x_bits.is_inf())
94*bb722a7dSDimitry Andric return FPBits::zero().get_val();
95*bb722a7dSDimitry Andric
96*bb722a7dSDimitry Andric fputil::set_errno_if_required(ERANGE);
97*bb722a7dSDimitry Andric fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
98*bb722a7dSDimitry Andric
99*bb722a7dSDimitry Andric switch (fputil::quick_get_round()) {
100*bb722a7dSDimitry Andric case FE_UPWARD:
101*bb722a7dSDimitry Andric return FPBits::min_subnormal().get_val();
102*bb722a7dSDimitry Andric default:
103*bb722a7dSDimitry Andric return FPBits::zero().get_val();
104*bb722a7dSDimitry Andric }
105*bb722a7dSDimitry Andric }
106*bb722a7dSDimitry Andric
107*bb722a7dSDimitry Andric // When 0 < |x| <= 2^(-5).
108*bb722a7dSDimitry Andric if (x_abs <= 0x2800U && !x_bits.is_zero()) {
109*bb722a7dSDimitry Andric #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
110*bb722a7dSDimitry Andric if (auto r = EXPF16_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
111*bb722a7dSDimitry Andric return r.value();
112*bb722a7dSDimitry Andric #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
113*bb722a7dSDimitry Andric
114*bb722a7dSDimitry Andric float xf = x;
115*bb722a7dSDimitry Andric // Degree-3 minimax polynomial generated by Sollya with the following
116*bb722a7dSDimitry Andric // commands:
117*bb722a7dSDimitry Andric // > display = hexadecimal;
118*bb722a7dSDimitry Andric // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
119*bb722a7dSDimitry Andric // > 1 + x * P;
120*bb722a7dSDimitry Andric return fputil::cast<float16>(
121*bb722a7dSDimitry Andric fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f));
122*bb722a7dSDimitry Andric }
123*bb722a7dSDimitry Andric }
124*bb722a7dSDimitry Andric
125*bb722a7dSDimitry Andric #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
126*bb722a7dSDimitry Andric if (auto r = EXPF16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
127*bb722a7dSDimitry Andric return r.value();
128*bb722a7dSDimitry Andric #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
129*bb722a7dSDimitry Andric
130*bb722a7dSDimitry Andric // exp(x) = exp(hi + mid) * exp(lo)
131*bb722a7dSDimitry Andric auto [exp_hi_mid, exp_lo] = exp_range_reduction(x);
132*bb722a7dSDimitry Andric return fputil::cast<float16>(exp_hi_mid * exp_lo);
133*bb722a7dSDimitry Andric }
134*bb722a7dSDimitry Andric
135*bb722a7dSDimitry Andric } // namespace math
136*bb722a7dSDimitry Andric
137*bb722a7dSDimitry Andric } // namespace LIBC_NAMESPACE_DECL
138*bb722a7dSDimitry Andric
139*bb722a7dSDimitry Andric #endif // LIBC_TYPES_HAS_FLOAT16
140*bb722a7dSDimitry Andric
141*bb722a7dSDimitry Andric #endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H
142