xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/asinhf_3u5.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Single-precision asinh(x) function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2022-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "poly_scalar_f32.h"
9*f3087befSAndrew Turner #include "math_config.h"
10*f3087befSAndrew Turner #include "test_sig.h"
11*f3087befSAndrew Turner #include "test_defs.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner #define AbsMask (0x7fffffff)
14*f3087befSAndrew Turner #define SqrtFltMax (0x1.749e96p+10f)
15*f3087befSAndrew Turner #define Ln2 (0x1.62e4p-1f)
16*f3087befSAndrew Turner #define One (0x3f8)
17*f3087befSAndrew Turner #define ExpM12 (0x398)
18*f3087befSAndrew Turner 
19*f3087befSAndrew Turner /* asinhf approximation using a variety of approaches on different intervals:
20*f3087befSAndrew Turner 
21*f3087befSAndrew Turner    |x| < 2^-12: Return x. Function is exactly rounded in this region.
22*f3087befSAndrew Turner 
23*f3087befSAndrew Turner    |x| < 1.0: Use custom order-8 polynomial. The largest observed
24*f3087befSAndrew Turner      error in this region is 1.3ulps:
25*f3087befSAndrew Turner      asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
26*f3087befSAndrew Turner 
27*f3087befSAndrew Turner    |x| <= SqrtFltMax: Calculate the result directly using the
28*f3087befSAndrew Turner      definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
29*f3087befSAndrew Turner      observed error in this region is 1.99ulps.
30*f3087befSAndrew Turner      asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
31*f3087befSAndrew Turner 
32*f3087befSAndrew Turner    |x| > SqrtFltMax: We cannot square x without overflow at a low
33*f3087befSAndrew Turner      cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
34*f3087befSAndrew Turner      even double x without overflow, so calculate this as ln(x) +
35*f3087befSAndrew Turner      ln(2). This largest observed error in this region is 3.39ulps.
36*f3087befSAndrew Turner      asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2.  */
37*f3087befSAndrew Turner float
asinhf(float x)38*f3087befSAndrew Turner asinhf (float x)
39*f3087befSAndrew Turner {
40*f3087befSAndrew Turner   uint32_t ix = asuint (x);
41*f3087befSAndrew Turner   uint32_t ia = ix & AbsMask;
42*f3087befSAndrew Turner   uint32_t ia12 = ia >> 20;
43*f3087befSAndrew Turner   float ax = asfloat (ia);
44*f3087befSAndrew Turner   uint32_t sign = ix & ~AbsMask;
45*f3087befSAndrew Turner 
46*f3087befSAndrew Turner   if (unlikely (ia12 < ExpM12 || ia == 0x7f800000))
47*f3087befSAndrew Turner     return x;
48*f3087befSAndrew Turner 
49*f3087befSAndrew Turner   if (unlikely (ia12 >= 0x7f8))
50*f3087befSAndrew Turner     return __math_invalidf (x);
51*f3087befSAndrew Turner 
52*f3087befSAndrew Turner   if (ia12 < One)
53*f3087befSAndrew Turner     {
54*f3087befSAndrew Turner       float x2 = ax * ax;
55*f3087befSAndrew Turner       float p = estrin_7_f32 (ax, x2, x2 * x2, __asinhf_data.coeffs);
56*f3087befSAndrew Turner       float y = fmaf (x2, p, ax);
57*f3087befSAndrew Turner       return asfloat (asuint (y) | sign);
58*f3087befSAndrew Turner     }
59*f3087befSAndrew Turner 
60*f3087befSAndrew Turner   if (unlikely (ax > SqrtFltMax))
61*f3087befSAndrew Turner     {
62*f3087befSAndrew Turner       return asfloat (asuint (logf (ax) + Ln2) | sign);
63*f3087befSAndrew Turner     }
64*f3087befSAndrew Turner 
65*f3087befSAndrew Turner   return asfloat (asuint (logf (ax + sqrtf (ax * ax + 1))) | sign);
66*f3087befSAndrew Turner }
67*f3087befSAndrew Turner 
68*f3087befSAndrew Turner TEST_SIG (S, F, 1, asinh, -10.0, 10.0)
69*f3087befSAndrew Turner TEST_ULP (asinhf, 2.9)
70*f3087befSAndrew Turner TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
71*f3087befSAndrew Turner TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
72*f3087befSAndrew Turner TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
73*f3087befSAndrew Turner TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
74