xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/asinh_2u5.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision asinh(x) function
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2022-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner #include "mathlib.h"
8*f3087befSAndrew Turner #include "poly_scalar_f64.h"
9*f3087befSAndrew Turner #include "math_config.h"
10*f3087befSAndrew Turner #include "test_sig.h"
11*f3087befSAndrew Turner #include "test_defs.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner #define AbsMask 0x7fffffffffffffff
14*f3087befSAndrew Turner #define ExpM26 0x3e50000000000000 /* asuint64(0x1.0p-26).  */
15*f3087befSAndrew Turner #define One 0x3ff0000000000000	  /* asuint64(1.0).  */
16*f3087befSAndrew Turner #define Exp511 0x5fe0000000000000 /* asuint64(0x1.0p511).  */
17*f3087befSAndrew Turner #define Ln2 0x1.62e42fefa39efp-1
18*f3087befSAndrew Turner 
19*f3087befSAndrew Turner /* Scalar double-precision asinh implementation. This routine uses different
20*f3087befSAndrew Turner    approaches on different intervals:
21*f3087befSAndrew Turner 
22*f3087befSAndrew Turner    |x| < 2^-26: Return x. Function is exact in this region.
23*f3087befSAndrew Turner 
24*f3087befSAndrew Turner    |x| < 1: Use custom order-17 polynomial. This is least accurate close to 1.
25*f3087befSAndrew Turner      The largest observed error in this region is 1.47 ULPs:
26*f3087befSAndrew Turner      asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
27*f3087befSAndrew Turner 				want 0x1.c1d6bf874019cp-1.
28*f3087befSAndrew Turner 
29*f3087befSAndrew Turner    |x| < 2^511: Upper bound of this region is close to sqrt(DBL_MAX). Calculate
30*f3087befSAndrew Turner      the result directly using the definition asinh(x) = ln(x + sqrt(x*x + 1)).
31*f3087befSAndrew Turner      The largest observed error in this region is 2.03 ULPs:
32*f3087befSAndrew Turner      asinh(-0x1.00094e0f39574p+0) got -0x1.c3508eb6a681ep-1
33*f3087befSAndrew Turner 				 want -0x1.c3508eb6a682p-1.
34*f3087befSAndrew Turner 
35*f3087befSAndrew Turner    |x| >= 2^511: We cannot square x without overflow at a low
36*f3087befSAndrew Turner      cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
37*f3087befSAndrew Turner      even double x without overflow, so calculate this as ln(x) +
38*f3087befSAndrew Turner      ln(2). The largest observed error in this region is 0.98 ULPs at many
39*f3087befSAndrew Turner      values, for instance:
40*f3087befSAndrew Turner      asinh(0x1.5255a4cf10319p+975) got 0x1.52652f4cb26cbp+9
41*f3087befSAndrew Turner 				  want 0x1.52652f4cb26ccp+9.  */
42*f3087befSAndrew Turner double
asinh(double x)43*f3087befSAndrew Turner asinh (double x)
44*f3087befSAndrew Turner {
45*f3087befSAndrew Turner   uint64_t ix = asuint64 (x);
46*f3087befSAndrew Turner   uint64_t ia = ix & AbsMask;
47*f3087befSAndrew Turner   double ax = asdouble (ia);
48*f3087befSAndrew Turner   uint64_t sign = ix & ~AbsMask;
49*f3087befSAndrew Turner 
50*f3087befSAndrew Turner   if (ia < ExpM26)
51*f3087befSAndrew Turner     {
52*f3087befSAndrew Turner       return x;
53*f3087befSAndrew Turner     }
54*f3087befSAndrew Turner 
55*f3087befSAndrew Turner   if (ia < One)
56*f3087befSAndrew Turner     {
57*f3087befSAndrew Turner       double x2 = x * x;
58*f3087befSAndrew Turner       double z2 = x2 * x2;
59*f3087befSAndrew Turner       double z4 = z2 * z2;
60*f3087befSAndrew Turner       double z8 = z4 * z4;
61*f3087befSAndrew Turner       double p = estrin_17_f64 (x2, z2, z4, z8, z8 * z8, __asinh_data.poly);
62*f3087befSAndrew Turner       double y = fma (p, x2 * ax, ax);
63*f3087befSAndrew Turner       return asdouble (asuint64 (y) | sign);
64*f3087befSAndrew Turner     }
65*f3087befSAndrew Turner 
66*f3087befSAndrew Turner   if (unlikely (ia >= Exp511))
67*f3087befSAndrew Turner     {
68*f3087befSAndrew Turner       return asdouble (asuint64 (log (ax) + Ln2) | sign);
69*f3087befSAndrew Turner     }
70*f3087befSAndrew Turner 
71*f3087befSAndrew Turner   return asdouble (asuint64 (log (ax + sqrt (ax * ax + 1))) | sign);
72*f3087befSAndrew Turner }
73*f3087befSAndrew Turner 
74*f3087befSAndrew Turner TEST_SIG (S, D, 1, asinh, -10.0, 10.0)
75*f3087befSAndrew Turner TEST_ULP (asinh, 1.54)
76*f3087befSAndrew Turner TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
77*f3087befSAndrew Turner TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
78*f3087befSAndrew Turner TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
79*f3087befSAndrew Turner TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
80*f3087befSAndrew Turner TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
81*f3087befSAndrew Turner TEST_INTERVAL (asinh, 100.0, inf, 50000)
82*f3087befSAndrew Turner TEST_INTERVAL (asinh, -100.0, -inf, 10000)
83