xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/advsimd/sinh.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision vector sinh(x) function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2022-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "v_math.h"
9*f3087befSAndrew Turner #include "test_sig.h"
10*f3087befSAndrew Turner #include "test_defs.h"
11*f3087befSAndrew Turner #include "v_expm1_inline.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner static const struct data
14*f3087befSAndrew Turner {
15*f3087befSAndrew Turner   struct v_expm1_data d;
16*f3087befSAndrew Turner   uint64x2_t halff;
17*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT
18*f3087befSAndrew Turner   uint64x2_t tiny_bound, thresh;
19*f3087befSAndrew Turner #else
20*f3087befSAndrew Turner   float64x2_t large_bound;
21*f3087befSAndrew Turner #endif
22*f3087befSAndrew Turner } data = {
23*f3087befSAndrew Turner   .d = V_EXPM1_DATA,
24*f3087befSAndrew Turner   .halff = V2 (0x3fe0000000000000),
25*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT
26*f3087befSAndrew Turner   /* 2^-26, below which sinh(x) rounds to x.  */
27*f3087befSAndrew Turner   .tiny_bound = V2 (0x3e50000000000000),
28*f3087befSAndrew Turner   /* asuint(large_bound) - asuint(tiny_bound).  */
29*f3087befSAndrew Turner   .thresh = V2 (0x0230000000000000),
30*f3087befSAndrew Turner #else
31*f3087befSAndrew Turner   /* 2^9. expm1 helper overflows for large input.  */
32*f3087befSAndrew Turner   .large_bound = V2 (0x1p+9),
33*f3087befSAndrew Turner #endif
34*f3087befSAndrew Turner };
35*f3087befSAndrew Turner 
36*f3087befSAndrew Turner static float64x2_t NOINLINE VPCS_ATTR
special_case(float64x2_t x)37*f3087befSAndrew Turner special_case (float64x2_t x)
38*f3087befSAndrew Turner {
39*f3087befSAndrew Turner   return v_call_f64 (sinh, x, x, v_u64 (-1));
40*f3087befSAndrew Turner }
41*f3087befSAndrew Turner 
42*f3087befSAndrew Turner /* Approximation for vector double-precision sinh(x) using expm1.
43*f3087befSAndrew Turner    sinh(x) = (exp(x) - exp(-x)) / 2.
44*f3087befSAndrew Turner    The greatest observed error is 2.52 ULP:
45*f3087befSAndrew Turner    _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
46*f3087befSAndrew Turner 				       want -0x1.ac2f05bb66fc9p-2.  */
V_NAME_D1(sinh)47*f3087befSAndrew Turner float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
48*f3087befSAndrew Turner {
49*f3087befSAndrew Turner   const struct data *d = ptr_barrier (&data);
50*f3087befSAndrew Turner 
51*f3087befSAndrew Turner   float64x2_t ax = vabsq_f64 (x);
52*f3087befSAndrew Turner   uint64x2_t ix = vreinterpretq_u64_f64 (x);
53*f3087befSAndrew Turner   float64x2_t halfsign = vreinterpretq_f64_u64 (
54*f3087befSAndrew Turner       vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
55*f3087befSAndrew Turner 
56*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT
57*f3087befSAndrew Turner   uint64x2_t special = vcgeq_u64 (
58*f3087befSAndrew Turner       vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
59*f3087befSAndrew Turner #else
60*f3087befSAndrew Turner   uint64x2_t special = vcageq_f64 (x, d->large_bound);
61*f3087befSAndrew Turner #endif
62*f3087befSAndrew Turner 
63*f3087befSAndrew Turner   /* Fall back to scalar variant for all lanes if any of them are special.  */
64*f3087befSAndrew Turner   if (unlikely (v_any_u64 (special)))
65*f3087befSAndrew Turner     return special_case (x);
66*f3087befSAndrew Turner 
67*f3087befSAndrew Turner   /* Up to the point that expm1 overflows, we can use it to calculate sinh
68*f3087befSAndrew Turner      using a slight rearrangement of the definition of sinh. This allows us to
69*f3087befSAndrew Turner      retain acceptable accuracy for very small inputs.  */
70*f3087befSAndrew Turner   float64x2_t t = expm1_inline (ax, &d->d);
71*f3087befSAndrew Turner   t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
72*f3087befSAndrew Turner   return vmulq_f64 (t, halfsign);
73*f3087befSAndrew Turner }
74*f3087befSAndrew Turner 
75*f3087befSAndrew Turner TEST_SIG (V, D, 1, sinh, -10.0, 10.0)
76*f3087befSAndrew Turner TEST_ULP (V_NAME_D1 (sinh), 2.02)
77*f3087befSAndrew Turner TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
78*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
79*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
80*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
81