xref: /freebsd/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
1*072a4ba8SAndrew Turner /*
2*072a4ba8SAndrew Turner  * Single-precision vector acosh(x) function.
3*072a4ba8SAndrew Turner  * Copyright (c) 2023, Arm Limited.
4*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
5*072a4ba8SAndrew Turner  */
6*072a4ba8SAndrew Turner 
7*072a4ba8SAndrew Turner #include "v_math.h"
8*072a4ba8SAndrew Turner #include "pl_sig.h"
9*072a4ba8SAndrew Turner #include "pl_test.h"
10*072a4ba8SAndrew Turner 
11*072a4ba8SAndrew Turner #define SignMask 0x80000000
12*072a4ba8SAndrew Turner #define One 0x3f800000
13*072a4ba8SAndrew Turner #define SquareLim 0x5f800000 /* asuint(0x1p64).  */
14*072a4ba8SAndrew Turner 
15*072a4ba8SAndrew Turner #if V_SUPPORTED
16*072a4ba8SAndrew Turner 
17*072a4ba8SAndrew Turner #include "v_log1pf_inline.h"
18*072a4ba8SAndrew Turner 
19*072a4ba8SAndrew Turner static NOINLINE VPCS_ATTR v_f32_t
20*072a4ba8SAndrew Turner special_case (v_f32_t x, v_f32_t y, v_u32_t special)
21*072a4ba8SAndrew Turner {
22*072a4ba8SAndrew Turner   return v_call_f32 (acoshf, x, y, special);
23*072a4ba8SAndrew Turner }
24*072a4ba8SAndrew Turner 
25*072a4ba8SAndrew Turner /* Vector approximation for single-precision acosh, based on log1p. Maximum
26*072a4ba8SAndrew Turner    error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
27*072a4ba8SAndrew Turner    is 2.78 ULP:
28*072a4ba8SAndrew Turner    __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
29*072a4ba8SAndrew Turner 			   want 0x1.ef9ea2p-3.
30*072a4ba8SAndrew Turner    With exceptions disabled, we can compute u with a shorter dependency chain,
31*072a4ba8SAndrew Turner    which gives maximum error of 3.07 ULP:
32*072a4ba8SAndrew Turner   __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
33*072a4ba8SAndrew Turner 			   want 0x1.fbc7f4p-4.  */
34*072a4ba8SAndrew Turner 
35*072a4ba8SAndrew Turner VPCS_ATTR v_f32_t V_NAME (acoshf) (v_f32_t x)
36*072a4ba8SAndrew Turner {
37*072a4ba8SAndrew Turner   v_u32_t ix = v_as_u32_f32 (x);
38*072a4ba8SAndrew Turner   v_u32_t special = v_cond_u32 ((ix - One) >= (SquareLim - One));
39*072a4ba8SAndrew Turner 
40*072a4ba8SAndrew Turner #if WANT_SIMD_EXCEPT
41*072a4ba8SAndrew Turner   /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
42*072a4ba8SAndrew Turner      only xm1 to calculate u, as operating on x will trigger invalid for NaN. */
43*072a4ba8SAndrew Turner   v_f32_t xm1 = v_sel_f32 (special, v_f32 (1), x - 1);
44*072a4ba8SAndrew Turner   v_f32_t u = v_fma_f32 (xm1, xm1, 2 * xm1);
45*072a4ba8SAndrew Turner #else
46*072a4ba8SAndrew Turner   v_f32_t xm1 = x - 1;
47*072a4ba8SAndrew Turner   v_f32_t u = xm1 * (x + 1.0f);
48*072a4ba8SAndrew Turner #endif
49*072a4ba8SAndrew Turner   v_f32_t y = log1pf_inline (xm1 + v_sqrt_f32 (u));
50*072a4ba8SAndrew Turner 
51*072a4ba8SAndrew Turner   if (unlikely (v_any_u32 (special)))
52*072a4ba8SAndrew Turner     return special_case (x, y, special);
53*072a4ba8SAndrew Turner   return y;
54*072a4ba8SAndrew Turner }
55*072a4ba8SAndrew Turner VPCS_ALIAS
56*072a4ba8SAndrew Turner 
57*072a4ba8SAndrew Turner PL_SIG (V, F, 1, acosh, 1.0, 10.0)
58*072a4ba8SAndrew Turner #if WANT_SIMD_EXCEPT
59*072a4ba8SAndrew Turner PL_TEST_ULP (V_NAME (acoshf), 2.29)
60*072a4ba8SAndrew Turner #else
61*072a4ba8SAndrew Turner PL_TEST_ULP (V_NAME (acoshf), 2.58)
62*072a4ba8SAndrew Turner #endif
63*072a4ba8SAndrew Turner PL_TEST_EXPECT_FENV (V_NAME (acoshf), WANT_SIMD_EXCEPT)
64*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), 0, 1, 500)
65*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), 1, SquareLim, 100000)
66*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), SquareLim, inf, 1000)
67*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), -0, -inf, 1000)
68*072a4ba8SAndrew Turner #endif
69