xref: /freebsd/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*
2  * Double-precision vector atanh(x) function.
3  *
4  * Copyright (c) 2022-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "v_math.h"
9 #include "pairwise_horner.h"
10 #include "pl_sig.h"
11 #include "pl_test.h"
12 
13 #if V_SUPPORTED
14 
15 #define WANT_V_LOG1P_K0_SHORTCUT 0
16 #include "v_log1p_inline.h"
17 
18 #define AbsMask 0x7fffffffffffffff
19 #define Half 0x3fe0000000000000
20 #define One 0x3ff0000000000000
21 
22 VPCS_ATTR
23 NOINLINE static v_f64_t
24 specialcase (v_f64_t x, v_f64_t y, v_u64_t special)
25 {
26   return v_call_f64 (atanh, x, y, special);
27 }
28 
29 /* Approximation for vector double-precision atanh(x) using modified log1p.
30    The greatest observed error is 3.31 ULP:
31    __v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
32 				 want 0x1.ffd8ff31b501cp-6.  */
33 VPCS_ATTR
34 v_f64_t V_NAME (atanh) (v_f64_t x)
35 {
36   v_u64_t ix = v_as_u64_f64 (x);
37   v_u64_t sign = ix & ~AbsMask;
38   v_u64_t ia = ix & AbsMask;
39   v_u64_t special = v_cond_u64 (ia >= One);
40   v_f64_t halfsign = v_as_f64_u64 (sign | Half);
41 
42   /* Mask special lanes with 0 to prevent spurious underflow.  */
43   v_f64_t ax = v_sel_f64 (special, v_f64 (0), v_as_f64_u64 (ia));
44   v_f64_t y = halfsign * log1p_inline ((2 * ax) / (1 - ax));
45 
46   if (unlikely (v_any_u64 (special)))
47     return specialcase (x, y, special);
48   return y;
49 }
50 VPCS_ALIAS
51 
52 PL_SIG (V, D, 1, atanh, -1.0, 1.0)
53 PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (atanh))
54 PL_TEST_ULP (V_NAME (atanh), 3.32)
55 PL_TEST_INTERVAL_C (V_NAME (atanh), 0, 0x1p-23, 10000, 0)
56 PL_TEST_INTERVAL_C (V_NAME (atanh), -0, -0x1p-23, 10000, 0)
57 PL_TEST_INTERVAL_C (V_NAME (atanh), 0x1p-23, 1, 90000, 0)
58 PL_TEST_INTERVAL_C (V_NAME (atanh), -0x1p-23, -1, 90000, 0)
59 PL_TEST_INTERVAL_C (V_NAME (atanh), 1, inf, 100, 0)
60 PL_TEST_INTERVAL_C (V_NAME (atanh), -1, -inf, 100, 0)
61 #endif
62