1 /* 2 * Double-precision vector asinh(x) function. 3 * 4 * Copyright (c) 2022-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "v_math.h" 9 #include "estrin.h" 10 #include "pl_sig.h" 11 #include "pl_test.h" 12 13 #if V_SUPPORTED 14 15 #define OneTop 0x3ff /* top12(asuint64(1.0f)). */ 16 #define HugeBound 0x5fe /* top12(asuint64(0x1p511)). */ 17 #define TinyBound 0x3e5 /* top12(asuint64(0x1p-26)). */ 18 #define AbsMask v_u64 (0x7fffffffffffffff) 19 #define C(i) v_f64 (__asinh_data.poly[i]) 20 21 /* Constants & data for log. */ 22 #define OFF 0x3fe6000000000000 23 #define Ln2 v_f64 (0x1.62e42fefa39efp-1) 24 #define A(i) v_f64 (__sv_log_data.poly[i]) 25 #define T(i) __log_data.tab[i] 26 #define N (1 << LOG_TABLE_BITS) 27 28 static NOINLINE v_f64_t 29 special_case (v_f64_t x, v_f64_t y, v_u64_t special) 30 { 31 return v_call_f64 (asinh, x, y, special); 32 } 33 34 struct entry 35 { 36 v_f64_t invc; 37 v_f64_t logc; 38 }; 39 40 static inline struct entry 41 lookup (v_u64_t i) 42 { 43 struct entry e; 44 #ifdef SCALAR 45 e.invc = T (i).invc; 46 e.logc = T (i).logc; 47 #else 48 e.invc[0] = T (i[0]).invc; 49 e.logc[0] = T (i[0]).logc; 50 e.invc[1] = T (i[1]).invc; 51 e.logc[1] = T (i[1]).logc; 52 #endif 53 return e; 54 } 55 56 static inline v_f64_t 57 log_inline (v_f64_t x) 58 { 59 /* Double-precision vector log, copied from math/v_log.c with some cosmetic 60 modification and special-cases removed. See that file for details of the 61 algorithm used. */ 62 v_u64_t ix = v_as_u64_f64 (x); 63 v_u64_t tmp = ix - OFF; 64 v_u64_t i = (tmp >> (52 - LOG_TABLE_BITS)) % N; 65 v_s64_t k = v_as_s64_u64 (tmp) >> 52; 66 v_u64_t iz = ix - (tmp & 0xfffULL << 52); 67 v_f64_t z = v_as_f64_u64 (iz); 68 struct entry e = lookup (i); 69 v_f64_t r = v_fma_f64 (z, e.invc, v_f64 (-1.0)); 70 v_f64_t kd = v_to_f64_s64 (k); 71 v_f64_t hi = v_fma_f64 (kd, Ln2, e.logc + r); 72 v_f64_t r2 = r * r; 73 v_f64_t y = v_fma_f64 (A (3), r, A (2)); 74 v_f64_t p = v_fma_f64 (A (1), r, A (0)); 75 y = v_fma_f64 (A (4), r2, y); 76 y = v_fma_f64 (y, r2, p); 77 y = v_fma_f64 (y, r2, hi); 78 return y; 79 } 80 81 /* Double-precision implementation of vector asinh(x). 82 asinh is very sensitive around 1, so it is impractical to devise a single 83 low-cost algorithm which is sufficiently accurate on a wide range of input. 84 Instead we use two different algorithms: 85 asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1 86 = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise 87 where log(x) is an optimized log approximation, and P(x) is a polynomial 88 shared with the scalar routine. The greatest observed error 3.29 ULP, in 89 |x| >= 1: 90 __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1 91 want 0x1.ffffcfd0e2352p-1. */ 92 VPCS_ATTR v_f64_t V_NAME (asinh) (v_f64_t x) 93 { 94 v_u64_t ix = v_as_u64_f64 (x); 95 v_u64_t iax = ix & AbsMask; 96 v_f64_t ax = v_as_f64_u64 (iax); 97 v_u64_t top12 = iax >> 52; 98 99 v_u64_t gt1 = v_cond_u64 (top12 >= OneTop); 100 v_u64_t special = v_cond_u64 (top12 >= HugeBound); 101 102 #if WANT_SIMD_EXCEPT 103 v_u64_t tiny = v_cond_u64 (top12 < TinyBound); 104 special |= tiny; 105 #endif 106 107 /* Option 1: |x| >= 1. 108 Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)). 109 If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will 110 overflow, by setting special lanes to 1. These will be fixed later. */ 111 v_f64_t option_1 = v_f64 (0); 112 if (likely (v_any_u64 (gt1))) 113 { 114 #if WANT_SIMD_EXCEPT 115 v_f64_t xm = v_sel_f64 (special, v_f64 (1), ax); 116 #else 117 v_f64_t xm = ax; 118 #endif 119 option_1 = log_inline (xm + v_sqrt_f64 (xm * xm + 1)); 120 } 121 122 /* Option 2: |x| < 1. 123 Compute asinh(x) using a polynomial. 124 If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will 125 overflow, and tiny lanes, which will underflow, by setting them to 0. They 126 will be fixed later, either by selecting x or falling back to the scalar 127 special-case. The largest observed error in this region is 1.47 ULPs: 128 __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1 129 want 0x1.c1d6bf874019cp-1. */ 130 v_f64_t option_2 = v_f64 (0); 131 if (likely (v_any_u64 (~gt1))) 132 { 133 #if WANT_SIMD_EXCEPT 134 ax = v_sel_f64 (tiny | gt1, v_f64 (0), ax); 135 #endif 136 v_f64_t x2 = ax * ax; 137 v_f64_t z2 = x2 * x2; 138 v_f64_t z4 = z2 * z2; 139 v_f64_t z8 = z4 * z4; 140 v_f64_t p = ESTRIN_17 (x2, z2, z4, z8, z8 * z8, C); 141 option_2 = v_fma_f64 (p, x2 * ax, ax); 142 #if WANT_SIMD_EXCEPT 143 option_2 = v_sel_f64 (tiny, x, option_2); 144 #endif 145 } 146 147 /* Choose the right option for each lane. */ 148 v_f64_t y = v_sel_f64 (gt1, option_1, option_2); 149 /* Copy sign. */ 150 y = v_as_f64_u64 (v_bsl_u64 (AbsMask, v_as_u64_f64 (y), ix)); 151 152 if (unlikely (v_any_u64 (special))) 153 return special_case (x, y, special); 154 return y; 155 } 156 VPCS_ALIAS 157 158 PL_SIG (V, D, 1, asinh, -10.0, 10.0) 159 PL_TEST_ULP (V_NAME (asinh), 2.80) 160 PL_TEST_EXPECT_FENV (V_NAME (asinh), WANT_SIMD_EXCEPT) 161 /* Test vector asinh 3 times, with control lane < 1, > 1 and special. 162 Ensures the v_sel is choosing the right option in all cases. */ 163 #define V_ASINH_INTERVAL(lo, hi, n) \ 164 PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 0.5) \ 165 PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 2) \ 166 PL_TEST_INTERVAL_C (V_NAME (asinh), lo, hi, n, 0x1p600) 167 V_ASINH_INTERVAL (0, 0x1p-26, 50000) 168 V_ASINH_INTERVAL (0x1p-26, 1, 50000) 169 V_ASINH_INTERVAL (1, 0x1p511, 50000) 170 V_ASINH_INTERVAL (0x1p511, inf, 40000) 171 V_ASINH_INTERVAL (-0, -0x1p-26, 50000) 172 V_ASINH_INTERVAL (-0x1p-26, -1, 50000) 173 V_ASINH_INTERVAL (-1, -0x1p511, 50000) 174 V_ASINH_INTERVAL (-0x1p511, -inf, 40000) 175 #endif 176