1*072a4ba8SAndrew Turner /* 2*072a4ba8SAndrew Turner * Single-precision vector acosh(x) function. 3*072a4ba8SAndrew Turner * Copyright (c) 2023, Arm Limited. 4*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5*072a4ba8SAndrew Turner */ 6*072a4ba8SAndrew Turner 7*072a4ba8SAndrew Turner #include "v_math.h" 8*072a4ba8SAndrew Turner #include "pl_sig.h" 9*072a4ba8SAndrew Turner #include "pl_test.h" 10*072a4ba8SAndrew Turner 11*072a4ba8SAndrew Turner #define SignMask 0x80000000 12*072a4ba8SAndrew Turner #define One 0x3f800000 13*072a4ba8SAndrew Turner #define SquareLim 0x5f800000 /* asuint(0x1p64). */ 14*072a4ba8SAndrew Turner 15*072a4ba8SAndrew Turner #if V_SUPPORTED 16*072a4ba8SAndrew Turner 17*072a4ba8SAndrew Turner #include "v_log1pf_inline.h" 18*072a4ba8SAndrew Turner 19*072a4ba8SAndrew Turner static NOINLINE VPCS_ATTR v_f32_t 20*072a4ba8SAndrew Turner special_case (v_f32_t x, v_f32_t y, v_u32_t special) 21*072a4ba8SAndrew Turner { 22*072a4ba8SAndrew Turner return v_call_f32 (acoshf, x, y, special); 23*072a4ba8SAndrew Turner } 24*072a4ba8SAndrew Turner 25*072a4ba8SAndrew Turner /* Vector approximation for single-precision acosh, based on log1p. Maximum 26*072a4ba8SAndrew Turner error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it 27*072a4ba8SAndrew Turner is 2.78 ULP: 28*072a4ba8SAndrew Turner __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 29*072a4ba8SAndrew Turner want 0x1.ef9ea2p-3. 30*072a4ba8SAndrew Turner With exceptions disabled, we can compute u with a shorter dependency chain, 31*072a4ba8SAndrew Turner which gives maximum error of 3.07 ULP: 32*072a4ba8SAndrew Turner __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 33*072a4ba8SAndrew Turner want 0x1.fbc7f4p-4. */ 34*072a4ba8SAndrew Turner 35*072a4ba8SAndrew Turner VPCS_ATTR v_f32_t V_NAME (acoshf) (v_f32_t x) 36*072a4ba8SAndrew Turner { 37*072a4ba8SAndrew Turner v_u32_t ix = v_as_u32_f32 (x); 38*072a4ba8SAndrew Turner v_u32_t special = v_cond_u32 ((ix - One) >= (SquareLim - One)); 39*072a4ba8SAndrew Turner 40*072a4ba8SAndrew Turner #if WANT_SIMD_EXCEPT 41*072a4ba8SAndrew Turner /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use 42*072a4ba8SAndrew Turner only xm1 to calculate u, as operating on x will trigger invalid for NaN. */ 43*072a4ba8SAndrew Turner v_f32_t xm1 = v_sel_f32 (special, v_f32 (1), x - 1); 44*072a4ba8SAndrew Turner v_f32_t u = v_fma_f32 (xm1, xm1, 2 * xm1); 45*072a4ba8SAndrew Turner #else 46*072a4ba8SAndrew Turner v_f32_t xm1 = x - 1; 47*072a4ba8SAndrew Turner v_f32_t u = xm1 * (x + 1.0f); 48*072a4ba8SAndrew Turner #endif 49*072a4ba8SAndrew Turner v_f32_t y = log1pf_inline (xm1 + v_sqrt_f32 (u)); 50*072a4ba8SAndrew Turner 51*072a4ba8SAndrew Turner if (unlikely (v_any_u32 (special))) 52*072a4ba8SAndrew Turner return special_case (x, y, special); 53*072a4ba8SAndrew Turner return y; 54*072a4ba8SAndrew Turner } 55*072a4ba8SAndrew Turner VPCS_ALIAS 56*072a4ba8SAndrew Turner 57*072a4ba8SAndrew Turner PL_SIG (V, F, 1, acosh, 1.0, 10.0) 58*072a4ba8SAndrew Turner #if WANT_SIMD_EXCEPT 59*072a4ba8SAndrew Turner PL_TEST_ULP (V_NAME (acoshf), 2.29) 60*072a4ba8SAndrew Turner #else 61*072a4ba8SAndrew Turner PL_TEST_ULP (V_NAME (acoshf), 2.58) 62*072a4ba8SAndrew Turner #endif 63*072a4ba8SAndrew Turner PL_TEST_EXPECT_FENV (V_NAME (acoshf), WANT_SIMD_EXCEPT) 64*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), 0, 1, 500) 65*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), 1, SquareLim, 100000) 66*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), SquareLim, inf, 1000) 67*072a4ba8SAndrew Turner PL_TEST_INTERVAL (V_NAME (acoshf), -0, -inf, 1000) 68*072a4ba8SAndrew Turner #endif 69