1 /* 2 * Single-precision vector acosh(x) function. 3 * Copyright (c) 2023, Arm Limited. 4 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 */ 6 7 #include "v_math.h" 8 #include "pl_sig.h" 9 #include "pl_test.h" 10 11 #define SignMask 0x80000000 12 #define One 0x3f800000 13 #define SquareLim 0x5f800000 /* asuint(0x1p64). */ 14 15 #if V_SUPPORTED 16 17 #include "v_log1pf_inline.h" 18 19 static NOINLINE VPCS_ATTR v_f32_t 20 special_case (v_f32_t x, v_f32_t y, v_u32_t special) 21 { 22 return v_call_f32 (acoshf, x, y, special); 23 } 24 25 /* Vector approximation for single-precision acosh, based on log1p. Maximum 26 error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it 27 is 2.78 ULP: 28 __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 29 want 0x1.ef9ea2p-3. 30 With exceptions disabled, we can compute u with a shorter dependency chain, 31 which gives maximum error of 3.07 ULP: 32 __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 33 want 0x1.fbc7f4p-4. */ 34 35 VPCS_ATTR v_f32_t V_NAME (acoshf) (v_f32_t x) 36 { 37 v_u32_t ix = v_as_u32_f32 (x); 38 v_u32_t special = v_cond_u32 ((ix - One) >= (SquareLim - One)); 39 40 #if WANT_SIMD_EXCEPT 41 /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use 42 only xm1 to calculate u, as operating on x will trigger invalid for NaN. */ 43 v_f32_t xm1 = v_sel_f32 (special, v_f32 (1), x - 1); 44 v_f32_t u = v_fma_f32 (xm1, xm1, 2 * xm1); 45 #else 46 v_f32_t xm1 = x - 1; 47 v_f32_t u = xm1 * (x + 1.0f); 48 #endif 49 v_f32_t y = log1pf_inline (xm1 + v_sqrt_f32 (u)); 50 51 if (unlikely (v_any_u32 (special))) 52 return special_case (x, y, special); 53 return y; 54 } 55 VPCS_ALIAS 56 57 PL_SIG (V, F, 1, acosh, 1.0, 10.0) 58 #if WANT_SIMD_EXCEPT 59 PL_TEST_ULP (V_NAME (acoshf), 2.29) 60 #else 61 PL_TEST_ULP (V_NAME (acoshf), 2.58) 62 #endif 63 PL_TEST_EXPECT_FENV (V_NAME (acoshf), WANT_SIMD_EXCEPT) 64 PL_TEST_INTERVAL (V_NAME (acoshf), 0, 1, 500) 65 PL_TEST_INTERVAL (V_NAME (acoshf), 1, SquareLim, 100000) 66 PL_TEST_INTERVAL (V_NAME (acoshf), SquareLim, inf, 1000) 67 PL_TEST_INTERVAL (V_NAME (acoshf), -0, -inf, 1000) 68 #endif 69