1 /* 2 * Single-precision vector acosh(x) function. 3 * Copyright (c) 2023, Arm Limited. 4 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 */ 6 7 #include "v_math.h" 8 #include "pl_sig.h" 9 #include "pl_test.h" 10 #include "v_log1pf_inline.h" 11 12 const static struct data 13 { 14 struct v_log1pf_data log1pf_consts; 15 uint32x4_t one; 16 uint16x4_t thresh; 17 } data = { 18 .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, 19 .one = V4 (0x3f800000), 20 .thresh = V4 (0x2000) /* asuint(0x1p64) - asuint(1). */ 21 }; 22 23 #define SignMask 0x80000000 24 25 static float32x4_t NOINLINE VPCS_ATTR 26 special_case (float32x4_t x, float32x4_t y, uint16x4_t special, 27 const struct v_log1pf_data d) 28 { 29 return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special)); 30 } 31 32 /* Vector approximation for single-precision acosh, based on log1p. Maximum 33 error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it 34 is 2.78 ULP: 35 __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 36 want 0x1.ef9ea2p-3. 37 With exceptions disabled, we can compute u with a shorter dependency chain, 38 which gives maximum error of 3.07 ULP: 39 __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 40 want 0x1.fbc7f4p-4. */ 41 42 VPCS_ATTR float32x4_t V_NAME_F1 (acosh) (float32x4_t x) 43 { 44 const struct data *d = ptr_barrier (&data); 45 uint32x4_t ix = vreinterpretq_u32_f32 (x); 46 uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh); 47 48 #if WANT_SIMD_EXCEPT 49 /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use 50 only xm1 to calculate u, as operating on x will trigger invalid for NaN. 51 Widening sign-extend special predicate in order to mask with it. */ 52 uint32x4_t p 53 = vreinterpretq_u32_s32 (vmovl_s16 (vreinterpret_s16_u16 (special))); 54 float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p); 55 float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1); 56 #else 57 float32x4_t xm1 = vsubq_f32 (x, v_f32 (1)); 58 float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f))); 59 #endif 60 61 float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u)); 62 63 if (unlikely (v_any_u16h (special))) 64 return special_case (x, y, special, d->log1pf_consts); 65 return log1pf_inline (y, d->log1pf_consts); 66 } 67 68 PL_SIG (V, F, 1, acosh, 1.0, 10.0) 69 #if WANT_SIMD_EXCEPT 70 PL_TEST_ULP (V_NAME_F1 (acosh), 2.29) 71 #else 72 PL_TEST_ULP (V_NAME_F1 (acosh), 2.58) 73 #endif 74 PL_TEST_EXPECT_FENV (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT) 75 PL_TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500) 76 PL_TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000) 77 PL_TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000) 78 PL_TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000) 79