1 /* 2 * Double-precision vector hypot(x) function. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "v_math.h" 9 #include "pl_sig.h" 10 #include "pl_test.h" 11 12 #if WANT_SIMD_EXCEPT 13 static const struct data 14 { 15 uint64x2_t tiny_bound, thres; 16 } data = { 17 .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511). */ 18 .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound. */ 19 }; 20 #else 21 static const struct data 22 { 23 uint64x2_t tiny_bound; 24 uint32x4_t thres; 25 } data = { 26 .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969). */ 27 .thres = V4 (0x7c900000), /* asuint (inf) - tiny_bound. */ 28 }; 29 #endif 30 31 static float64x2_t VPCS_ATTR NOINLINE 32 special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum, 33 uint32x2_t special) 34 { 35 return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special)); 36 } 37 38 /* Vector implementation of double-precision hypot. 39 Maximum error observed is 1.21 ULP: 40 _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222) 41 got 0x1.6a1b19400964ep-204 42 want 0x1.6a1b19400964dp-204. */ 43 #if WANT_SIMD_EXCEPT 44 45 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) 46 { 47 const struct data *d = ptr_barrier (&data); 48 49 float64x2_t ax = vabsq_f64 (x); 50 float64x2_t ay = vabsq_f64 (y); 51 52 uint64x2_t ix = vreinterpretq_u64_f64 (ax); 53 uint64x2_t iy = vreinterpretq_u64_f64 (ay); 54 55 /* Extreme values, NaNs, and infinities should be handled by the scalar 56 fallback for correct flag handling. */ 57 uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres); 58 uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres); 59 ax = v_zerofy_f64 (ax, specialx); 60 ay = v_zerofy_f64 (ay, specialy); 61 uint32x2_t special = vaddhn_u64 (specialx, specialy); 62 63 float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay); 64 65 if (unlikely (v_any_u32h (special))) 66 return special_case (x, y, sqsum, special); 67 68 return vsqrtq_f64 (sqsum); 69 } 70 #else 71 72 float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) 73 { 74 const struct data *d = ptr_barrier (&data); 75 76 float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y); 77 78 uint32x2_t special = vcge_u32 ( 79 vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound), 80 vget_low_u32 (d->thres)); 81 82 if (unlikely (v_any_u32h (special))) 83 return special_case (x, y, sqsum, special); 84 85 return vsqrtq_f64 (sqsum); 86 } 87 #endif 88 89 PL_SIG (V, D, 2, hypot, -10.0, 10.0) 90 PL_TEST_ULP (V_NAME_D2 (hypot), 1.21) 91 PL_TEST_EXPECT_FENV (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT) 92 PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000) 93 PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000) 94 PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000) 95 PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000) 96