1 /* 2 * Double-precision vector cbrt(x) function. 3 * 4 * Copyright (c) 2022-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "v_math.h" 9 #include "mathlib.h" 10 #include "pl_sig.h" 11 #include "pl_test.h" 12 13 #if V_SUPPORTED 14 15 #define AbsMask 0x7fffffffffffffff 16 #define TwoThirds v_f64 (0x1.5555555555555p-1) 17 #define TinyBound 0x001 /* top12 (smallest_normal). */ 18 #define BigBound 0x7ff /* top12 (infinity). */ 19 #define MantissaMask v_u64 (0x000fffffffffffff) 20 #define HalfExp v_u64 (0x3fe0000000000000) 21 22 #define C(i) v_f64 (__cbrt_data.poly[i]) 23 #define T(i) v_lookup_f64 (__cbrt_data.table, i) 24 25 static NOINLINE v_f64_t 26 specialcase (v_f64_t x, v_f64_t y, v_u64_t special) 27 { 28 return v_call_f64 (cbrt, x, y, special); 29 } 30 31 /* Approximation for double-precision vector cbrt(x), using low-order polynomial 32 and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat 33 according to the exponent, for instance an error observed for double value 34 m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an 35 integer. 36 __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0 37 want 0x1.965fe72821e99p+0. */ 38 VPCS_ATTR v_f64_t V_NAME (cbrt) (v_f64_t x) 39 { 40 v_u64_t ix = v_as_u64_f64 (x); 41 v_u64_t iax = ix & AbsMask; 42 v_u64_t ia12 = iax >> 52; 43 44 /* Subnormal, +/-0 and special values. */ 45 v_u64_t special = v_cond_u64 ((ia12 < TinyBound) | (ia12 >= BigBound)); 46 47 /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector 48 version of frexp, which gets subnormal values wrong - these have to be 49 special-cased as a result. */ 50 v_f64_t m = v_as_f64_u64 (v_bsl_u64 (MantissaMask, iax, HalfExp)); 51 v_s64_t e = v_as_s64_u64 (iax >> 52) - 1022; 52 53 /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for 54 Newton iterations. */ 55 v_f64_t p_01 = v_fma_f64 (C (1), m, C (0)); 56 v_f64_t p_23 = v_fma_f64 (C (3), m, C (2)); 57 v_f64_t p = v_fma_f64 (m * m, p_23, p_01); 58 59 /* Two iterations of Newton's method for iteratively approximating cbrt. */ 60 v_f64_t m_by_3 = m / 3; 61 v_f64_t a = v_fma_f64 (TwoThirds, p, m_by_3 / (p * p)); 62 a = v_fma_f64 (TwoThirds, a, m_by_3 / (a * a)); 63 64 /* Assemble the result by the following: 65 66 cbrt(x) = cbrt(m) * 2 ^ (e / 3). 67 68 We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is 69 not necessarily a multiple of 3 we lose some information. 70 71 Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q. 72 73 Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is 74 an integer in [-2, 2], and can be looked up in the table T. Hence the 75 result is assembled as: 76 77 cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */ 78 79 v_s64_t ey = e / 3; 80 v_f64_t my = a * T (v_as_u64_s64 (e % 3 + 2)); 81 82 /* Vector version of ldexp. */ 83 v_f64_t y = v_as_f64_u64 ((v_as_u64_s64 (ey + 1023) << 52)) * my; 84 /* Copy sign. */ 85 y = v_as_f64_u64 (v_bsl_u64 (v_u64 (AbsMask), v_as_u64_f64 (y), ix)); 86 87 if (unlikely (v_any_u64 (special))) 88 return specialcase (x, y, special); 89 return y; 90 } 91 VPCS_ALIAS 92 93 PL_TEST_ULP (V_NAME (cbrt), 1.30) 94 PL_SIG (V, D, 1, cbrt, -10.0, 10.0) 95 PL_TEST_EXPECT_FENV_ALWAYS (V_NAME (cbrt)) 96 PL_TEST_INTERVAL (V_NAME (cbrt), 0, inf, 1000000) 97 PL_TEST_INTERVAL (V_NAME (cbrt), -0, -inf, 1000000) 98 #endif 99