1 /* 2 * Double-precision SVE cbrt(x) function. 3 * 4 * Copyright (c) 2023-2024, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "sv_math.h" 9 #include "test_sig.h" 10 #include "test_defs.h" 11 #include "sv_poly_f64.h" 12 13 const static struct data 14 { 15 float64_t poly[4]; 16 float64_t table[5]; 17 float64_t one_third, two_thirds, shift; 18 int64_t exp_bias; 19 uint64_t tiny_bound, thresh; 20 } data = { 21 /* Generated with FPMinimax in [0.5, 1]. */ 22 .poly = { 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 23 0x1.2c74eaa3ba428p-3, }, 24 /* table[i] = 2^((i - 2) / 3). */ 25 .table = { 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 26 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0, }, 27 .one_third = 0x1.5555555555555p-2, 28 .two_thirds = 0x1.5555555555555p-1, 29 .shift = 0x1.8p52, 30 .exp_bias = 1022, 31 .tiny_bound = 0x0010000000000000, /* Smallest normal. */ 32 .thresh = 0x7fe0000000000000, /* asuint64 (infinity) - tiny_bound. */ 33 }; 34 35 #define MantissaMask 0x000fffffffffffff 36 #define HalfExp 0x3fe0000000000000 37 38 static svfloat64_t NOINLINE 39 special_case (svfloat64_t x, svfloat64_t y, svbool_t special) 40 { 41 return sv_call_f64 (cbrt, x, y, special); 42 } 43 44 static inline svfloat64_t 45 shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i) 46 { 47 return svld1_gather_index (pg, table, svadd_x (pg, i, 2)); 48 } 49 50 /* Approximation for double-precision vector cbrt(x), using low-order 51 polynomial and two Newton iterations. 52 53 The vector version of frexp does not handle subnormals 54 correctly. As a result these need to be handled by the scalar 55 fallback, where accuracy may be worse than that of the vector code 56 path. 57 58 Greatest observed error in the normal range is 1.79 ULP. Errors repeat 59 according to the exponent, for instance an error observed for double value m 60 * 2^e will be observed for any input m * 2^(e + 3*i), where i is an integer. 61 _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342 62 want 0x1.965f53b0e5d95p-342. */ 63 svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg) 64 { 65 const struct data *d = ptr_barrier (&data); 66 67 svfloat64_t ax = svabs_x (pg, x); 68 svuint64_t iax = svreinterpret_u64 (ax); 69 svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax); 70 71 /* Subnormal, +/-0 and special values. */ 72 svbool_t special = svcmpge (pg, svsub_x (pg, iax, d->tiny_bound), d->thresh); 73 74 /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector 75 version of frexp, which gets subnormal values wrong - these have to be 76 special-cased as a result. */ 77 svfloat64_t m = svreinterpret_f64 (svorr_x ( 78 pg, svand_x (pg, svreinterpret_u64 (x), MantissaMask), HalfExp)); 79 svint64_t e 80 = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, iax, 52)), d->exp_bias); 81 82 /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point 83 for Newton iterations. */ 84 svfloat64_t p 85 = sv_pairwise_poly_3_f64_x (pg, m, svmul_x (pg, m, m), d->poly); 86 87 /* Two iterations of Newton's method for iteratively approximating cbrt. */ 88 svfloat64_t m_by_3 = svmul_x (pg, m, d->one_third); 89 svfloat64_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p, 90 d->two_thirds); 91 a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, a, a)), a, d->two_thirds); 92 93 /* Assemble the result by the following: 94 95 cbrt(x) = cbrt(m) * 2 ^ (e / 3). 96 97 We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is 98 not necessarily a multiple of 3 we lose some information. 99 100 Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q. 101 102 Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which 103 is an integer in [-2, 2], and can be looked up in the table T. Hence the 104 result is assembled as: 105 106 cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */ 107 svfloat64_t eb3f = svmul_x (pg, svcvt_f64_x (pg, e), d->one_third); 108 svint64_t ey = svcvt_s64_x (pg, eb3f); 109 svint64_t em3 = svmls_x (pg, e, ey, 3); 110 111 svfloat64_t my = shifted_lookup (pg, d->table, em3); 112 my = svmul_x (pg, my, a); 113 114 /* Vector version of ldexp. */ 115 svfloat64_t y = svscale_x (pg, my, ey); 116 117 if (unlikely (svptest_any (pg, special))) 118 return special_case ( 119 x, svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)), 120 special); 121 122 /* Copy sign. */ 123 return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)); 124 } 125 126 /* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which 127 has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error 128 in the vector path is 1.79 ULP. 129 [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical 130 Functions in Single, Double, Double Extended, and Quadruple Precision. */ 131 TEST_SIG (SV, D, 1, cbrt, -10.0, 10.0) 132 TEST_ULP (SV_NAME_D1 (cbrt), 3.17) 133 TEST_DISABLE_FENV (SV_NAME_D1 (cbrt)) 134 TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000) 135 CLOSE_SVE_ATTR 136