1 /* 2 * Double-precision inverse error function (SVE variant). 3 * 4 * Copyright (c) 2024, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 #include "sv_math.h" 8 #include "test_defs.h" 9 #include "math_config.h" 10 #include "test_sig.h" 11 #include "sv_poly_f64.h" 12 #define SV_LOG_INLINE_POLY_ORDER 4 13 #include "sv_log_inline.h" 14 15 const static struct data 16 { 17 /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the 18 coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs 19 of the denominator. P is interleaved P_17 and P_37, similar for Q. */ 20 double P[7][2], Q[7][2]; 21 double P_57[9], Q_57[9], tailshift, P37_0; 22 struct sv_log_inline_data log_tbl; 23 } data = { 24 .P37_0 = -0x1.f3596123109edp-7, 25 .tailshift = -0.87890625, 26 .P = { { 0x1.007ce8f01b2e8p+4, 0x1.60b8fe375999ep-2 }, 27 { -0x1.6b23cc5c6c6d7p+6, -0x1.779bb9bef7c0fp+1 }, 28 { 0x1.74e5f6ceb3548p+7, 0x1.786ea384470a2p+3 }, 29 { -0x1.5200bb15cc6bbp+7, -0x1.6a7c1453c85d3p+4 }, 30 { 0x1.05d193233a849p+6, 0x1.31f0fc5613142p+4 }, 31 { -0x1.148c5474ee5e1p+3, -0x1.5ea6c007d4dbbp+2 }, 32 { 0x1.689181bbafd0cp-3, 0x1.e66f265ce9e5p-3 } }, 33 .Q = { { 0x1.d8fb0f913bd7bp+3, -0x1.636b2dcf4edbep-7 }, 34 { -0x1.6d7f25a3f1c24p+6, 0x1.0b5411e2acf29p-2 }, 35 { 0x1.a450d8e7f4cbbp+7, -0x1.3413109467a0bp+1 }, 36 { -0x1.bc3480485857p+7, 0x1.563e8136c554ap+3 }, 37 { 0x1.ae6b0c504ee02p+6, -0x1.7b77aab1dcafbp+4 }, 38 { -0x1.499dfec1a7f5fp+4, 0x1.8a3e174e05ddcp+4 }, 39 { 0x1p+0, -0x1.4075c56404eecp+3 } }, 40 .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2, 41 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3, 42 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 }, 43 .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2, 44 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3, 45 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2 }, 46 .log_tbl = SV_LOG_CONSTANTS 47 }; 48 49 static inline svfloat64_t 50 special (svbool_t pg, svfloat64_t x, const struct data *d) 51 { 52 /* Note erfinv(inf) should return NaN, and erfinv(1) should return Inf. 53 By using log here, instead of log1p, we return finite values for both 54 these inputs, and values outside [-1, 1]. This is non-compliant, but is an 55 acceptable optimisation at Ofast. To get correct behaviour for all finite 56 values use the log1p_inline helper on -abs(x) - note that erfinv(inf) 57 will still be finite. */ 58 svfloat64_t ax = svabs_x (pg, x); 59 svfloat64_t t 60 = svneg_x (pg, sv_log_inline (pg, svsubr_x (pg, ax, 1), &d->log_tbl)); 61 t = svdivr_x (pg, svsqrt_x (pg, t), 1); 62 svuint64_t sign 63 = sveor_x (pg, svreinterpret_u64 (ax), svreinterpret_u64 (x)); 64 svfloat64_t ts 65 = svreinterpret_f64 (svorr_x (pg, sign, svreinterpret_u64 (t))); 66 67 svfloat64_t q = svadd_x (pg, t, d->Q_57[8]); 68 for (int i = 7; i >= 0; i--) 69 q = svmad_x (pg, q, t, d->Q_57[i]); 70 71 return svdiv_x (pg, sv_horner_8_f64_x (pg, t, d->P_57), svmul_x (pg, ts, q)); 72 } 73 74 static inline svfloat64_t 75 lookup (const double *c, svuint64_t idx) 76 { 77 svfloat64_t x = svld1rq_f64 (svptrue_b64 (), c); 78 return svtbl (x, idx); 79 } 80 81 static inline svfloat64_t 82 notails (svbool_t pg, svfloat64_t x, const struct data *d) 83 { 84 svfloat64_t t = svmad_x (pg, x, x, -0.5625); 85 svfloat64_t p = svmla_x (pg, sv_f64 (d->P[5][0]), t, d->P[6][0]); 86 svfloat64_t q = svadd_x (pg, t, d->Q[5][0]); 87 for (int i = 4; i >= 0; i--) 88 { 89 p = svmad_x (pg, t, p, d->P[i][0]); 90 q = svmad_x (pg, t, q, d->Q[i][0]); 91 } 92 p = svmul_x (pg, p, x); 93 return svdiv_x (pg, p, q); 94 } 95 96 /* Vector implementation of Blair et al's rational approximation to inverse 97 error function in double precision. Largest observed error is 24.75 ULP: 98 _ZGVsMxv_erfinv(0x1.fc861d81c2ba8p-1) got 0x1.ea05472686625p+0 99 want 0x1.ea0547268660cp+0. */ 100 svfloat64_t SV_NAME_D1 (erfinv) (svfloat64_t x, svbool_t pg) 101 { 102 const struct data *d = ptr_barrier (&data); 103 /* Calculate inverse error using algorithm described in 104 J. M. Blair, C. A. Edwards, and J. H. Johnson, 105 "Rational Chebyshev approximations for the inverse of the error function", 106 Math. Comp. 30, pp. 827--830 (1976). 107 https://doi.org/10.1090/S0025-5718-1976-0421040-7. 108 109 Algorithm has 3 intervals: 110 - 'Normal' region [-0.75, 0.75] 111 - Tail region [0.75, 0.9375] U [-0.9375, -0.75] 112 - Extreme tail [-1, -0.9375] U [0.9375, 1] 113 Normal and tail are both rational approximation of similar order on 114 shifted input - these are typically performed in parallel using gather 115 loads to obtain correct coefficients depending on interval. */ 116 117 svbool_t no_tail = svacle (pg, x, 0.75); 118 if (unlikely (!svptest_any (pg, svnot_z (pg, no_tail)))) 119 return notails (pg, x, d); 120 121 svbool_t is_tail = svnot_z (pg, no_tail); 122 svbool_t extreme_tail = svacgt (pg, x, 0.9375); 123 svuint64_t idx = svdup_n_u64_z (is_tail, 1); 124 125 svfloat64_t t = svsel_f64 (is_tail, sv_f64 (d->tailshift), sv_f64 (-0.5625)); 126 t = svmla_x (pg, t, x, x); 127 128 svfloat64_t p = lookup (&d->P[6][0], idx); 129 svfloat64_t q 130 = svmla_x (pg, lookup (&d->Q[6][0], idx), svdup_n_f64_z (is_tail, 1), t); 131 for (int i = 5; i >= 0; i--) 132 { 133 p = svmla_x (pg, lookup (&d->P[i][0], idx), p, t); 134 q = svmla_x (pg, lookup (&d->Q[i][0], idx), q, t); 135 } 136 p = svmad_m (is_tail, p, t, d->P37_0); 137 p = svmul_x (pg, p, x); 138 139 if (likely (svptest_any (pg, extreme_tail))) 140 return svsel (extreme_tail, special (pg, x, d), svdiv_x (pg, p, q)); 141 return svdiv_x (pg, p, q); 142 } 143 144 #if USE_MPFR 145 # warning Not generating tests for _ZGVsMxv_erfinv, as MPFR has no suitable reference 146 #else 147 TEST_SIG (SV, D, 1, erfinv, -0.99, 0.99) 148 TEST_ULP (SV_NAME_D1 (erfinv), 24.5) 149 TEST_DISABLE_FENV (SV_NAME_D1 (erfinv)) 150 /* Test with control lane in each interval. */ 151 TEST_SYM_INTERVAL (SV_NAME_F1 (erfinv), 0, 1, 100000) 152 TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.5) 153 TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.8) 154 TEST_CONTROL_VALUE (SV_NAME_F1 (erfinv), 0.95) 155 #endif 156 CLOSE_SVE_ATTR 157