xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvl.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Extended precision inverse error function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2023-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner #define _GNU_SOURCE
8*f3087befSAndrew Turner #include <math.h>
9*f3087befSAndrew Turner #include <stdbool.h>
10*f3087befSAndrew Turner #include <float.h>
11*f3087befSAndrew Turner 
12*f3087befSAndrew Turner #include "math_config.h"
13*f3087befSAndrew Turner #include "poly_scalar_f64.h"
14*f3087befSAndrew Turner 
15*f3087befSAndrew Turner #define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l
16*f3087befSAndrew Turner #define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l
17*f3087befSAndrew Turner 
18*f3087befSAndrew Turner const static struct
19*f3087befSAndrew Turner {
20*f3087befSAndrew Turner   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
21*f3087befSAndrew Turner       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
22*f3087befSAndrew Turner       of the denominator.  */
23*f3087befSAndrew Turner   double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
24*f3087befSAndrew Turner } data = {
25*f3087befSAndrew Turner   .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
26*f3087befSAndrew Turner 	    -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
27*f3087befSAndrew Turner 	    0x1.689181bbafd0cp-3 },
28*f3087befSAndrew Turner   .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
29*f3087befSAndrew Turner 	    -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
30*f3087befSAndrew Turner 	    0x1p+0 },
31*f3087befSAndrew Turner   .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
32*f3087befSAndrew Turner 	    0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
33*f3087befSAndrew Turner 	    -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
34*f3087befSAndrew Turner   .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
35*f3087befSAndrew Turner 	    0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
36*f3087befSAndrew Turner 	    -0x1.4075c56404eecp+3, 0x1p+0 },
37*f3087befSAndrew Turner   .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
38*f3087befSAndrew Turner 	    0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
39*f3087befSAndrew Turner 	    0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
40*f3087befSAndrew Turner   .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
41*f3087befSAndrew Turner 	    0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
42*f3087befSAndrew Turner 	    0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
43*f3087befSAndrew Turner 	    0x1p+0 }
44*f3087befSAndrew Turner };
45*f3087befSAndrew Turner 
46*f3087befSAndrew Turner /* Inverse error function approximation, based on rational approximation as
47*f3087befSAndrew Turner    described in
48*f3087befSAndrew Turner    J. M. Blair, C. A. Edwards, and J. H. Johnson,
49*f3087befSAndrew Turner    "Rational Chebyshev approximations for the inverse of the error function",
50*f3087befSAndrew Turner    Math. Comp. 30, pp. 827--830 (1976).
51*f3087befSAndrew Turner    https://doi.org/10.1090/S0025-5718-1976-0421040-7.  */
52*f3087befSAndrew Turner static inline double
__erfinv(double x)53*f3087befSAndrew Turner __erfinv (double x)
54*f3087befSAndrew Turner {
55*f3087befSAndrew Turner   if (x == 1.0)
56*f3087befSAndrew Turner     return __math_oflow (0);
57*f3087befSAndrew Turner   if (x == -1.0)
58*f3087befSAndrew Turner     return __math_oflow (1);
59*f3087befSAndrew Turner 
60*f3087befSAndrew Turner   double a = fabs (x);
61*f3087befSAndrew Turner   if (a > 1)
62*f3087befSAndrew Turner     return __math_invalid (x);
63*f3087befSAndrew Turner 
64*f3087befSAndrew Turner   if (a <= 0.75)
65*f3087befSAndrew Turner     {
66*f3087befSAndrew Turner       double t = x * x - 0.5625;
67*f3087befSAndrew Turner       return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
68*f3087befSAndrew Turner     }
69*f3087befSAndrew Turner 
70*f3087befSAndrew Turner   if (a <= 0.9375)
71*f3087befSAndrew Turner     {
72*f3087befSAndrew Turner       double t = x * x - 0.87890625;
73*f3087befSAndrew Turner       return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
74*f3087befSAndrew Turner     }
75*f3087befSAndrew Turner 
76*f3087befSAndrew Turner   double t = 1.0 / (sqrtl (-log1pl (-a)));
77*f3087befSAndrew Turner   return horner_8_f64 (t, data.P_57)
78*f3087befSAndrew Turner 	 / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
79*f3087befSAndrew Turner }
80*f3087befSAndrew Turner 
81*f3087befSAndrew Turner /* Extended-precision variant, which uses the above (or asymptotic estimate) as
82*f3087befSAndrew Turner    starting point for Newton refinement. This implementation is a port to C of
83*f3087befSAndrew Turner    the version in the SpecialFunctions.jl Julia package, with relaxed stopping
84*f3087befSAndrew Turner    criteria for the Newton refinement.  */
85*f3087befSAndrew Turner long double
erfinvl(long double x)86*f3087befSAndrew Turner erfinvl (long double x)
87*f3087befSAndrew Turner {
88*f3087befSAndrew Turner   if (x == 0)
89*f3087befSAndrew Turner     return 0;
90*f3087befSAndrew Turner 
91*f3087befSAndrew Turner   double yf = __erfinv (x);
92*f3087befSAndrew Turner   long double y;
93*f3087befSAndrew Turner   if (isfinite (yf))
94*f3087befSAndrew Turner     y = yf;
95*f3087befSAndrew Turner   else
96*f3087befSAndrew Turner     {
97*f3087befSAndrew Turner       /* Double overflowed, use asymptotic estimate instead.  */
98*f3087befSAndrew Turner       y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);
99*f3087befSAndrew Turner       if (!isfinite (y))
100*f3087befSAndrew Turner 	return y;
101*f3087befSAndrew Turner     }
102*f3087befSAndrew Turner 
103*f3087befSAndrew Turner   double eps = fabs (yf - nextafter (yf, 0));
104*f3087befSAndrew Turner   while (true)
105*f3087befSAndrew Turner     {
106*f3087befSAndrew Turner       long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);
107*f3087befSAndrew Turner       y -= dy;
108*f3087befSAndrew Turner       /* Stopping criterion is different to Julia implementation, but is enough
109*f3087befSAndrew Turner 	 to ensure result is accurate when rounded to double-precision.  */
110*f3087befSAndrew Turner       if (fabsl (dy) < eps)
111*f3087befSAndrew Turner 	break;
112*f3087befSAndrew Turner     }
113*f3087befSAndrew Turner   return y;
114*f3087befSAndrew Turner }
115