1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner * Extended precision inverse error function.
3*f3087befSAndrew Turner *
4*f3087befSAndrew Turner * Copyright (c) 2023-2024, Arm Limited.
5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner */
7*f3087befSAndrew Turner #define _GNU_SOURCE
8*f3087befSAndrew Turner #include <math.h>
9*f3087befSAndrew Turner #include <stdbool.h>
10*f3087befSAndrew Turner #include <float.h>
11*f3087befSAndrew Turner
12*f3087befSAndrew Turner #include "math_config.h"
13*f3087befSAndrew Turner #include "poly_scalar_f64.h"
14*f3087befSAndrew Turner
15*f3087befSAndrew Turner #define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l
16*f3087befSAndrew Turner #define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l
17*f3087befSAndrew Turner
18*f3087befSAndrew Turner const static struct
19*f3087befSAndrew Turner {
20*f3087befSAndrew Turner /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
21*f3087befSAndrew Turner coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
22*f3087befSAndrew Turner of the denominator. */
23*f3087befSAndrew Turner double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
24*f3087befSAndrew Turner } data = {
25*f3087befSAndrew Turner .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
26*f3087befSAndrew Turner -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
27*f3087befSAndrew Turner 0x1.689181bbafd0cp-3 },
28*f3087befSAndrew Turner .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
29*f3087befSAndrew Turner -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
30*f3087befSAndrew Turner 0x1p+0 },
31*f3087befSAndrew Turner .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
32*f3087befSAndrew Turner 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
33*f3087befSAndrew Turner -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
34*f3087befSAndrew Turner .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
35*f3087befSAndrew Turner 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
36*f3087befSAndrew Turner -0x1.4075c56404eecp+3, 0x1p+0 },
37*f3087befSAndrew Turner .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
38*f3087befSAndrew Turner 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
39*f3087befSAndrew Turner 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
40*f3087befSAndrew Turner .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
41*f3087befSAndrew Turner 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
42*f3087befSAndrew Turner 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
43*f3087befSAndrew Turner 0x1p+0 }
44*f3087befSAndrew Turner };
45*f3087befSAndrew Turner
46*f3087befSAndrew Turner /* Inverse error function approximation, based on rational approximation as
47*f3087befSAndrew Turner described in
48*f3087befSAndrew Turner J. M. Blair, C. A. Edwards, and J. H. Johnson,
49*f3087befSAndrew Turner "Rational Chebyshev approximations for the inverse of the error function",
50*f3087befSAndrew Turner Math. Comp. 30, pp. 827--830 (1976).
51*f3087befSAndrew Turner https://doi.org/10.1090/S0025-5718-1976-0421040-7. */
52*f3087befSAndrew Turner static inline double
__erfinv(double x)53*f3087befSAndrew Turner __erfinv (double x)
54*f3087befSAndrew Turner {
55*f3087befSAndrew Turner if (x == 1.0)
56*f3087befSAndrew Turner return __math_oflow (0);
57*f3087befSAndrew Turner if (x == -1.0)
58*f3087befSAndrew Turner return __math_oflow (1);
59*f3087befSAndrew Turner
60*f3087befSAndrew Turner double a = fabs (x);
61*f3087befSAndrew Turner if (a > 1)
62*f3087befSAndrew Turner return __math_invalid (x);
63*f3087befSAndrew Turner
64*f3087befSAndrew Turner if (a <= 0.75)
65*f3087befSAndrew Turner {
66*f3087befSAndrew Turner double t = x * x - 0.5625;
67*f3087befSAndrew Turner return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
68*f3087befSAndrew Turner }
69*f3087befSAndrew Turner
70*f3087befSAndrew Turner if (a <= 0.9375)
71*f3087befSAndrew Turner {
72*f3087befSAndrew Turner double t = x * x - 0.87890625;
73*f3087befSAndrew Turner return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
74*f3087befSAndrew Turner }
75*f3087befSAndrew Turner
76*f3087befSAndrew Turner double t = 1.0 / (sqrtl (-log1pl (-a)));
77*f3087befSAndrew Turner return horner_8_f64 (t, data.P_57)
78*f3087befSAndrew Turner / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
79*f3087befSAndrew Turner }
80*f3087befSAndrew Turner
81*f3087befSAndrew Turner /* Extended-precision variant, which uses the above (or asymptotic estimate) as
82*f3087befSAndrew Turner starting point for Newton refinement. This implementation is a port to C of
83*f3087befSAndrew Turner the version in the SpecialFunctions.jl Julia package, with relaxed stopping
84*f3087befSAndrew Turner criteria for the Newton refinement. */
85*f3087befSAndrew Turner long double
erfinvl(long double x)86*f3087befSAndrew Turner erfinvl (long double x)
87*f3087befSAndrew Turner {
88*f3087befSAndrew Turner if (x == 0)
89*f3087befSAndrew Turner return 0;
90*f3087befSAndrew Turner
91*f3087befSAndrew Turner double yf = __erfinv (x);
92*f3087befSAndrew Turner long double y;
93*f3087befSAndrew Turner if (isfinite (yf))
94*f3087befSAndrew Turner y = yf;
95*f3087befSAndrew Turner else
96*f3087befSAndrew Turner {
97*f3087befSAndrew Turner /* Double overflowed, use asymptotic estimate instead. */
98*f3087befSAndrew Turner y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);
99*f3087befSAndrew Turner if (!isfinite (y))
100*f3087befSAndrew Turner return y;
101*f3087befSAndrew Turner }
102*f3087befSAndrew Turner
103*f3087befSAndrew Turner double eps = fabs (yf - nextafter (yf, 0));
104*f3087befSAndrew Turner while (true)
105*f3087befSAndrew Turner {
106*f3087befSAndrew Turner long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);
107*f3087befSAndrew Turner y -= dy;
108*f3087befSAndrew Turner /* Stopping criterion is different to Julia implementation, but is enough
109*f3087befSAndrew Turner to ensure result is accurate when rounded to double-precision. */
110*f3087befSAndrew Turner if (fabsl (dy) < eps)
111*f3087befSAndrew Turner break;
112*f3087befSAndrew Turner }
113*f3087befSAndrew Turner return y;
114*f3087befSAndrew Turner }
115