xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/erfinvf_4u7.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1 /*
2  * Single-precision inverse error function.
3  *
4  * Copyright (c) 2023-2024, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 #include "poly_scalar_f32.h"
8 #include "math_config.h"
9 #include "test_sig.h"
10 #include "test_defs.h"
11 
12 const static struct
13 {
14   /*  We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
15       coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
16       of the denominator.  */
17   float P_10[3], Q_10[4], P_29[4], Q_29[4], P_50[6], Q_50[3];
18 } data = { .P_10 = { -0x1.a31268p+3, 0x1.ac9048p+4, -0x1.293ff6p+3 },
19 	   .Q_10 = { -0x1.8265eep+3, 0x1.ef5eaep+4, -0x1.12665p+4, 0x1p+0 },
20 	   .P_29
21 	   = { -0x1.fc0252p-4, 0x1.119d44p+0, -0x1.f59ee2p+0, 0x1.b13626p-2 },
22 	   .Q_29 = { -0x1.69952p-4, 0x1.c7b7d2p-1, -0x1.167d7p+1, 0x1p+0 },
23 	   .P_50 = { 0x1.3d8948p-3, 0x1.61f9eap+0, 0x1.61c6bcp-1,
24 		     -0x1.20c9f2p+0, 0x1.5c704cp-1, -0x1.50c6bep-3 },
25 	   .Q_50 = { 0x1.3d7dacp-3, 0x1.629e5p+0, 0x1p+0 } };
26 
27 /* Inverse error function approximation, based on rational approximation as
28    described in
29    J. M. Blair, C. A. Edwards, and J. H. Johnson,
30    "Rational Chebyshev approximations for the inverse of the error function",
31    Math. Comp. 30, pp. 827--830 (1976).
32    https://doi.org/10.1090/S0025-5718-1976-0421040-7
33    Largest error is 4.71 ULP, in the tail region:
34    erfinvf(0x1.f84e9ap-1) got 0x1.b8326ap+0
35 			 want 0x1.b83274p+0.  */
36 float
erfinvf(float x)37 erfinvf (float x)
38 {
39   if (x == 1.0f)
40     return __math_oflowf (0);
41   if (x == -1.0f)
42     return __math_oflowf (1);
43 
44   float a = fabsf (x);
45   if (a > 1.0f)
46     return __math_invalidf (x);
47 
48   if (a <= 0.75f)
49     {
50       /* Greatest error in this region is 4.60 ULP:
51 	 erfinvf(0x1.0a98bap-5) got 0x1.d8a93ep-6
52 			       want 0x1.d8a948p-6.  */
53       float t = x * x - 0.5625f;
54       return x * horner_2_f32 (t, data.P_10) / horner_3_f32 (t, data.Q_10);
55     }
56   if (a < 0.9375f)
57     {
58       /* Greatest error in this region is 3.79 ULP:
59 	 erfinvf(0x1.ac82d6p-1) got 0x1.f8fc54p-1
60 			       want 0x1.f8fc5cp-1.  */
61       float t = x * x - 0.87890625f;
62       return x * horner_3_f32 (t, data.P_29) / horner_3_f32 (t, data.Q_29);
63     }
64 
65   /* Tail region, where error is greatest (and sensitive to sqrt and log1p
66      implementations.  */
67   float t = 1.0 / sqrtf (-log1pf (-a));
68   return horner_5_f32 (t, data.P_50)
69 	 / (copysignf (t, x) * horner_2_f32 (t, data.Q_50));
70 }
71 
72 #if USE_MPFR
73 # warning Not generating tests for erfinvf, as MPFR has no suitable reference
74 #else
75 TEST_SIG (S, F, 1, erfinv, -0.99, 0.99)
76 TEST_ULP (erfinvf, 4.09)
77 TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
78 #endif
79