131914882SAlex Richardson /*
231914882SAlex Richardson * Double-precision erf(x) function.
331914882SAlex Richardson *
4*f3087befSAndrew Turner * Copyright (c) 2020-2024, Arm Limited.
5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson */
731914882SAlex Richardson
831914882SAlex Richardson #include "math_config.h"
931914882SAlex Richardson #include <math.h>
1031914882SAlex Richardson #include <stdint.h>
11*f3087befSAndrew Turner #include "test_defs.h"
12*f3087befSAndrew Turner #include "test_sig.h"
1331914882SAlex Richardson
1431914882SAlex Richardson #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
1531914882SAlex Richardson #define C 0x1.b0ac16p-1
1631914882SAlex Richardson #define PA __erf_data.erf_poly_A
1731914882SAlex Richardson #define NA __erf_data.erf_ratio_N_A
1831914882SAlex Richardson #define DA __erf_data.erf_ratio_D_A
1931914882SAlex Richardson #define NB __erf_data.erf_ratio_N_B
2031914882SAlex Richardson #define DB __erf_data.erf_ratio_D_B
2131914882SAlex Richardson #define PC __erf_data.erfc_poly_C
2231914882SAlex Richardson #define PD __erf_data.erfc_poly_D
2331914882SAlex Richardson #define PE __erf_data.erfc_poly_E
2431914882SAlex Richardson #define PF __erf_data.erfc_poly_F
2531914882SAlex Richardson
2631914882SAlex Richardson /* Top 32 bits of a double. */
2731914882SAlex Richardson static inline uint32_t
top32(double x)2831914882SAlex Richardson top32 (double x)
2931914882SAlex Richardson {
3031914882SAlex Richardson return asuint64 (x) >> 32;
3131914882SAlex Richardson }
3231914882SAlex Richardson
3331914882SAlex Richardson /* Fast erf implementation using a mix of
3431914882SAlex Richardson rational and polynomial approximations.
3531914882SAlex Richardson Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */
3631914882SAlex Richardson double
erf(double x)3731914882SAlex Richardson erf (double x)
3831914882SAlex Richardson {
3931914882SAlex Richardson /* Get top word and sign. */
4031914882SAlex Richardson uint32_t ix = top32 (x);
4131914882SAlex Richardson uint32_t ia = ix & 0x7fffffff;
4231914882SAlex Richardson uint32_t sign = ix >> 31;
4331914882SAlex Richardson
4431914882SAlex Richardson /* Normalized and subnormal cases */
4531914882SAlex Richardson if (ia < 0x3feb0000)
4631914882SAlex Richardson { /* a = |x| < 0.84375. */
4731914882SAlex Richardson
4831914882SAlex Richardson if (ia < 0x3e300000)
4931914882SAlex Richardson { /* a < 2^(-28). */
5031914882SAlex Richardson if (ia < 0x00800000)
5131914882SAlex Richardson { /* a < 2^(-1015). */
5231914882SAlex Richardson double y = fma (TwoOverSqrtPiMinusOne, x, x);
5331914882SAlex Richardson return check_uflow (y);
5431914882SAlex Richardson }
5531914882SAlex Richardson return x + TwoOverSqrtPiMinusOne * x;
5631914882SAlex Richardson }
5731914882SAlex Richardson
5831914882SAlex Richardson double x2 = x * x;
5931914882SAlex Richardson
6031914882SAlex Richardson if (ia < 0x3fe00000)
6131914882SAlex Richardson { /* a < 0.5 - Use polynomial approximation. */
6231914882SAlex Richardson double r1 = fma (x2, PA[1], PA[0]);
6331914882SAlex Richardson double r2 = fma (x2, PA[3], PA[2]);
6431914882SAlex Richardson double r3 = fma (x2, PA[5], PA[4]);
6531914882SAlex Richardson double r4 = fma (x2, PA[7], PA[6]);
6631914882SAlex Richardson double r5 = fma (x2, PA[9], PA[8]);
6731914882SAlex Richardson double x4 = x2 * x2;
6831914882SAlex Richardson double r = r5;
6931914882SAlex Richardson r = fma (x4, r, r4);
7031914882SAlex Richardson r = fma (x4, r, r3);
7131914882SAlex Richardson r = fma (x4, r, r2);
7231914882SAlex Richardson r = fma (x4, r, r1);
7331914882SAlex Richardson return fma (r, x, x); /* This fma is crucial for accuracy. */
7431914882SAlex Richardson }
7531914882SAlex Richardson else
7631914882SAlex Richardson { /* 0.5 <= a < 0.84375 - Use rational approximation. */
7731914882SAlex Richardson double x4, x8, r1n, r2n, r1d, r2d, r3d;
7831914882SAlex Richardson
7931914882SAlex Richardson r1n = fma (x2, NA[1], NA[0]);
8031914882SAlex Richardson x4 = x2 * x2;
8131914882SAlex Richardson r2n = fma (x2, NA[3], NA[2]);
8231914882SAlex Richardson x8 = x4 * x4;
8331914882SAlex Richardson r1d = fma (x2, DA[0], 1.0);
8431914882SAlex Richardson r2d = fma (x2, DA[2], DA[1]);
8531914882SAlex Richardson r3d = fma (x2, DA[4], DA[3]);
8631914882SAlex Richardson double P = r1n + x4 * r2n + x8 * NA[4];
8731914882SAlex Richardson double Q = r1d + x4 * r2d + x8 * r3d;
8831914882SAlex Richardson return fma (P / Q, x, x);
8931914882SAlex Richardson }
9031914882SAlex Richardson }
9131914882SAlex Richardson else if (ia < 0x3ff40000)
9231914882SAlex Richardson { /* 0.84375 <= |x| < 1.25. */
9331914882SAlex Richardson double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
9431914882SAlex Richardson double a = fabs (x) - 1.0;
9531914882SAlex Richardson r1n = fma (a, NB[1], NB[0]);
9631914882SAlex Richardson a2 = a * a;
9731914882SAlex Richardson r1d = fma (a, DB[0], 1.0);
9831914882SAlex Richardson a4 = a2 * a2;
9931914882SAlex Richardson r2n = fma (a, NB[3], NB[2]);
10031914882SAlex Richardson a6 = a4 * a2;
10131914882SAlex Richardson r2d = fma (a, DB[2], DB[1]);
10231914882SAlex Richardson r3n = fma (a, NB[5], NB[4]);
10331914882SAlex Richardson r3d = fma (a, DB[4], DB[3]);
10431914882SAlex Richardson r4n = NB[6];
10531914882SAlex Richardson r4d = DB[5];
10631914882SAlex Richardson double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
10731914882SAlex Richardson double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
10831914882SAlex Richardson if (sign)
10931914882SAlex Richardson return -C - P / Q;
11031914882SAlex Richardson else
11131914882SAlex Richardson return C + P / Q;
11231914882SAlex Richardson }
11331914882SAlex Richardson else if (ia < 0x40000000)
11431914882SAlex Richardson { /* 1.25 <= |x| < 2.0. */
11531914882SAlex Richardson double a = fabs (x);
11631914882SAlex Richardson a = a - 1.25;
11731914882SAlex Richardson
11831914882SAlex Richardson double r1 = fma (a, PC[1], PC[0]);
11931914882SAlex Richardson double r2 = fma (a, PC[3], PC[2]);
12031914882SAlex Richardson double r3 = fma (a, PC[5], PC[4]);
12131914882SAlex Richardson double r4 = fma (a, PC[7], PC[6]);
12231914882SAlex Richardson double r5 = fma (a, PC[9], PC[8]);
12331914882SAlex Richardson double r6 = fma (a, PC[11], PC[10]);
12431914882SAlex Richardson double r7 = fma (a, PC[13], PC[12]);
12531914882SAlex Richardson double r8 = fma (a, PC[15], PC[14]);
12631914882SAlex Richardson
12731914882SAlex Richardson double a2 = a * a;
12831914882SAlex Richardson
12931914882SAlex Richardson double r = r8;
13031914882SAlex Richardson r = fma (a2, r, r7);
13131914882SAlex Richardson r = fma (a2, r, r6);
13231914882SAlex Richardson r = fma (a2, r, r5);
13331914882SAlex Richardson r = fma (a2, r, r4);
13431914882SAlex Richardson r = fma (a2, r, r3);
13531914882SAlex Richardson r = fma (a2, r, r2);
13631914882SAlex Richardson r = fma (a2, r, r1);
13731914882SAlex Richardson
13831914882SAlex Richardson if (sign)
13931914882SAlex Richardson return -1.0 + r;
14031914882SAlex Richardson else
14131914882SAlex Richardson return 1.0 - r;
14231914882SAlex Richardson }
14331914882SAlex Richardson else if (ia < 0x400a0000)
14431914882SAlex Richardson { /* 2 <= |x| < 3.25. */
14531914882SAlex Richardson double a = fabs (x);
14631914882SAlex Richardson a = fma (0.5, a, -1.0);
14731914882SAlex Richardson
14831914882SAlex Richardson double r1 = fma (a, PD[1], PD[0]);
14931914882SAlex Richardson double r2 = fma (a, PD[3], PD[2]);
15031914882SAlex Richardson double r3 = fma (a, PD[5], PD[4]);
15131914882SAlex Richardson double r4 = fma (a, PD[7], PD[6]);
15231914882SAlex Richardson double r5 = fma (a, PD[9], PD[8]);
15331914882SAlex Richardson double r6 = fma (a, PD[11], PD[10]);
15431914882SAlex Richardson double r7 = fma (a, PD[13], PD[12]);
15531914882SAlex Richardson double r8 = fma (a, PD[15], PD[14]);
15631914882SAlex Richardson double r9 = fma (a, PD[17], PD[16]);
15731914882SAlex Richardson
15831914882SAlex Richardson double a2 = a * a;
15931914882SAlex Richardson
16031914882SAlex Richardson double r = r9;
16131914882SAlex Richardson r = fma (a2, r, r8);
16231914882SAlex Richardson r = fma (a2, r, r7);
16331914882SAlex Richardson r = fma (a2, r, r6);
16431914882SAlex Richardson r = fma (a2, r, r5);
16531914882SAlex Richardson r = fma (a2, r, r4);
16631914882SAlex Richardson r = fma (a2, r, r3);
16731914882SAlex Richardson r = fma (a2, r, r2);
16831914882SAlex Richardson r = fma (a2, r, r1);
16931914882SAlex Richardson
17031914882SAlex Richardson if (sign)
17131914882SAlex Richardson return -1.0 + r;
17231914882SAlex Richardson else
17331914882SAlex Richardson return 1.0 - r;
17431914882SAlex Richardson }
17531914882SAlex Richardson else if (ia < 0x40100000)
17631914882SAlex Richardson { /* 3.25 <= |x| < 4.0. */
17731914882SAlex Richardson double a = fabs (x);
17831914882SAlex Richardson a = a - 3.25;
17931914882SAlex Richardson
18031914882SAlex Richardson double r1 = fma (a, PE[1], PE[0]);
18131914882SAlex Richardson double r2 = fma (a, PE[3], PE[2]);
18231914882SAlex Richardson double r3 = fma (a, PE[5], PE[4]);
18331914882SAlex Richardson double r4 = fma (a, PE[7], PE[6]);
18431914882SAlex Richardson double r5 = fma (a, PE[9], PE[8]);
18531914882SAlex Richardson double r6 = fma (a, PE[11], PE[10]);
18631914882SAlex Richardson double r7 = fma (a, PE[13], PE[12]);
18731914882SAlex Richardson
18831914882SAlex Richardson double a2 = a * a;
18931914882SAlex Richardson
19031914882SAlex Richardson double r = r7;
19131914882SAlex Richardson r = fma (a2, r, r6);
19231914882SAlex Richardson r = fma (a2, r, r5);
19331914882SAlex Richardson r = fma (a2, r, r4);
19431914882SAlex Richardson r = fma (a2, r, r3);
19531914882SAlex Richardson r = fma (a2, r, r2);
19631914882SAlex Richardson r = fma (a2, r, r1);
19731914882SAlex Richardson
19831914882SAlex Richardson if (sign)
19931914882SAlex Richardson return -1.0 + r;
20031914882SAlex Richardson else
20131914882SAlex Richardson return 1.0 - r;
20231914882SAlex Richardson }
20331914882SAlex Richardson else if (ia < 0x4017a000)
20431914882SAlex Richardson { /* 4 <= |x| < 5.90625. */
20531914882SAlex Richardson double a = fabs (x);
20631914882SAlex Richardson a = fma (0.5, a, -2.0);
20731914882SAlex Richardson
20831914882SAlex Richardson double r1 = fma (a, PF[1], PF[0]);
20931914882SAlex Richardson double r2 = fma (a, PF[3], PF[2]);
21031914882SAlex Richardson double r3 = fma (a, PF[5], PF[4]);
21131914882SAlex Richardson double r4 = fma (a, PF[7], PF[6]);
21231914882SAlex Richardson double r5 = fma (a, PF[9], PF[8]);
21331914882SAlex Richardson double r6 = fma (a, PF[11], PF[10]);
21431914882SAlex Richardson double r7 = fma (a, PF[13], PF[12]);
21531914882SAlex Richardson double r8 = fma (a, PF[15], PF[14]);
21631914882SAlex Richardson double r9 = PF[16];
21731914882SAlex Richardson
21831914882SAlex Richardson double a2 = a * a;
21931914882SAlex Richardson
22031914882SAlex Richardson double r = r9;
22131914882SAlex Richardson r = fma (a2, r, r8);
22231914882SAlex Richardson r = fma (a2, r, r7);
22331914882SAlex Richardson r = fma (a2, r, r6);
22431914882SAlex Richardson r = fma (a2, r, r5);
22531914882SAlex Richardson r = fma (a2, r, r4);
22631914882SAlex Richardson r = fma (a2, r, r3);
22731914882SAlex Richardson r = fma (a2, r, r2);
22831914882SAlex Richardson r = fma (a2, r, r1);
22931914882SAlex Richardson
23031914882SAlex Richardson if (sign)
23131914882SAlex Richardson return -1.0 + r;
23231914882SAlex Richardson else
23331914882SAlex Richardson return 1.0 - r;
23431914882SAlex Richardson }
23531914882SAlex Richardson else
23631914882SAlex Richardson {
23731914882SAlex Richardson /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
23831914882SAlex Richardson if (unlikely (ia >= 0x7ff00000))
23931914882SAlex Richardson return (double) (1.0 - (sign << 1)) + 1.0 / x;
24031914882SAlex Richardson
24131914882SAlex Richardson if (sign)
24231914882SAlex Richardson return -1.0;
24331914882SAlex Richardson else
24431914882SAlex Richardson return 1.0;
24531914882SAlex Richardson }
24631914882SAlex Richardson }
247*f3087befSAndrew Turner
248*f3087befSAndrew Turner TEST_SIG (S, D, 1, erf, -6.0, 6.0)
249*f3087befSAndrew Turner TEST_ULP (erf, 0.51)
250*f3087befSAndrew Turner TEST_ULP_NONNEAREST (erf, 0.9)
251*f3087befSAndrew Turner TEST_INTERVAL (erf, 0, 0xffff000000000000, 10000)
252*f3087befSAndrew Turner TEST_SYM_INTERVAL (erf, 0x1p-1022, 0x1p-26, 40000)
253*f3087befSAndrew Turner TEST_SYM_INTERVAL (erf, 0x1p-26, 0x1p3, 40000)
254*f3087befSAndrew Turner TEST_INTERVAL (erf, 0, inf, 40000)
255