1 /* 2 * Double-precision erf(x) function. 3 * 4 * Copyright (c) 2020-2024, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "math_config.h" 9 #include <math.h> 10 #include <stdint.h> 11 #include "test_defs.h" 12 #include "test_sig.h" 13 14 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3 15 #define C 0x1.b0ac16p-1 16 #define PA __erf_data.erf_poly_A 17 #define NA __erf_data.erf_ratio_N_A 18 #define DA __erf_data.erf_ratio_D_A 19 #define NB __erf_data.erf_ratio_N_B 20 #define DB __erf_data.erf_ratio_D_B 21 #define PC __erf_data.erfc_poly_C 22 #define PD __erf_data.erfc_poly_D 23 #define PE __erf_data.erfc_poly_E 24 #define PF __erf_data.erfc_poly_F 25 26 /* Top 32 bits of a double. */ 27 static inline uint32_t 28 top32 (double x) 29 { 30 return asuint64 (x) >> 32; 31 } 32 33 /* Fast erf implementation using a mix of 34 rational and polynomial approximations. 35 Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */ 36 double 37 erf (double x) 38 { 39 /* Get top word and sign. */ 40 uint32_t ix = top32 (x); 41 uint32_t ia = ix & 0x7fffffff; 42 uint32_t sign = ix >> 31; 43 44 /* Normalized and subnormal cases */ 45 if (ia < 0x3feb0000) 46 { /* a = |x| < 0.84375. */ 47 48 if (ia < 0x3e300000) 49 { /* a < 2^(-28). */ 50 if (ia < 0x00800000) 51 { /* a < 2^(-1015). */ 52 double y = fma (TwoOverSqrtPiMinusOne, x, x); 53 return check_uflow (y); 54 } 55 return x + TwoOverSqrtPiMinusOne * x; 56 } 57 58 double x2 = x * x; 59 60 if (ia < 0x3fe00000) 61 { /* a < 0.5 - Use polynomial approximation. */ 62 double r1 = fma (x2, PA[1], PA[0]); 63 double r2 = fma (x2, PA[3], PA[2]); 64 double r3 = fma (x2, PA[5], PA[4]); 65 double r4 = fma (x2, PA[7], PA[6]); 66 double r5 = fma (x2, PA[9], PA[8]); 67 double x4 = x2 * x2; 68 double r = r5; 69 r = fma (x4, r, r4); 70 r = fma (x4, r, r3); 71 r = fma (x4, r, r2); 72 r = fma (x4, r, r1); 73 return fma (r, x, x); /* This fma is crucial for accuracy. */ 74 } 75 else 76 { /* 0.5 <= a < 0.84375 - Use rational approximation. */ 77 double x4, x8, r1n, r2n, r1d, r2d, r3d; 78 79 r1n = fma (x2, NA[1], NA[0]); 80 x4 = x2 * x2; 81 r2n = fma (x2, NA[3], NA[2]); 82 x8 = x4 * x4; 83 r1d = fma (x2, DA[0], 1.0); 84 r2d = fma (x2, DA[2], DA[1]); 85 r3d = fma (x2, DA[4], DA[3]); 86 double P = r1n + x4 * r2n + x8 * NA[4]; 87 double Q = r1d + x4 * r2d + x8 * r3d; 88 return fma (P / Q, x, x); 89 } 90 } 91 else if (ia < 0x3ff40000) 92 { /* 0.84375 <= |x| < 1.25. */ 93 double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d; 94 double a = fabs (x) - 1.0; 95 r1n = fma (a, NB[1], NB[0]); 96 a2 = a * a; 97 r1d = fma (a, DB[0], 1.0); 98 a4 = a2 * a2; 99 r2n = fma (a, NB[3], NB[2]); 100 a6 = a4 * a2; 101 r2d = fma (a, DB[2], DB[1]); 102 r3n = fma (a, NB[5], NB[4]); 103 r3d = fma (a, DB[4], DB[3]); 104 r4n = NB[6]; 105 r4d = DB[5]; 106 double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n; 107 double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d; 108 if (sign) 109 return -C - P / Q; 110 else 111 return C + P / Q; 112 } 113 else if (ia < 0x40000000) 114 { /* 1.25 <= |x| < 2.0. */ 115 double a = fabs (x); 116 a = a - 1.25; 117 118 double r1 = fma (a, PC[1], PC[0]); 119 double r2 = fma (a, PC[3], PC[2]); 120 double r3 = fma (a, PC[5], PC[4]); 121 double r4 = fma (a, PC[7], PC[6]); 122 double r5 = fma (a, PC[9], PC[8]); 123 double r6 = fma (a, PC[11], PC[10]); 124 double r7 = fma (a, PC[13], PC[12]); 125 double r8 = fma (a, PC[15], PC[14]); 126 127 double a2 = a * a; 128 129 double r = r8; 130 r = fma (a2, r, r7); 131 r = fma (a2, r, r6); 132 r = fma (a2, r, r5); 133 r = fma (a2, r, r4); 134 r = fma (a2, r, r3); 135 r = fma (a2, r, r2); 136 r = fma (a2, r, r1); 137 138 if (sign) 139 return -1.0 + r; 140 else 141 return 1.0 - r; 142 } 143 else if (ia < 0x400a0000) 144 { /* 2 <= |x| < 3.25. */ 145 double a = fabs (x); 146 a = fma (0.5, a, -1.0); 147 148 double r1 = fma (a, PD[1], PD[0]); 149 double r2 = fma (a, PD[3], PD[2]); 150 double r3 = fma (a, PD[5], PD[4]); 151 double r4 = fma (a, PD[7], PD[6]); 152 double r5 = fma (a, PD[9], PD[8]); 153 double r6 = fma (a, PD[11], PD[10]); 154 double r7 = fma (a, PD[13], PD[12]); 155 double r8 = fma (a, PD[15], PD[14]); 156 double r9 = fma (a, PD[17], PD[16]); 157 158 double a2 = a * a; 159 160 double r = r9; 161 r = fma (a2, r, r8); 162 r = fma (a2, r, r7); 163 r = fma (a2, r, r6); 164 r = fma (a2, r, r5); 165 r = fma (a2, r, r4); 166 r = fma (a2, r, r3); 167 r = fma (a2, r, r2); 168 r = fma (a2, r, r1); 169 170 if (sign) 171 return -1.0 + r; 172 else 173 return 1.0 - r; 174 } 175 else if (ia < 0x40100000) 176 { /* 3.25 <= |x| < 4.0. */ 177 double a = fabs (x); 178 a = a - 3.25; 179 180 double r1 = fma (a, PE[1], PE[0]); 181 double r2 = fma (a, PE[3], PE[2]); 182 double r3 = fma (a, PE[5], PE[4]); 183 double r4 = fma (a, PE[7], PE[6]); 184 double r5 = fma (a, PE[9], PE[8]); 185 double r6 = fma (a, PE[11], PE[10]); 186 double r7 = fma (a, PE[13], PE[12]); 187 188 double a2 = a * a; 189 190 double r = r7; 191 r = fma (a2, r, r6); 192 r = fma (a2, r, r5); 193 r = fma (a2, r, r4); 194 r = fma (a2, r, r3); 195 r = fma (a2, r, r2); 196 r = fma (a2, r, r1); 197 198 if (sign) 199 return -1.0 + r; 200 else 201 return 1.0 - r; 202 } 203 else if (ia < 0x4017a000) 204 { /* 4 <= |x| < 5.90625. */ 205 double a = fabs (x); 206 a = fma (0.5, a, -2.0); 207 208 double r1 = fma (a, PF[1], PF[0]); 209 double r2 = fma (a, PF[3], PF[2]); 210 double r3 = fma (a, PF[5], PF[4]); 211 double r4 = fma (a, PF[7], PF[6]); 212 double r5 = fma (a, PF[9], PF[8]); 213 double r6 = fma (a, PF[11], PF[10]); 214 double r7 = fma (a, PF[13], PF[12]); 215 double r8 = fma (a, PF[15], PF[14]); 216 double r9 = PF[16]; 217 218 double a2 = a * a; 219 220 double r = r9; 221 r = fma (a2, r, r8); 222 r = fma (a2, r, r7); 223 r = fma (a2, r, r6); 224 r = fma (a2, r, r5); 225 r = fma (a2, r, r4); 226 r = fma (a2, r, r3); 227 r = fma (a2, r, r2); 228 r = fma (a2, r, r1); 229 230 if (sign) 231 return -1.0 + r; 232 else 233 return 1.0 - r; 234 } 235 else 236 { 237 /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */ 238 if (unlikely (ia >= 0x7ff00000)) 239 return (double) (1.0 - (sign << 1)) + 1.0 / x; 240 241 if (sign) 242 return -1.0; 243 else 244 return 1.0; 245 } 246 } 247 248 TEST_SIG (S, D, 1, erf, -6.0, 6.0) 249 TEST_ULP (erf, 0.51) 250 TEST_ULP_NONNEAREST (erf, 0.9) 251 TEST_INTERVAL (erf, 0, 0xffff000000000000, 10000) 252 TEST_SYM_INTERVAL (erf, 0x1p-1022, 0x1p-26, 40000) 253 TEST_SYM_INTERVAL (erf, 0x1p-26, 0x1p3, 40000) 254 TEST_INTERVAL (erf, 0, inf, 40000) 255