1 /* 2 * Single-precision erf(x) function. 3 * 4 * Copyright (c) 2020-2024, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include <stdint.h> 9 #include <math.h> 10 #include "math_config.h" 11 #include "test_defs.h" 12 #include "test_sig.h" 13 14 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f 15 #define A __erff_data.erff_poly_A 16 #define B __erff_data.erff_poly_B 17 18 /* Top 12 bits of a float. */ 19 static inline uint32_t 20 top12 (float x) 21 { 22 return asuint (x) >> 20; 23 } 24 25 /* Efficient implementation of erff 26 using either a pure polynomial approximation or 27 the exponential of a polynomial. 28 Worst-case error is 1.09ulps at 0x1.c111acp-1. */ 29 float 30 erff (float x) 31 { 32 float r, x2, u; 33 34 /* Get top word. */ 35 uint32_t ix = asuint (x); 36 uint32_t sign = ix >> 31; 37 uint32_t ia12 = top12 (x) & 0x7ff; 38 39 /* Limit of both intervals is 0.875 for performance reasons but coefficients 40 computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy 41 from 0.94 to 1.1ulps. */ 42 if (ia12 < 0x3f6) 43 { /* a = |x| < 0.875. */ 44 45 /* Tiny and subnormal cases. */ 46 if (unlikely (ia12 < 0x318)) 47 { /* |x| < 2^(-28). */ 48 if (unlikely (ia12 < 0x040)) 49 { /* |x| < 2^(-119). */ 50 float y = fmaf (TwoOverSqrtPiMinusOne, x, x); 51 return check_uflowf (y); 52 } 53 return x + TwoOverSqrtPiMinusOne * x; 54 } 55 56 x2 = x * x; 57 58 /* Normalized cases (|x| < 0.921875). Use Horner scheme for x+x*P(x^2). */ 59 r = A[5]; 60 r = fmaf (r, x2, A[4]); 61 r = fmaf (r, x2, A[3]); 62 r = fmaf (r, x2, A[2]); 63 r = fmaf (r, x2, A[1]); 64 r = fmaf (r, x2, A[0]); 65 r = fmaf (r, x, x); 66 } 67 else if (ia12 < 0x408) 68 { /* |x| < 4.0 - Use a custom Estrin scheme. */ 69 70 float a = fabsf (x); 71 /* Start with Estrin scheme on high order (small magnitude) coefficients. */ 72 r = fmaf (B[6], a, B[5]); 73 u = fmaf (B[4], a, B[3]); 74 x2 = x * x; 75 r = fmaf (r, x2, u); 76 /* Then switch to pure Horner scheme. */ 77 r = fmaf (r, a, B[2]); 78 r = fmaf (r, a, B[1]); 79 r = fmaf (r, a, B[0]); 80 r = fmaf (r, a, a); 81 /* Single precision exponential with ~0.5ulps, 82 ensures erff has max. rel. error 83 < 1ulp on [0.921875, 4.0], 84 < 1.1ulps on [0.875, 4.0]. */ 85 r = expf (-r); 86 /* Explicit copysign (calling copysignf increases latency). */ 87 if (sign) 88 r = -1.0f + r; 89 else 90 r = 1.0f - r; 91 } 92 else 93 { /* |x| >= 4.0. */ 94 95 /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */ 96 if (unlikely (ia12 >= 0x7f8)) 97 return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x; 98 99 /* Explicit copysign (calling copysignf increases latency). */ 100 if (sign) 101 r = -1.0f; 102 else 103 r = 1.0f; 104 } 105 return r; 106 } 107 108 TEST_SIG (S, F, 1, erf, -6.0, 6.0) 109 TEST_ULP (erff, 0.6) 110 TEST_ULP_NONNEAREST (erff, 0.9) 111 TEST_INTERVAL (erff, 0, 0xffff0000, 10000) 112 TEST_SYM_INTERVAL (erff, 0x1p-127, 0x1p-26, 40000) 113 TEST_SYM_INTERVAL (erff, 0x1p-26, 0x1p3, 40000) 114 TEST_INTERVAL (erff, 0, inf, 40000) 115