1 /* 2 * Double-precision erfc(x) function. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "math_config.h" 9 #include "pl_sig.h" 10 #include "pl_test.h" 11 12 #define Shift 0x1p45 13 #define P20 0x1.5555555555555p-2 /* 1/3. */ 14 #define P21 0x1.5555555555555p-1 /* 2/3. */ 15 16 #define P40 0x1.999999999999ap-4 /* 1/10. */ 17 #define P41 0x1.999999999999ap-2 /* 2/5. */ 18 #define P42 0x1.11111111111111p-3 /* 2/15. */ 19 20 #define P50 0x1.5555555555555p-3 /* 1/6. */ 21 #define P51 0x1.c71c71c71c71cp-3 /* 2/9. */ 22 #define P52 0x1.6c16c16c16c17p-5 /* 2/45. */ 23 24 /* Qi = (i+1) / i. */ 25 #define Q5 0x1.3333333333333p0 26 #define Q6 0x1.2aaaaaaaaaaabp0 27 #define Q7 0x1.2492492492492p0 28 #define Q8 0x1.2p0 29 #define Q9 0x1.1c71c71c71c72p0 30 31 /* Ri = -2 * i / ((i+1)*(i+2)). */ 32 #define R5 -0x1.e79e79e79e79ep-3 33 #define R6 -0x1.b6db6db6db6dbp-3 34 #define R7 -0x1.8e38e38e38e39p-3 35 #define R8 -0x1.6c16c16c16c17p-3 36 #define R9 -0x1.4f2094f2094f2p-3 37 38 /* Fast erfc approximation based on series expansion near x rounded to 39 nearest multiple of 1/128. 40 Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r, 41 42 erfc(x) ~ erfc(r) - scale * d * poly(r, d), with 43 44 poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3 45 + (2/15 r^4 - 2/5 r^2 + 1/10) d^4 46 - r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5 47 + p6(r) d^6 + ... + p10(r) d^10 48 49 Polynomials p6(r) to p10(r) are computed using recurrence relation 50 51 2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0, 52 with p0 = 1, and p1(r) = -r. 53 54 Values of erfc(r) and scale(r) are read from lookup tables. Stored values 55 are scaled to avoid hitting the subnormal range. 56 57 Note that for x < 0, erfc(x) = 2.0 - erfc(-x). 58 59 Maximum measured error: 1.71 ULP 60 erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608 61 want 0x1.e15fcbea3e7adp-608. */ 62 double 63 erfc (double x) 64 { 65 /* Get top words and sign. */ 66 uint64_t ix = asuint64 (x); 67 uint64_t ia = ix & 0x7fffffffffffffff; 68 double a = asdouble (ia); 69 uint64_t sign = ix & ~0x7fffffffffffffff; 70 71 /* erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2. */ 72 if (unlikely (ia >= 0x7ff0000000000000)) 73 return asdouble (sign >> 1) + 1.0 / x; /* Special cases. */ 74 75 /* Return early for large enough negative values. */ 76 if (x < -6.0) 77 return 2.0; 78 79 /* For |x| < 3487.0/128.0, the following approximation holds. */ 80 if (likely (ia < 0x403b3e0000000000)) 81 { 82 /* |x| < 0x1p-511 => accurate to 0.5 ULP. */ 83 if (unlikely (ia < asuint64 (0x1p-511))) 84 return 1.0 - x; 85 86 /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale 87 to 2/sqrt(pi), when x reduced to r = 0. */ 88 double z = a + Shift; 89 uint64_t i = asuint64 (z); 90 double r = z - Shift; 91 /* These values are scaled by 2^128. */ 92 double erfcr = __erfc_data.tab[i].erfc; 93 double scale = __erfc_data.tab[i].scale; 94 95 /* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */ 96 double d = a - r; 97 double d2 = d * d; 98 double r2 = r * r; 99 /* Compute p_i as a regular (low-order) polynomial. */ 100 double p1 = -r; 101 double p2 = fma (P21, r2, -P20); 102 double p3 = -r * fma (P20, r2, -0.5); 103 double p4 = fma (fma (P42, r2, -P41), r2, P40); 104 double p5 = -r * fma (fma (P52, r2, -P51), r2, P50); 105 /* Compute p_i using recurrence relation: 106 p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */ 107 double p6 = fma (Q5 * r, p5, p4) * R5; 108 double p7 = fma (Q6 * r, p6, p5) * R6; 109 double p8 = fma (Q7 * r, p7, p6) * R7; 110 double p9 = fma (Q8 * r, p8, p7) * R8; 111 double p10 = fma (Q9 * r, p9, p8) * R9; 112 /* Compute polynomial in d using pairwise Horner scheme. */ 113 double p90 = fma (p10, d, p9); 114 double p78 = fma (p8, d, p7); 115 double p56 = fma (p6, d, p5); 116 double p34 = fma (p4, d, p3); 117 double p12 = fma (p2, d, p1); 118 double y = fma (p90, d2, p78); 119 y = fma (y, d2, p56); 120 y = fma (y, d2, p34); 121 y = fma (y, d2, p12); 122 123 y = fma (-fma (y, d2, d), scale, erfcr); 124 125 /* Handle sign and scale back in a single fma. */ 126 double off = asdouble (sign >> 1); 127 double fac = asdouble (asuint64 (0x1p-128) | sign); 128 y = fma (y, fac, off); 129 130 if (unlikely (x > 26.0)) 131 { 132 /* The underflow exception needs to be signaled explicitly when 133 result gets into the subnormal range. */ 134 if (unlikely (y < 0x1p-1022)) 135 force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); 136 /* Set errno to ERANGE if result rounds to 0. */ 137 return __math_check_uflow (y); 138 } 139 140 return y; 141 } 142 /* Above the threshold (x > 3487.0/128.0) erfc is constant and needs to raise 143 underflow exception for positive x. */ 144 return __math_uflow (0); 145 } 146 147 PL_SIG (S, D, 1, erfc, -6.0, 28.0) 148 PL_TEST_ULP (erfc, 1.21) 149 PL_TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000) 150 PL_TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000) 151 PL_TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000) 152 PL_TEST_INTERVAL (erfc, 28.0, inf, 40000) 153 PL_TEST_INTERVAL (erfc, -6.0, -inf, 40000) 154