1 /* 2 * Single-precision erfc(x) function. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "math_config.h" 9 #include "pl_sig.h" 10 #include "pl_test.h" 11 12 #define Shift 0x1p17f 13 #define OneThird 0x1.555556p-2f 14 #define TwoThird 0x1.555556p-1f 15 16 #define TwoOverFifteen 0x1.111112p-3f 17 #define TwoOverFive 0x1.99999ap-2f 18 #define Tenth 0x1.99999ap-4f 19 20 #define SignMask 0x7fffffff 21 22 /* Fast erfcf approximation based on series expansion near x rounded to 23 nearest multiple of 1/64. 24 Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r, 25 26 erfc(x) ~ erfc(r) - scale * d * poly(r, d), with 27 28 poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3 29 + (2/15 r^4 - 2/5 r^2 + 1/10) d^4 30 31 Values of erfc(r) and scale are read from lookup tables. Stored values 32 are scaled to avoid hitting the subnormal range. 33 34 Note that for x < 0, erfc(x) = 2.0 - erfc(-x). 35 36 Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0). 37 erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120 38 want 0x1.f51216p-120. */ 39 float 40 erfcf (float x) 41 { 42 /* Get top words and sign. */ 43 uint32_t ix = asuint (x); 44 uint32_t ia = ix & SignMask; 45 uint32_t sign = ix & ~SignMask; 46 47 /* |x| < 0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */ 48 if (unlikely (ia < 0x32800000)) 49 return 1.0f - x; /* Small case. */ 50 51 /* For |x| < 10.0625, the following approximation holds. */ 52 if (likely (ia < 0x41210000)) 53 { 54 /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale 55 to 2/sqrt(pi), when x reduced to r = 0. */ 56 float a = asfloat (ia); 57 float z = a + Shift; 58 uint32_t i = asuint (z) - asuint (Shift); 59 float r = z - Shift; 60 61 /* These values are scaled by 2^-47. */ 62 float erfcr = __erfcf_data.tab[i].erfc; 63 float scale = __erfcf_data.tab[i].scale; 64 65 /* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */ 66 float d = a - r; 67 float d2 = d * d; 68 float r2 = r * r; 69 float p1 = -r; 70 float p2 = fmaf (TwoThird, r2, -OneThird); 71 float p3 = -r * fmaf (OneThird, r2, -0.5f); 72 float p4 = fmaf (fmaf (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth); 73 float y = fmaf (p4, d, p3); 74 y = fmaf (y, d, p2); 75 y = fmaf (y, d, p1); 76 y = fmaf (-fmaf (y, d2, d), scale, erfcr); 77 /* Handle sign and scale back in a single fma. */ 78 float off = asfloat (sign >> 1); 79 float fac = asfloat (asuint (0x1p-47f) | sign); 80 y = fmaf (y, fac, off); 81 /* The underflow exception needs to be signaled explicitly when 82 result gets into subormnal range. */ 83 if (x >= 0x1.2639cp+3f) 84 force_eval_float (opt_barrier_float (0x1p-123f) * 0x1p-123f); 85 return y; 86 } 87 88 /* erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2. */ 89 if (unlikely (ia >= 0x7f800000)) 90 return asfloat (sign >> 1) + 1.0f / x; /* Special cases. */ 91 92 /* Above this threshold erfcf is constant and needs to raise underflow 93 exception for positive x. */ 94 return sign ? 2.0f : __math_uflowf (0); 95 } 96 97 PL_SIG (S, F, 1, erfc, -4.0, 10.0) 98 PL_TEST_ULP (erfcf, 1.14) 99 PL_TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000) 100 PL_TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000) 101 PL_TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000) 102 PL_TEST_INTERVAL (erfcf, 10.0625, inf, 40000) 103 PL_TEST_INTERVAL (erfcf, -4.0, -inf, 40000) 104