1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner * Single-precision erfc(x) function.
3*f3087befSAndrew Turner *
4*f3087befSAndrew Turner * Copyright (c) 2023-2024, Arm Limited.
5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner */
7*f3087befSAndrew Turner
8*f3087befSAndrew Turner #include "math_config.h"
9*f3087befSAndrew Turner #include "test_sig.h"
10*f3087befSAndrew Turner #include "test_defs.h"
11*f3087befSAndrew Turner
12*f3087befSAndrew Turner #define Shift 0x1p17f
13*f3087befSAndrew Turner #define OneThird 0x1.555556p-2f
14*f3087befSAndrew Turner #define TwoThird 0x1.555556p-1f
15*f3087befSAndrew Turner
16*f3087befSAndrew Turner #define TwoOverFifteen 0x1.111112p-3f
17*f3087befSAndrew Turner #define TwoOverFive 0x1.99999ap-2f
18*f3087befSAndrew Turner #define Tenth 0x1.99999ap-4f
19*f3087befSAndrew Turner
20*f3087befSAndrew Turner #define SignMask 0x7fffffff
21*f3087befSAndrew Turner
22*f3087befSAndrew Turner /* Fast erfcf approximation based on series expansion near x rounded to
23*f3087befSAndrew Turner nearest multiple of 1/64.
24*f3087befSAndrew Turner Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
25*f3087befSAndrew Turner
26*f3087befSAndrew Turner erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
27*f3087befSAndrew Turner
28*f3087befSAndrew Turner poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
29*f3087befSAndrew Turner + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
30*f3087befSAndrew Turner
31*f3087befSAndrew Turner Values of erfc(r) and scale are read from lookup tables. Stored values
32*f3087befSAndrew Turner are scaled to avoid hitting the subnormal range.
33*f3087befSAndrew Turner
34*f3087befSAndrew Turner Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
35*f3087befSAndrew Turner
36*f3087befSAndrew Turner Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
37*f3087befSAndrew Turner erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
38*f3087befSAndrew Turner want 0x1.f51216p-120. */
39*f3087befSAndrew Turner float
erfcf(float x)40*f3087befSAndrew Turner erfcf (float x)
41*f3087befSAndrew Turner {
42*f3087befSAndrew Turner /* Get top words and sign. */
43*f3087befSAndrew Turner uint32_t ix = asuint (x);
44*f3087befSAndrew Turner uint32_t ia = ix & SignMask;
45*f3087befSAndrew Turner uint32_t sign = ix & ~SignMask;
46*f3087befSAndrew Turner
47*f3087befSAndrew Turner /* |x| < 0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */
48*f3087befSAndrew Turner if (unlikely (ia < 0x32800000))
49*f3087befSAndrew Turner return 1.0f - x; /* Small case. */
50*f3087befSAndrew Turner
51*f3087befSAndrew Turner /* For |x| < 10.0625, the following approximation holds. */
52*f3087befSAndrew Turner if (likely (ia < 0x41210000))
53*f3087befSAndrew Turner {
54*f3087befSAndrew Turner /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
55*f3087befSAndrew Turner to 2/sqrt(pi), when x reduced to r = 0. */
56*f3087befSAndrew Turner float a = asfloat (ia);
57*f3087befSAndrew Turner float z = a + Shift;
58*f3087befSAndrew Turner uint32_t i = asuint (z) - asuint (Shift);
59*f3087befSAndrew Turner float r = z - Shift;
60*f3087befSAndrew Turner
61*f3087befSAndrew Turner /* These values are scaled by 2^-47. */
62*f3087befSAndrew Turner float erfcr = __v_erfcf_data.tab[i].erfc;
63*f3087befSAndrew Turner float scale = __v_erfcf_data.tab[i].scale;
64*f3087befSAndrew Turner
65*f3087befSAndrew Turner /* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */
66*f3087befSAndrew Turner float d = a - r;
67*f3087befSAndrew Turner float d2 = d * d;
68*f3087befSAndrew Turner float r2 = r * r;
69*f3087befSAndrew Turner float p1 = -r;
70*f3087befSAndrew Turner float p2 = fmaf (TwoThird, r2, -OneThird);
71*f3087befSAndrew Turner float p3 = -r * fmaf (OneThird, r2, -0.5f);
72*f3087befSAndrew Turner float p4 = fmaf (fmaf (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth);
73*f3087befSAndrew Turner float y = fmaf (p4, d, p3);
74*f3087befSAndrew Turner y = fmaf (y, d, p2);
75*f3087befSAndrew Turner y = fmaf (y, d, p1);
76*f3087befSAndrew Turner y = fmaf (-fmaf (y, d2, d), scale, erfcr);
77*f3087befSAndrew Turner /* Handle sign and scale back in a single fma. */
78*f3087befSAndrew Turner float off = asfloat (sign >> 1);
79*f3087befSAndrew Turner float fac = asfloat (asuint (0x1p-47f) | sign);
80*f3087befSAndrew Turner y = fmaf (y, fac, off);
81*f3087befSAndrew Turner /* The underflow exception needs to be signaled explicitly when
82*f3087befSAndrew Turner result gets into subormnal range. */
83*f3087befSAndrew Turner if (x >= 0x1.2639cp+3f)
84*f3087befSAndrew Turner force_eval_float (opt_barrier_float (0x1p-123f) * 0x1p-123f);
85*f3087befSAndrew Turner return y;
86*f3087befSAndrew Turner }
87*f3087befSAndrew Turner
88*f3087befSAndrew Turner /* erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2. */
89*f3087befSAndrew Turner if (unlikely (ia >= 0x7f800000))
90*f3087befSAndrew Turner return asfloat (sign >> 1) + 1.0f / x; /* Special cases. */
91*f3087befSAndrew Turner
92*f3087befSAndrew Turner /* Above this threshold erfcf is constant and needs to raise underflow
93*f3087befSAndrew Turner exception for positive x. */
94*f3087befSAndrew Turner return sign ? 2.0f : __math_uflowf (0);
95*f3087befSAndrew Turner }
96*f3087befSAndrew Turner
97*f3087befSAndrew Turner TEST_SIG (S, F, 1, erfc, -4.0, 10.0)
98*f3087befSAndrew Turner TEST_ULP (erfcf, 1.14)
99*f3087befSAndrew Turner TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
100*f3087befSAndrew Turner TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
101*f3087befSAndrew Turner TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
102*f3087befSAndrew Turner TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
103*f3087befSAndrew Turner TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
104