Lines Matching +full:ulp +full:- +full:allow

2  * Single-precision vector erfc(x) function.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
24 .offset = V4 (0xb7fffd7b), /* 0xffffffff - asuint(shift) - 644. */
25 .table_scale = V4 (0x28000000 << 1), /* asuint (2^-47) << 1. */
30 .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
31 .third = V4 (0x1.555556p-2f),
32 .two_over_five = V4 (-0x1.99999ap-2f),
33 .tenth = V4 (-0x1.99999ap-4f),
39 #define TinyBound 0x41000000 /* 0x1p-62f << 1. */
40 #define Thres 0xbe000000 /* asuint(infinity) << 1 - TinyBound. */
41 #define Off 0xfffffd7b /* 0xffffffff - 644. */
53 float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0])); in lookup()
54 float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1])); in lookup()
55 float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2])); in lookup()
56 float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3])); in lookup()
72 /* Optimized single-precision vector erfcf(x).
75 Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
77 erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
79 poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
80 + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
85 Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
86 Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
87 _ZGVnN4v_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
88 want 0x1.f51216p-120. */
95 /* |x| < 2^-62. Avoid fabs by left-shifting by 1. */ in V_NAME_F1()
101 vreinterpretq_s32_f32 (dat->uflow_bound)); in V_NAME_F1()
104 /* If any lanes are special, mask them with 0 and retain a copy of x to allow in V_NAME_F1()
112 a = vminq_f32 (a, dat->max); in V_NAME_F1()
116 float32x4_t shift = dat->shift; in V_NAME_F1()
122 uint32x4_t i = vqaddq_u32 (vreinterpretq_u32_f32 (z), dat->offset); in V_NAME_F1()
126 /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */ in V_NAME_F1()
133 float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1); in V_NAME_F1()
135 = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0)); in V_NAME_F1()
136 float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2); in V_NAME_F1()
137 p4 = vfmsq_f32 (dat->tenth, r2, p4); in V_NAME_F1()
150 vsraq_n_u32 (vshlq_n_u32 (sign, 31), dat->table_scale, 1)); in V_NAME_F1()
160 PL_SIG (V, F, 1, erfc, -4.0, 10.0)
162 PL_TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
163 PL_TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
164 PL_TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
166 PL_TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)