1 /* 2 * Single-precision SVE 2^x function. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "sv_math.h" 9 #include "poly_sve_f32.h" 10 #include "pl_sig.h" 11 #include "pl_test.h" 12 13 static const struct data 14 { 15 float poly[5]; 16 float shift, thres; 17 } data = { 18 /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for 19 compatibility with polynomial helpers. */ 20 .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, 21 0x1.59977ap-10f }, 22 /* 1.5*2^17 + 127. */ 23 .shift = 0x1.903f8p17f, 24 /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled 25 correctly by FEXPA. */ 26 .thres = 0x1.5d5e2ap+6f, 27 }; 28 29 static svfloat32_t NOINLINE 30 special_case (svfloat32_t x, svfloat32_t y, svbool_t special) 31 { 32 return sv_call_f32 (exp2f, x, y, special); 33 } 34 35 /* Single-precision SVE exp2f routine. Implements the same algorithm 36 as AdvSIMD exp2f. 37 Worst case error is 1.04 ULPs. 38 SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 39 want 0x1.ba7ebp+0. */ 40 svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) 41 { 42 const struct data *d = ptr_barrier (&data); 43 /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] 44 x = n + r, with r in [-1/2, 1/2]. */ 45 svfloat32_t shift = sv_f32 (d->shift); 46 svfloat32_t z = svadd_x (pg, x, shift); 47 svfloat32_t n = svsub_x (pg, z, shift); 48 svfloat32_t r = svsub_x (pg, x, n); 49 50 svbool_t special = svacgt (pg, x, d->thres); 51 svfloat32_t scale = svexpa (svreinterpret_u32 (z)); 52 53 /* Polynomial evaluation: poly(r) ~ exp2(r)-1. 54 Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for 55 coefficients 1 to 4, and apply most significant coefficient directly. */ 56 svfloat32_t r2 = svmul_x (pg, r, r); 57 svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); 58 svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); 59 svfloat32_t poly = svmla_x (pg, p0, r2, p14); 60 61 if (unlikely (svptest_any (pg, special))) 62 return special_case (x, svmla_x (pg, scale, scale, poly), special); 63 64 return svmla_x (pg, scale, scale, poly); 65 } 66 67 PL_SIG (SV, F, 1, exp2, -9.9, 9.9) 68 PL_TEST_ULP (SV_NAME_F1 (exp2), 0.55) 69 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000) 70 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000) 71 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000) 72 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000) 73 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000) 74 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000) 75 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000) 76 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000) 77 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000) 78 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000) 79 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000) 80 PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000) 81