1 /* 2 * Single-precision vector tan(x) function. 3 * 4 * Copyright (c) 2020-2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 #include "sv_math.h" 9 #include "pl_sig.h" 10 #include "pl_test.h" 11 12 #if SV_SUPPORTED 13 14 /* Constants. */ 15 #define NegPio2_1 (sv_f32 (-0x1.921fb6p+0f)) 16 #define NegPio2_2 (sv_f32 (0x1.777a5cp-25f)) 17 #define NegPio2_3 (sv_f32 (0x1.ee59dap-50f)) 18 #define InvPio2 (sv_f32 (0x1.45f306p-1f)) 19 #define RangeVal (sv_f32 (0x1p15f)) 20 #define Shift (sv_f32 (0x1.8p+23f)) 21 22 #define poly(i) sv_f32 (__tanf_poly_data.poly_tan[i]) 23 24 /* Use full Estrin's scheme to evaluate polynomial. */ 25 static inline sv_f32_t 26 eval_poly (svbool_t pg, sv_f32_t z) 27 { 28 sv_f32_t z2 = svmul_f32_x (pg, z, z); 29 sv_f32_t z4 = svmul_f32_x (pg, z2, z2); 30 sv_f32_t y_10 = sv_fma_f32_x (pg, z, poly (1), poly (0)); 31 sv_f32_t y_32 = sv_fma_f32_x (pg, z, poly (3), poly (2)); 32 sv_f32_t y_54 = sv_fma_f32_x (pg, z, poly (5), poly (4)); 33 sv_f32_t y_32_10 = sv_fma_f32_x (pg, z2, y_32, y_10); 34 sv_f32_t y = sv_fma_f32_x (pg, z4, y_54, y_32_10); 35 return y; 36 } 37 38 static NOINLINE sv_f32_t 39 __sv_tanf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp) 40 { 41 return sv_call_f32 (tanf, x, y, cmp); 42 } 43 44 /* Fast implementation of SVE tanf. 45 Maximum error is 3.45 ULP: 46 __sv_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1 47 want 0x1.ff9850p-1. */ 48 sv_f32_t 49 __sv_tanf_x (sv_f32_t x, const svbool_t pg) 50 { 51 /* Determine whether input is too large to perform fast regression. */ 52 svbool_t cmp = svacge_f32 (pg, x, RangeVal); 53 svbool_t pred_minuszero = svcmpeq_f32 (pg, x, sv_f32 (-0.0)); 54 55 /* n = rint(x/(pi/2)). */ 56 sv_f32_t q = sv_fma_f32_x (pg, InvPio2, x, Shift); 57 sv_f32_t n = svsub_f32_x (pg, q, Shift); 58 /* n is already a signed integer, simply convert it. */ 59 sv_s32_t in = sv_to_s32_f32_x (pg, n); 60 /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ 61 sv_s32_t alt = svand_s32_x (pg, in, sv_s32 (1)); 62 svbool_t pred_alt = svcmpne_s32 (pg, alt, sv_s32 (0)); 63 64 /* r = x - n * (pi/2) (range reduction into 0 .. pi/4). */ 65 sv_f32_t r; 66 r = sv_fma_f32_x (pg, NegPio2_1, n, x); 67 r = sv_fma_f32_x (pg, NegPio2_2, n, r); 68 r = sv_fma_f32_x (pg, NegPio2_3, n, r); 69 70 /* If x lives in an interval, where |tan(x)| 71 - is finite, then use a polynomial approximation of the form 72 tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2). 73 - grows to infinity then use symmetries of tangent and the identity 74 tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use 75 the same polynomial approximation of tan as above. */ 76 77 /* Perform additional reduction if required. */ 78 sv_f32_t z = svneg_f32_m (r, pred_alt, r); 79 80 /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4]. */ 81 sv_f32_t z2 = svmul_f32_x (pg, z, z); 82 sv_f32_t p = eval_poly (pg, z2); 83 sv_f32_t y = sv_fma_f32_x (pg, svmul_f32_x (pg, z, z2), p, z); 84 85 /* Transform result back, if necessary. */ 86 sv_f32_t inv_y = svdiv_f32_x (pg, sv_f32 (1.0f), y); 87 y = svsel_f32 (pred_alt, inv_y, y); 88 89 /* Fast reduction does not handle the x = -0.0 case well, 90 therefore it is fixed here. */ 91 y = svsel_f32 (pred_minuszero, x, y); 92 93 /* No need to pass pg to specialcase here since cmp is a strict subset, 94 guaranteed by the cmpge above. */ 95 if (unlikely (svptest_any (pg, cmp))) 96 return __sv_tanf_specialcase (x, y, cmp); 97 return y; 98 } 99 100 PL_ALIAS (__sv_tanf_x, _ZGVsMxv_tanf) 101 102 PL_SIG (SV, F, 1, tan, -3.1, 3.1) 103 PL_TEST_ULP (__sv_tanf, 2.96) 104 PL_TEST_INTERVAL (__sv_tanf, -0.0, -0x1p126, 100) 105 PL_TEST_INTERVAL (__sv_tanf, 0x1p-149, 0x1p-126, 4000) 106 PL_TEST_INTERVAL (__sv_tanf, 0x1p-126, 0x1p-23, 50000) 107 PL_TEST_INTERVAL (__sv_tanf, 0x1p-23, 0.7, 50000) 108 PL_TEST_INTERVAL (__sv_tanf, 0.7, 1.5, 50000) 109 PL_TEST_INTERVAL (__sv_tanf, 1.5, 100, 50000) 110 PL_TEST_INTERVAL (__sv_tanf, 100, 0x1p17, 50000) 111 PL_TEST_INTERVAL (__sv_tanf, 0x1p17, inf, 50000) 112 #endif 113