xref: /freebsd/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 /*
2  * Single-precision vector tan(x) function.
3  *
4  * Copyright (c) 2020-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "sv_math.h"
9 #include "pl_sig.h"
10 #include "pl_test.h"
11 
12 #if SV_SUPPORTED
13 
14 /* Constants.  */
15 #define NegPio2_1 (sv_f32 (-0x1.921fb6p+0f))
16 #define NegPio2_2 (sv_f32 (0x1.777a5cp-25f))
17 #define NegPio2_3 (sv_f32 (0x1.ee59dap-50f))
18 #define InvPio2 (sv_f32 (0x1.45f306p-1f))
19 #define RangeVal (sv_f32 (0x1p15f))
20 #define Shift (sv_f32 (0x1.8p+23f))
21 
22 #define poly(i) sv_f32 (__tanf_poly_data.poly_tan[i])
23 
24 /* Use full Estrin's scheme to evaluate polynomial.  */
25 static inline sv_f32_t
26 eval_poly (svbool_t pg, sv_f32_t z)
27 {
28   sv_f32_t z2 = svmul_f32_x (pg, z, z);
29   sv_f32_t z4 = svmul_f32_x (pg, z2, z2);
30   sv_f32_t y_10 = sv_fma_f32_x (pg, z, poly (1), poly (0));
31   sv_f32_t y_32 = sv_fma_f32_x (pg, z, poly (3), poly (2));
32   sv_f32_t y_54 = sv_fma_f32_x (pg, z, poly (5), poly (4));
33   sv_f32_t y_32_10 = sv_fma_f32_x (pg, z2, y_32, y_10);
34   sv_f32_t y = sv_fma_f32_x (pg, z4, y_54, y_32_10);
35   return y;
36 }
37 
38 static NOINLINE sv_f32_t
39 __sv_tanf_specialcase (sv_f32_t x, sv_f32_t y, svbool_t cmp)
40 {
41   return sv_call_f32 (tanf, x, y, cmp);
42 }
43 
44 /* Fast implementation of SVE tanf.
45    Maximum error is 3.45 ULP:
46    __sv_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
47 			     want 0x1.ff9850p-1.  */
48 sv_f32_t
49 __sv_tanf_x (sv_f32_t x, const svbool_t pg)
50 {
51   /* Determine whether input is too large to perform fast regression.  */
52   svbool_t cmp = svacge_f32 (pg, x, RangeVal);
53   svbool_t pred_minuszero = svcmpeq_f32 (pg, x, sv_f32 (-0.0));
54 
55   /* n = rint(x/(pi/2)).  */
56   sv_f32_t q = sv_fma_f32_x (pg, InvPio2, x, Shift);
57   sv_f32_t n = svsub_f32_x (pg, q, Shift);
58   /* n is already a signed integer, simply convert it.  */
59   sv_s32_t in = sv_to_s32_f32_x (pg, n);
60   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
61   sv_s32_t alt = svand_s32_x (pg, in, sv_s32 (1));
62   svbool_t pred_alt = svcmpne_s32 (pg, alt, sv_s32 (0));
63 
64   /* r = x - n * (pi/2)  (range reduction into 0 .. pi/4).  */
65   sv_f32_t r;
66   r = sv_fma_f32_x (pg, NegPio2_1, n, x);
67   r = sv_fma_f32_x (pg, NegPio2_2, n, r);
68   r = sv_fma_f32_x (pg, NegPio2_3, n, r);
69 
70   /* If x lives in an interval, where |tan(x)|
71      - is finite, then use a polynomial approximation of the form
72        tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
73      - grows to infinity then use symmetries of tangent and the identity
74        tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
75        the same polynomial approximation of tan as above.  */
76 
77   /* Perform additional reduction if required.  */
78   sv_f32_t z = svneg_f32_m (r, pred_alt, r);
79 
80   /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4].  */
81   sv_f32_t z2 = svmul_f32_x (pg, z, z);
82   sv_f32_t p = eval_poly (pg, z2);
83   sv_f32_t y = sv_fma_f32_x (pg, svmul_f32_x (pg, z, z2), p, z);
84 
85   /* Transform result back, if necessary.  */
86   sv_f32_t inv_y = svdiv_f32_x (pg, sv_f32 (1.0f), y);
87   y = svsel_f32 (pred_alt, inv_y, y);
88 
89   /* Fast reduction does not handle the x = -0.0 case well,
90      therefore it is fixed here.  */
91   y = svsel_f32 (pred_minuszero, x, y);
92 
93   /* No need to pass pg to specialcase here since cmp is a strict subset,
94      guaranteed by the cmpge above.  */
95   if (unlikely (svptest_any (pg, cmp)))
96     return __sv_tanf_specialcase (x, y, cmp);
97   return y;
98 }
99 
100 PL_ALIAS (__sv_tanf_x, _ZGVsMxv_tanf)
101 
102 PL_SIG (SV, F, 1, tan, -3.1, 3.1)
103 PL_TEST_ULP (__sv_tanf, 2.96)
104 PL_TEST_INTERVAL (__sv_tanf, -0.0, -0x1p126, 100)
105 PL_TEST_INTERVAL (__sv_tanf, 0x1p-149, 0x1p-126, 4000)
106 PL_TEST_INTERVAL (__sv_tanf, 0x1p-126, 0x1p-23, 50000)
107 PL_TEST_INTERVAL (__sv_tanf, 0x1p-23, 0.7, 50000)
108 PL_TEST_INTERVAL (__sv_tanf, 0.7, 1.5, 50000)
109 PL_TEST_INTERVAL (__sv_tanf, 1.5, 100, 50000)
110 PL_TEST_INTERVAL (__sv_tanf, 100, 0x1p17, 50000)
111 PL_TEST_INTERVAL (__sv_tanf, 0x1p17, inf, 50000)
112 #endif
113