xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/tanpif_3u1.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Single-precision scalar tanpi(x) function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner #include "mathlib.h"
8*f3087befSAndrew Turner #include "math_config.h"
9*f3087befSAndrew Turner #include "test_sig.h"
10*f3087befSAndrew Turner #include "test_defs.h"
11*f3087befSAndrew Turner #include "poly_scalar_f32.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner const static struct tanpif_data
14*f3087befSAndrew Turner {
15*f3087befSAndrew Turner   float tan_poly[6], cot_poly[4], pi, invpi;
16*f3087befSAndrew Turner } tanpif_data = {
17*f3087befSAndrew Turner   /* Coefficents for tan(pi * x).  */
18*f3087befSAndrew Turner   .tan_poly = {
19*f3087befSAndrew Turner     0x1.4abbc8p3,
20*f3087befSAndrew Turner     0x1.467284p5,
21*f3087befSAndrew Turner     0x1.44cf12p7,
22*f3087befSAndrew Turner     0x1.596b5p9,
23*f3087befSAndrew Turner     0x1.753858p10,
24*f3087befSAndrew Turner     0x1.76ff52p14,
25*f3087befSAndrew Turner   },
26*f3087befSAndrew Turner   /* Coefficents for cot(pi * x).  */
27*f3087befSAndrew Turner   .cot_poly = {
28*f3087befSAndrew Turner     -0x1.0c1522p0,
29*f3087befSAndrew Turner     -0x1.60ce32p-1,
30*f3087befSAndrew Turner     -0x1.49cd42p-1,
31*f3087befSAndrew Turner     -0x1.73f786p-1,
32*f3087befSAndrew Turner   },
33*f3087befSAndrew Turner   .pi = 0x1.921fb6p1f,
34*f3087befSAndrew Turner   .invpi = 0x1.45f308p-2f,
35*f3087befSAndrew Turner };
36*f3087befSAndrew Turner 
37*f3087befSAndrew Turner /* Single-precision scalar tanpi(x) implementation.
38*f3087befSAndrew Turner    Maximum error 2.56 ULP:
39*f3087befSAndrew Turner    tanpif(0x1.4bf948p-1) got -0x1.fcc9ep+0
40*f3087befSAndrew Turner 			want -0x1.fcc9e6p+0.  */
41*f3087befSAndrew Turner float
arm_math_tanpif(float x)42*f3087befSAndrew Turner arm_math_tanpif (float x)
43*f3087befSAndrew Turner {
44*f3087befSAndrew Turner   uint32_t xabs_12 = asuint (x) >> 20 & 0x7f8;
45*f3087befSAndrew Turner 
46*f3087befSAndrew Turner   /* x >= 0x1p24f.  */
47*f3087befSAndrew Turner   if (unlikely (xabs_12 >= 0x4b1))
48*f3087befSAndrew Turner     {
49*f3087befSAndrew Turner       /* tanpif(+/-inf) and tanpif(+/-nan) = nan.  */
50*f3087befSAndrew Turner       if (unlikely (xabs_12 == 0x7f8))
51*f3087befSAndrew Turner 	{
52*f3087befSAndrew Turner 	  return __math_invalidf (x);
53*f3087befSAndrew Turner 	}
54*f3087befSAndrew Turner 
55*f3087befSAndrew Turner       uint32_t x_sign = asuint (x) & 0x80000000;
56*f3087befSAndrew Turner       return asfloat (x_sign);
57*f3087befSAndrew Turner     }
58*f3087befSAndrew Turner 
59*f3087befSAndrew Turner   const struct tanpif_data *d = ptr_barrier (&tanpif_data);
60*f3087befSAndrew Turner 
61*f3087befSAndrew Turner   /* Prevent underflow exceptions. x <= 0x1p-31.  */
62*f3087befSAndrew Turner   if (unlikely (xabs_12 < 0x300))
63*f3087befSAndrew Turner     {
64*f3087befSAndrew Turner       return d->pi * x;
65*f3087befSAndrew Turner     }
66*f3087befSAndrew Turner 
67*f3087befSAndrew Turner   float rounded = roundf (x);
68*f3087befSAndrew Turner   if (unlikely (rounded == x))
69*f3087befSAndrew Turner     {
70*f3087befSAndrew Turner       /* If x == 0, return with sign.  */
71*f3087befSAndrew Turner       if (x == 0)
72*f3087befSAndrew Turner 	{
73*f3087befSAndrew Turner 	  return x;
74*f3087befSAndrew Turner 	}
75*f3087befSAndrew Turner       /* Otherwise, return zero with alternating sign.  */
76*f3087befSAndrew Turner       int32_t m = (int32_t) rounded;
77*f3087befSAndrew Turner       if (x < 0)
78*f3087befSAndrew Turner 	{
79*f3087befSAndrew Turner 	  return m & 1 ? 0.0f : -0.0f;
80*f3087befSAndrew Turner 	}
81*f3087befSAndrew Turner       else
82*f3087befSAndrew Turner 	{
83*f3087befSAndrew Turner 	  return m & 1 ? -0.0f : 0.0f;
84*f3087befSAndrew Turner 	}
85*f3087befSAndrew Turner     }
86*f3087befSAndrew Turner 
87*f3087befSAndrew Turner   float x_reduced = x - rounded;
88*f3087befSAndrew Turner   float abs_x_reduced = 0.5f - asfloat (asuint (x_reduced) & 0x7fffffff);
89*f3087befSAndrew Turner 
90*f3087befSAndrew Turner   float result, offset, scale;
91*f3087befSAndrew Turner 
92*f3087befSAndrew Turner   /* Test  0.25 < abs_x < 0.5 independent from abs_x_reduced.  */
93*f3087befSAndrew Turner   float x2 = x + x;
94*f3087befSAndrew Turner   int32_t rounded_x2 = (int32_t) roundf (x2);
95*f3087befSAndrew Turner   if (rounded_x2 & 1)
96*f3087befSAndrew Turner     {
97*f3087befSAndrew Turner       float r_x = abs_x_reduced;
98*f3087befSAndrew Turner 
99*f3087befSAndrew Turner       float r_x2 = r_x * r_x;
100*f3087befSAndrew Turner       float r_x4 = r_x2 * r_x2;
101*f3087befSAndrew Turner 
102*f3087befSAndrew Turner       uint32_t sign = asuint (x_reduced) & 0x80000000;
103*f3087befSAndrew Turner       r_x = asfloat (asuint (r_x) ^ sign);
104*f3087befSAndrew Turner 
105*f3087befSAndrew Turner       // calculate sign for half-fractional inf values
106*f3087befSAndrew Turner       uint32_t is_finite = asuint (abs_x_reduced);
107*f3087befSAndrew Turner       uint32_t is_odd = (rounded_x2 & 2) << 30;
108*f3087befSAndrew Turner       uint32_t is_neg = rounded_x2 & 0x80000000;
109*f3087befSAndrew Turner       uint32_t keep_sign = is_finite | (is_odd ^ is_neg);
110*f3087befSAndrew Turner       offset = d->invpi / (keep_sign ? r_x : -r_x);
111*f3087befSAndrew Turner       scale = r_x;
112*f3087befSAndrew Turner 
113*f3087befSAndrew Turner       result = pairwise_poly_3_f32 (r_x2, r_x4, d->cot_poly);
114*f3087befSAndrew Turner     }
115*f3087befSAndrew Turner   else
116*f3087befSAndrew Turner     {
117*f3087befSAndrew Turner       float r_x = x_reduced;
118*f3087befSAndrew Turner 
119*f3087befSAndrew Turner       float r_x2 = r_x * r_x;
120*f3087befSAndrew Turner       float r_x4 = r_x2 * r_x2;
121*f3087befSAndrew Turner 
122*f3087befSAndrew Turner       offset = d->pi * r_x;
123*f3087befSAndrew Turner       scale = r_x * r_x2;
124*f3087befSAndrew Turner 
125*f3087befSAndrew Turner       result = pw_horner_5_f32 (r_x2, r_x4, d->tan_poly);
126*f3087befSAndrew Turner     }
127*f3087befSAndrew Turner 
128*f3087befSAndrew Turner   return fmaf (scale, result, offset);
129*f3087befSAndrew Turner }
130*f3087befSAndrew Turner 
131*f3087befSAndrew Turner #if WANT_EXPERIMENTAL_MATH
132*f3087befSAndrew Turner float
tanpif(float x)133*f3087befSAndrew Turner tanpif (float x)
134*f3087befSAndrew Turner {
135*f3087befSAndrew Turner   return arm_math_tanpif (x);
136*f3087befSAndrew Turner }
137*f3087befSAndrew Turner #endif
138*f3087befSAndrew Turner 
139*f3087befSAndrew Turner #if WANT_TRIGPI_TESTS
140*f3087befSAndrew Turner TEST_ULP (arm_math_tanpif, 2.57)
141*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_tanpif, 0, 0x1p-31f, 50000)
142*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p-31f, 0.5, 100000)
143*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_tanpif, 0.5, 0x1p23f, 100000)
144*f3087befSAndrew Turner TEST_SYM_INTERVAL (arm_math_tanpif, 0x1p23f, inf, 100000)
145*f3087befSAndrew Turner #endif
146