xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/sincospif_3u2.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Single-precision scalar sincospi function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "math_config.h"
9*f3087befSAndrew Turner #include "test_sig.h"
10*f3087befSAndrew Turner #include "test_defs.h"
11*f3087befSAndrew Turner #include "poly_scalar_f32.h"
12*f3087befSAndrew Turner 
13*f3087befSAndrew Turner /* Taylor series coefficents for sin(pi * x).  */
14*f3087befSAndrew Turner const static struct sincospif_data
15*f3087befSAndrew Turner {
16*f3087befSAndrew Turner   float poly[6];
17*f3087befSAndrew Turner } sincospif_data = {
18*f3087befSAndrew Turner   /* Taylor series coefficents for sin(pi * x).  */
19*f3087befSAndrew Turner   .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
20*f3087befSAndrew Turner 	    0x1.50783p-4f, -0x1.e30750p-8f },
21*f3087befSAndrew Turner };
22*f3087befSAndrew Turner 
23*f3087befSAndrew Turner /* Top 12 bits of the float representation with the sign bit cleared.  */
24*f3087befSAndrew Turner static inline uint32_t
abstop12(float x)25*f3087befSAndrew Turner abstop12 (float x)
26*f3087befSAndrew Turner {
27*f3087befSAndrew Turner   return (asuint (x) >> 20) & 0x7ff;
28*f3087befSAndrew Turner }
29*f3087befSAndrew Turner 
30*f3087befSAndrew Turner /* Triages special cases into 4 categories:
31*f3087befSAndrew Turner      -1 or +1 if iy represents half an integer
32*f3087befSAndrew Turner        -1 if round(y) is odd.
33*f3087befSAndrew Turner        +1 if round(y) is even.
34*f3087befSAndrew Turner      -2 or +2 if iy represents and integer.
35*f3087befSAndrew Turner        -2 if iy is odd.
36*f3087befSAndrew Turner        +2 if iy is even.
37*f3087befSAndrew Turner    The argument is the bit representation of a positive non-zero
38*f3087befSAndrew Turner    finite floating-point value which is either a half or an integer.  */
39*f3087befSAndrew Turner static inline int
checkint(uint32_t iy)40*f3087befSAndrew Turner checkint (uint32_t iy)
41*f3087befSAndrew Turner {
42*f3087befSAndrew Turner   int e = iy >> 23;
43*f3087befSAndrew Turner   if (e > 0x7f + 23)
44*f3087befSAndrew Turner     return 2;
45*f3087befSAndrew Turner   if (iy & ((1 << (0x7f + 23 - e)) - 1))
46*f3087befSAndrew Turner     {
47*f3087befSAndrew Turner       if ((iy - 1) & 2)
48*f3087befSAndrew Turner 	return -1;
49*f3087befSAndrew Turner       else
50*f3087befSAndrew Turner 	return 1;
51*f3087befSAndrew Turner     }
52*f3087befSAndrew Turner   if (iy & (1 << (0x7f + 23 - e)))
53*f3087befSAndrew Turner     return -2;
54*f3087befSAndrew Turner   return 2;
55*f3087befSAndrew Turner }
56*f3087befSAndrew Turner 
57*f3087befSAndrew Turner /* Approximation for scalar single-precision sincospif(x).
58*f3087befSAndrew Turner    Maximum error for sin: 3.04 ULP:
59*f3087befSAndrew Turner       sincospif_sin(0x1.c597ccp-2) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
60*f3087befSAndrew Turner    Maximum error for cos: 3.18 ULP:
61*f3087befSAndrew Turner       sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.  */
62*f3087befSAndrew Turner void
arm_math_sincospif(float x,float * out_sin,float * out_cos)63*f3087befSAndrew Turner arm_math_sincospif (float x, float *out_sin, float *out_cos)
64*f3087befSAndrew Turner {
65*f3087befSAndrew Turner 
66*f3087befSAndrew Turner   const struct sincospif_data *d = ptr_barrier (&sincospif_data);
67*f3087befSAndrew Turner   uint32_t sign = asuint (x) & 0x80000000;
68*f3087befSAndrew Turner 
69*f3087befSAndrew Turner   /* abs(x) in [0, 0x1p22].  */
70*f3087befSAndrew Turner   if (likely (abstop12 (x) < abstop12 (0x1p22)))
71*f3087befSAndrew Turner     {
72*f3087befSAndrew Turner       /* ar_s = x - n (range reduction into -1/2 .. 1/2).  */
73*f3087befSAndrew Turner       float ar_s = x - rintf (x);
74*f3087befSAndrew Turner       /* We know that cospi(x) = sinpi(0.5 - x)
75*f3087befSAndrew Turner       range reduction and offset into sinpi range -1/2 .. 1/2
76*f3087befSAndrew Turner       ar_c = 0.5 - |x - n|.  */
77*f3087befSAndrew Turner       float ar_c = 0.5f - fabsf (ar_s);
78*f3087befSAndrew Turner 
79*f3087befSAndrew Turner       float ar2_s = ar_s * ar_s;
80*f3087befSAndrew Turner       float ar2_c = ar_c * ar_c;
81*f3087befSAndrew Turner       float ar4_s = ar2_s * ar2_s;
82*f3087befSAndrew Turner       float ar4_c = ar2_c * ar2_c;
83*f3087befSAndrew Turner 
84*f3087befSAndrew Turner       uint32_t cc_sign = lrintf (x) << 31;
85*f3087befSAndrew Turner       uint32_t ss_sign = cc_sign;
86*f3087befSAndrew Turner       if (ar_s == 0)
87*f3087befSAndrew Turner 	ss_sign = sign;
88*f3087befSAndrew Turner 
89*f3087befSAndrew Turner       /* As all values are reduced to -1/2 .. 1/2, the result of cos(x)
90*f3087befSAndrew Turner       always be positive, therefore, the sign must be introduced
91*f3087befSAndrew Turner       based upon if x rounds to odd or even. For sin(x) the sign is
92*f3087befSAndrew Turner       copied from x.  */
93*f3087befSAndrew Turner       *out_sin = pw_horner_5_f32 (ar2_s, ar4_s, d->poly)
94*f3087befSAndrew Turner 		 * asfloat (asuint (ar_s) ^ ss_sign);
95*f3087befSAndrew Turner       *out_cos = pw_horner_5_f32 (ar2_c, ar4_c, d->poly)
96*f3087befSAndrew Turner 		 * asfloat (asuint (ar_c) ^ cc_sign);
97*f3087befSAndrew Turner       return;
98*f3087befSAndrew Turner     }
99*f3087befSAndrew Turner   else
100*f3087befSAndrew Turner     {
101*f3087befSAndrew Turner       /* When abs(x) > 0x1p22, the x will be either
102*f3087befSAndrew Turner 	    - Half integer (relevant if abs(x) in [0x1p22, 0x1p23])
103*f3087befSAndrew Turner 	    - Odd integer  (relevant if abs(x) in [0x1p22, 0x1p24])
104*f3087befSAndrew Turner 	    - Even integer (relevant if abs(x) in [0x1p22, inf])
105*f3087befSAndrew Turner 	    - Inf or NaN.  */
106*f3087befSAndrew Turner       if (abstop12 (x) >= 0x7f8)
107*f3087befSAndrew Turner 	{
108*f3087befSAndrew Turner 	  float inv_result = __math_invalidf (x);
109*f3087befSAndrew Turner 	  *out_sin = inv_result;
110*f3087befSAndrew Turner 	  *out_cos = inv_result;
111*f3087befSAndrew Turner 	  return;
112*f3087befSAndrew Turner 	}
113*f3087befSAndrew Turner       else
114*f3087befSAndrew Turner 	{
115*f3087befSAndrew Turner 	  uint32_t ax = asuint (x) & 0x7fffffff;
116*f3087befSAndrew Turner 	  int m = checkint (ax);
117*f3087befSAndrew Turner 	  if (m & 1)
118*f3087befSAndrew Turner 	    {
119*f3087befSAndrew Turner 	      *out_sin = sign ? -m : m;
120*f3087befSAndrew Turner 	      *out_cos = 0;
121*f3087befSAndrew Turner 	      return;
122*f3087befSAndrew Turner 	    }
123*f3087befSAndrew Turner 	  else
124*f3087befSAndrew Turner 	    {
125*f3087befSAndrew Turner 	      *out_sin = asfloat (sign);
126*f3087befSAndrew Turner 	      *out_cos = m >> 1;
127*f3087befSAndrew Turner 	      return;
128*f3087befSAndrew Turner 	    }
129*f3087befSAndrew Turner 	}
130*f3087befSAndrew Turner     }
131*f3087befSAndrew Turner }
132*f3087befSAndrew Turner 
133*f3087befSAndrew Turner #if WANT_TRIGPI_TESTS
134*f3087befSAndrew Turner TEST_DISABLE_FENV (arm_math_sincospif_sin)
135*f3087befSAndrew Turner TEST_DISABLE_FENV (arm_math_sincospif_cos)
136*f3087befSAndrew Turner TEST_ULP (arm_math_sincospif_sin, 2.54)
137*f3087befSAndrew Turner TEST_ULP (arm_math_sincospif_cos, 2.68)
138*f3087befSAndrew Turner #  define SINCOSPIF_INTERVAL(lo, hi, n)                                       \
139*f3087befSAndrew Turner     TEST_SYM_INTERVAL (arm_math_sincospif_sin, lo, hi, n)                     \
140*f3087befSAndrew Turner     TEST_SYM_INTERVAL (arm_math_sincospif_cos, lo, hi, n)
141*f3087befSAndrew Turner SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
142*f3087befSAndrew Turner SINCOSPIF_INTERVAL (0x1p-31, 1, 50000)
143*f3087befSAndrew Turner SINCOSPIF_INTERVAL (1, 0x1p22f, 50000)
144*f3087befSAndrew Turner SINCOSPIF_INTERVAL (0x1p22f, inf, 10000)
145*f3087befSAndrew Turner #endif
146