xref: /titanic_41/usr/src/lib/libmvec/common/__vatanf.c (revision 5b2ba9d3d1e04dde804c89e9879f7dfc9bcebf61)
1*5b2ba9d3SPiotr Jasiukajtis /*
2*5b2ba9d3SPiotr Jasiukajtis  * CDDL HEADER START
3*5b2ba9d3SPiotr Jasiukajtis  *
4*5b2ba9d3SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*5b2ba9d3SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*5b2ba9d3SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*5b2ba9d3SPiotr Jasiukajtis  *
8*5b2ba9d3SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5b2ba9d3SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*5b2ba9d3SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*5b2ba9d3SPiotr Jasiukajtis  * and limitations under the License.
12*5b2ba9d3SPiotr Jasiukajtis  *
13*5b2ba9d3SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*5b2ba9d3SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5b2ba9d3SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*5b2ba9d3SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*5b2ba9d3SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*5b2ba9d3SPiotr Jasiukajtis  *
19*5b2ba9d3SPiotr Jasiukajtis  * CDDL HEADER END
20*5b2ba9d3SPiotr Jasiukajtis  */
21*5b2ba9d3SPiotr Jasiukajtis 
22*5b2ba9d3SPiotr Jasiukajtis /*
23*5b2ba9d3SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*5b2ba9d3SPiotr Jasiukajtis  */
25*5b2ba9d3SPiotr Jasiukajtis /*
26*5b2ba9d3SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*5b2ba9d3SPiotr Jasiukajtis  * Use is subject to license terms.
28*5b2ba9d3SPiotr Jasiukajtis  */
29*5b2ba9d3SPiotr Jasiukajtis 
30*5b2ba9d3SPiotr Jasiukajtis #ifdef __RESTRICT
31*5b2ba9d3SPiotr Jasiukajtis #define restrict _Restrict
32*5b2ba9d3SPiotr Jasiukajtis #else
33*5b2ba9d3SPiotr Jasiukajtis #define restrict
34*5b2ba9d3SPiotr Jasiukajtis #endif
35*5b2ba9d3SPiotr Jasiukajtis 
36*5b2ba9d3SPiotr Jasiukajtis void
__vatanf(int n,float * restrict x,int stridex,float * restrict y,int stridey)37*5b2ba9d3SPiotr Jasiukajtis __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
38*5b2ba9d3SPiotr Jasiukajtis {
39*5b2ba9d3SPiotr Jasiukajtis   extern const double __vlibm_TBL_atan1[];
40*5b2ba9d3SPiotr Jasiukajtis   double  conup0, conup1, conup2;
41*5b2ba9d3SPiotr Jasiukajtis   float dummy, ansf = 0.0;
42*5b2ba9d3SPiotr Jasiukajtis   float f0, f1, f2;
43*5b2ba9d3SPiotr Jasiukajtis   float ans0, ans1, ans2;
44*5b2ba9d3SPiotr Jasiukajtis   float poly0, poly1, poly2;
45*5b2ba9d3SPiotr Jasiukajtis   float sign0, sign1, sign2;
46*5b2ba9d3SPiotr Jasiukajtis   int intf, intz, argcount;
47*5b2ba9d3SPiotr Jasiukajtis   int index0, index1, index2;
48*5b2ba9d3SPiotr Jasiukajtis   float z,*yaddr0,*yaddr1,*yaddr2;
49*5b2ba9d3SPiotr Jasiukajtis   int *pz = (int *) &z;
50*5b2ba9d3SPiotr Jasiukajtis #ifdef UNROLL4
51*5b2ba9d3SPiotr Jasiukajtis   double conup3;
52*5b2ba9d3SPiotr Jasiukajtis   int index3;
53*5b2ba9d3SPiotr Jasiukajtis   float f3, ans3, poly3, sign3, *yaddr3;
54*5b2ba9d3SPiotr Jasiukajtis #endif
55*5b2ba9d3SPiotr Jasiukajtis 
56*5b2ba9d3SPiotr Jasiukajtis /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
57*5b2ba9d3SPiotr Jasiukajtis  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
58*5b2ba9d3SPiotr Jasiukajtis 
59*5b2ba9d3SPiotr Jasiukajtis   static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
60*5b2ba9d3SPiotr Jasiukajtis   static const float pone = 1.0f;
61*5b2ba9d3SPiotr Jasiukajtis 
62*5b2ba9d3SPiotr Jasiukajtis   if (n <= 0) return;		/* if no. of elements is 0 or neg, do nothing */
63*5b2ba9d3SPiotr Jasiukajtis   do
64*5b2ba9d3SPiotr Jasiukajtis   {
65*5b2ba9d3SPiotr Jasiukajtis   LOOP0:
66*5b2ba9d3SPiotr Jasiukajtis 
67*5b2ba9d3SPiotr Jasiukajtis 	intf     = *(int *) x;		/* upper half of x, as integer */
68*5b2ba9d3SPiotr Jasiukajtis 	f0 = *x;
69*5b2ba9d3SPiotr Jasiukajtis 	sign0 = pone;
70*5b2ba9d3SPiotr Jasiukajtis     	if (intf < 0) {
71*5b2ba9d3SPiotr Jasiukajtis     		intf = intf & ~0x80000000; /* abs(upper argument) */
72*5b2ba9d3SPiotr Jasiukajtis 		f0 = -f0;
73*5b2ba9d3SPiotr Jasiukajtis 		sign0 = -sign0;
74*5b2ba9d3SPiotr Jasiukajtis 	}
75*5b2ba9d3SPiotr Jasiukajtis 
76*5b2ba9d3SPiotr Jasiukajtis     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
77*5b2ba9d3SPiotr Jasiukajtis     {
78*5b2ba9d3SPiotr Jasiukajtis       if (intf > 0x7f800000)
79*5b2ba9d3SPiotr Jasiukajtis       {
80*5b2ba9d3SPiotr Jasiukajtis 	ansf  = f0- f0; 				/* return NaN if x=NaN*/
81*5b2ba9d3SPiotr Jasiukajtis       }
82*5b2ba9d3SPiotr Jasiukajtis       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
83*5b2ba9d3SPiotr Jasiukajtis       {
84*5b2ba9d3SPiotr Jasiukajtis         dummy = 1.0e37 + f0;
85*5b2ba9d3SPiotr Jasiukajtis         dummy = dummy;
86*5b2ba9d3SPiotr Jasiukajtis 	ansf  = f0;
87*5b2ba9d3SPiotr Jasiukajtis       }
88*5b2ba9d3SPiotr Jasiukajtis       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
89*5b2ba9d3SPiotr Jasiukajtis       {
90*5b2ba9d3SPiotr Jasiukajtis         index0= 2;
91*5b2ba9d3SPiotr Jasiukajtis         ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
92*5b2ba9d3SPiotr Jasiukajtis       }
93*5b2ba9d3SPiotr Jasiukajtis       *y      = sign0*ansf;		/* store answer, with sign bit 	*/
94*5b2ba9d3SPiotr Jasiukajtis       x      += stridex;
95*5b2ba9d3SPiotr Jasiukajtis       y      += stridey;
96*5b2ba9d3SPiotr Jasiukajtis       argcount = 0;				/* initialize argcount		*/
97*5b2ba9d3SPiotr Jasiukajtis       if (--n <=0) break;			/* we are done 			*/
98*5b2ba9d3SPiotr Jasiukajtis       goto LOOP0;				/* otherwise, examine next arg  */
99*5b2ba9d3SPiotr Jasiukajtis     }
100*5b2ba9d3SPiotr Jasiukajtis 
101*5b2ba9d3SPiotr Jasiukajtis     if (intf > 0x42800000)			/* if (|x| > 64               	*/
102*5b2ba9d3SPiotr Jasiukajtis     {
103*5b2ba9d3SPiotr Jasiukajtis     f0 = -pone/f0;
104*5b2ba9d3SPiotr Jasiukajtis 	index0 = 2; 				/* point to pi/2 upper, lower	*/
105*5b2ba9d3SPiotr Jasiukajtis     }
106*5b2ba9d3SPiotr Jasiukajtis     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
107*5b2ba9d3SPiotr Jasiukajtis     {
108*5b2ba9d3SPiotr Jasiukajtis       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
109*5b2ba9d3SPiotr Jasiukajtis       pz[0]  = intz;				/* store as a float (z)		*/
110*5b2ba9d3SPiotr Jasiukajtis     f0 = (f0 - z)/(pone + f0*z);
111*5b2ba9d3SPiotr Jasiukajtis 	index0 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
112*5b2ba9d3SPiotr Jasiukajtis 	index0 = index0+ 4;			/* skip over 0,0,pi/2,pi/2	*/
113*5b2ba9d3SPiotr Jasiukajtis     }
114*5b2ba9d3SPiotr Jasiukajtis     else					/* |x| < 1/64 */
115*5b2ba9d3SPiotr Jasiukajtis     {
116*5b2ba9d3SPiotr Jasiukajtis 	index0   = 0;				/* points to 0,0 in table	*/
117*5b2ba9d3SPiotr Jasiukajtis     }
118*5b2ba9d3SPiotr Jasiukajtis     yaddr0   = y;				/* address to store this answer */
119*5b2ba9d3SPiotr Jasiukajtis     x       += stridex;				/* point to next arg		*/
120*5b2ba9d3SPiotr Jasiukajtis     y       += stridey;				/* point to next result		*/
121*5b2ba9d3SPiotr Jasiukajtis     argcount = 1;				/* we now have 1 good argument  */
122*5b2ba9d3SPiotr Jasiukajtis     if (--n <=0)
123*5b2ba9d3SPiotr Jasiukajtis     {
124*5b2ba9d3SPiotr Jasiukajtis       goto UNROLL;				/* finish up with 1 good arg 	*/
125*5b2ba9d3SPiotr Jasiukajtis     }
126*5b2ba9d3SPiotr Jasiukajtis 
127*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
128*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
129*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
130*5b2ba9d3SPiotr Jasiukajtis 
131*5b2ba9d3SPiotr Jasiukajtis   LOOP1:
132*5b2ba9d3SPiotr Jasiukajtis 
133*5b2ba9d3SPiotr Jasiukajtis 	intf     = *(int *) x;		/* upper half of x, as integer */
134*5b2ba9d3SPiotr Jasiukajtis 	f1 = *x;
135*5b2ba9d3SPiotr Jasiukajtis 	sign1 = pone;
136*5b2ba9d3SPiotr Jasiukajtis     	if (intf < 0) {
137*5b2ba9d3SPiotr Jasiukajtis     		intf = intf & ~0x80000000; /* abs(upper argument) */
138*5b2ba9d3SPiotr Jasiukajtis 		f1 = -f1;
139*5b2ba9d3SPiotr Jasiukajtis 		sign1 = -sign1;
140*5b2ba9d3SPiotr Jasiukajtis 	}
141*5b2ba9d3SPiotr Jasiukajtis 
142*5b2ba9d3SPiotr Jasiukajtis     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
143*5b2ba9d3SPiotr Jasiukajtis     {
144*5b2ba9d3SPiotr Jasiukajtis       if (intf > 0x7f800000)
145*5b2ba9d3SPiotr Jasiukajtis       {
146*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f1 - f1;			/* return NaN if x=NaN*/
147*5b2ba9d3SPiotr Jasiukajtis       }
148*5b2ba9d3SPiotr Jasiukajtis       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
149*5b2ba9d3SPiotr Jasiukajtis       {
150*5b2ba9d3SPiotr Jasiukajtis         dummy = 1.0e37 + f1;
151*5b2ba9d3SPiotr Jasiukajtis         dummy = dummy;
152*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f1;
153*5b2ba9d3SPiotr Jasiukajtis       }
154*5b2ba9d3SPiotr Jasiukajtis       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
155*5b2ba9d3SPiotr Jasiukajtis       {
156*5b2ba9d3SPiotr Jasiukajtis         index1 = 2;
157*5b2ba9d3SPiotr Jasiukajtis         ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
158*5b2ba9d3SPiotr Jasiukajtis       }
159*5b2ba9d3SPiotr Jasiukajtis       *y      = sign1 * ansf;		/* store answer, with sign bit 	*/
160*5b2ba9d3SPiotr Jasiukajtis       x      += stridex;
161*5b2ba9d3SPiotr Jasiukajtis       y      += stridey;
162*5b2ba9d3SPiotr Jasiukajtis       argcount = 1;				/* we still have 1 good arg 	*/
163*5b2ba9d3SPiotr Jasiukajtis       if (--n <=0)
164*5b2ba9d3SPiotr Jasiukajtis       {
165*5b2ba9d3SPiotr Jasiukajtis         goto UNROLL;				/* finish up with 1 good arg 	*/
166*5b2ba9d3SPiotr Jasiukajtis       }
167*5b2ba9d3SPiotr Jasiukajtis       goto LOOP1;				/* otherwise, examine next arg  */
168*5b2ba9d3SPiotr Jasiukajtis     }
169*5b2ba9d3SPiotr Jasiukajtis 
170*5b2ba9d3SPiotr Jasiukajtis     if (intf > 0x42800000)			/* if (|x| > 64               	*/
171*5b2ba9d3SPiotr Jasiukajtis     {
172*5b2ba9d3SPiotr Jasiukajtis     f1 = -pone/f1;
173*5b2ba9d3SPiotr Jasiukajtis       index1 = 2; 				/* point to pi/2 upper, lower	*/
174*5b2ba9d3SPiotr Jasiukajtis     }
175*5b2ba9d3SPiotr Jasiukajtis     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
176*5b2ba9d3SPiotr Jasiukajtis     {
177*5b2ba9d3SPiotr Jasiukajtis       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
178*5b2ba9d3SPiotr Jasiukajtis       pz[0]  = intz;				/* store as a float (z)		*/
179*5b2ba9d3SPiotr Jasiukajtis     f1 = (f1 - z)/(pone + f1*z);
180*5b2ba9d3SPiotr Jasiukajtis       index1 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
181*5b2ba9d3SPiotr Jasiukajtis       index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
182*5b2ba9d3SPiotr Jasiukajtis     }
183*5b2ba9d3SPiotr Jasiukajtis     else
184*5b2ba9d3SPiotr Jasiukajtis     {
185*5b2ba9d3SPiotr Jasiukajtis 	index1   = 0;				/* points to 0,0 in table	*/
186*5b2ba9d3SPiotr Jasiukajtis     }
187*5b2ba9d3SPiotr Jasiukajtis 
188*5b2ba9d3SPiotr Jasiukajtis     yaddr1   = y;				/* address to store this answer */
189*5b2ba9d3SPiotr Jasiukajtis     x       += stridex;				/* point to next arg		*/
190*5b2ba9d3SPiotr Jasiukajtis     y       += stridey;				/* point to next result		*/
191*5b2ba9d3SPiotr Jasiukajtis     argcount = 2;				/* we now have 2 good arguments */
192*5b2ba9d3SPiotr Jasiukajtis     if (--n <=0)
193*5b2ba9d3SPiotr Jasiukajtis     {
194*5b2ba9d3SPiotr Jasiukajtis       goto UNROLL;				/* finish up with 2 good args 	*/
195*5b2ba9d3SPiotr Jasiukajtis     }
196*5b2ba9d3SPiotr Jasiukajtis 
197*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
198*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
199*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
200*5b2ba9d3SPiotr Jasiukajtis 
201*5b2ba9d3SPiotr Jasiukajtis   LOOP2:
202*5b2ba9d3SPiotr Jasiukajtis 
203*5b2ba9d3SPiotr Jasiukajtis 	intf     = *(int *) x;		/* upper half of x, as integer */
204*5b2ba9d3SPiotr Jasiukajtis 	f2 = *x;
205*5b2ba9d3SPiotr Jasiukajtis 	sign2 = pone;
206*5b2ba9d3SPiotr Jasiukajtis     	if (intf < 0) {
207*5b2ba9d3SPiotr Jasiukajtis     		intf = intf & ~0x80000000; /* abs(upper argument) */
208*5b2ba9d3SPiotr Jasiukajtis 		f2 = -f2;
209*5b2ba9d3SPiotr Jasiukajtis 		sign2 = -sign2;
210*5b2ba9d3SPiotr Jasiukajtis 	}
211*5b2ba9d3SPiotr Jasiukajtis 
212*5b2ba9d3SPiotr Jasiukajtis     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
213*5b2ba9d3SPiotr Jasiukajtis     {
214*5b2ba9d3SPiotr Jasiukajtis       if (intf > 0x7f800000)
215*5b2ba9d3SPiotr Jasiukajtis       {
216*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f2 - f2;			/* return NaN if x=NaN*/
217*5b2ba9d3SPiotr Jasiukajtis       }
218*5b2ba9d3SPiotr Jasiukajtis       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
219*5b2ba9d3SPiotr Jasiukajtis       {
220*5b2ba9d3SPiotr Jasiukajtis         dummy = 1.0e37 + f2;
221*5b2ba9d3SPiotr Jasiukajtis         dummy = dummy;
222*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f2;
223*5b2ba9d3SPiotr Jasiukajtis       }
224*5b2ba9d3SPiotr Jasiukajtis       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
225*5b2ba9d3SPiotr Jasiukajtis       {
226*5b2ba9d3SPiotr Jasiukajtis         index2 = 2;
227*5b2ba9d3SPiotr Jasiukajtis         ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
228*5b2ba9d3SPiotr Jasiukajtis       }
229*5b2ba9d3SPiotr Jasiukajtis       *y      = sign2 * ansf;		/* store answer, with sign bit 	*/
230*5b2ba9d3SPiotr Jasiukajtis       x      += stridex;
231*5b2ba9d3SPiotr Jasiukajtis       y      += stridey;
232*5b2ba9d3SPiotr Jasiukajtis       argcount = 2;				/* we still have 2 good args 	*/
233*5b2ba9d3SPiotr Jasiukajtis       if (--n <=0)
234*5b2ba9d3SPiotr Jasiukajtis       {
235*5b2ba9d3SPiotr Jasiukajtis         goto UNROLL;				/* finish up with 2 good args 	*/
236*5b2ba9d3SPiotr Jasiukajtis       }
237*5b2ba9d3SPiotr Jasiukajtis       goto LOOP2;				/* otherwise, examine next arg  */
238*5b2ba9d3SPiotr Jasiukajtis     }
239*5b2ba9d3SPiotr Jasiukajtis 
240*5b2ba9d3SPiotr Jasiukajtis     if (intf > 0x42800000)			/* if (|x| > 64               	*/
241*5b2ba9d3SPiotr Jasiukajtis     {
242*5b2ba9d3SPiotr Jasiukajtis     f2 = -pone/f2;
243*5b2ba9d3SPiotr Jasiukajtis       index2 = 2; 				/* point to pi/2 upper, lower	*/
244*5b2ba9d3SPiotr Jasiukajtis     }
245*5b2ba9d3SPiotr Jasiukajtis     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
246*5b2ba9d3SPiotr Jasiukajtis     {
247*5b2ba9d3SPiotr Jasiukajtis       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
248*5b2ba9d3SPiotr Jasiukajtis       pz[0]  = intz;				/* store as a float (z)		*/
249*5b2ba9d3SPiotr Jasiukajtis     f2 = (f2 - z)/(pone + f2*z);
250*5b2ba9d3SPiotr Jasiukajtis       index2 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
251*5b2ba9d3SPiotr Jasiukajtis       index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
252*5b2ba9d3SPiotr Jasiukajtis     }
253*5b2ba9d3SPiotr Jasiukajtis     else
254*5b2ba9d3SPiotr Jasiukajtis     {
255*5b2ba9d3SPiotr Jasiukajtis 	index2   = 0;				/* points to 0,0 in table	*/
256*5b2ba9d3SPiotr Jasiukajtis     }
257*5b2ba9d3SPiotr Jasiukajtis     yaddr2   = y;				/* address to store this answer */
258*5b2ba9d3SPiotr Jasiukajtis     x       += stridex;				/* point to next arg		*/
259*5b2ba9d3SPiotr Jasiukajtis     y       += stridey;				/* point to next result		*/
260*5b2ba9d3SPiotr Jasiukajtis     argcount = 3;				/* we now have 3 good arguments */
261*5b2ba9d3SPiotr Jasiukajtis     if (--n <=0)
262*5b2ba9d3SPiotr Jasiukajtis     {
263*5b2ba9d3SPiotr Jasiukajtis       goto UNROLL;				/* finish up with 2 good args 	*/
264*5b2ba9d3SPiotr Jasiukajtis     }
265*5b2ba9d3SPiotr Jasiukajtis 
266*5b2ba9d3SPiotr Jasiukajtis 
267*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
268*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
269*5b2ba9d3SPiotr Jasiukajtis     /*--------------------------------------------------------------------------*/
270*5b2ba9d3SPiotr Jasiukajtis 
271*5b2ba9d3SPiotr Jasiukajtis #ifdef UNROLL4
272*5b2ba9d3SPiotr Jasiukajtis   LOOP3:
273*5b2ba9d3SPiotr Jasiukajtis 
274*5b2ba9d3SPiotr Jasiukajtis 	intf     = *(int *) x;		/* upper half of x, as integer */
275*5b2ba9d3SPiotr Jasiukajtis 	f3 = *x;
276*5b2ba9d3SPiotr Jasiukajtis 	sign3 = pone;
277*5b2ba9d3SPiotr Jasiukajtis     	if (intf < 0) {
278*5b2ba9d3SPiotr Jasiukajtis     		intf = intf & ~0x80000000; /* abs(upper argument) */
279*5b2ba9d3SPiotr Jasiukajtis 		f3 = -f3;
280*5b2ba9d3SPiotr Jasiukajtis 		sign3 = -sign3;
281*5b2ba9d3SPiotr Jasiukajtis 	}
282*5b2ba9d3SPiotr Jasiukajtis 
283*5b2ba9d3SPiotr Jasiukajtis     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
284*5b2ba9d3SPiotr Jasiukajtis     {
285*5b2ba9d3SPiotr Jasiukajtis       if (intf > 0x7f800000)
286*5b2ba9d3SPiotr Jasiukajtis       {
287*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f3 - f3;			/* return NaN if x=NaN*/
288*5b2ba9d3SPiotr Jasiukajtis       }
289*5b2ba9d3SPiotr Jasiukajtis       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
290*5b2ba9d3SPiotr Jasiukajtis       {
291*5b2ba9d3SPiotr Jasiukajtis         dummy = 1.0e37 + f3;
292*5b2ba9d3SPiotr Jasiukajtis         dummy = dummy;
293*5b2ba9d3SPiotr Jasiukajtis 	ansf   = f3;
294*5b2ba9d3SPiotr Jasiukajtis       }
295*5b2ba9d3SPiotr Jasiukajtis       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
296*5b2ba9d3SPiotr Jasiukajtis       {
297*5b2ba9d3SPiotr Jasiukajtis         index3 = 2;
298*5b2ba9d3SPiotr Jasiukajtis         ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
299*5b2ba9d3SPiotr Jasiukajtis       }
300*5b2ba9d3SPiotr Jasiukajtis       *y      = sign3 * ansf;		/* store answer, with sign bit 	*/
301*5b2ba9d3SPiotr Jasiukajtis       x      += stridex;
302*5b2ba9d3SPiotr Jasiukajtis       y      += stridey;
303*5b2ba9d3SPiotr Jasiukajtis       argcount = 3;				/* we still have 3 good args 	*/
304*5b2ba9d3SPiotr Jasiukajtis       if (--n <=0)
305*5b2ba9d3SPiotr Jasiukajtis       {
306*5b2ba9d3SPiotr Jasiukajtis         goto UNROLL;				/* finish up with 3 good args 	*/
307*5b2ba9d3SPiotr Jasiukajtis       }
308*5b2ba9d3SPiotr Jasiukajtis       goto LOOP3;				/* otherwise, examine next arg  */
309*5b2ba9d3SPiotr Jasiukajtis     }
310*5b2ba9d3SPiotr Jasiukajtis 
311*5b2ba9d3SPiotr Jasiukajtis     if (intf > 0x42800000)			/* if (|x| > 64               	*/
312*5b2ba9d3SPiotr Jasiukajtis     {
313*5b2ba9d3SPiotr Jasiukajtis 	n3 = -pone;
314*5b2ba9d3SPiotr Jasiukajtis         d3 = f3;
315*5b2ba9d3SPiotr Jasiukajtis     f3 = n3/d3;
316*5b2ba9d3SPiotr Jasiukajtis       index3 = 2; 				/* point to pi/2 upper, lower	*/
317*5b2ba9d3SPiotr Jasiukajtis     }
318*5b2ba9d3SPiotr Jasiukajtis     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
319*5b2ba9d3SPiotr Jasiukajtis     {
320*5b2ba9d3SPiotr Jasiukajtis       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
321*5b2ba9d3SPiotr Jasiukajtis       pz[0]  = intz;				/* store as a float (z)		*/
322*5b2ba9d3SPiotr Jasiukajtis 	n3     = (f3 - z);
323*5b2ba9d3SPiotr Jasiukajtis 	d3     = (pone + f3*z); 		/* get reduced argument		*/
324*5b2ba9d3SPiotr Jasiukajtis     f3 = n3/d3;
325*5b2ba9d3SPiotr Jasiukajtis       index3 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
326*5b2ba9d3SPiotr Jasiukajtis       index3 = index3 + 4;			/* skip over 0,0,pi/2,pi/2	*/
327*5b2ba9d3SPiotr Jasiukajtis     }
328*5b2ba9d3SPiotr Jasiukajtis     else
329*5b2ba9d3SPiotr Jasiukajtis     {
330*5b2ba9d3SPiotr Jasiukajtis 	n3 = f3;
331*5b2ba9d3SPiotr Jasiukajtis 	d3 = pone;
332*5b2ba9d3SPiotr Jasiukajtis 	index3   = 0;				/* points to 0,0 in table	*/
333*5b2ba9d3SPiotr Jasiukajtis     }
334*5b2ba9d3SPiotr Jasiukajtis     yaddr3   = y;				/* address to store this answer */
335*5b2ba9d3SPiotr Jasiukajtis     x       += stridex;				/* point to next arg		*/
336*5b2ba9d3SPiotr Jasiukajtis     y       += stridey;				/* point to next result		*/
337*5b2ba9d3SPiotr Jasiukajtis     argcount = 4;				/* we now have 4 good arguments */
338*5b2ba9d3SPiotr Jasiukajtis     if (--n <=0)
339*5b2ba9d3SPiotr Jasiukajtis     {
340*5b2ba9d3SPiotr Jasiukajtis       goto UNROLL;				/* finish up with 3 good args 	*/
341*5b2ba9d3SPiotr Jasiukajtis     }
342*5b2ba9d3SPiotr Jasiukajtis #endif /* UNROLL4 */
343*5b2ba9d3SPiotr Jasiukajtis 
344*5b2ba9d3SPiotr Jasiukajtis /* here is the n-way unrolled section,
345*5b2ba9d3SPiotr Jasiukajtis    but we may actually have less than n
346*5b2ba9d3SPiotr Jasiukajtis    arguments at this point
347*5b2ba9d3SPiotr Jasiukajtis */
348*5b2ba9d3SPiotr Jasiukajtis 
349*5b2ba9d3SPiotr Jasiukajtis UNROLL:
350*5b2ba9d3SPiotr Jasiukajtis 
351*5b2ba9d3SPiotr Jasiukajtis #ifdef UNROLL4
352*5b2ba9d3SPiotr Jasiukajtis     if (argcount == 4)
353*5b2ba9d3SPiotr Jasiukajtis     {
354*5b2ba9d3SPiotr Jasiukajtis     conup0   = __vlibm_TBL_atan1[index0];
355*5b2ba9d3SPiotr Jasiukajtis     conup1   = __vlibm_TBL_atan1[index1];
356*5b2ba9d3SPiotr Jasiukajtis     conup2   = __vlibm_TBL_atan1[index2];
357*5b2ba9d3SPiotr Jasiukajtis     conup3   = __vlibm_TBL_atan1[index3];
358*5b2ba9d3SPiotr Jasiukajtis     poly0    = p1*f0*f0*f0 + f0;
359*5b2ba9d3SPiotr Jasiukajtis     ans0     = sign0 * (float)(conup0 + poly0);
360*5b2ba9d3SPiotr Jasiukajtis     poly1    = p1*f1*f1*f1 + f1;
361*5b2ba9d3SPiotr Jasiukajtis     ans1     = sign1 * (float)(conup1 + poly1);
362*5b2ba9d3SPiotr Jasiukajtis     poly2    = p1*f2*f2*f2 + f2;
363*5b2ba9d3SPiotr Jasiukajtis     ans2     = sign2 * (float)(conup2 + poly2);
364*5b2ba9d3SPiotr Jasiukajtis     poly3    = p1*f3*f3*f3 + f3;
365*5b2ba9d3SPiotr Jasiukajtis     ans3     = sign3 * (float)(conup3 + poly3);
366*5b2ba9d3SPiotr Jasiukajtis     *yaddr0  = ans0;
367*5b2ba9d3SPiotr Jasiukajtis     *yaddr1  = ans1;
368*5b2ba9d3SPiotr Jasiukajtis     *yaddr2  = ans2;
369*5b2ba9d3SPiotr Jasiukajtis     *yaddr3  = ans3;
370*5b2ba9d3SPiotr Jasiukajtis     }
371*5b2ba9d3SPiotr Jasiukajtis     else
372*5b2ba9d3SPiotr Jasiukajtis #endif
373*5b2ba9d3SPiotr Jasiukajtis     if (argcount == 3)
374*5b2ba9d3SPiotr Jasiukajtis     {
375*5b2ba9d3SPiotr Jasiukajtis     conup0   = __vlibm_TBL_atan1[index0];
376*5b2ba9d3SPiotr Jasiukajtis     conup1   = __vlibm_TBL_atan1[index1];
377*5b2ba9d3SPiotr Jasiukajtis     conup2   = __vlibm_TBL_atan1[index2];
378*5b2ba9d3SPiotr Jasiukajtis     poly0    = p1*f0*f0*f0 + f0;
379*5b2ba9d3SPiotr Jasiukajtis     poly1    = p1*f1*f1*f1 + f1;
380*5b2ba9d3SPiotr Jasiukajtis     poly2    = p1*f2*f2*f2 + f2;
381*5b2ba9d3SPiotr Jasiukajtis     ans0     = sign0 * (float)(conup0 + poly0);
382*5b2ba9d3SPiotr Jasiukajtis     ans1     = sign1 * (float)(conup1 + poly1);
383*5b2ba9d3SPiotr Jasiukajtis     ans2     = sign2 * (float)(conup2 + poly2);
384*5b2ba9d3SPiotr Jasiukajtis     *yaddr0  = ans0;
385*5b2ba9d3SPiotr Jasiukajtis     *yaddr1  = ans1;
386*5b2ba9d3SPiotr Jasiukajtis     *yaddr2  = ans2;
387*5b2ba9d3SPiotr Jasiukajtis     }
388*5b2ba9d3SPiotr Jasiukajtis     else
389*5b2ba9d3SPiotr Jasiukajtis     if (argcount == 2)
390*5b2ba9d3SPiotr Jasiukajtis     {
391*5b2ba9d3SPiotr Jasiukajtis     conup0   = __vlibm_TBL_atan1[index0];
392*5b2ba9d3SPiotr Jasiukajtis     conup1   = __vlibm_TBL_atan1[index1];
393*5b2ba9d3SPiotr Jasiukajtis     poly0    = p1*f0*f0*f0 + f0;
394*5b2ba9d3SPiotr Jasiukajtis     poly1    = p1*f1*f1*f1 + f1;
395*5b2ba9d3SPiotr Jasiukajtis     ans0     = sign0 * (float)(conup0 + poly0);
396*5b2ba9d3SPiotr Jasiukajtis     ans1     = sign1 * (float)(conup1 + poly1);
397*5b2ba9d3SPiotr Jasiukajtis     *yaddr0  = ans0;
398*5b2ba9d3SPiotr Jasiukajtis     *yaddr1  = ans1;
399*5b2ba9d3SPiotr Jasiukajtis     }
400*5b2ba9d3SPiotr Jasiukajtis     else
401*5b2ba9d3SPiotr Jasiukajtis     if (argcount == 1)
402*5b2ba9d3SPiotr Jasiukajtis     {
403*5b2ba9d3SPiotr Jasiukajtis     conup0   = __vlibm_TBL_atan1[index0];
404*5b2ba9d3SPiotr Jasiukajtis     poly0    = p1*f0*f0*f0 + f0;
405*5b2ba9d3SPiotr Jasiukajtis     ans0     = sign0 * (float)(conup0 + poly0);
406*5b2ba9d3SPiotr Jasiukajtis     *yaddr0  = ans0;
407*5b2ba9d3SPiotr Jasiukajtis      }
408*5b2ba9d3SPiotr Jasiukajtis 
409*5b2ba9d3SPiotr Jasiukajtis   }  while (n > 0);
410*5b2ba9d3SPiotr Jasiukajtis 
411*5b2ba9d3SPiotr Jasiukajtis }
412