xref: /illumos-gate/usr/src/lib/libmvec/common/__vatanf.c (revision 843c398e8904ed9d833d2af3103894f909fb4b52)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #ifdef __RESTRICT
31 #define restrict _Restrict
32 #else
33 #define restrict
34 #endif
35 
36 void
37 __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
38 {
39   extern const double __vlibm_TBL_atan1[];
40   double  conup0, conup1, conup2;
41   float dummy, ansf = 0.0;
42   float f0, f1, f2;
43   float ans0, ans1, ans2;
44   float poly0, poly1, poly2;
45   float sign0, sign1, sign2;
46   int intf, intz, argcount;
47   int index0, index1, index2;
48   float z,*yaddr0,*yaddr1,*yaddr2;
49   int *pz = (int *) &z;
50 #ifdef UNROLL4
51   double conup3;
52   int index3;
53   float f3, ans3, poly3, sign3, *yaddr3;
54 #endif
55 
56 /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
57  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
58 
59   static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
60   static const float pone = 1.0f;
61 
62   if (n <= 0) return;		/* if no. of elements is 0 or neg, do nothing */
63   do
64   {
65   LOOP0:
66 
67 	intf     = *(int *) x;		/* upper half of x, as integer */
68 	f0 = *x;
69 	sign0 = pone;
70     	if (intf < 0) {
71     		intf = intf & ~0x80000000; /* abs(upper argument) */
72 		f0 = -f0;
73 		sign0 = -sign0;
74 	}
75 
76     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
77     {
78       if (intf > 0x7f800000)
79       {
80 	ansf  = f0- f0; 				/* return NaN if x=NaN*/
81       }
82       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
83       {
84         dummy = 1.0e37 + f0;
85         dummy = dummy;
86 	ansf  = f0;
87       }
88       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
89       {
90         index0= 2;
91         ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
92       }
93       *y      = sign0*ansf;		/* store answer, with sign bit 	*/
94       x      += stridex;
95       y      += stridey;
96       argcount = 0;				/* initialize argcount		*/
97       if (--n <=0) break;			/* we are done 			*/
98       goto LOOP0;				/* otherwise, examine next arg  */
99     }
100 
101     if (intf > 0x42800000)			/* if (|x| > 64               	*/
102     {
103     f0 = -pone/f0;
104 	index0 = 2; 				/* point to pi/2 upper, lower	*/
105     }
106     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
107     {
108       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
109       pz[0]  = intz;				/* store as a float (z)		*/
110     f0 = (f0 - z)/(pone + f0*z);
111 	index0 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
112 	index0 = index0+ 4;			/* skip over 0,0,pi/2,pi/2	*/
113     }
114     else					/* |x| < 1/64 */
115     {
116 	index0   = 0;				/* points to 0,0 in table	*/
117     }
118     yaddr0   = y;				/* address to store this answer */
119     x       += stridex;				/* point to next arg		*/
120     y       += stridey;				/* point to next result		*/
121     argcount = 1;				/* we now have 1 good argument  */
122     if (--n <=0)
123     {
124       goto UNROLL;				/* finish up with 1 good arg 	*/
125     }
126 
127     /*--------------------------------------------------------------------------*/
128     /*--------------------------------------------------------------------------*/
129     /*--------------------------------------------------------------------------*/
130 
131   LOOP1:
132 
133 	intf     = *(int *) x;		/* upper half of x, as integer */
134 	f1 = *x;
135 	sign1 = pone;
136     	if (intf < 0) {
137     		intf = intf & ~0x80000000; /* abs(upper argument) */
138 		f1 = -f1;
139 		sign1 = -sign1;
140 	}
141 
142     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
143     {
144       if (intf > 0x7f800000)
145       {
146 	ansf   = f1 - f1;			/* return NaN if x=NaN*/
147       }
148       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
149       {
150         dummy = 1.0e37 + f1;
151         dummy = dummy;
152 	ansf   = f1;
153       }
154       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
155       {
156         index1 = 2;
157         ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
158       }
159       *y      = sign1 * ansf;		/* store answer, with sign bit 	*/
160       x      += stridex;
161       y      += stridey;
162       argcount = 1;				/* we still have 1 good arg 	*/
163       if (--n <=0)
164       {
165         goto UNROLL;				/* finish up with 1 good arg 	*/
166       }
167       goto LOOP1;				/* otherwise, examine next arg  */
168     }
169 
170     if (intf > 0x42800000)			/* if (|x| > 64               	*/
171     {
172     f1 = -pone/f1;
173       index1 = 2; 				/* point to pi/2 upper, lower	*/
174     }
175     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
176     {
177       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
178       pz[0]  = intz;				/* store as a float (z)		*/
179     f1 = (f1 - z)/(pone + f1*z);
180       index1 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
181       index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
182     }
183     else
184     {
185 	index1   = 0;				/* points to 0,0 in table	*/
186     }
187 
188     yaddr1   = y;				/* address to store this answer */
189     x       += stridex;				/* point to next arg		*/
190     y       += stridey;				/* point to next result		*/
191     argcount = 2;				/* we now have 2 good arguments */
192     if (--n <=0)
193     {
194       goto UNROLL;				/* finish up with 2 good args 	*/
195     }
196 
197     /*--------------------------------------------------------------------------*/
198     /*--------------------------------------------------------------------------*/
199     /*--------------------------------------------------------------------------*/
200 
201   LOOP2:
202 
203 	intf     = *(int *) x;		/* upper half of x, as integer */
204 	f2 = *x;
205 	sign2 = pone;
206     	if (intf < 0) {
207     		intf = intf & ~0x80000000; /* abs(upper argument) */
208 		f2 = -f2;
209 		sign2 = -sign2;
210 	}
211 
212     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
213     {
214       if (intf > 0x7f800000)
215       {
216 	ansf   = f2 - f2;			/* return NaN if x=NaN*/
217       }
218       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
219       {
220         dummy = 1.0e37 + f2;
221         dummy = dummy;
222 	ansf   = f2;
223       }
224       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
225       {
226         index2 = 2;
227         ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
228       }
229       *y      = sign2 * ansf;		/* store answer, with sign bit 	*/
230       x      += stridex;
231       y      += stridey;
232       argcount = 2;				/* we still have 2 good args 	*/
233       if (--n <=0)
234       {
235         goto UNROLL;				/* finish up with 2 good args 	*/
236       }
237       goto LOOP2;				/* otherwise, examine next arg  */
238     }
239 
240     if (intf > 0x42800000)			/* if (|x| > 64               	*/
241     {
242     f2 = -pone/f2;
243       index2 = 2; 				/* point to pi/2 upper, lower	*/
244     }
245     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
246     {
247       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
248       pz[0]  = intz;				/* store as a float (z)		*/
249     f2 = (f2 - z)/(pone + f2*z);
250       index2 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
251       index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
252     }
253     else
254     {
255 	index2   = 0;				/* points to 0,0 in table	*/
256     }
257     yaddr2   = y;				/* address to store this answer */
258     x       += stridex;				/* point to next arg		*/
259     y       += stridey;				/* point to next result		*/
260     argcount = 3;				/* we now have 3 good arguments */
261     if (--n <=0)
262     {
263       goto UNROLL;				/* finish up with 2 good args 	*/
264     }
265 
266 
267     /*--------------------------------------------------------------------------*/
268     /*--------------------------------------------------------------------------*/
269     /*--------------------------------------------------------------------------*/
270 
271 #ifdef UNROLL4
272   LOOP3:
273 
274 	intf     = *(int *) x;		/* upper half of x, as integer */
275 	f3 = *x;
276 	sign3 = pone;
277     	if (intf < 0) {
278     		intf = intf & ~0x80000000; /* abs(upper argument) */
279 		f3 = -f3;
280 		sign3 = -sign3;
281 	}
282 
283     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
284     {
285       if (intf > 0x7f800000)
286       {
287 	ansf   = f3 - f3;			/* return NaN if x=NaN*/
288       }
289       else if (intf < 0x31800000) 		/* avoid underflow for small arg */
290       {
291         dummy = 1.0e37 + f3;
292         dummy = dummy;
293 	ansf   = f3;
294       }
295       else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
296       {
297         index3 = 2;
298         ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
299       }
300       *y      = sign3 * ansf;		/* store answer, with sign bit 	*/
301       x      += stridex;
302       y      += stridey;
303       argcount = 3;				/* we still have 3 good args 	*/
304       if (--n <=0)
305       {
306         goto UNROLL;				/* finish up with 3 good args 	*/
307       }
308       goto LOOP3;				/* otherwise, examine next arg  */
309     }
310 
311     if (intf > 0x42800000)			/* if (|x| > 64               	*/
312     {
313 	n3 = -pone;
314         d3 = f3;
315     f3 = n3/d3;
316       index3 = 2; 				/* point to pi/2 upper, lower	*/
317     }
318     else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
319     {
320       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
321       pz[0]  = intz;				/* store as a float (z)		*/
322 	n3     = (f3 - z);
323 	d3     = (pone + f3*z); 		/* get reduced argument		*/
324     f3 = n3/d3;
325       index3 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
326       index3 = index3 + 4;			/* skip over 0,0,pi/2,pi/2	*/
327     }
328     else
329     {
330 	n3 = f3;
331 	d3 = pone;
332 	index3   = 0;				/* points to 0,0 in table	*/
333     }
334     yaddr3   = y;				/* address to store this answer */
335     x       += stridex;				/* point to next arg		*/
336     y       += stridey;				/* point to next result		*/
337     argcount = 4;				/* we now have 4 good arguments */
338     if (--n <=0)
339     {
340       goto UNROLL;				/* finish up with 3 good args 	*/
341     }
342 #endif /* UNROLL4 */
343 
344 /* here is the n-way unrolled section,
345    but we may actually have less than n
346    arguments at this point
347 */
348 
349 UNROLL:
350 
351 #ifdef UNROLL4
352     if (argcount == 4)
353     {
354     conup0   = __vlibm_TBL_atan1[index0];
355     conup1   = __vlibm_TBL_atan1[index1];
356     conup2   = __vlibm_TBL_atan1[index2];
357     conup3   = __vlibm_TBL_atan1[index3];
358     poly0    = p1*f0*f0*f0 + f0;
359     ans0     = sign0 * (float)(conup0 + poly0);
360     poly1    = p1*f1*f1*f1 + f1;
361     ans1     = sign1 * (float)(conup1 + poly1);
362     poly2    = p1*f2*f2*f2 + f2;
363     ans2     = sign2 * (float)(conup2 + poly2);
364     poly3    = p1*f3*f3*f3 + f3;
365     ans3     = sign3 * (float)(conup3 + poly3);
366     *yaddr0  = ans0;
367     *yaddr1  = ans1;
368     *yaddr2  = ans2;
369     *yaddr3  = ans3;
370     }
371     else
372 #endif
373     if (argcount == 3)
374     {
375     conup0   = __vlibm_TBL_atan1[index0];
376     conup1   = __vlibm_TBL_atan1[index1];
377     conup2   = __vlibm_TBL_atan1[index2];
378     poly0    = p1*f0*f0*f0 + f0;
379     poly1    = p1*f1*f1*f1 + f1;
380     poly2    = p1*f2*f2*f2 + f2;
381     ans0     = sign0 * (float)(conup0 + poly0);
382     ans1     = sign1 * (float)(conup1 + poly1);
383     ans2     = sign2 * (float)(conup2 + poly2);
384     *yaddr0  = ans0;
385     *yaddr1  = ans1;
386     *yaddr2  = ans2;
387     }
388     else
389     if (argcount == 2)
390     {
391     conup0   = __vlibm_TBL_atan1[index0];
392     conup1   = __vlibm_TBL_atan1[index1];
393     poly0    = p1*f0*f0*f0 + f0;
394     poly1    = p1*f1*f1*f1 + f1;
395     ans0     = sign0 * (float)(conup0 + poly0);
396     ans1     = sign1 * (float)(conup1 + poly1);
397     *yaddr0  = ans0;
398     *yaddr1  = ans1;
399     }
400     else
401     if (argcount == 1)
402     {
403     conup0   = __vlibm_TBL_atan1[index0];
404     poly0    = p1*f0*f0*f0 + f0;
405     ans0     = sign0 * (float)(conup0 + poly0);
406     *yaddr0  = ans0;
407      }
408 
409   }  while (n > 0);
410 
411 }
412