xref: /illumos-gate/usr/src/lib/libmvec/common/__vatan.c (revision ca0cc3918a1789fa839194af2a9245f801a06b1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 void
48 __vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey)
49 {
50   double  f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy;
51   double  f1,   ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
52   double  f2,   ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
53   int index, sign, intf, intflo, intz, argcount;
54   int index1, sign1 = 0;
55   int index2, sign2 = 0;
56   double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
57   extern const double __vlibm_TBL_atan1[];
58   extern double fabs(double);
59 
60 /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
61  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
62 
63 /* define dummy names for readability.  Use parray to help compiler optimize loads */
64 #define p3    parray[0]
65 #define p2    parray[1]
66 #define p1    parray[2]
67 
68   static const double parray[] = {
69    -1.428029046844299722E-01,		/* p[3]		*/
70     1.999999917247000615E-01, 		/* p[2]		*/
71    -3.333333333329292858E-01, 		/* p[1]		*/
72     1.0, 				/* not used for p[0], though		*/
73    -1.0,				/* used to flip sign of answer 		*/
74   };
75 
76   if (n <= 0) return;		/* if no. of elements is 0 or neg, do nothing */
77   do
78   {
79   LOOP0:
80 
81     f        = fabs(*x);			/* fetch argument		*/
82     intf     = HI(x);			/* upper half of x, as integer	*/
83     intflo   = LO(x);			/* lower half of x, as integer	*/
84     sign     = intf &  0x80000000;		/* sign of argument		*/
85     intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
86 
87     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
88     {
89       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
90       {
91 	ans   = f - f; 				/* return NaN if x=NaN*/
92       }
93       else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
94       {
95         dummy = 1.0e37 + f;
96         dummy = dummy;
97 	ans   = f;
98       }
99       else if (intf > 0x43600000)		/* avoid underflow for big arg  */
100       {
101         index = 2;
102         ans   = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low   */
103       }
104       *y      = (sign) ? -ans: ans;		/* store answer, with sign bit 	*/
105       x      += stridex;
106       y      += stridey;
107       argcount = 0;				/* initialize argcount		*/
108       if (--n <=0) break;			/* we are done 			*/
109       goto LOOP0;				/* otherwise, examine next arg  */
110     }
111 
112     index    = 0;				/* points to 0,0 in table	*/
113     if (intf > 0x40500000)			/* if (|x| > 64               	*/
114     { f = -1.0/f;
115       index  = 2; 				/* point to pi/2 upper, lower	*/
116     }
117     else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
118     {
119       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
120       HI(&z)  = intz;				/* store as a double (z)	*/
121       LO(&z)  = 0;				/* ...lower			*/
122       f      = (f - z)/(1.0 + f*z); 		/* get reduced argument		*/
123       index  = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
124       index  = index + 4;			/* skip over 0,0,pi/2,pi/2	*/
125     }
126     yaddr    = y;				/* address to store this answer */
127     x       += stridex;				/* point to next arg		*/
128     y       += stridey;				/* point to next result		*/
129     argcount = 1;				/* we now have 1 good argument  */
130     if (--n <=0)
131     {
132       f1      = 0.0;				/* put dummy values in args 1,2 */
133       f2      = 0.0;
134       index1  = 0;
135       index2  = 0;
136       goto UNROLL3;				/* finish up with 1 good arg 	*/
137     }
138 
139     /*--------------------------------------------------------------------------*/
140     /*--------------------------------------------------------------------------*/
141     /*--------------------------------------------------------------------------*/
142 
143   LOOP1:
144 
145     f1       = fabs(*x);			/* fetch argument		*/
146     intf     = HI(x);			/* upper half of x, as integer	*/
147     intflo   = LO(x);			/* lower half of x, as integer	*/
148     sign1    = intf &  0x80000000;		/* sign of argument		*/
149     intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
150 
151     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
152     {
153       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
154       {
155 	ans   = f1 - f1;			/* return NaN if x=NaN*/
156       }
157       else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
158       {
159         dummy = 1.0e37 + f1;
160         dummy = dummy;
161 	ans   = f1;
162       }
163       else if (intf > 0x43600000)		/* avoid underflow for big arg  */
164       {
165         index1 = 2;
166         ans   = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low   */
167       }
168       *y      = (sign1) ? -ans: ans;		/* store answer, with sign bit 	*/
169       x      += stridex;
170       y      += stridey;
171       argcount = 1;				/* we still have 1 good arg 	*/
172       if (--n <=0)
173       {
174         f1      = 0.0;				/* put dummy values in args 1,2 */
175         f2      = 0.0;
176         index1  = 0;
177         index2  = 0;
178         goto UNROLL3;				/* finish up with 1 good arg 	*/
179       }
180       goto LOOP1;				/* otherwise, examine next arg  */
181     }
182 
183     index1   = 0;				/* points to 0,0 in table	*/
184     if (intf > 0x40500000)			/* if (|x| > 64               	*/
185     { f1 = -1.0/f1;
186       index1 = 2; 				/* point to pi/2 upper, lower	*/
187     }
188     else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
189     {
190       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
191       HI(&z) = intz;				/* store as a double (z)	*/
192       LO(&z) = 0;				/* ...lower			*/
193       f1     = (f1 - z)/(1.0 + f1*z); 		/* get reduced argument		*/
194       index1 = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
195       index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
196     }
197     yaddr1   = y;				/* address to store this answer */
198     x       += stridex;				/* point to next arg		*/
199     y       += stridey;				/* point to next result		*/
200     argcount = 2;				/* we now have 2 good arguments */
201     if (--n <=0)
202     {
203       f2      = 0.0;				/* put dummy value in arg 2 */
204       index2  = 0;
205       goto UNROLL3;				/* finish up with 2 good args 	*/
206     }
207 
208     /*--------------------------------------------------------------------------*/
209     /*--------------------------------------------------------------------------*/
210     /*--------------------------------------------------------------------------*/
211 
212   LOOP2:
213 
214     f2       = fabs(*x);			/* fetch argument		*/
215     intf     = HI(x);			/* upper half of x, as integer	*/
216     intflo   = LO(x);			/* lower half of x, as integer	*/
217     sign2    = intf &  0x80000000;		/* sign of argument		*/
218     intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
219 
220     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
221     {
222       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
223       {
224 	ans   = f2 - f2;			/* return NaN if x=NaN*/
225       }
226       else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
227       {
228         dummy = 1.0e37 + f2;
229         dummy = dummy;
230 	ans   = f2;
231       }
232       else if (intf > 0x43600000)		/* avoid underflow for big arg  */
233       {
234         index2 = 2;
235         ans   = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low   */
236       }
237       *y      = (sign2) ? -ans: ans;		/* store answer, with sign bit 	*/
238       x      += stridex;
239       y      += stridey;
240       argcount = 2;				/* we still have 2 good args 	*/
241       if (--n <=0)
242       {
243         f2      = 0.0;				/* put dummy value in arg 2 */
244         index2  = 0;
245         goto UNROLL3;				/* finish up with 2 good args 	*/
246       }
247       goto LOOP2;				/* otherwise, examine next arg  */
248     }
249 
250     index2   = 0;				/* points to 0,0 in table	*/
251     if (intf > 0x40500000)			/* if (|x| > 64               	*/
252     { f2 = -1.0/f2;
253       index2 = 2; 				/* point to pi/2 upper, lower	*/
254     }
255     else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
256     {
257       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
258       HI(&z) = intz;				/* store as a double (z)	*/
259       LO(&z) = 0;				/* ...lower			*/
260       f2     = (f2 - z)/(1.0 + f2*z); 		/* get reduced argument		*/
261       index2 = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
262       index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
263     }
264     yaddr2   = y;				/* address to store this answer */
265     x       += stridex;				/* point to next arg		*/
266     y       += stridey;				/* point to next result		*/
267     argcount = 3;				/* we now have 3 good arguments */
268 
269 
270 /* here is the 3 way unrolled section,
271    note, we may actually only have
272    1,2, or 3 'real' arguments at this point
273 */
274 
275 UNROLL3:
276 
277     conup    = __vlibm_TBL_atan1[index ];	/* upper table 			*/
278     conup1   = __vlibm_TBL_atan1[index1];	/* upper table 			*/
279     conup2   = __vlibm_TBL_atan1[index2];	/* upper table 			*/
280 
281     conlo    = __vlibm_TBL_atan1[index +1];	/* lower table 			*/
282     conlo1   = __vlibm_TBL_atan1[index1+1];	/* lower table 			*/
283     conlo2   = __vlibm_TBL_atan1[index2+1];	/* lower table 			*/
284 
285     tmp      = f *f ;
286     tmp1     = f1*f1;
287     tmp2     = f2*f2;
288 
289     poly     = f *((p3*tmp  + p2)*tmp  + p1)*tmp ;
290     poly1    = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
291     poly2    = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;
292 
293     ansu     = conup  + f ;    			/* compute atan(f)  upper 	*/
294     ansu1    = conup1 + f1;    			/* compute atan(f)  upper 	*/
295     ansu2    = conup2 + f2;    			/* compute atan(f)  upper 	*/
296 
297     ansl     = (((conup  - ansu) + f) + poly) + conlo ;
298     ansl1    = (((conup1 - ansu1) + f1) + poly1) + conlo1;
299     ansl2    = (((conup2 - ansu2) + f2) + poly2) + conlo2;
300 
301     ans      = ansu  + ansl ;
302     ans1     = ansu1 + ansl1;
303     ans2     = ansu2 + ansl2;
304 
305 /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
306 
307    *yaddr    = sign ? -ans: ans;		/* this one is always good	*/
308    if (argcount < 3) break;			/* end loop and finish up 	*/
309      *yaddr1   = sign1 ? -ans1: ans1;
310      *yaddr2   = sign2 ? -ans2: ans2;
311 
312   }  while (--n > 0);
313 
314  if (argcount == 2)
315    {  *yaddr1  = sign1 ? -ans1: ans1;
316    }
317 }
318