xref: /illumos-gate/usr/src/lib/libmvec/common/__vatan2.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 extern const double __vlibm_TBL_atan2[];
48 
49 static const double
50 zero	=  0.0,
51 twom3	=  0.125,
52 one		=  1.0,
53 two110	=  1.2980742146337069071e+33,
54 pio4	=  7.8539816339744827900e-01,
55 pio2	=  1.5707963267948965580e+00,
56 pio2_lo	=  6.1232339957367658860e-17,
57 pi		=  3.1415926535897931160e+00,
58 pi_lo	=  1.2246467991473531772e-16,
59 p1		= -3.33333333333327571893331786354179101074860633009e-0001,
60 p2		=  1.99999999942671624230086497610394721817438631379e-0001,
61 p3		= -1.42856965565428636896183013324727205980484158356e-0001,
62 p4		=  1.10894981496317081405107718475040168084164825641e-0001;
63 
64 /* Don't __ the following; acomp will handle it */
65 extern double fabs(double);
66 
67 void
68 __vatan2(int n, double * restrict y, int stridey, double * restrict x,
69 	int stridex, double * restrict z, int stridez)
70 {
71 	double		x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
72 	double		ah0, ah1, ah2, al0, al1, al2, t0, t1, t2;
73 	double		z0, z1, z2, sign0, sign1, sign2, xh;
74 	int			i, k, hx, hy, sx, sy;
75 
76 	do
77 	{
78 loop0:
79 		hy = HI(y);
80 		sy = hy & 0x80000000;
81 		hy &= ~0x80000000;
82 		sign0 = (sy)? -one : one;
83 
84 		hx = HI(x);
85 		sx = hx & 0x80000000;
86 		hx &= ~0x80000000;
87 
88 		if (hy > hx || (hy == hx && LO(y) > LO(x)))
89 		{
90 			i = hx;
91 			hx = hy;
92 			hy = i;
93 			x0 = fabs(*y);
94 			y0 = fabs(*x);
95 			if (sx)
96 			{
97 				ah0 = pio2;
98 				al0 = pio2_lo;
99 			}
100 			else
101 			{
102 				ah0 = -pio2;
103 				al0 = -pio2_lo;
104 				sign0 = -sign0;
105 			}
106 		}
107 		else
108 		{
109 			x0 = fabs(*x);
110 			y0 = fabs(*y);
111 			if (sx)
112 			{
113 				ah0 = -pi;
114 				al0 = -pi_lo;
115 				sign0 = -sign0;
116 			}
117 			else
118 				ah0 = al0 = zero;
119 		}
120 
121 		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
122 		{
123 			if (hx >= 0x7ff00000)
124 			{
125 				if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */
126 					ah0 =  x0 + y0;
127 				else if (hy >= 0x7ff00000)
128 					ah0 += pio4;
129 				*z = sign0 * ah0;
130 				x += stridex;
131 				y += stridey;
132 				z += stridez;
133 				i = 0;
134 				if (--n <= 0)
135 					break;
136 				goto loop0;
137 			}
138 			if (hx - hy >= 0x03600000)
139 			{
140 				if ((int) ah0 == 0)
141 					ah0 = y0 / x0;
142 				*z = sign0 * ah0;
143 				x += stridex;
144 				y += stridey;
145 				z += stridez;
146 				i = 0;
147 				if (--n <= 0)
148 					break;
149 				goto loop0;
150 			}
151 			y0 *= twom3;
152 			x0 *= twom3;
153 			hy -= 0x00300000;
154 			hx -= 0x00300000;
155 		}
156 		else if (hy < 0x00100000)
157 		{
158 			if ((hy | LO(&y0)) == 0)
159 			{
160 				*z = sign0 * ah0;
161 				x += stridex;
162 				y += stridey;
163 				z += stridez;
164 				i = 0;
165 				if (--n <= 0)
166 					break;
167 				goto loop0;
168 			}
169 			y0 *= two110;
170 			x0 *= two110;
171 			hy = HI(&y0);
172 			hx = HI(&x0);
173 		}
174 
175 		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
176 		if (k > 644)
177 			k = 644;
178 		ah0 += __vlibm_TBL_atan2[k];
179 		al0 += __vlibm_TBL_atan2[k+1];
180 		t0 = __vlibm_TBL_atan2[k+2];
181 
182 		xh = x0;
183 		LO(&xh) = 0;
184 		z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0);
185 		pz0 = z;
186 		x += stridex;
187 		y += stridey;
188 		z += stridez;
189 		i = 1;
190 		if (--n <= 0)
191 			break;
192 
193 loop1:
194 		hy = HI(y);
195 		sy = hy & 0x80000000;
196 		hy &= ~0x80000000;
197 		sign1 = (sy)? -one : one;
198 
199 		hx = HI(x);
200 		sx = hx & 0x80000000;
201 		hx &= ~0x80000000;
202 
203 		if (hy > hx || (hy == hx && LO(y) > LO(x)))
204 		{
205 			i = hx;
206 			hx = hy;
207 			hy = i;
208 			x1 = fabs(*y);
209 			y1 = fabs(*x);
210 			if (sx)
211 			{
212 				ah1 = pio2;
213 				al1 = pio2_lo;
214 			}
215 			else
216 			{
217 				ah1 = -pio2;
218 				al1 = -pio2_lo;
219 				sign1 = -sign1;
220 			}
221 		}
222 		else
223 		{
224 			x1 = fabs(*x);
225 			y1 = fabs(*y);
226 			if (sx)
227 			{
228 				ah1 = -pi;
229 				al1 = -pi_lo;
230 				sign1 = -sign1;
231 			}
232 			else
233 				ah1 = al1 = zero;
234 		}
235 
236 		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
237 		{
238 			if (hx >= 0x7ff00000)
239 			{
240 				if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */
241 					ah1 =  x1 + y1;
242 				else if (hy >= 0x7ff00000)
243 					ah1 += pio4;
244 				*z = sign1 * ah1;
245 				x += stridex;
246 				y += stridey;
247 				z += stridez;
248 				i = 1;
249 				if (--n <= 0)
250 					break;
251 				goto loop1;
252 			}
253 			if (hx - hy >= 0x03600000)
254 			{
255 				if ((int) ah1 == 0)
256 					ah1 = y1 / x1;
257 				*z = sign1 * ah1;
258 				x += stridex;
259 				y += stridey;
260 				z += stridez;
261 				i = 1;
262 				if (--n <= 0)
263 					break;
264 				goto loop1;
265 			}
266 			y1 *= twom3;
267 			x1 *= twom3;
268 			hy -= 0x00300000;
269 			hx -= 0x00300000;
270 		}
271 		else if (hy < 0x00100000)
272 		{
273 			if ((hy | LO(&y1)) == 0)
274 			{
275 				*z = sign1 * ah1;
276 				x += stridex;
277 				y += stridey;
278 				z += stridez;
279 				i = 1;
280 				if (--n <= 0)
281 					break;
282 				goto loop1;
283 			}
284 			y1 *= two110;
285 			x1 *= two110;
286 			hy = HI(&y1);
287 			hx = HI(&x1);
288 		}
289 
290 		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
291 		if (k > 644)
292 			k = 644;
293 		ah1 += __vlibm_TBL_atan2[k];
294 		al1 += __vlibm_TBL_atan2[k+1];
295 		t1 = __vlibm_TBL_atan2[k+2];
296 
297 		xh = x1;
298 		LO(&xh) = 0;
299 		z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1);
300 		pz1 = z;
301 		x += stridex;
302 		y += stridey;
303 		z += stridez;
304 		i = 2;
305 		if (--n <= 0)
306 			break;
307 
308 loop2:
309 		hy = HI(y);
310 		sy = hy & 0x80000000;
311 		hy &= ~0x80000000;
312 		sign2 = (sy)? -one : one;
313 
314 		hx = HI(x);
315 		sx = hx & 0x80000000;
316 		hx &= ~0x80000000;
317 
318 		if (hy > hx || (hy == hx && LO(y) > LO(x)))
319 		{
320 			i = hx;
321 			hx = hy;
322 			hy = i;
323 			x2 = fabs(*y);
324 			y2 = fabs(*x);
325 			if (sx)
326 			{
327 				ah2 = pio2;
328 				al2 = pio2_lo;
329 			}
330 			else
331 			{
332 				ah2 = -pio2;
333 				al2 = -pio2_lo;
334 				sign2 = -sign2;
335 			}
336 		}
337 		else
338 		{
339 			x2 = fabs(*x);
340 			y2 = fabs(*y);
341 			if (sx)
342 			{
343 				ah2 = -pi;
344 				al2 = -pi_lo;
345 				sign2 = -sign2;
346 			}
347 			else
348 				ah2 = al2 = zero;
349 		}
350 
351 		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
352 		{
353 			if (hx >= 0x7ff00000)
354 			{
355 				if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */
356 					ah2 =  x2 + y2;
357 				else if (hy >= 0x7ff00000)
358 					ah2 += pio4;
359 				*z = sign2 * ah2;
360 				x += stridex;
361 				y += stridey;
362 				z += stridez;
363 				i = 2;
364 				if (--n <= 0)
365 					break;
366 				goto loop2;
367 			}
368 			if (hx - hy >= 0x03600000)
369 			{
370 				if ((int) ah2 == 0)
371 					ah2 = y2 / x2;
372 				*z = sign2 * ah2;
373 				x += stridex;
374 				y += stridey;
375 				z += stridez;
376 				i = 2;
377 				if (--n <= 0)
378 					break;
379 				goto loop2;
380 			}
381 			y2 *= twom3;
382 			x2 *= twom3;
383 			hy -= 0x00300000;
384 			hx -= 0x00300000;
385 		}
386 		else if (hy < 0x00100000)
387 		{
388 			if ((hy | LO(&y2)) == 0)
389 			{
390 				*z = sign2 * ah2;
391 				x += stridex;
392 				y += stridey;
393 				z += stridez;
394 				i = 2;
395 				if (--n <= 0)
396 					break;
397 				goto loop2;
398 			}
399 			y2 *= two110;
400 			x2 *= two110;
401 			hy = HI(&y2);
402 			hx = HI(&x2);
403 		}
404 
405 		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
406 		if (k > 644)
407 			k = 644;
408 		ah2 += __vlibm_TBL_atan2[k];
409 		al2 += __vlibm_TBL_atan2[k+1];
410 		t2 = __vlibm_TBL_atan2[k+2];
411 
412 		xh = x2;
413 		LO(&xh) = 0;
414 		z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2);
415 		pz2 = z;
416 
417 		x0 = z0 * z0;
418 		x1 = z1 * z1;
419 		x2 = z2 * z2;
420 
421 		t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
422 			(p2 + x0 * (p3 + x0 * p4)))));
423 		t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
424 			(p2 + x1 * (p3 + x1 * p4)))));
425 		t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 *
426 			(p2 + x2 * (p3 + x2 * p4)))));
427 
428 		*pz0 = sign0 * t0;
429 		*pz1 = sign1 * t1;
430 		*pz2 = sign2 * t2;
431 
432 		x += stridex;
433 		y += stridey;
434 		z += stridez;
435 		i = 0;
436 	} while (--n > 0);
437 
438 	if (i > 0)
439 	{
440 		if (i > 1)
441 		{
442 			x1 = z1 * z1;
443 			t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
444 				(p2 + x1 * (p3 + x1 * p4)))));
445 			*pz1 = sign1 * t1;
446 		}
447 
448 		x0 = z0 * z0;
449 		t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
450 			(p2 + x0 * (p3 + x0 * p4)))));
451 		*pz0 = sign0 * t0;
452 	}
453 }
454