xref: /illumos-gate/usr/src/lib/libmvec/common/__vcosbig_ultra3.c (revision cffcfaee1e6b29ef9ceb7d80e4e053ffd029906b)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  
22  /*
23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24   */
25  /*
26   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27   * Use is subject to license terms.
28   */
29  
30  #include <sys/isa_defs.h>
31  
32  #ifdef _LITTLE_ENDIAN
33  #define HI(x)	*(1+(int*)x)
34  #define LO(x)	*(unsigned*)x
35  #else
36  #define HI(x)	*(int*)x
37  #define LO(x)	*(1+(unsigned*)x)
38  #endif
39  
40  #ifdef __RESTRICT
41  #define restrict _Restrict
42  #else
43  #define restrict
44  #endif
45  
46  extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
47  
48  static const double
49  	half[2]	= { 0.5, -0.5 },
50  	one		= 1.0,
51  	invpio2	= 0.636619772367581343075535,
52  	pio2_1	= 1.570796326734125614166,
53  	pio2_2	= 6.077100506303965976596e-11,
54  	pio2_3	= 2.022266248711166455796e-21,
55  	pio2_3t	= 8.478427660368899643959e-32,
56  	pp1		= -1.666666666605760465276263943134982554676e-0001,
57  	pp2		=  8.333261209690963126718376566146180944442e-0003,
58  	qq1		= -4.999999999977710986407023955908711557870e-0001,
59  	qq2		=  4.166654863857219350645055881018842089580e-0002,
60  	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
61  				-4.999999999999931701464060878888294524481e-0001 },
62  	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
63  				 4.166666666394861917535640593963708222319e-0002 },
64  	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
65  				-1.388888552656142867832756687736851681462e-0003 },
66  	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
67  				 2.478519423681460796618128289454530524759e-0005 };
68  
69  static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
70  
71  extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
72  
73  void
74  __vlibm_vcos_big_ultra3(int n, double * restrict x, int stridex, double * restrict y,
75  	int stridey, int pthresh)
76  {
77  	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
78  	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
79  	double		x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
80  	unsigned	xsb0, xsb1, xsb2;
81  	int			i, biguns, nsave, sxsave, sysave;
82  
83  	nsave = n;
84  	xsave = x;
85  	sxsave = stridex;
86  	ysave = y;
87  	sysave = stridey;
88  	biguns = 0;
89  
90  	x0_or_one[1] = 1.0;
91  	x1_or_one[1] = 1.0;
92  	x2_or_one[1] = 1.0;
93  	x0_or_one[3] = -1.0;
94  	x1_or_one[3] = -1.0;
95  	x2_or_one[3] = -1.0;
96  	y0_or_zero[1] = 0.0;
97  	y1_or_zero[1] = 0.0;
98  	y2_or_zero[1] = 0.0;
99  	y0_or_zero[3] = 0.0;
100  	y1_or_zero[3] = 0.0;
101  	y2_or_zero[3] = 0.0;
102  
103  	do
104  	{
105  		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
106  		unsigned	hx;
107  		int			n0, n1, n2;
108  
109  loop0:
110  		hx = HI(x);
111  		xsb0 = hx >> 31;
112  		hx &= ~0x80000000;
113  		if (hx <= pthresh || hx > 0x413921fb)
114  		{
115  			if (hx > 0x413921fb && hx < 0x7ff00000)
116  				biguns = 1;
117  			x += stridex;
118  			y += stridey;
119  			i = 0;
120  			if (--n <= 0)
121  				break;
122  			goto loop0;
123  		}
124  		x0 = *x;
125  		py0 = y;
126  		x += stridex;
127  		y += stridey;
128  		i = 1;
129  		if (--n <= 0)
130  			break;
131  
132  loop1:
133  		hx = HI(x);
134  		xsb1 = hx >> 31;
135  		hx &= ~0x80000000;
136  		if (hx <= pthresh || hx > 0x413921fb)
137  		{
138  			if (hx > 0x413921fb && hx < 0x7ff00000)
139  				biguns = 1;
140  			x += stridex;
141  			y += stridey;
142  			i = 1;
143  			if (--n <= 0)
144  				break;
145  			goto loop1;
146  		}
147  		x1 = *x;
148  		py1 = y;
149  		x += stridex;
150  		y += stridey;
151  		i = 2;
152  		if (--n <= 0)
153  			break;
154  
155  loop2:
156  		hx = HI(x);
157  		xsb2 = hx >> 31;
158  		hx &= ~0x80000000;
159  		if (hx <= pthresh || hx > 0x413921fb)
160  		{
161  			if (hx > 0x413921fb && hx < 0x7ff00000)
162  				biguns = 1;
163  			x += stridex;
164  			y += stridey;
165  			i = 2;
166  			if (--n <= 0)
167  				break;
168  			goto loop2;
169  		}
170  		x2 = *x;
171  		py2 = y;
172  
173  		n0 = (int) (x0 * invpio2 + half[xsb0]);
174  		n1 = (int) (x1 * invpio2 + half[xsb1]);
175  		n2 = (int) (x2 * invpio2 + half[xsb2]);
176  		fn0 = (double) n0;
177  		fn1 = (double) n1;
178  		fn2 = (double) n2;
179  		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
180  		n1 = (n1 + 1) & 3;
181  		n2 = (n2 + 1) & 3;
182  		a0 = x0 - fn0 * pio2_1;
183  		a1 = x1 - fn1 * pio2_1;
184  		a2 = x2 - fn2 * pio2_1;
185  		w0 = fn0 * pio2_2;
186  		w1 = fn1 * pio2_2;
187  		w2 = fn2 * pio2_2;
188  		x0 = a0 - w0;
189  		x1 = a1 - w1;
190  		x2 = a2 - w2;
191  		y0 = (a0 - x0) - w0;
192  		y1 = (a1 - x1) - w1;
193  		y2 = (a2 - x2) - w2;
194  		a0 = x0;
195  		a1 = x1;
196  		a2 = x2;
197  		w0 = fn0 * pio2_3 - y0;
198  		w1 = fn1 * pio2_3 - y1;
199  		w2 = fn2 * pio2_3 - y2;
200  		x0 = a0 - w0;
201  		x1 = a1 - w1;
202  		x2 = a2 - w2;
203  		y0 = (a0 - x0) - w0;
204  		y1 = (a1 - x1) - w1;
205  		y2 = (a2 - x2) - w2;
206  		a0 = x0;
207  		a1 = x1;
208  		a2 = x2;
209  		w0 = fn0 * pio2_3t - y0;
210  		w1 = fn1 * pio2_3t - y1;
211  		w2 = fn2 * pio2_3t - y2;
212  		x0 = a0 - w0;
213  		x1 = a1 - w1;
214  		x2 = a2 - w2;
215  		y0 = (a0 - x0) - w0;
216  		y1 = (a1 - x1) - w1;
217  		y2 = (a2 - x2) - w2;
218  		xsb0 = HI(&x0);
219  		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
220  		xsb1 = HI(&x1);
221  		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
222  		xsb2 = HI(&x2);
223  		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
224  		switch (i)
225  		{
226  			double		t0, t1, t2, z0, z1, z2;
227  			unsigned	j0, j1, j2;
228  
229  		case 0:
230  			j0 = (xsb0 + 0x4000) & 0xffff8000;
231  			j1 = (xsb1 + 0x4000) & 0xffff8000;
232  			j2 = (xsb2 + 0x4000) & 0xffff8000;
233  			HI(&t0) = j0;
234  			HI(&t1) = j1;
235  			HI(&t2) = j2;
236  			LO(&t0) = 0;
237  			LO(&t1) = 0;
238  			LO(&t2) = 0;
239  			x0 = (x0 - t0) + y0;
240  			x1 = (x1 - t1) + y1;
241  			x2 = (x2 - t2) + y2;
242  			z0 = x0 * x0;
243  			z1 = x1 * x1;
244  			z2 = x2 * x2;
245  			t0 = z0 * (qq1 + z0 * qq2);
246  			t1 = z1 * (qq1 + z1 * qq2);
247  			t2 = z2 * (qq1 + z2 * qq2);
248  			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
249  			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
250  			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
251  			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
252  			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
253  			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
254  			xsb0 = (xsb0 >> 30) & 2;
255  			xsb1 = (xsb1 >> 30) & 2;
256  			xsb2 = (xsb2 >> 30) & 2;
257  			n0 ^= (xsb0 & ~(n0 << 1));
258  			n1 ^= (xsb1 & ~(n1 << 1));
259  			n2 ^= (xsb2 & ~(n2 << 1));
260  			xsb0 |= 1;
261  			xsb1 |= 1;
262  			xsb2 |= 1;
263  			a0 = __vlibm_TBL_sincos_hi[j0+n0];
264  			a1 = __vlibm_TBL_sincos_hi[j1+n1];
265  			a2 = __vlibm_TBL_sincos_hi[j2+n2];
266  			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
267  			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
268  			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
269  			*py0 = ( a0 + t0 );
270  			*py1 = ( a1 + t1 );
271  			*py2 = ( a2 + t2 );
272  			break;
273  
274  		case 1:
275  			j0 = n0 & 1;
276  			j1 = (xsb1 + 0x4000) & 0xffff8000;
277  			j2 = (xsb2 + 0x4000) & 0xffff8000;
278  			HI(&t1) = j1;
279  			HI(&t2) = j2;
280  			LO(&t1) = 0;
281  			LO(&t2) = 0;
282  			x0_or_one[0] = x0;
283  			x0_or_one[2] = -x0;
284  			y0_or_zero[0] = y0;
285  			y0_or_zero[2] = -y0;
286  			x1 = (x1 - t1) + y1;
287  			x2 = (x2 - t2) + y2;
288  			z0 = x0 * x0;
289  			z1 = x1 * x1;
290  			z2 = x2 * x2;
291  			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
292  			t1 = z1 * (qq1 + z1 * qq2);
293  			t2 = z2 * (qq1 + z2 * qq2);
294  			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
295  			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
296  			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297  			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298  			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299  			xsb1 = (xsb1 >> 30) & 2;
300  			xsb2 = (xsb2 >> 30) & 2;
301  			n1 ^= (xsb1 & ~(n1 << 1));
302  			n2 ^= (xsb2 & ~(n2 << 1));
303  			xsb1 |= 1;
304  			xsb2 |= 1;
305  			a1 = __vlibm_TBL_sincos_hi[j1+n1];
306  			a2 = __vlibm_TBL_sincos_hi[j2+n2];
307  			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
308  			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
309  			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
310  			*py0 = t0;
311  			*py1 = ( a1 + t1 );
312  			*py2 = ( a2 + t2 );
313  			break;
314  
315  		case 2:
316  			j0 = (xsb0 + 0x4000) & 0xffff8000;
317  			j1 = n1 & 1;
318  			j2 = (xsb2 + 0x4000) & 0xffff8000;
319  			HI(&t0) = j0;
320  			HI(&t2) = j2;
321  			LO(&t0) = 0;
322  			LO(&t2) = 0;
323  			x1_or_one[0] = x1;
324  			x1_or_one[2] = -x1;
325  			x0 = (x0 - t0) + y0;
326  			y1_or_zero[0] = y1;
327  			y1_or_zero[2] = -y1;
328  			x2 = (x2 - t2) + y2;
329  			z0 = x0 * x0;
330  			z1 = x1 * x1;
331  			z2 = x2 * x2;
332  			t0 = z0 * (qq1 + z0 * qq2);
333  			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
334  			t2 = z2 * (qq1 + z2 * qq2);
335  			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
336  			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
337  			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
338  			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
339  			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
340  			xsb0 = (xsb0 >> 30) & 2;
341  			xsb2 = (xsb2 >> 30) & 2;
342  			n0 ^= (xsb0 & ~(n0 << 1));
343  			n2 ^= (xsb2 & ~(n2 << 1));
344  			xsb0 |= 1;
345  			xsb2 |= 1;
346  			a0 = __vlibm_TBL_sincos_hi[j0+n0];
347  			a2 = __vlibm_TBL_sincos_hi[j2+n2];
348  			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
349  			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
350  			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
351  			*py0 = ( a0 + t0 );
352  			*py1 = t1;
353  			*py2 = ( a2 + t2 );
354  			break;
355  
356  		case 3:
357  			j0 = n0 & 1;
358  			j1 = n1 & 1;
359  			j2 = (xsb2 + 0x4000) & 0xffff8000;
360  			HI(&t2) = j2;
361  			LO(&t2) = 0;
362  			x0_or_one[0] = x0;
363  			x0_or_one[2] = -x0;
364  			x1_or_one[0] = x1;
365  			x1_or_one[2] = -x1;
366  			y0_or_zero[0] = y0;
367  			y0_or_zero[2] = -y0;
368  			y1_or_zero[0] = y1;
369  			y1_or_zero[2] = -y1;
370  			x2 = (x2 - t2) + y2;
371  			z0 = x0 * x0;
372  			z1 = x1 * x1;
373  			z2 = x2 * x2;
374  			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
375  			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
376  			t2 = z2 * (qq1 + z2 * qq2);
377  			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
378  			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
379  			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
380  			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381  			xsb2 = (xsb2 >> 30) & 2;
382  			n2 ^= (xsb2 & ~(n2 << 1));
383  			xsb2 |= 1;
384  			a2 = __vlibm_TBL_sincos_hi[j2+n2];
385  			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
386  			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
387  			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
388  			*py0 = t0;
389  			*py1 = t1;
390  			*py2 = ( a2 + t2 );
391  			break;
392  
393  		case 4:
394  			j0 = (xsb0 + 0x4000) & 0xffff8000;
395  			j1 = (xsb1 + 0x4000) & 0xffff8000;
396  			j2 = n2 & 1;
397  			HI(&t0) = j0;
398  			HI(&t1) = j1;
399  			LO(&t0) = 0;
400  			LO(&t1) = 0;
401  			x2_or_one[0] = x2;
402  			x2_or_one[2] = -x2;
403  			x0 = (x0 - t0) + y0;
404  			x1 = (x1 - t1) + y1;
405  			y2_or_zero[0] = y2;
406  			y2_or_zero[2] = -y2;
407  			z0 = x0 * x0;
408  			z1 = x1 * x1;
409  			z2 = x2 * x2;
410  			t0 = z0 * (qq1 + z0 * qq2);
411  			t1 = z1 * (qq1 + z1 * qq2);
412  			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
413  			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
414  			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
415  			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
416  			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
417  			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
418  			xsb0 = (xsb0 >> 30) & 2;
419  			xsb1 = (xsb1 >> 30) & 2;
420  			n0 ^= (xsb0 & ~(n0 << 1));
421  			n1 ^= (xsb1 & ~(n1 << 1));
422  			xsb0 |= 1;
423  			xsb1 |= 1;
424  			a0 = __vlibm_TBL_sincos_hi[j0+n0];
425  			a1 = __vlibm_TBL_sincos_hi[j1+n1];
426  			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
427  			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
428  			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
429  			*py0 = ( a0 + t0 );
430  			*py1 = ( a1 + t1 );
431  			*py2 = t2;
432  			break;
433  
434  		case 5:
435  			j0 = n0 & 1;
436  			j1 = (xsb1 + 0x4000) & 0xffff8000;
437  			j2 = n2 & 1;
438  			HI(&t1) = j1;
439  			LO(&t1) = 0;
440  			x0_or_one[0] = x0;
441  			x0_or_one[2] = -x0;
442  			x2_or_one[0] = x2;
443  			x2_or_one[2] = -x2;
444  			y0_or_zero[0] = y0;
445  			y0_or_zero[2] = -y0;
446  			x1 = (x1 - t1) + y1;
447  			y2_or_zero[0] = y2;
448  			y2_or_zero[2] = -y2;
449  			z0 = x0 * x0;
450  			z1 = x1 * x1;
451  			z2 = x2 * x2;
452  			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
453  			t1 = z1 * (qq1 + z1 * qq2);
454  			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
455  			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
456  			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
457  			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
458  			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459  			xsb1 = (xsb1 >> 30) & 2;
460  			n1 ^= (xsb1 & ~(n1 << 1));
461  			xsb1 |= 1;
462  			a1 = __vlibm_TBL_sincos_hi[j1+n1];
463  			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
464  			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
465  			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
466  			*py0 = t0;
467  			*py1 = ( a1 + t1 );
468  			*py2 = t2;
469  			break;
470  
471  		case 6:
472  			j0 = (xsb0 + 0x4000) & 0xffff8000;
473  			j1 = n1 & 1;
474  			j2 = n2 & 1;
475  			HI(&t0) = j0;
476  			LO(&t0) = 0;
477  			x1_or_one[0] = x1;
478  			x1_or_one[2] = -x1;
479  			x2_or_one[0] = x2;
480  			x2_or_one[2] = -x2;
481  			x0 = (x0 - t0) + y0;
482  			y1_or_zero[0] = y1;
483  			y1_or_zero[2] = -y1;
484  			y2_or_zero[0] = y2;
485  			y2_or_zero[2] = -y2;
486  			z0 = x0 * x0;
487  			z1 = x1 * x1;
488  			z2 = x2 * x2;
489  			t0 = z0 * (qq1 + z0 * qq2);
490  			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
491  			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
492  			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
493  			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
494  			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
495  			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
496  			xsb0 = (xsb0 >> 30) & 2;
497  			n0 ^= (xsb0 & ~(n0 << 1));
498  			xsb0 |= 1;
499  			a0 = __vlibm_TBL_sincos_hi[j0+n0];
500  			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
501  			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
502  			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
503  			*py0 = ( a0 + t0 );
504  			*py1 = t1;
505  			*py2 = t2;
506  			break;
507  
508  		case 7:
509  			j0 = n0 & 1;
510  			j1 = n1 & 1;
511  			j2 = n2 & 1;
512  			x0_or_one[0] = x0;
513  			x0_or_one[2] = -x0;
514  			x1_or_one[0] = x1;
515  			x1_or_one[2] = -x1;
516  			x2_or_one[0] = x2;
517  			x2_or_one[2] = -x2;
518  			y0_or_zero[0] = y0;
519  			y0_or_zero[2] = -y0;
520  			y1_or_zero[0] = y1;
521  			y1_or_zero[2] = -y1;
522  			y2_or_zero[0] = y2;
523  			y2_or_zero[2] = -y2;
524  			z0 = x0 * x0;
525  			z1 = x1 * x1;
526  			z2 = x2 * x2;
527  			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
528  			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
529  			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
530  			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
531  			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
532  			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
533  			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
534  			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
535  			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
536  			*py0 = t0;
537  			*py1 = t1;
538  			*py2 = t2;
539  			break;
540  		}
541  
542  		x += stridex;
543  		y += stridey;
544  		i = 0;
545  	} while (--n > 0);
546  
547  	if (i > 0)
548  	{
549  		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
550  		double		t0, t1, z0, z1;
551  		unsigned	j0, j1;
552  		int			n0, n1;
553  
554  		if (i > 1)
555  		{
556  			n1 = (int) (x1 * invpio2 + half[xsb1]);
557  			fn1 = (double) n1;
558  			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
559  			a1 = x1 - fn1 * pio2_1;
560  			w1 = fn1 * pio2_2;
561  			x1 = a1 - w1;
562  			y1 = (a1 - x1) - w1;
563  			a1 = x1;
564  			w1 = fn1 * pio2_3 - y1;
565  			x1 = a1 - w1;
566  			y1 = (a1 - x1) - w1;
567  			a1 = x1;
568  			w1 = fn1 * pio2_3t - y1;
569  			x1 = a1 - w1;
570  			y1 = (a1 - x1) - w1;
571  			xsb1 = HI(&x1);
572  			if ((xsb1 & ~0x80000000) < thresh[n1&1])
573  			{
574  				j1 = n1 & 1;
575  				x1_or_one[0] = x1;
576  				x1_or_one[2] = -x1;
577  				y1_or_zero[0] = y1;
578  				y1_or_zero[2] = -y1;
579  				z1 = x1 * x1;
580  				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
581  				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
582  				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
583  				*py1 = t1;
584  			}
585  			else
586  			{
587  				j1 = (xsb1 + 0x4000) & 0xffff8000;
588  				HI(&t1) = j1;
589  				LO(&t1) = 0;
590  				x1 = (x1 - t1) + y1;
591  				z1 = x1 * x1;
592  				t1 = z1 * (qq1 + z1 * qq2);
593  				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
594  				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
595  				xsb1 = (xsb1 >> 30) & 2;
596  				n1 ^= (xsb1 & ~(n1 << 1));
597  				xsb1 |= 1;
598  				a1 = __vlibm_TBL_sincos_hi[j1+n1];
599  				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
600  				*py1 = ( a1 + t1 );
601  			}
602  		}
603  		n0 = (int) (x0 * invpio2 + half[xsb0]);
604  		fn0 = (double) n0;
605  		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
606  		a0 = x0 - fn0 * pio2_1;
607  		w0 = fn0 * pio2_2;
608  		x0 = a0 - w0;
609  		y0 = (a0 - x0) - w0;
610  		a0 = x0;
611  		w0 = fn0 * pio2_3 - y0;
612  		x0 = a0 - w0;
613  		y0 = (a0 - x0) - w0;
614  		a0 = x0;
615  		w0 = fn0 * pio2_3t - y0;
616  		x0 = a0 - w0;
617  		y0 = (a0 - x0) - w0;
618  		xsb0 = HI(&x0);
619  		if ((xsb0 & ~0x80000000) < thresh[n0&1])
620  		{
621  			j0 = n0 & 1;
622  			x0_or_one[0] = x0;
623  			x0_or_one[2] = -x0;
624  			y0_or_zero[0] = y0;
625  			y0_or_zero[2] = -y0;
626  			z0 = x0 * x0;
627  			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
628  			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
629  			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
630  			*py0 = t0;
631  		}
632  		else
633  		{
634  			j0 = (xsb0 + 0x4000) & 0xffff8000;
635  			HI(&t0) = j0;
636  			LO(&t0) = 0;
637  			x0 = (x0 - t0) + y0;
638  			z0 = x0 * x0;
639  			t0 = z0 * (qq1 + z0 * qq2);
640  			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
641  			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
642  			xsb0 = (xsb0 >> 30) & 2;
643  			n0 ^= (xsb0 & ~(n0 << 1));
644  			xsb0 |= 1;
645  			a0 = __vlibm_TBL_sincos_hi[j0+n0];
646  			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
647  			*py0 = ( a0 + t0 );
648  		}
649  	}
650  
651  	if (biguns)
652  		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
653  }
654