xref: /titanic_52/usr/src/lib/libmvec/common/__vsincos.c (revision c702eacaf4d917acee909844383924df2accf8f5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 /*
48  * vsincos.c
49  *
50  * Vector sine and cosine function.  Just slight modifications to vcos.c.
51  */
52 
53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
54 
55 static const double
56 	half[2]	= { 0.5, -0.5 },
57 	one		= 1.0,
58 	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
59 	pio2_1	= 1.570796326734125614166,  /* first 33 bits of pi/2 */
60 	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
61 	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
62 	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
63 	pp1		= -1.666666666605760465276263943134982554676e-0001,
64 	pp2		=  8.333261209690963126718376566146180944442e-0003,
65 	qq1		= -4.999999999977710986407023955908711557870e-0001,
66 	qq2		=  4.166654863857219350645055881018842089580e-0002,
67 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
68 				-4.999999999999931701464060878888294524481e-0001 },
69 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
70 				 4.166666666394861917535640593963708222319e-0002 },
71 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
72 				-1.388888552656142867832756687736851681462e-0003 },
73 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
74 				 2.478519423681460796618128289454530524759e-0005 };
75 
76 /* Don't __ the following; acomp will handle it */
77 extern double fabs(double);
78 extern void __vlibm_vsincos_big(int, double *, int, double *, int, double *, int, int);
79 
80 /*
81  * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
82  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
83  *
84  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
85  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
86  *
87  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
88  */
89 void
90 __vsincos(int n, double * restrict x, int stridex,
91 				double * restrict y, int stridey,
92 				double * restrict c, int stridec)
93 {
94 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
95 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
96 	double		x0, x1, x2,
97 			*py0, *py1, *py2,
98 			*pc0, *pc1, *pc2,
99 			*xsave, *ysave, *csave;
100 	unsigned	hx0, hx1, hx2, xsb0, xsb1, xsb2;
101 	int		i, biguns, nsave, sxsave, sysave, scsave;
102 	volatile int	v __unused;
103 	nsave = n;
104 	xsave = x;
105 	sxsave = stridex;
106 	ysave = y;
107 	sysave = stridey;
108 	csave = c;
109 	scsave = stridec;
110 	biguns = 0;
111 
112 	do /* MAIN LOOP */
113 	{
114 
115 		/* Gotos here so _break_ exits MAIN LOOP. */
116 LOOP0:  /* Find first arg in right range. */
117 		xsb0 = HI(x); /* get most significant word */
118 		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
119 		if (hx0 > 0x3fe921fb) {
120 			/* Too big: arg reduction needed, so leave for second part */
121 			biguns = 1;
122 			x += stridex;
123 			y += stridey;
124 			c += stridec;
125 			i = 0;
126 			if (--n <= 0)
127 				break;
128 			goto LOOP0;
129 		}
130 		if (hx0 < 0x3e400000) {
131 			/* Too small.  cos x ~ 1, sin x ~ x. */
132 			v = *x;
133 			*c = 1.0;
134 			*y = *x;
135 			x += stridex;
136 			y += stridey;
137 			c += stridec;
138 			i = 0;
139 			if (--n <= 0)
140 				break;
141 			goto LOOP0;
142 		}
143 		x0 = *x;
144 		py0 = y;
145 		pc0 = c;
146 		x += stridex;
147 		y += stridey;
148 		c += stridec;
149 		i = 1;
150 		if (--n <= 0)
151 			break;
152 
153 LOOP1: /* Get second arg, same as above. */
154 		xsb1 = HI(x);
155 		hx1 = xsb1 & ~0x80000000;
156 		if (hx1 > 0x3fe921fb)
157 		{
158 			biguns = 1;
159 			x += stridex;
160 			y += stridey;
161 			c += stridec;
162 			i = 1;
163 			if (--n <= 0)
164 				break;
165 			goto LOOP1;
166 		}
167 		if (hx1 < 0x3e400000)
168 		{
169 			v = *x;
170 			*c = 1.0;
171 			*y = *x;
172 			x += stridex;
173 			y += stridey;
174 			c += stridec;
175 			i = 1;
176 			if (--n <= 0)
177 				break;
178 			goto LOOP1;
179 		}
180 		x1 = *x;
181 		py1 = y;
182 		pc1 = c;
183 		x += stridex;
184 		y += stridey;
185 		c += stridec;
186 		i = 2;
187 		if (--n <= 0)
188 			break;
189 
190 LOOP2: /* Get third arg, same as above. */
191 		xsb2 = HI(x);
192 		hx2 = xsb2 & ~0x80000000;
193 		if (hx2 > 0x3fe921fb)
194 		{
195 			biguns = 1;
196 			x += stridex;
197 			y += stridey;
198 			c += stridec;
199 			i = 2;
200 			if (--n <= 0)
201 				break;
202 			goto LOOP2;
203 		}
204 		if (hx2 < 0x3e400000)
205 		{
206 			v = *x;
207 			*c = 1.0;
208 			*y = *x;
209 			x += stridex;
210 			y += stridey;
211 			c += stridec;
212 			i = 2;
213 			if (--n <= 0)
214 				break;
215 			goto LOOP2;
216 		}
217 		x2 = *x;
218 		py2 = y;
219 		pc2 = c;
220 
221 		/*
222 		 * 0x3fc40000 = 5/32 ~ 0.15625
223 		 * Get msb after subtraction.  Will be 1 only if
224 		 * hx0 - 5/32 is negative.
225 		 */
226 		i = (hx2 - 0x3fc40000) >> 31;
227 		i |= ((hx1 - 0x3fc40000) >> 30) & 2;
228 		i |= ((hx0 - 0x3fc40000) >> 29) & 4;
229 		switch (i)
230 		{
231 			double		a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
232 			double		w0, w1, w2;
233 			double		t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
234 			double		z0, z1, z2;
235 			unsigned	j0, j1, j2;
236 
237 		case 0: /* All are > 5/32 */
238 			j0 = (xsb0 + 0x4000) & 0xffff8000;
239 			j1 = (xsb1 + 0x4000) & 0xffff8000;
240 			j2 = (xsb2 + 0x4000) & 0xffff8000;
241 
242 			HI(&t0) = j0;
243 			HI(&t1) = j1;
244 			HI(&t2) = j2;
245 			LO(&t0) = 0;
246 			LO(&t1) = 0;
247 			LO(&t2) = 0;
248 
249 			x0 -= t0;
250 			x1 -= t1;
251 			x2 -= t2;
252 
253 			z0 = x0 * x0;
254 			z1 = x1 * x1;
255 			z2 = x2 * x2;
256 
257 			t0 = z0 * (qq1 + z0 * qq2);
258 			t1 = z1 * (qq1 + z1 * qq2);
259 			t2 = z2 * (qq1 + z2 * qq2);
260 
261 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
262 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
263 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
264 
265 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
266 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
268 
269 			xsb0 = (xsb0 >> 30) & 2;
270 			xsb1 = (xsb1 >> 30) & 2;
271 			xsb2 = (xsb2 >> 30) & 2;
272 
273 			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
274 			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
275 			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
276 
277 			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
278 			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
279 			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
280 				/* cos_lo(t) */
281 			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
282 			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
283 			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
284 
285 			*pc0 = a2_0 + t2_0;
286 			*pc1 = a2_1 + t2_1;
287 			*pc2 = a2_2 + t2_2;
288 
289 			t1_0 = a2_0*w0 + a1_0*t0;
290 			t1_1 = a2_1*w1 + a1_1*t1;
291 			t1_2 = a2_2*w2 + a1_2*t2;
292 
293 			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
294 			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
295 			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
296 
297 			*py0 = a1_0 + t1_0;
298 			*py1 = a1_1 + t1_1;
299 			*py2 = a1_2 + t1_2;
300 
301 			break;
302 
303 		case 1:
304 			j0 = (xsb0 + 0x4000) & 0xffff8000;
305 			j1 = (xsb1 + 0x4000) & 0xffff8000;
306 			HI(&t0) = j0;
307 			HI(&t1) = j1;
308 			LO(&t0) = 0;
309 			LO(&t1) = 0;
310 			x0 -= t0;
311 			x1 -= t1;
312 			z0 = x0 * x0;
313 			z1 = x1 * x1;
314 			z2 = x2 * x2;
315 			t0 = z0 * (qq1 + z0 * qq2);
316 			t1 = z1 * (qq1 + z1 * qq2);
317 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
318 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
319 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
320 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
321 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
322 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
323 			xsb0 = (xsb0 >> 30) & 2;
324 			xsb1 = (xsb1 >> 30) & 2;
325 
326 			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
327 			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
328 
329 			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
330 			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
331 				/* cos_lo(t) */
332 			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
333 			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
334 
335 			*pc0 = a2_0 + t2_0;
336 			*pc1 = a2_1 + t2_1;
337 			*pc2 = one + t2;
338 
339 			t1_0 = a2_0*w0 + a1_0*t0;
340 			t1_1 = a2_1*w1 + a1_1*t1;
341 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
342 
343 			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
344 			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
345 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
346 
347 			*py0 = a1_0 + t1_0;
348 			*py1 = a1_1 + t1_1;
349 			t2 = x2 + x2 * t2;
350 			*py2 = t2;
351 
352 			break;
353 
354 		case 2:
355 			j0 = (xsb0 + 0x4000) & 0xffff8000;
356 			j2 = (xsb2 + 0x4000) & 0xffff8000;
357 			HI(&t0) = j0;
358 			HI(&t2) = j2;
359 			LO(&t0) = 0;
360 			LO(&t2) = 0;
361 			x0 -= t0;
362 			x2 -= t2;
363 			z0 = x0 * x0;
364 			z1 = x1 * x1;
365 			z2 = x2 * x2;
366 			t0 = z0 * (qq1 + z0 * qq2);
367 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
368 			t2 = z2 * (qq1 + z2 * qq2);
369 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
370 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
371 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
372 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
373 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
374 			xsb0 = (xsb0 >> 30) & 2;
375 			xsb2 = (xsb2 >> 30) & 2;
376 
377 			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
378 			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
379 
380 			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
381 			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
382 				/* cos_lo(t) */
383 			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
384 			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
385 
386 			*pc0 = a2_0 + t2_0;
387 			*pc1 = one + t1;
388 			*pc2 = a2_2 + t2_2;
389 
390 			t1_0 = a2_0*w0 + a1_0*t0;
391 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
392 			t1_2 = a2_2*w2 + a1_2*t2;
393 
394 			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
395 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
396 			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
397 
398 			*py0 = a1_0 + t1_0;
399 			t1 = x1 + x1 * t1;
400 			*py1 = t1;
401 			*py2 = a1_2 + t1_2;
402 
403 			break;
404 
405 		case 3:
406 			j0 = (xsb0 + 0x4000) & 0xffff8000;
407 			HI(&t0) = j0;
408 			LO(&t0) = 0;
409 			x0 -= t0;
410 			z0 = x0 * x0;
411 			z1 = x1 * x1;
412 			z2 = x2 * x2;
413 			t0 = z0 * (qq1 + z0 * qq2);
414 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
415 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
416 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
417 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
418 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
419 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
420 			xsb0 = (xsb0 >> 30) & 2;
421 			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
422 
423 			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
424 
425 			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
426 
427 			*pc0 = a2_0 + t2_0;
428 			*pc1 = one + t1;
429 			*pc2 = one + t2;
430 
431 			t1_0 = a2_0*w0 + a1_0*t0;
432 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
433 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
434 
435 			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
436 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
437 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
438 
439 			*py0 = a1_0 + t1_0;
440 			t1 = x1 + x1 * t1;
441 			*py1 = t1;
442 			t2 = x2 + x2 * t2;
443 			*py2 = t2;
444 
445 			break;
446 
447 		case 4:
448 			j1 = (xsb1 + 0x4000) & 0xffff8000;
449 			j2 = (xsb2 + 0x4000) & 0xffff8000;
450 			HI(&t1) = j1;
451 			HI(&t2) = j2;
452 			LO(&t1) = 0;
453 			LO(&t2) = 0;
454 			x1 -= t1;
455 			x2 -= t2;
456 			z0 = x0 * x0;
457 			z1 = x1 * x1;
458 			z2 = x2 * x2;
459 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
460 			t1 = z1 * (qq1 + z1 * qq2);
461 			t2 = z2 * (qq1 + z2 * qq2);
462 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
463 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
464 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
465 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
466 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
467 			xsb1 = (xsb1 >> 30) & 2;
468 			xsb2 = (xsb2 >> 30) & 2;
469 
470 			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
471 			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
472 
473 			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
474 			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
475 				/* cos_lo(t) */
476 			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
477 			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
478 
479 			*pc0 = one + t0;
480 			*pc1 = a2_1 + t2_1;
481 			*pc2 = a2_2 + t2_2;
482 
483 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
484 			t1_1 = a2_1*w1 + a1_1*t1;
485 			t1_2 = a2_2*w2 + a1_2*t2;
486 
487 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
488 			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
489 			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
490 
491 			t0 = x0 + x0 * t0;
492 			*py0 = t0;
493 			*py1 = a1_1 + t1_1;
494 			*py2 = a1_2 + t1_2;
495 
496 			break;
497 
498 		case 5:
499 			j1 = (xsb1 + 0x4000) & 0xffff8000;
500 			HI(&t1) = j1;
501 			LO(&t1) = 0;
502 			x1 -= t1;
503 			z0 = x0 * x0;
504 			z1 = x1 * x1;
505 			z2 = x2 * x2;
506 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
507 			t1 = z1 * (qq1 + z1 * qq2);
508 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
509 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
510 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
511 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
512 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
513 			xsb1 = (xsb1 >> 30) & 2;
514 
515 			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
516 
517 			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
518 
519 			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
520 
521 			*pc0 = one + t0;
522 			*pc1 = a2_1 + t2_1;
523 			*pc2 = one + t2;
524 
525 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
526 			t1_1 = a2_1*w1 + a1_1*t1;
527 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
528 
529 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
530 			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
531 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
532 
533 			t0 = x0 + x0 * t0;
534 			*py0 = t0;
535 			*py1 = a1_1 + t1_1;
536 			t2 = x2 + x2 * t2;
537 			*py2 = t2;
538 
539 			break;
540 
541 		case 6:
542 			j2 = (xsb2 + 0x4000) & 0xffff8000;
543 			HI(&t2) = j2;
544 			LO(&t2) = 0;
545 			x2 -= t2;
546 			z0 = x0 * x0;
547 			z1 = x1 * x1;
548 			z2 = x2 * x2;
549 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
550 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
551 			t2 = z2 * (qq1 + z2 * qq2);
552 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
553 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
554 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
555 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
556 			xsb2 = (xsb2 >> 30) & 2;
557 			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
558 
559 			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
560 
561 			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
562 
563 			*pc0 = one + t0;
564 			*pc1 = one + t1;
565 			*pc2 = a2_2 + t2_2;
566 
567 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
568 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
569 			t1_2 = a2_2*w2 + a1_2*t2;
570 
571 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
572 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
573 			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
574 
575 			t0 = x0 + x0 * t0;
576 			*py0 = t0;
577 			t1 = x1 + x1 * t1;
578 			*py1 = t1;
579 			*py2 = a1_2 + t1_2;
580 
581 			break;
582 
583 		case 7: /* All are < 5/32 */
584 			z0 = x0 * x0;
585 			z1 = x1 * x1;
586 			z2 = x2 * x2;
587 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
588 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
589 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
590 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
591 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
592 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
593 			*pc0 = one + t0;
594 			*pc1 = one + t1;
595 			*pc2 = one + t2;
596 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
597 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
598 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
599 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
600 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
601 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
602 			t0 = x0 + x0 * t0;
603 			t1 = x1 + x1 * t1;
604 			t2 = x2 + x2 * t2;
605 			*py0 = t0;
606 			*py1 = t1;
607 			*py2 = t2;
608 			break;
609 		}
610 
611 		x += stridex;
612 		y += stridey;
613 		c += stridec;
614 		i = 0;
615 	} while (--n > 0); /* END MAIN LOOP */
616 
617 	/*
618 	 * CLEAN UP last 0, 1, or 2 elts.
619 	 */
620 	if (i > 0) /* Clean up elts at tail.  i < 3. */
621 	{
622 		double		a1_0, a1_1, a2_0, a2_1;
623 		double		w0, w1;
624 		double		t0, t1, t1_0, t1_1, t2_0, t2_1;
625 		double		z0, z1;
626 		unsigned	j0, j1;
627 
628 		if (i > 1)
629 		{
630 			if (hx1 < 0x3fc40000)
631 			{
632 				z1 = x1 * x1;
633 				t1 = z1 * (poly3[1] + z1 * poly4[1]);
634 				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
635 				t1 = one + t1;
636 				*pc1 = t1;
637 				t1 = z1 * (poly3[0] + z1 * poly4[0]);
638 				t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
639 				t1 = x1 + x1 * t1;
640 				*py1 = t1;
641 			}
642 			else
643 			{
644 				j1 = (xsb1 + 0x4000) & 0xffff8000;
645 				HI(&t1) = j1;
646 				LO(&t1) = 0;
647 				x1 -= t1;
648 				z1 = x1 * x1;
649 				t1 = z1 * (qq1 + z1 * qq2);
650 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
651 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
652 				xsb1 = (xsb1 >> 30) & 2;
653 				a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
654 				a2_1 = __vlibm_TBL_sincos_hi[j1+1];
655 				t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
656 				*pc1 = a2_1 + t2_1;
657 				t1_1 = a2_1*w1 + a1_1*t1;
658 				t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
659 				*py1 = a1_1 + t1_1;
660 			}
661 		}
662 		if (hx0 < 0x3fc40000)
663 		{
664 			z0 = x0 * x0;
665 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
666 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
667 			t0 = one + t0;
668 			*pc0 = t0;
669 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
670 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
671 			t0 = x0 + x0 * t0;
672 			*py0 = t0;
673 		}
674 		else
675 		{
676 			j0 = (xsb0 + 0x4000) & 0xffff8000;
677 			HI(&t0) = j0;
678 			LO(&t0) = 0;
679 			x0 -= t0;
680 			z0 = x0 * x0;
681 			t0 = z0 * (qq1 + z0 * qq2);
682 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
683 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
684 			xsb0 = (xsb0 >> 30) & 2;
685 			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
686 			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
687 			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
688 			*pc0 = a2_0 + t2_0;
689 			t1_0 = a2_0*w0 + a1_0*t0;
690 			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
691 			*py0 = a1_0 + t1_0;
692 		}
693 	} /* END CLEAN UP */
694 
695 	if (!biguns)
696 		return;
697 
698 	/*
699 	 * Take care of BIGUNS.
700 	 */
701 	n = nsave;
702 	x = xsave;
703 	stridex = sxsave;
704 	y = ysave;
705 	stridey = sysave;
706 	c = csave;
707 	stridec = scsave;
708 	biguns = 0;
709 
710 	x0_or_one[1] = 1.0;
711 	x1_or_one[1] = 1.0;
712 	x2_or_one[1] = 1.0;
713 	x0_or_one[3] = -1.0;
714 	x1_or_one[3] = -1.0;
715 	x2_or_one[3] = -1.0;
716 	y0_or_zero[1] = 0.0;
717 	y1_or_zero[1] = 0.0;
718 	y2_or_zero[1] = 0.0;
719 	y0_or_zero[3] = 0.0;
720 	y1_or_zero[3] = 0.0;
721 	y2_or_zero[3] = 0.0;
722 
723 	do
724 	{
725 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
726 		unsigned	hx;
727 		int			n0, n1, n2;
728 
729 		/*
730 		 * Find 3 more to work on: Not already done, not too big.
731 		 */
732 loop0:
733 		hx = HI(x);
734 		xsb0 = hx >> 31;
735 		hx &= ~0x80000000;
736 		if (hx <= 0x3fe921fb) /* Done above. */
737 		{
738 			x += stridex;
739 			y += stridey;
740 			c += stridec;
741 			i = 0;
742 			if (--n <= 0)
743 				break;
744 			goto loop0;
745 		}
746 		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
747 		{
748 			if (hx >= 0x7ff00000) /* Inf or NaN */
749 			{
750 				x0 = *x;
751 				*y = x0 - x0;
752 				*c = x0 - x0;
753 			}
754 			else {
755 				biguns = 1;
756 			}
757 			x += stridex;
758 			y += stridey;
759 			c += stridec;
760 			i = 0;
761 			if (--n <= 0)
762 				break;
763 			goto loop0;
764 		}
765 		x0 = *x;
766 		py0 = y;
767 		pc0 = c;
768 		x += stridex;
769 		y += stridey;
770 		c += stridec;
771 		i = 1;
772 		if (--n <= 0)
773 			break;
774 
775 loop1:
776 		hx = HI(x);
777 		xsb1 = hx >> 31;
778 		hx &= ~0x80000000;
779 		if (hx <= 0x3fe921fb)
780 		{
781 			x += stridex;
782 			y += stridey;
783 			c += stridec;
784 			i = 1;
785 			if (--n <= 0)
786 				break;
787 			goto loop1;
788 		}
789 		if (hx > 0x413921fb)
790 		{
791 			if (hx >= 0x7ff00000)
792 			{
793 				x1 = *x;
794 				*y = x1 - x1;
795 				*c = x1 - x1;
796 			}
797 			else {
798 				biguns = 1;
799 			}
800 			x += stridex;
801 			y += stridey;
802 			c += stridec;
803 			i = 1;
804 			if (--n <= 0)
805 				break;
806 			goto loop1;
807 		}
808 		x1 = *x;
809 		py1 = y;
810 		pc1 = c;
811 		x += stridex;
812 		y += stridey;
813 		c += stridec;
814 		i = 2;
815 		if (--n <= 0)
816 			break;
817 
818 loop2:
819 		hx = HI(x);
820 		xsb2 = hx >> 31;
821 		hx &= ~0x80000000;
822 		if (hx <= 0x3fe921fb)
823 		{
824 			x += stridex;
825 			y += stridey;
826 			c += stridec;
827 			i = 2;
828 			if (--n <= 0)
829 				break;
830 			goto loop2;
831 		}
832 		if (hx > 0x413921fb)
833 		{
834 			if (hx >= 0x7ff00000)
835 			{
836 				x2 = *x;
837 				*y = x2 - x2;
838 				*c = x2 - x2;
839 			}
840 			else {
841 				biguns = 1;
842 			}
843 			x += stridex;
844 			y += stridey;
845 			c += stridec;
846 			i = 2;
847 			if (--n <= 0)
848 				break;
849 			goto loop2;
850 		}
851 		x2 = *x;
852 		py2 = y;
853 		pc2 = c;
854 
855 		n0 = (int) (x0 * invpio2 + half[xsb0]);
856 		n1 = (int) (x1 * invpio2 + half[xsb1]);
857 		n2 = (int) (x2 * invpio2 + half[xsb2]);
858 		fn0 = (double) n0;
859 		fn1 = (double) n1;
860 		fn2 = (double) n2;
861 		n0 &= 3;
862 		n1 &= 3;
863 		n2 &= 3;
864 		a0 = x0 - fn0 * pio2_1;
865 		a1 = x1 - fn1 * pio2_1;
866 		a2 = x2 - fn2 * pio2_1;
867 		w0 = fn0 * pio2_2;
868 		w1 = fn1 * pio2_2;
869 		w2 = fn2 * pio2_2;
870 		x0 = a0 - w0;
871 		x1 = a1 - w1;
872 		x2 = a2 - w2;
873 		y0 = (a0 - x0) - w0;
874 		y1 = (a1 - x1) - w1;
875 		y2 = (a2 - x2) - w2;
876 		a0 = x0;
877 		a1 = x1;
878 		a2 = x2;
879 		w0 = fn0 * pio2_3 - y0;
880 		w1 = fn1 * pio2_3 - y1;
881 		w2 = fn2 * pio2_3 - y2;
882 		x0 = a0 - w0;
883 		x1 = a1 - w1;
884 		x2 = a2 - w2;
885 		y0 = (a0 - x0) - w0;
886 		y1 = (a1 - x1) - w1;
887 		y2 = (a2 - x2) - w2;
888 		a0 = x0;
889 		a1 = x1;
890 		a2 = x2;
891 		w0 = fn0 * pio2_3t - y0;
892 		w1 = fn1 * pio2_3t - y1;
893 		w2 = fn2 * pio2_3t - y2;
894 		x0 = a0 - w0;
895 		x1 = a1 - w1;
896 		x2 = a2 - w2;
897 		y0 = (a0 - x0) - w0;
898 		y1 = (a1 - x1) - w1;
899 		y2 = (a2 - x2) - w2;
900 		xsb2 = HI(&x2);
901 		i = ((xsb2 & ~0x80000000) - 0x3fc40000) >> 31;
902 		xsb1 = HI(&x1);
903 		i |= (((xsb1 & ~0x80000000) - 0x3fc40000) >> 30) & 2;
904 		xsb0 = HI(&x0);
905 		i |= (((xsb0 & ~0x80000000) - 0x3fc40000) >> 29) & 4;
906 		switch (i)
907 		{
908 			double		a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
909 			double		t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
910 			double		z0, z1, z2;
911 			unsigned	j0, j1, j2;
912 
913 		case 0:
914 			j0 = (xsb0 + 0x4000) & 0xffff8000;
915 			j1 = (xsb1 + 0x4000) & 0xffff8000;
916 			j2 = (xsb2 + 0x4000) & 0xffff8000;
917 			HI(&t0) = j0;
918 			HI(&t1) = j1;
919 			HI(&t2) = j2;
920 			LO(&t0) = 0;
921 			LO(&t1) = 0;
922 			LO(&t2) = 0;
923 			x0 = (x0 - t0) + y0;
924 			x1 = (x1 - t1) + y1;
925 			x2 = (x2 - t2) + y2;
926 			z0 = x0 * x0;
927 			z1 = x1 * x1;
928 			z2 = x2 * x2;
929 			t0 = z0 * (qq1 + z0 * qq2);
930 			t1 = z1 * (qq1 + z1 * qq2);
931 			t2 = z2 * (qq1 + z2 * qq2);
932 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
933 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
934 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
935 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
936 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
937 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
938 			xsb0 = (xsb0 >> 30) & 2;
939 			xsb1 = (xsb1 >> 30) & 2;
940 			xsb2 = (xsb2 >> 30) & 2;
941 			n0 ^= (xsb0 & ~(n0 << 1));
942 			n1 ^= (xsb1 & ~(n1 << 1));
943 			n2 ^= (xsb2 & ~(n2 << 1));
944 			xsb0 |= 1;
945 			xsb1 |= 1;
946 			xsb2 |= 1;
947 
948 			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
949 			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
950 			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
951 
952 			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
953 			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
954 			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
955 
956 			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
957 			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
958 			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
959 
960 			w0 *= a2_0;
961 			w1 *= a2_1;
962 			w2 *= a2_2;
963 
964 			*pc0 = a2_0 + t2_0;
965 			*pc1 = a2_1 + t2_1;
966 			*pc2 = a2_2 + t2_2;
967 
968 			t1_0 = w0 + a1_0*t0;
969 			t1_1 = w1 + a1_1*t1;
970 			t1_2 = w2 + a1_2*t2;
971 
972 			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
973 			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
974 			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
975 
976 			*py0 = a1_0 + t1_0;
977 			*py1 = a1_1 + t1_1;
978 			*py2 = a1_2 + t1_2;
979 
980 			break;
981 
982 		case 1:
983 			j0 = (xsb0 + 0x4000) & 0xffff8000;
984 			j1 = (xsb1 + 0x4000) & 0xffff8000;
985 			j2 = n2 & 1;
986 			HI(&t0) = j0;
987 			HI(&t1) = j1;
988 			LO(&t0) = 0;
989 			LO(&t1) = 0;
990 			x2_or_one[0] = x2;
991 			x2_or_one[2] = -x2;
992 			x0 = (x0 - t0) + y0;
993 			x1 = (x1 - t1) + y1;
994 			y2_or_zero[0] = y2;
995 			y2_or_zero[2] = -y2;
996 			z0 = x0 * x0;
997 			z1 = x1 * x1;
998 			z2 = x2 * x2;
999 			t0 = z0 * (qq1 + z0 * qq2);
1000 			t1 = z1 * (qq1 + z1 * qq2);
1001 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1002 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1003 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1004 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1005 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1006 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1007 			xsb0 = (xsb0 >> 30) & 2;
1008 			xsb1 = (xsb1 >> 30) & 2;
1009 			n0 ^= (xsb0 & ~(n0 << 1));
1010 			n1 ^= (xsb1 & ~(n1 << 1));
1011 			xsb0 |= 1;
1012 			xsb1 |= 1;
1013 			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1014 			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1015 
1016 			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1017 			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1018 
1019 			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1020 			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1021 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1022 
1023 			*pc0 = a2_0 + t2_0;
1024 			*pc1 = a2_1 + t2_1;
1025 			*py2 = t2;
1026 
1027 			n2 = (n2 + 1) & 3;
1028 			j2 = (j2 + 1) & 1;
1029 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1030 
1031 			t1_0 = a2_0*w0 + a1_0*t0;
1032 			t1_1 = a2_1*w1 + a1_1*t1;
1033 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1034 
1035 			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1036 			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1037 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1038 
1039 			*py0 = a1_0 + t1_0;
1040 			*py1 = a1_1 + t1_1;
1041 			*pc2 = t2;
1042 
1043 			break;
1044 
1045 		case 2:
1046 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1047 			j1 = n1 & 1;
1048 			j2 = (xsb2 + 0x4000) & 0xffff8000;
1049 			HI(&t0) = j0;
1050 			HI(&t2) = j2;
1051 			LO(&t0) = 0;
1052 			LO(&t2) = 0;
1053 			x1_or_one[0] = x1;
1054 			x1_or_one[2] = -x1;
1055 			x0 = (x0 - t0) + y0;
1056 			y1_or_zero[0] = y1;
1057 			y1_or_zero[2] = -y1;
1058 			x2 = (x2 - t2) + y2;
1059 			z0 = x0 * x0;
1060 			z1 = x1 * x1;
1061 			z2 = x2 * x2;
1062 			t0 = z0 * (qq1 + z0 * qq2);
1063 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1064 			t2 = z2 * (qq1 + z2 * qq2);
1065 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1066 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1067 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1068 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1069 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1070 			xsb0 = (xsb0 >> 30) & 2;
1071 			xsb2 = (xsb2 >> 30) & 2;
1072 			n0 ^= (xsb0 & ~(n0 << 1));
1073 			n2 ^= (xsb2 & ~(n2 << 1));
1074 			xsb0 |= 1;
1075 			xsb2 |= 1;
1076 
1077 			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1078 			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1079 
1080 			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1081 			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1082 
1083 			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1084 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1085 			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1086 
1087 			*pc0 = a2_0 + t2_0;
1088 			*py1 = t1;
1089 			*pc2 = a2_2 + t2_2;
1090 
1091 			n1 = (n1 + 1) & 3;
1092 			j1 = (j1 + 1) & 1;
1093 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1094 
1095 			t1_0 = a2_0*w0 + a1_0*t0;
1096 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1097 			t1_2 = a2_2*w2 + a1_2*t2;
1098 
1099 			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1100 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1101 			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1102 
1103 			*py0 = a1_0 + t1_0;
1104 			*pc1 = t1;
1105 			*py2 = a1_2 + t1_2;
1106 
1107 			break;
1108 
1109 		case 3:
1110 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1111 			j1 = n1 & 1;
1112 			j2 = n2 & 1;
1113 			HI(&t0) = j0;
1114 			LO(&t0) = 0;
1115 			x1_or_one[0] = x1;
1116 			x1_or_one[2] = -x1;
1117 			x2_or_one[0] = x2;
1118 			x2_or_one[2] = -x2;
1119 			x0 = (x0 - t0) + y0;
1120 			y1_or_zero[0] = y1;
1121 			y1_or_zero[2] = -y1;
1122 			y2_or_zero[0] = y2;
1123 			y2_or_zero[2] = -y2;
1124 			z0 = x0 * x0;
1125 			z1 = x1 * x1;
1126 			z2 = x2 * x2;
1127 			t0 = z0 * (qq1 + z0 * qq2);
1128 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1129 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1130 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1131 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1132 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1133 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1134 			xsb0 = (xsb0 >> 30) & 2;
1135 			n0 ^= (xsb0 & ~(n0 << 1));
1136 			xsb0 |= 1;
1137 
1138 			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1139 			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1140 
1141 			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1142 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1143 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1144 
1145 			*pc0 = a2_0 + t2_0;
1146 			*py1 = t1;
1147 			*py2 = t2;
1148 
1149 			n1 = (n1 + 1) & 3;
1150 			n2 = (n2 + 1) & 3;
1151 			j1 = (j1 + 1) & 1;
1152 			j2 = (j2 + 1) & 1;
1153 
1154 			t1_0 = a2_0*w0 + a1_0*t0;
1155 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1156 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1157 
1158 			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1159 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1160 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1161 
1162 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1163 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1164 
1165 			*py0 = a1_0 + t1_0;
1166 			*pc1 = t1;
1167 			*pc2 = t2;
1168 
1169 			break;
1170 
1171 		case 4:
1172 			j0 = n0 & 1;
1173 			j1 = (xsb1 + 0x4000) & 0xffff8000;
1174 			j2 = (xsb2 + 0x4000) & 0xffff8000;
1175 			HI(&t1) = j1;
1176 			HI(&t2) = j2;
1177 			LO(&t1) = 0;
1178 			LO(&t2) = 0;
1179 			x0_or_one[0] = x0;
1180 			x0_or_one[2] = -x0;
1181 			y0_or_zero[0] = y0;
1182 			y0_or_zero[2] = -y0;
1183 			x1 = (x1 - t1) + y1;
1184 			x2 = (x2 - t2) + y2;
1185 			z0 = x0 * x0;
1186 			z1 = x1 * x1;
1187 			z2 = x2 * x2;
1188 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1189 			t1 = z1 * (qq1 + z1 * qq2);
1190 			t2 = z2 * (qq1 + z2 * qq2);
1191 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1192 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1193 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1194 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1195 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1196 			xsb1 = (xsb1 >> 30) & 2;
1197 			xsb2 = (xsb2 >> 30) & 2;
1198 			n1 ^= (xsb1 & ~(n1 << 1));
1199 			n2 ^= (xsb2 & ~(n2 << 1));
1200 			xsb1 |= 1;
1201 			xsb2 |= 1;
1202 
1203 			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1204 			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1205 
1206 			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1207 			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1208 
1209 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1210 			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1211 			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1212 
1213 			*py0 = t0;
1214 			*pc1 = a2_1 + t2_1;
1215 			*pc2 = a2_2 + t2_2;
1216 
1217 			n0 = (n0 + 1) & 3;
1218 			j0 = (j0 + 1) & 1;
1219 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1220 
1221 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1222 			t1_1 = a2_1*w1 + a1_1*t1;
1223 			t1_2 = a2_2*w2 + a1_2*t2;
1224 
1225 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1226 			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1227 			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1228 
1229 			*py1 = a1_1 + t1_1;
1230 			*py2 = a1_2 + t1_2;
1231 			*pc0 = t0;
1232 
1233 			break;
1234 
1235 		case 5:
1236 			j0 = n0 & 1;
1237 			j1 = (xsb1 + 0x4000) & 0xffff8000;
1238 			j2 = n2 & 1;
1239 			HI(&t1) = j1;
1240 			LO(&t1) = 0;
1241 			x0_or_one[0] = x0;
1242 			x0_or_one[2] = -x0;
1243 			x2_or_one[0] = x2;
1244 			x2_or_one[2] = -x2;
1245 			y0_or_zero[0] = y0;
1246 			y0_or_zero[2] = -y0;
1247 			x1 = (x1 - t1) + y1;
1248 			y2_or_zero[0] = y2;
1249 			y2_or_zero[2] = -y2;
1250 			z0 = x0 * x0;
1251 			z1 = x1 * x1;
1252 			z2 = x2 * x2;
1253 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1254 			t1 = z1 * (qq1 + z1 * qq2);
1255 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1256 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1257 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1258 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1259 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1260 			xsb1 = (xsb1 >> 30) & 2;
1261 			n1 ^= (xsb1 & ~(n1 << 1));
1262 			xsb1 |= 1;
1263 
1264 			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1265 			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1266 
1267 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1268 			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1269 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1270 
1271 			*py0 = t0;
1272 			*pc1 = a2_1 + t2_1;
1273 			*py2 = t2;
1274 
1275 			n0 = (n0 + 1) & 3;
1276 			n2 = (n2 + 1) & 3;
1277 			j0 = (j0 + 1) & 1;
1278 			j2 = (j2 + 1) & 1;
1279 
1280 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1281 			t1_1 = a2_1*w1 + a1_1*t1;
1282 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1283 
1284 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1285 			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1286 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1287 
1288 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1289 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1290 
1291 			*pc0 = t0;
1292 			*py1 = a1_1 + t1_1;
1293 			*pc2 = t2;
1294 
1295 			break;
1296 
1297 		case 6:
1298 			j0 = n0 & 1;
1299 			j1 = n1 & 1;
1300 			j2 = (xsb2 + 0x4000) & 0xffff8000;
1301 			HI(&t2) = j2;
1302 			LO(&t2) = 0;
1303 			x0_or_one[0] = x0;
1304 			x0_or_one[2] = -x0;
1305 			x1_or_one[0] = x1;
1306 			x1_or_one[2] = -x1;
1307 			y0_or_zero[0] = y0;
1308 			y0_or_zero[2] = -y0;
1309 			y1_or_zero[0] = y1;
1310 			y1_or_zero[2] = -y1;
1311 			x2 = (x2 - t2) + y2;
1312 			z0 = x0 * x0;
1313 			z1 = x1 * x1;
1314 			z2 = x2 * x2;
1315 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1316 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1317 			t2 = z2 * (qq1 + z2 * qq2);
1318 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1319 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1320 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1321 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1322 			xsb2 = (xsb2 >> 30) & 2;
1323 			n2 ^= (xsb2 & ~(n2 << 1));
1324 			xsb2 |= 1;
1325 
1326 			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1327 			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1328 
1329 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1330 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1331 			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1332 
1333 			*py0 = t0;
1334 			*py1 = t1;
1335 			*pc2 = a2_2 + t2_2;
1336 
1337 			n0 = (n0 + 1) & 3;
1338 			n1 = (n1 + 1) & 3;
1339 			j0 = (j0 + 1) & 1;
1340 			j1 = (j1 + 1) & 1;
1341 
1342 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1343 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1344 			t1_2 = a2_2*w2 + a1_2*t2;
1345 
1346 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1347 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1348 			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1349 
1350 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1351 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1352 
1353 			*pc0 = t0;
1354 			*pc1 = t1;
1355 			*py2 = a1_2 + t1_2;
1356 
1357 			break;
1358 
1359 		case 7:
1360 			j0 = n0 & 1;
1361 			j1 = n1 & 1;
1362 			j2 = n2 & 1;
1363 			x0_or_one[0] = x0;
1364 			x0_or_one[2] = -x0;
1365 			x1_or_one[0] = x1;
1366 			x1_or_one[2] = -x1;
1367 			x2_or_one[0] = x2;
1368 			x2_or_one[2] = -x2;
1369 			y0_or_zero[0] = y0;
1370 			y0_or_zero[2] = -y0;
1371 			y1_or_zero[0] = y1;
1372 			y1_or_zero[2] = -y1;
1373 			y2_or_zero[0] = y2;
1374 			y2_or_zero[2] = -y2;
1375 			z0 = x0 * x0;
1376 			z1 = x1 * x1;
1377 			z2 = x2 * x2;
1378 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1379 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1380 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1381 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1382 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1383 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1384 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1385 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1386 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1387 			*py0 = t0;
1388 			*py1 = t1;
1389 			*py2 = t2;
1390 
1391 			n0 = (n0 + 1) & 3;
1392 			n1 = (n1 + 1) & 3;
1393 			n2 = (n2 + 1) & 3;
1394 			j0 = (j0 + 1) & 1;
1395 			j1 = (j1 + 1) & 1;
1396 			j2 = (j2 + 1) & 1;
1397 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1398 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1399 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1400 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1401 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1402 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1403 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1404 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1405 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1406 			*pc0 = t0;
1407 			*pc1 = t1;
1408 			*pc2 = t2;
1409 			break;
1410 		}
1411 
1412 		x += stridex;
1413 		y += stridey;
1414 		c += stridec;
1415 		i = 0;
1416 	} while (--n > 0);
1417 
1418 	if (i > 0)
1419 	{
1420 		double		a1_0, a1_1, a2_0, a2_1;
1421 		double		t0, t1, t1_0, t1_1, t2_0, t2_1;
1422 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
1423 		double		z0, z1;
1424 		unsigned	j0, j1;
1425 		int		n0, n1;
1426 
1427 		if (i > 1)
1428 		{
1429 			n1 = (int) (x1 * invpio2 + half[xsb1]);
1430 			fn1 = (double) n1;
1431 			n1 &= 3;
1432 			a1 = x1 - fn1 * pio2_1;
1433 			w1 = fn1 * pio2_2;
1434 			x1 = a1 - w1;
1435 			y1 = (a1 - x1) - w1;
1436 			a1 = x1;
1437 			w1 = fn1 * pio2_3 - y1;
1438 			x1 = a1 - w1;
1439 			y1 = (a1 - x1) - w1;
1440 			a1 = x1;
1441 			w1 = fn1 * pio2_3t - y1;
1442 			x1 = a1 - w1;
1443 			y1 = (a1 - x1) - w1;
1444 			xsb1 = HI(&x1);
1445 			if ((xsb1 & ~0x80000000) < 0x3fc40000)
1446 			{
1447 				j1 = n1 & 1;
1448 				x1_or_one[0] = x1;
1449 				x1_or_one[2] = -x1;
1450 				y1_or_zero[0] = y1;
1451 				y1_or_zero[2] = -y1;
1452 				z1 = x1 * x1;
1453 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1454 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1455 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1456 				*py1 = t1;
1457 				n1 = (n1 + 1) & 3;
1458 				j1 = (j1 + 1) & 1;
1459 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1460 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1461 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1462 				*pc1 = t1;
1463 			}
1464 			else
1465 			{
1466 				j1 = (xsb1 + 0x4000) & 0xffff8000;
1467 				HI(&t1) = j1;
1468 				LO(&t1) = 0;
1469 				x1 = (x1 - t1) + y1;
1470 				z1 = x1 * x1;
1471 				t1 = z1 * (qq1 + z1 * qq2);
1472 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1473 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1474 				xsb1 = (xsb1 >> 30) & 2;
1475 				n1 ^= (xsb1 & ~(n1 << 1));
1476 				xsb1 |= 1;
1477 				a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1478 				a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1479 				t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1480 				*pc1 = a2_1 + t2_1;
1481 				t1_1 = a2_1*w1 + a1_1*t1;
1482 				t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1483 				*py1 = a1_1 + t1_1;
1484 			}
1485 		}
1486 		n0 = (int) (x0 * invpio2 + half[xsb0]);
1487 		fn0 = (double) n0;
1488 		n0 &= 3;
1489 		a0 = x0 - fn0 * pio2_1;
1490 		w0 = fn0 * pio2_2;
1491 		x0 = a0 - w0;
1492 		y0 = (a0 - x0) - w0;
1493 		a0 = x0;
1494 		w0 = fn0 * pio2_3 - y0;
1495 		x0 = a0 - w0;
1496 		y0 = (a0 - x0) - w0;
1497 		a0 = x0;
1498 		w0 = fn0 * pio2_3t - y0;
1499 		x0 = a0 - w0;
1500 		y0 = (a0 - x0) - w0;
1501 		xsb0 = HI(&x0);
1502 		if ((xsb0 & ~0x80000000) < 0x3fc40000)
1503 		{
1504 			j0 = n0 & 1;
1505 			x0_or_one[0] = x0;
1506 			x0_or_one[2] = -x0;
1507 			y0_or_zero[0] = y0;
1508 			y0_or_zero[2] = -y0;
1509 			z0 = x0 * x0;
1510 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1511 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1512 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1513 			*py0 = t0;
1514 			n0 = (n0 + 1) & 3;
1515 			j0 = (j0 + 1) & 1;
1516 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1517 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1518 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1519 			*pc0 = t0;
1520 		}
1521 		else
1522 		{
1523 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1524 			HI(&t0) = j0;
1525 			LO(&t0) = 0;
1526 			x0 = (x0 - t0) + y0;
1527 			z0 = x0 * x0;
1528 			t0 = z0 * (qq1 + z0 * qq2);
1529 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1530 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1531 			xsb0 = (xsb0 >> 30) & 2;
1532 			n0 ^= (xsb0 & ~(n0 << 1));
1533 			xsb0 |= 1;
1534 			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1535 			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1536 			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1537 			*pc0 = a2_0 + t2_0;
1538 			t1_0 = a2_0*w0 + a1_0*t0;
1539 			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1540 			*py0 = a1_0 + t1_0;
1541 		}
1542 	}
1543 
1544 	if (biguns) {
1545 		__vlibm_vsincos_big(nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb);
1546 	}
1547 }
1548