xref: /illumos-gate/usr/src/lib/libmvec/common/__vcos.c (revision c5749750a3e052f1194f65a303456224c51dea63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 /*
48  * vcos.1.c
49  *
50  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
51  * in the primary range part.
52  *
53  * Modification to primary range processing.  If an argument that does not
54  * fall in the primary range is encountered, then processing is continued
55  * in the medium range.
56  *
57  */
58 
59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60 
61 static const double
62 	half[2]	= { 0.5, -0.5 },
63 	one		= 1.0,
64 	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
65 	pio2_1	= 1.570796326734125614166,	/* first 33 bits of pi/2 */
66 	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67 	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68 	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69 	pp1		= -1.666666666605760465276263943134982554676e-0001,
70 	pp2		=  8.333261209690963126718376566146180944442e-0003,
71 	qq1		= -4.999999999977710986407023955908711557870e-0001,
72 	qq2		=  4.166654863857219350645055881018842089580e-0002,
73 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74 				-4.999999999999931701464060878888294524481e-0001 },
75 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
76 				 4.166666666394861917535640593963708222319e-0002 },
77 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78 				-1.388888552656142867832756687736851681462e-0003 },
79 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
80 				 2.478519423681460796618128289454530524759e-0005 };
81 
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83 
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs(double);
86 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
87 
88 /*
89  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90  *
91  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93  *
94  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95  */
96 void
97 __vcos(int n, double * restrict x, int stridex, double * restrict y,
98 	int stridey)
99 {
100 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 	int		i, biguns, nsave, sxsave, sysave;
105 	volatile int	v __unused;
106 	nsave = n;
107 	xsave = x;
108 	sxsave = stridex;
109 	ysave = y;
110 	sysave = stridey;
111 	biguns = 0;
112 
113 	x0 = *x;	/* 'x0' may be used uninitialized */
114 	do /* MAIN LOOP */
115 	{
116 		/* Gotos here so _break_ exits MAIN LOOP. */
117 LOOP0:  /* Find first arg in right range. */
118 		xsb0 = HI(x); /* get most significant word */
119 		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
120 		if (hx0 > 0x3fe921fb) {
121 			/* Too big: arg reduction needed, so leave for second part */
122 			biguns = 1;
123 			goto MEDIUM;
124 		}
125 		if (hx0 < 0x3e400000) {
126 			/* Too small.  cos x ~ 1. */
127 			v = *x;
128 			*y = 1.0;
129 			x += stridex;
130 			y += stridey;
131 			i = 0;
132 			if (--n <= 0)
133 				break;
134 			goto LOOP0;
135 		}
136 		x0 = *x;
137 		py0 = y;
138 		x += stridex;
139 		y += stridey;
140 		i = 1;
141 		if (--n <= 0)
142 			break;
143 
144 LOOP1: /* Get second arg, same as above. */
145 		xsb1 = HI(x);
146 		hx1 = xsb1 & ~0x80000000;
147 		if (hx1 > 0x3fe921fb)
148 		{
149 			biguns = 2;
150 			goto MEDIUM;
151 		}
152 		if (hx1 < 0x3e400000)
153 		{
154 			v = *x;
155 			*y = 1.0;
156 			x += stridex;
157 			y += stridey;
158 			i = 1;
159 			if (--n <= 0)
160 				break;
161 			goto LOOP1;
162 		}
163 		x1 = *x;
164 		py1 = y;
165 		x += stridex;
166 		y += stridey;
167 		i = 2;
168 		if (--n <= 0)
169 			break;
170 
171 LOOP2: /* Get third arg, same as above. */
172 		xsb2 = HI(x);
173 		hx2 = xsb2 & ~0x80000000;
174 		if (hx2 > 0x3fe921fb)
175 		{
176 			biguns = 3;
177 			goto MEDIUM;
178 		}
179 		if (hx2 < 0x3e400000)
180 		{
181 			v = *x;
182 			*y = 1.0;
183 			x += stridex;
184 			y += stridey;
185 			i = 2;
186 			if (--n <= 0)
187 				break;
188 			goto LOOP2;
189 		}
190 		x2 = *x;
191 		py2 = y;
192 
193 		/*
194 		 * 0x3fc40000 = 5/32 ~ 0.15625
195 		 * Get msb after subtraction.  Will be 1 only if
196 		 * hx0 - 5/32 is negative.
197 		 */
198 		i = (hx0 - 0x3fc40000) >> 31;
199 		i |= ((hx1 - 0x3fc40000) >> 30) & 2;
200 		i |= ((hx2 - 0x3fc40000) >> 29) & 4;
201 		switch (i)
202 		{
203 			double		a0, a1, a2, w0, w1, w2;
204 			double		t0, t1, t2, z0, z1, z2;
205 			unsigned	j0, j1, j2;
206 
207 		case 0: /* All are > 5/32 */
208 			j0 = (xsb0 + 0x4000) & 0xffff8000;
209 			j1 = (xsb1 + 0x4000) & 0xffff8000;
210 			j2 = (xsb2 + 0x4000) & 0xffff8000;
211 			HI(&t0) = j0;
212 			HI(&t1) = j1;
213 			HI(&t2) = j2;
214 			LO(&t0) = 0;
215 			LO(&t1) = 0;
216 			LO(&t2) = 0;
217 			x0 -= t0;
218 			x1 -= t1;
219 			x2 -= t2;
220 			z0 = x0 * x0;
221 			z1 = x1 * x1;
222 			z2 = x2 * x2;
223 			t0 = z0 * (qq1 + z0 * qq2);
224 			t1 = z1 * (qq1 + z1 * qq2);
225 			t2 = z2 * (qq1 + z2 * qq2);
226 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
227 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
228 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
229 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
230 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
231 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
232 			xsb0 = (xsb0 >> 30) & 2;
233 			xsb1 = (xsb1 >> 30) & 2;
234 			xsb2 = (xsb2 >> 30) & 2;
235 			a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
236 			a1 = __vlibm_TBL_sincos_hi[j1+1];
237 			a2 = __vlibm_TBL_sincos_hi[j2+1];
238 			   /*   cos_lo(t)			 sin_hi(t) */
239 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
240 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
241 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
242 
243 			*py0 = a0 + t0;
244 			*py1 = a1 + t1;
245 			*py2 = a2 + t2;
246 			break;
247 
248 		case 1:
249 			j1 = (xsb1 + 0x4000) & 0xffff8000;
250 			j2 = (xsb2 + 0x4000) & 0xffff8000;
251 			HI(&t1) = j1;
252 			HI(&t2) = j2;
253 			LO(&t1) = 0;
254 			LO(&t2) = 0;
255 			x1 -= t1;
256 			x2 -= t2;
257 			z0 = x0 * x0;
258 			z1 = x1 * x1;
259 			z2 = x2 * x2;
260 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
261 			t1 = z1 * (qq1 + z1 * qq2);
262 			t2 = z2 * (qq1 + z2 * qq2);
263 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
264 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
265 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
266 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
268 			xsb1 = (xsb1 >> 30) & 2;
269 			xsb2 = (xsb2 >> 30) & 2;
270 			a1 = __vlibm_TBL_sincos_hi[j1+1];
271 			a2 = __vlibm_TBL_sincos_hi[j2+1];
272 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
273 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
274 			*py0 = one + t0;
275 			*py1 = a1 + t1;
276 			*py2 = a2 + t2;
277 			break;
278 
279 		case 2:
280 			j0 = (xsb0 + 0x4000) & 0xffff8000;
281 			j2 = (xsb2 + 0x4000) & 0xffff8000;
282 			HI(&t0) = j0;
283 			HI(&t2) = j2;
284 			LO(&t0) = 0;
285 			LO(&t2) = 0;
286 			x0 -= t0;
287 			x2 -= t2;
288 			z0 = x0 * x0;
289 			z1 = x1 * x1;
290 			z2 = x2 * x2;
291 			t0 = z0 * (qq1 + z0 * qq2);
292 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
293 			t2 = z2 * (qq1 + z2 * qq2);
294 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
295 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
296 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 			xsb0 = (xsb0 >> 30) & 2;
300 			xsb2 = (xsb2 >> 30) & 2;
301 			a0 = __vlibm_TBL_sincos_hi[j0+1];
302 			a2 = __vlibm_TBL_sincos_hi[j2+1];
303 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
304 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
305 			*py0 = a0 + t0;
306 			*py1 = one + t1;
307 			*py2 = a2 + t2;
308 			break;
309 
310 		case 3:
311 			j2 = (xsb2 + 0x4000) & 0xffff8000;
312 			HI(&t2) = j2;
313 			LO(&t2) = 0;
314 			x2 -= t2;
315 			z0 = x0 * x0;
316 			z1 = x1 * x1;
317 			z2 = x2 * x2;
318 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
319 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
320 			t2 = z2 * (qq1 + z2 * qq2);
321 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
322 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
323 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
324 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
325 			xsb2 = (xsb2 >> 30) & 2;
326 			a2 = __vlibm_TBL_sincos_hi[j2+1];
327 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
328 			*py0 = one + t0;
329 			*py1 = one + t1;
330 			*py2 = a2 + t2;
331 			break;
332 
333 		case 4:
334 			j0 = (xsb0 + 0x4000) & 0xffff8000;
335 			j1 = (xsb1 + 0x4000) & 0xffff8000;
336 			HI(&t0) = j0;
337 			HI(&t1) = j1;
338 			LO(&t0) = 0;
339 			LO(&t1) = 0;
340 			x0 -= t0;
341 			x1 -= t1;
342 			z0 = x0 * x0;
343 			z1 = x1 * x1;
344 			z2 = x2 * x2;
345 			t0 = z0 * (qq1 + z0 * qq2);
346 			t1 = z1 * (qq1 + z1 * qq2);
347 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
348 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
349 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
350 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
351 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
352 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
353 			xsb0 = (xsb0 >> 30) & 2;
354 			xsb1 = (xsb1 >> 30) & 2;
355 			a0 = __vlibm_TBL_sincos_hi[j0+1];
356 			a1 = __vlibm_TBL_sincos_hi[j1+1];
357 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
358 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
359 			*py0 = a0 + t0;
360 			*py1 = a1 + t1;
361 			*py2 = one + t2;
362 			break;
363 
364 		case 5:
365 			j1 = (xsb1 + 0x4000) & 0xffff8000;
366 			HI(&t1) = j1;
367 			LO(&t1) = 0;
368 			x1 -= t1;
369 			z0 = x0 * x0;
370 			z1 = x1 * x1;
371 			z2 = x2 * x2;
372 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
373 			t1 = z1 * (qq1 + z1 * qq2);
374 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
375 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
376 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
377 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
378 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
379 			xsb1 = (xsb1 >> 30) & 2;
380 			a1 = __vlibm_TBL_sincos_hi[j1+1];
381 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
382 			*py0 = one + t0;
383 			*py1 = a1 + t1;
384 			*py2 = one + t2;
385 			break;
386 
387 		case 6:
388 			j0 = (xsb0 + 0x4000) & 0xffff8000;
389 			HI(&t0) = j0;
390 			LO(&t0) = 0;
391 			x0 -= t0;
392 			z0 = x0 * x0;
393 			z1 = x1 * x1;
394 			z2 = x2 * x2;
395 			t0 = z0 * (qq1 + z0 * qq2);
396 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
397 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
398 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
399 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
400 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
401 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
402 			xsb0 = (xsb0 >> 30) & 2;
403 			a0 = __vlibm_TBL_sincos_hi[j0+1];
404 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
405 			*py0 = a0 + t0;
406 			*py1 = one + t1;
407 			*py2 = one + t2;
408 			break;
409 
410 		case 7: /* All are < 5/32 */
411 			z0 = x0 * x0;
412 			z1 = x1 * x1;
413 			z2 = x2 * x2;
414 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
415 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
416 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
417 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
418 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
419 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
420 			*py0 = one + t0;
421 			*py1 = one + t1;
422 			*py2 = one + t2;
423 			break;
424 		}
425 
426 		x += stridex;
427 		y += stridey;
428 		i = 0;
429 	} while (--n > 0); /* END MAIN LOOP */
430 
431 	/*
432 	 * CLEAN UP last 0, 1, or 2 elts.
433 	 */
434 	if (i > 0) /* Clean up elts at tail.  i < 3. */
435 	{
436 		double		a0, a1, w0, w1;
437 		double		t0, t1, z0, z1;
438 		unsigned	j0, j1;
439 
440 		if (i > 1)
441 		{
442 			if (hx1 < 0x3fc40000)
443 			{
444 				z1 = x1 * x1;
445 				t1 = z1 * (poly3[1] + z1 * poly4[1]);
446 				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
447 				t1 = one + t1;
448 				*py1 = t1;
449 			}
450 			else
451 			{
452 				j1 = (xsb1 + 0x4000) & 0xffff8000;
453 				HI(&t1) = j1;
454 				LO(&t1) = 0;
455 				x1 -= t1;
456 				z1 = x1 * x1;
457 				t1 = z1 * (qq1 + z1 * qq2);
458 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
459 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
460 				xsb1 = (xsb1 >> 30) & 2;
461 				a1 = __vlibm_TBL_sincos_hi[j1+1];
462 				t1 = __vlibm_TBL_sincos_lo[j1+1]
463 					- (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
464 				*py1 = a1 + t1;
465 			}
466 		}
467 		if (hx0 < 0x3fc40000)
468 		{
469 			z0 = x0 * x0;
470 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
471 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
472 			t0 = one + t0;
473 			*py0 = t0;
474 		}
475 		else
476 		{
477 			j0 = (xsb0 + 0x4000) & 0xffff8000;
478 			HI(&t0) = j0;
479 			LO(&t0) = 0;
480 			x0 -= t0;
481 			z0 = x0 * x0;
482 			t0 = z0 * (qq1 + z0 * qq2);
483 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
484 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
485 			xsb0 = (xsb0 >> 30) & 2;
486 			a0 = __vlibm_TBL_sincos_hi[j0+1];
487 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
488 			*py0 = a0 + t0;
489 		}
490 	} /* END CLEAN UP */
491 
492 	return;
493 
494 	/*
495 	 * Take care of BIGUNS.
496 	 *
497 	 * We have jumped here in the middle of processing after having
498 	 * encountered a medium range argument.  Therefore things are in a
499 	 * bit of a tizzy.
500 	 */
501 
502 MEDIUM:
503 
504 	x0_or_one[1] = 1.0;
505 	x1_or_one[1] = 1.0;
506 	x2_or_one[1] = 1.0;
507 	x0_or_one[3] = -1.0;
508 	x1_or_one[3] = -1.0;
509 	x2_or_one[3] = -1.0;
510 	y0_or_zero[1] = 0.0;
511 	y1_or_zero[1] = 0.0;
512 	y2_or_zero[1] = 0.0;
513 	y0_or_zero[3] = 0.0;
514 	y1_or_zero[3] = 0.0;
515 	y2_or_zero[3] = 0.0;
516 
517 	if (biguns == 3)
518 	{
519 		biguns = 0;
520 		xsb0 = xsb0 >> 31;
521 		xsb1 = xsb1 >> 31;
522 		goto loop2;
523 	}
524 	else if (biguns == 2)
525 	{
526 		xsb0 = xsb0 >> 31;
527 		biguns = 0;
528 		goto loop1;
529 	}
530 	biguns = 0;
531 
532 	do
533 	{
534 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
535 		unsigned	hx;
536 		int			n0, n1, n2;
537 
538 		/*
539 		 * Find 3 more to work on: Not already done, not too big.
540 		 */
541 
542 loop0:
543 		hx = HI(x);
544 		xsb0 = hx >> 31;
545 		hx &= ~0x80000000;
546 		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
547 		{
548 			if (hx >= 0x7ff00000) /* Inf or NaN */
549 			{
550 				x0 = *x;
551 				*y = x0 - x0;
552 			}
553 			else
554 				biguns = 1;
555 			x += stridex;
556 			y += stridey;
557 			i = 0;
558 			if (--n <= 0)
559 				break;
560 			goto loop0;
561 		}
562 		x0 = *x;
563 		py0 = y;
564 		x += stridex;
565 		y += stridey;
566 		i = 1;
567 		if (--n <= 0)
568 			break;
569 
570 loop1:
571 		hx = HI(x);
572 		xsb1 = hx >> 31;
573 		hx &= ~0x80000000;
574 		if (hx > 0x413921fb)
575 		{
576 			if (hx >= 0x7ff00000)
577 			{
578 				x1 = *x;
579 				*y = x1 - x1;
580 			}
581 			else
582 				biguns = 1;
583 			x += stridex;
584 			y += stridey;
585 			i = 1;
586 			if (--n <= 0)
587 				break;
588 			goto loop1;
589 		}
590 		x1 = *x;
591 		py1 = y;
592 		x += stridex;
593 		y += stridey;
594 		i = 2;
595 		if (--n <= 0)
596 			break;
597 
598 loop2:
599 		hx = HI(x);
600 		xsb2 = hx >> 31;
601 		hx &= ~0x80000000;
602 		if (hx > 0x413921fb)
603 		{
604 			if (hx >= 0x7ff00000)
605 			{
606 				x2 = *x;
607 				*y = x2 - x2;
608 			}
609 			else
610 				biguns = 1;
611 			x += stridex;
612 			y += stridey;
613 			i = 2;
614 			if (--n <= 0)
615 				break;
616 			goto loop2;
617 		}
618 		x2 = *x;
619 		py2 = y;
620 
621 		n0 = (int) (x0 * invpio2 + half[xsb0]);
622 		n1 = (int) (x1 * invpio2 + half[xsb1]);
623 		n2 = (int) (x2 * invpio2 + half[xsb2]);
624 		fn0 = (double) n0;
625 		fn1 = (double) n1;
626 		fn2 = (double) n2;
627 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
628 		n1 = (n1 + 1) & 3;
629 		n2 = (n2 + 1) & 3;
630 		a0 = x0 - fn0 * pio2_1;
631 		a1 = x1 - fn1 * pio2_1;
632 		a2 = x2 - fn2 * pio2_1;
633 		w0 = fn0 * pio2_2;
634 		w1 = fn1 * pio2_2;
635 		w2 = fn2 * pio2_2;
636 		x0 = a0 - w0;
637 		x1 = a1 - w1;
638 		x2 = a2 - w2;
639 		y0 = (a0 - x0) - w0;
640 		y1 = (a1 - x1) - w1;
641 		y2 = (a2 - x2) - w2;
642 		a0 = x0;
643 		a1 = x1;
644 		a2 = x2;
645 		w0 = fn0 * pio2_3 - y0;
646 		w1 = fn1 * pio2_3 - y1;
647 		w2 = fn2 * pio2_3 - y2;
648 		x0 = a0 - w0;
649 		x1 = a1 - w1;
650 		x2 = a2 - w2;
651 		y0 = (a0 - x0) - w0;
652 		y1 = (a1 - x1) - w1;
653 		y2 = (a2 - x2) - w2;
654 		a0 = x0;
655 		a1 = x1;
656 		a2 = x2;
657 		w0 = fn0 * pio2_3t - y0;
658 		w1 = fn1 * pio2_3t - y1;
659 		w2 = fn2 * pio2_3t - y2;
660 		x0 = a0 - w0;
661 		x1 = a1 - w1;
662 		x2 = a2 - w2;
663 		y0 = (a0 - x0) - w0;
664 		y1 = (a1 - x1) - w1;
665 		y2 = (a2 - x2) - w2;
666 		xsb0 = HI(&x0);
667 		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
668 		xsb1 = HI(&x1);
669 		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
670 		xsb2 = HI(&x2);
671 		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
672 		switch (i)
673 		{
674 			double		t0, t1, t2, z0, z1, z2;
675 			unsigned	j0, j1, j2;
676 
677 		case 0:
678 			j0 = (xsb0 + 0x4000) & 0xffff8000;
679 			j1 = (xsb1 + 0x4000) & 0xffff8000;
680 			j2 = (xsb2 + 0x4000) & 0xffff8000;
681 			HI(&t0) = j0;
682 			HI(&t1) = j1;
683 			HI(&t2) = j2;
684 			LO(&t0) = 0;
685 			LO(&t1) = 0;
686 			LO(&t2) = 0;
687 			x0 = (x0 - t0) + y0;
688 			x1 = (x1 - t1) + y1;
689 			x2 = (x2 - t2) + y2;
690 			z0 = x0 * x0;
691 			z1 = x1 * x1;
692 			z2 = x2 * x2;
693 			t0 = z0 * (qq1 + z0 * qq2);
694 			t1 = z1 * (qq1 + z1 * qq2);
695 			t2 = z2 * (qq1 + z2 * qq2);
696 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
697 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
698 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
699 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
700 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
701 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
702 			xsb0 = (xsb0 >> 30) & 2;
703 			xsb1 = (xsb1 >> 30) & 2;
704 			xsb2 = (xsb2 >> 30) & 2;
705 			n0 ^= (xsb0 & ~(n0 << 1));
706 			n1 ^= (xsb1 & ~(n1 << 1));
707 			n2 ^= (xsb2 & ~(n2 << 1));
708 			xsb0 |= 1;
709 			xsb1 |= 1;
710 			xsb2 |= 1;
711 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
712 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
713 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
714 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
715 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
716 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
717 			*py0 = ( a0 + t0 );
718 			*py1 = ( a1 + t1 );
719 			*py2 = ( a2 + t2 );
720 			break;
721 
722 		case 1:
723 			j0 = n0 & 1;
724 			j1 = (xsb1 + 0x4000) & 0xffff8000;
725 			j2 = (xsb2 + 0x4000) & 0xffff8000;
726 			HI(&t1) = j1;
727 			HI(&t2) = j2;
728 			LO(&t1) = 0;
729 			LO(&t2) = 0;
730 			x0_or_one[0] = x0;
731 			x0_or_one[2] = -x0;
732 			y0_or_zero[0] = y0;
733 			y0_or_zero[2] = -y0;
734 			x1 = (x1 - t1) + y1;
735 			x2 = (x2 - t2) + y2;
736 			z0 = x0 * x0;
737 			z1 = x1 * x1;
738 			z2 = x2 * x2;
739 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
740 			t1 = z1 * (qq1 + z1 * qq2);
741 			t2 = z2 * (qq1 + z2 * qq2);
742 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
743 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
744 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
745 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
746 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
747 			xsb1 = (xsb1 >> 30) & 2;
748 			xsb2 = (xsb2 >> 30) & 2;
749 			n1 ^= (xsb1 & ~(n1 << 1));
750 			n2 ^= (xsb2 & ~(n2 << 1));
751 			xsb1 |= 1;
752 			xsb2 |= 1;
753 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
754 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
755 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
756 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
757 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
758 			*py0 = t0;
759 			*py1 = ( a1 + t1 );
760 			*py2 = ( a2 + t2 );
761 			break;
762 
763 		case 2:
764 			j0 = (xsb0 + 0x4000) & 0xffff8000;
765 			j1 = n1 & 1;
766 			j2 = (xsb2 + 0x4000) & 0xffff8000;
767 			HI(&t0) = j0;
768 			HI(&t2) = j2;
769 			LO(&t0) = 0;
770 			LO(&t2) = 0;
771 			x1_or_one[0] = x1;
772 			x1_or_one[2] = -x1;
773 			x0 = (x0 - t0) + y0;
774 			y1_or_zero[0] = y1;
775 			y1_or_zero[2] = -y1;
776 			x2 = (x2 - t2) + y2;
777 			z0 = x0 * x0;
778 			z1 = x1 * x1;
779 			z2 = x2 * x2;
780 			t0 = z0 * (qq1 + z0 * qq2);
781 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
782 			t2 = z2 * (qq1 + z2 * qq2);
783 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
784 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
785 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
786 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
787 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
788 			xsb0 = (xsb0 >> 30) & 2;
789 			xsb2 = (xsb2 >> 30) & 2;
790 			n0 ^= (xsb0 & ~(n0 << 1));
791 			n2 ^= (xsb2 & ~(n2 << 1));
792 			xsb0 |= 1;
793 			xsb2 |= 1;
794 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
795 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
796 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
797 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
798 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
799 			*py0 = ( a0 + t0 );
800 			*py1 = t1;
801 			*py2 = ( a2 + t2 );
802 			break;
803 
804 		case 3:
805 			j0 = n0 & 1;
806 			j1 = n1 & 1;
807 			j2 = (xsb2 + 0x4000) & 0xffff8000;
808 			HI(&t2) = j2;
809 			LO(&t2) = 0;
810 			x0_or_one[0] = x0;
811 			x0_or_one[2] = -x0;
812 			x1_or_one[0] = x1;
813 			x1_or_one[2] = -x1;
814 			y0_or_zero[0] = y0;
815 			y0_or_zero[2] = -y0;
816 			y1_or_zero[0] = y1;
817 			y1_or_zero[2] = -y1;
818 			x2 = (x2 - t2) + y2;
819 			z0 = x0 * x0;
820 			z1 = x1 * x1;
821 			z2 = x2 * x2;
822 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
823 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
824 			t2 = z2 * (qq1 + z2 * qq2);
825 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
826 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
827 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
828 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
829 			xsb2 = (xsb2 >> 30) & 2;
830 			n2 ^= (xsb2 & ~(n2 << 1));
831 			xsb2 |= 1;
832 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
833 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
834 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
835 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
836 			*py0 = t0;
837 			*py1 = t1;
838 			*py2 = ( a2 + t2 );
839 			break;
840 
841 		case 4:
842 			j0 = (xsb0 + 0x4000) & 0xffff8000;
843 			j1 = (xsb1 + 0x4000) & 0xffff8000;
844 			j2 = n2 & 1;
845 			HI(&t0) = j0;
846 			HI(&t1) = j1;
847 			LO(&t0) = 0;
848 			LO(&t1) = 0;
849 			x2_or_one[0] = x2;
850 			x2_or_one[2] = -x2;
851 			x0 = (x0 - t0) + y0;
852 			x1 = (x1 - t1) + y1;
853 			y2_or_zero[0] = y2;
854 			y2_or_zero[2] = -y2;
855 			z0 = x0 * x0;
856 			z1 = x1 * x1;
857 			z2 = x2 * x2;
858 			t0 = z0 * (qq1 + z0 * qq2);
859 			t1 = z1 * (qq1 + z1 * qq2);
860 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
861 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
862 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
863 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
864 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
865 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
866 			xsb0 = (xsb0 >> 30) & 2;
867 			xsb1 = (xsb1 >> 30) & 2;
868 			n0 ^= (xsb0 & ~(n0 << 1));
869 			n1 ^= (xsb1 & ~(n1 << 1));
870 			xsb0 |= 1;
871 			xsb1 |= 1;
872 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
873 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
874 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
875 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
876 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
877 			*py0 = ( a0 + t0 );
878 			*py1 = ( a1 + t1 );
879 			*py2 = t2;
880 			break;
881 
882 		case 5:
883 			j0 = n0 & 1;
884 			j1 = (xsb1 + 0x4000) & 0xffff8000;
885 			j2 = n2 & 1;
886 			HI(&t1) = j1;
887 			LO(&t1) = 0;
888 			x0_or_one[0] = x0;
889 			x0_or_one[2] = -x0;
890 			x2_or_one[0] = x2;
891 			x2_or_one[2] = -x2;
892 			y0_or_zero[0] = y0;
893 			y0_or_zero[2] = -y0;
894 			x1 = (x1 - t1) + y1;
895 			y2_or_zero[0] = y2;
896 			y2_or_zero[2] = -y2;
897 			z0 = x0 * x0;
898 			z1 = x1 * x1;
899 			z2 = x2 * x2;
900 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
901 			t1 = z1 * (qq1 + z1 * qq2);
902 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
903 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
904 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
905 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
906 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
907 			xsb1 = (xsb1 >> 30) & 2;
908 			n1 ^= (xsb1 & ~(n1 << 1));
909 			xsb1 |= 1;
910 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
911 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
912 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
913 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
914 			*py0 = t0;
915 			*py1 = ( a1 + t1 );
916 			*py2 = t2;
917 			break;
918 
919 		case 6:
920 			j0 = (xsb0 + 0x4000) & 0xffff8000;
921 			j1 = n1 & 1;
922 			j2 = n2 & 1;
923 			HI(&t0) = j0;
924 			LO(&t0) = 0;
925 			x1_or_one[0] = x1;
926 			x1_or_one[2] = -x1;
927 			x2_or_one[0] = x2;
928 			x2_or_one[2] = -x2;
929 			x0 = (x0 - t0) + y0;
930 			y1_or_zero[0] = y1;
931 			y1_or_zero[2] = -y1;
932 			y2_or_zero[0] = y2;
933 			y2_or_zero[2] = -y2;
934 			z0 = x0 * x0;
935 			z1 = x1 * x1;
936 			z2 = x2 * x2;
937 			t0 = z0 * (qq1 + z0 * qq2);
938 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
939 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
940 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
941 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
942 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
943 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
944 			xsb0 = (xsb0 >> 30) & 2;
945 			n0 ^= (xsb0 & ~(n0 << 1));
946 			xsb0 |= 1;
947 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
948 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
949 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
950 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
951 			*py0 = ( a0 + t0 );
952 			*py1 = t1;
953 			*py2 = t2;
954 			break;
955 
956 		case 7:
957 			j0 = n0 & 1;
958 			j1 = n1 & 1;
959 			j2 = n2 & 1;
960 			x0_or_one[0] = x0;
961 			x0_or_one[2] = -x0;
962 			x1_or_one[0] = x1;
963 			x1_or_one[2] = -x1;
964 			x2_or_one[0] = x2;
965 			x2_or_one[2] = -x2;
966 			y0_or_zero[0] = y0;
967 			y0_or_zero[2] = -y0;
968 			y1_or_zero[0] = y1;
969 			y1_or_zero[2] = -y1;
970 			y2_or_zero[0] = y2;
971 			y2_or_zero[2] = -y2;
972 			z0 = x0 * x0;
973 			z1 = x1 * x1;
974 			z2 = x2 * x2;
975 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
976 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
977 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
978 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
979 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
980 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
981 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
982 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
983 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
984 			*py0 = t0;
985 			*py1 = t1;
986 			*py2 = t2;
987 			break;
988 		}
989 
990 		x += stridex;
991 		y += stridey;
992 		i = 0;
993 	} while (--n > 0);
994 
995 	if (i > 0)
996 	{
997 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
998 		double		t0, t1, z0, z1;
999 		unsigned	j0, j1;
1000 		int			n0, n1;
1001 
1002 		if (i > 1)
1003 		{
1004 			n1 = (int) (x1 * invpio2 + half[xsb1]);
1005 			fn1 = (double) n1;
1006 			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1007 			a1 = x1 - fn1 * pio2_1;
1008 			w1 = fn1 * pio2_2;
1009 			x1 = a1 - w1;
1010 			y1 = (a1 - x1) - w1;
1011 			a1 = x1;
1012 			w1 = fn1 * pio2_3 - y1;
1013 			x1 = a1 - w1;
1014 			y1 = (a1 - x1) - w1;
1015 			a1 = x1;
1016 			w1 = fn1 * pio2_3t - y1;
1017 			x1 = a1 - w1;
1018 			y1 = (a1 - x1) - w1;
1019 			xsb1 = HI(&x1);
1020 			if ((xsb1 & ~0x80000000) < thresh[n1&1])
1021 			{
1022 				j1 = n1 & 1;
1023 				x1_or_one[0] = x1;
1024 				x1_or_one[2] = -x1;
1025 				y1_or_zero[0] = y1;
1026 				y1_or_zero[2] = -y1;
1027 				z1 = x1 * x1;
1028 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1029 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1030 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1031 				*py1 = t1;
1032 			}
1033 			else
1034 			{
1035 				j1 = (xsb1 + 0x4000) & 0xffff8000;
1036 				HI(&t1) = j1;
1037 				LO(&t1) = 0;
1038 				x1 = (x1 - t1) + y1;
1039 				z1 = x1 * x1;
1040 				t1 = z1 * (qq1 + z1 * qq2);
1041 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1042 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1043 				xsb1 = (xsb1 >> 30) & 2;
1044 				n1 ^= (xsb1 & ~(n1 << 1));
1045 				xsb1 |= 1;
1046 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
1047 				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1048 				*py1 = ( a1 + t1 );
1049 			}
1050 		}
1051 		n0 = (int) (x0 * invpio2 + half[xsb0]);
1052 		fn0 = (double) n0;
1053 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1054 		a0 = x0 - fn0 * pio2_1;
1055 		w0 = fn0 * pio2_2;
1056 		x0 = a0 - w0;
1057 		y0 = (a0 - x0) - w0;
1058 		a0 = x0;
1059 		w0 = fn0 * pio2_3 - y0;
1060 		x0 = a0 - w0;
1061 		y0 = (a0 - x0) - w0;
1062 		a0 = x0;
1063 		w0 = fn0 * pio2_3t - y0;
1064 		x0 = a0 - w0;
1065 		y0 = (a0 - x0) - w0;
1066 		xsb0 = HI(&x0);
1067 		if ((xsb0 & ~0x80000000) < thresh[n0&1])
1068 		{
1069 			j0 = n0 & 1;
1070 			x0_or_one[0] = x0;
1071 			x0_or_one[2] = -x0;
1072 			y0_or_zero[0] = y0;
1073 			y0_or_zero[2] = -y0;
1074 			z0 = x0 * x0;
1075 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1076 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1077 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1078 			*py0 = t0;
1079 		}
1080 		else
1081 		{
1082 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1083 			HI(&t0) = j0;
1084 			LO(&t0) = 0;
1085 			x0 = (x0 - t0) + y0;
1086 			z0 = x0 * x0;
1087 			t0 = z0 * (qq1 + z0 * qq2);
1088 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1089 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1090 			xsb0 = (xsb0 >> 30) & 2;
1091 			n0 ^= (xsb0 & ~(n0 << 1));
1092 			xsb0 |= 1;
1093 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
1094 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1095 			*py0 = ( a0 + t0 );
1096 		}
1097 	}
1098 
1099 	if (biguns)
1100 		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1101 }
1102