xref: /titanic_50/usr/src/lib/libmvec/common/__vsin.c (revision 84ba300aaa958c8e8427c2ec66a932d86bee71c4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
48 
49 static const double
50 	half[2]	= { 0.5, -0.5 },
51 	one		= 1.0,
52 	invpio2 = 0.636619772367581343075535,
53 	pio2_1  = 1.570796326734125614166,
54 	pio2_2  = 6.077100506303965976596e-11,
55 	pio2_3  = 2.022266248711166455796e-21,
56 	pio2_3t = 8.478427660368899643959e-32,
57 	pp1		= -1.666666666605760465276263943134982554676e-0001,
58 	pp2		=  8.333261209690963126718376566146180944442e-0003,
59 	qq1		= -4.999999999977710986407023955908711557870e-0001,
60 	qq2		=  4.166654863857219350645055881018842089580e-0002,
61 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
62 				-4.999999999999931701464060878888294524481e-0001 },
63 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
64 				 4.166666666394861917535640593963708222319e-0002 },
65 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
66 				-1.388888552656142867832756687736851681462e-0003 },
67 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
68 				 2.478519423681460796618128289454530524759e-0005 };
69 
70 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
71 
72 /* Don't __ the following; acomp will handle it */
73 extern double fabs(double);
74 extern void __vlibm_vsin_big(int, double *, int, double *, int, int);
75 
76 void
77 __vsin(int n, double * restrict x, int stridex, double * restrict y,
78 	int stridey)
79 {
80 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
81 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
82 	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
83 	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
84 	int		i, biguns, nsave, sxsave, sysave;
85 	volatile int	v __GNU_UNUSED;
86 	nsave = n;
87 	xsave = x;
88 	sxsave = stridex;
89 	ysave = y;
90 	sysave = stridey;
91 	biguns = 0;
92 
93 	do
94 	{
95 LOOP0:
96 		xsb0 = HI(x);
97 		hx0 = xsb0 & ~0x80000000;
98 		if (hx0 > 0x3fe921fb)
99 		{
100 			biguns = 1;
101 			goto MEDIUM;
102 		}
103 		if (hx0 < 0x3e400000)
104 		{
105 			v = *x;
106 			*y = *x;
107 			x += stridex;
108 			y += stridey;
109 			i = 0;
110 			if (--n <= 0)
111 				break;
112 			goto LOOP0;
113 		}
114 		x0 = *x;
115 		py0 = y;
116 		x += stridex;
117 		y += stridey;
118 		i = 1;
119 		if (--n <= 0)
120 			break;
121 
122 LOOP1:
123 		xsb1 = HI(x);
124 		hx1 = xsb1 & ~0x80000000;
125 		if (hx1 > 0x3fe921fb)
126 		{
127 			biguns = 2;
128 			goto MEDIUM;
129 		}
130 		if (hx1 < 0x3e400000)
131 		{
132 			v = *x;
133 			*y = *x;
134 			x += stridex;
135 			y += stridey;
136 			i = 1;
137 			if (--n <= 0)
138 				break;
139 			goto LOOP1;
140 		}
141 		x1 = *x;
142 		py1 = y;
143 		x += stridex;
144 		y += stridey;
145 		i = 2;
146 		if (--n <= 0)
147 			break;
148 
149 LOOP2:
150 		xsb2 = HI(x);
151 		hx2 = xsb2 & ~0x80000000;
152 		if (hx2 > 0x3fe921fb)
153 		{
154 			biguns = 3;
155 			goto MEDIUM;
156 		}
157 		if (hx2 < 0x3e400000)
158 		{
159 			v = *x;
160 			*y = *x;
161 			x += stridex;
162 			y += stridey;
163 			i = 2;
164 			if (--n <= 0)
165 				break;
166 			goto LOOP2;
167 		}
168 		x2 = *x;
169 		py2 = y;
170 
171 		i = (hx0 - 0x3fc90000) >> 31;
172 		i |= ((hx1 - 0x3fc90000) >> 30) & 2;
173 		i |= ((hx2 - 0x3fc90000) >> 29) & 4;
174 		switch (i)
175 		{
176 			double		a0, a1, a2, w0, w1, w2;
177 			double		t0, t1, t2, z0, z1, z2;
178 			unsigned	j0, j1, j2;
179 
180 		case 0:
181 			j0 = (xsb0 + 0x4000) & 0xffff8000;
182 			j1 = (xsb1 + 0x4000) & 0xffff8000;
183 			j2 = (xsb2 + 0x4000) & 0xffff8000;
184 			HI(&t0) = j0;
185 			HI(&t1) = j1;
186 			HI(&t2) = j2;
187 			LO(&t0) = 0;
188 			LO(&t1) = 0;
189 			LO(&t2) = 0;
190 			x0 -= t0;
191 			x1 -= t1;
192 			x2 -= t2;
193 			z0 = x0 * x0;
194 			z1 = x1 * x1;
195 			z2 = x2 * x2;
196 			t0 = z0 * (qq1 + z0 * qq2);
197 			t1 = z1 * (qq1 + z1 * qq2);
198 			t2 = z2 * (qq1 + z2 * qq2);
199 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
200 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
201 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
202 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
203 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
204 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
205 			xsb0 = (xsb0 >> 30) & 2;
206 			xsb1 = (xsb1 >> 30) & 2;
207 			xsb2 = (xsb2 >> 30) & 2;
208 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
209 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
210 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
211 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
212 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
213 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
214 			*py0 = a0 + t0;
215 			*py1 = a1 + t1;
216 			*py2 = a2 + t2;
217 			break;
218 
219 		case 1:
220 			j1 = (xsb1 + 0x4000) & 0xffff8000;
221 			j2 = (xsb2 + 0x4000) & 0xffff8000;
222 			HI(&t1) = j1;
223 			HI(&t2) = j2;
224 			LO(&t1) = 0;
225 			LO(&t2) = 0;
226 			x1 -= t1;
227 			x2 -= t2;
228 			z0 = x0 * x0;
229 			z1 = x1 * x1;
230 			z2 = x2 * x2;
231 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
232 			t1 = z1 * (qq1 + z1 * qq2);
233 			t2 = z2 * (qq1 + z2 * qq2);
234 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
235 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
236 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
237 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
238 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
239 			xsb1 = (xsb1 >> 30) & 2;
240 			xsb2 = (xsb2 >> 30) & 2;
241 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
242 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
243 			t0 = x0 + x0 * t0;
244 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
245 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
246 			*py0 = t0;
247 			*py1 = a1 + t1;
248 			*py2 = a2 + t2;
249 			break;
250 
251 		case 2:
252 			j0 = (xsb0 + 0x4000) & 0xffff8000;
253 			j2 = (xsb2 + 0x4000) & 0xffff8000;
254 			HI(&t0) = j0;
255 			HI(&t2) = j2;
256 			LO(&t0) = 0;
257 			LO(&t2) = 0;
258 			x0 -= t0;
259 			x2 -= t2;
260 			z0 = x0 * x0;
261 			z1 = x1 * x1;
262 			z2 = x2 * x2;
263 			t0 = z0 * (qq1 + z0 * qq2);
264 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
265 			t2 = z2 * (qq1 + z2 * qq2);
266 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
267 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
268 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
269 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
270 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
271 			xsb0 = (xsb0 >> 30) & 2;
272 			xsb2 = (xsb2 >> 30) & 2;
273 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
274 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
275 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
276 			t1 = x1 + x1 * t1;
277 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
278 			*py0 = a0 + t0;
279 			*py1 = t1;
280 			*py2 = a2 + t2;
281 			break;
282 
283 		case 3:
284 			j2 = (xsb2 + 0x4000) & 0xffff8000;
285 			HI(&t2) = j2;
286 			LO(&t2) = 0;
287 			x2 -= t2;
288 			z0 = x0 * x0;
289 			z1 = x1 * x1;
290 			z2 = x2 * x2;
291 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
292 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
293 			t2 = z2 * (qq1 + z2 * qq2);
294 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
295 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
296 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 			xsb2 = (xsb2 >> 30) & 2;
299 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
300 			t0 = x0 + x0 * t0;
301 			t1 = x1 + x1 * t1;
302 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
303 			*py0 = t0;
304 			*py1 = t1;
305 			*py2 = a2 + t2;
306 			break;
307 
308 		case 4:
309 			j0 = (xsb0 + 0x4000) & 0xffff8000;
310 			j1 = (xsb1 + 0x4000) & 0xffff8000;
311 			HI(&t0) = j0;
312 			HI(&t1) = j1;
313 			LO(&t0) = 0;
314 			LO(&t1) = 0;
315 			x0 -= t0;
316 			x1 -= t1;
317 			z0 = x0 * x0;
318 			z1 = x1 * x1;
319 			z2 = x2 * x2;
320 			t0 = z0 * (qq1 + z0 * qq2);
321 			t1 = z1 * (qq1 + z1 * qq2);
322 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
323 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
324 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
325 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
326 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
327 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
328 			xsb0 = (xsb0 >> 30) & 2;
329 			xsb1 = (xsb1 >> 30) & 2;
330 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
331 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
332 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
333 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
334 			t2 = x2 + x2 * t2;
335 			*py0 = a0 + t0;
336 			*py1 = a1 + t1;
337 			*py2 = t2;
338 			break;
339 
340 		case 5:
341 			j1 = (xsb1 + 0x4000) & 0xffff8000;
342 			HI(&t1) = j1;
343 			LO(&t1) = 0;
344 			x1 -= t1;
345 			z0 = x0 * x0;
346 			z1 = x1 * x1;
347 			z2 = x2 * x2;
348 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
349 			t1 = z1 * (qq1 + z1 * qq2);
350 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
351 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
352 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
353 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
354 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
355 			xsb1 = (xsb1 >> 30) & 2;
356 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
357 			t0 = x0 + x0 * t0;
358 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
359 			t2 = x2 + x2 * t2;
360 			*py0 = t0;
361 			*py1 = a1 + t1;
362 			*py2 = t2;
363 			break;
364 
365 		case 6:
366 			j0 = (xsb0 + 0x4000) & 0xffff8000;
367 			HI(&t0) = j0;
368 			LO(&t0) = 0;
369 			x0 -= t0;
370 			z0 = x0 * x0;
371 			z1 = x1 * x1;
372 			z2 = x2 * x2;
373 			t0 = z0 * (qq1 + z0 * qq2);
374 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
375 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
376 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
377 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
378 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
379 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
380 			xsb0 = (xsb0 >> 30) & 2;
381 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
382 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
383 			t1 = x1 + x1 * t1;
384 			t2 = x2 + x2 * t2;
385 			*py0 = a0 + t0;
386 			*py1 = t1;
387 			*py2 = t2;
388 			break;
389 
390 		case 7:
391 			z0 = x0 * x0;
392 			z1 = x1 * x1;
393 			z2 = x2 * x2;
394 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
395 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
396 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
397 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
398 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
399 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
400 			t0 = x0 + x0 * t0;
401 			t1 = x1 + x1 * t1;
402 			t2 = x2 + x2 * t2;
403 			*py0 = t0;
404 			*py1 = t1;
405 			*py2 = t2;
406 			break;
407 		}
408 
409 		x += stridex;
410 		y += stridey;
411 		i = 0;
412 	} while (--n > 0);
413 
414 	if (i > 0)
415 	{
416 		double		a0, a1, w0, w1;
417 		double		t0, t1, z0, z1;
418 		unsigned	j0, j1;
419 
420 		if (i > 1)
421 		{
422 			if (hx1 < 0x3fc90000)
423 			{
424 				z1 = x1 * x1;
425 				t1 = z1 * (poly3[0] + z1 * poly4[0]);
426 				t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
427 				t1 = x1 + x1 * t1;
428 				*py1 = t1;
429 			}
430 			else
431 			{
432 				j1 = (xsb1 + 0x4000) & 0xffff8000;
433 				HI(&t1) = j1;
434 				LO(&t1) = 0;
435 				x1 -= t1;
436 				z1 = x1 * x1;
437 				t1 = z1 * (qq1 + z1 * qq2);
438 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
439 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
440 				xsb1 = (xsb1 >> 30) & 2;
441 				a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
442 				t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
443 				*py1 = a1 + t1;
444 			}
445 		}
446 		if (hx0 < 0x3fc90000)
447 		{
448 			z0 = x0 * x0;
449 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
450 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
451 			t0 = x0 + x0 * t0;
452 			*py0 = t0;
453 		}
454 		else
455 		{
456 			j0 = (xsb0 + 0x4000) & 0xffff8000;
457 			HI(&t0) = j0;
458 			LO(&t0) = 0;
459 			x0 -= t0;
460 			z0 = x0 * x0;
461 			t0 = z0 * (qq1 + z0 * qq2);
462 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
463 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
464 			xsb0 = (xsb0 >> 30) & 2;
465 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
466 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
467 			*py0 = a0 + t0;
468 		}
469 	}
470 
471 	return;
472 
473 	/*
474 	 * MEDIUM RANGE PROCESSING
475 	 * Jump here at first sign of medium range argument.  We are a bit
476 	 * confused due to the jump.. fix up several variables and jump into
477 	 * the nth loop, same as was being processed above.
478 	 */
479 
480 MEDIUM:
481 
482 	x0_or_one[1] = 1.0;
483 	x1_or_one[1] = 1.0;
484 	x2_or_one[1] = 1.0;
485 	x0_or_one[3] = -1.0;
486 	x1_or_one[3] = -1.0;
487 	x2_or_one[3] = -1.0;
488 	y0_or_zero[1] = 0.0;
489 	y1_or_zero[1] = 0.0;
490 	y2_or_zero[1] = 0.0;
491 	y0_or_zero[3] = 0.0;
492 	y1_or_zero[3] = 0.0;
493 	y2_or_zero[3] = 0.0;
494 
495 	if (biguns == 3)
496 	{
497 		biguns = 0;
498 		xsb0 = xsb0 >> 31;
499 		xsb1 = xsb1 >> 31;
500 		goto loop2;
501 	}
502 	else if (biguns == 2)
503 	{
504 		xsb0 = xsb0 >> 31;
505 		biguns = 0;
506 		goto loop1;
507 	}
508 	biguns = 0;
509 
510 	do
511 	{
512 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
513 		unsigned	hx;
514 		int		n0, n1, n2;
515 
516 loop0:
517 		hx = HI(x);
518 		xsb0 = hx >> 31;
519 		hx &= ~0x80000000;
520 		if (hx < 0x3e400000)
521 		{
522 			v = *x;
523 			*y = *x;
524 			x += stridex;
525 			y += stridey;
526 			i = 0;
527 			if (--n <= 0)
528 				break;
529 			goto loop0;
530 		}
531 		if (hx > 0x413921fb)
532 		{
533 			if (hx >= 0x7ff00000)
534 			{
535 				x0 = *x;
536 				*y = x0 - x0;
537 			}
538 			else
539 				biguns = 1;
540 			x += stridex;
541 			y += stridey;
542 			i = 0;
543 			if (--n <= 0)
544 				break;
545 			goto loop0;
546 		}
547 		x0 = *x;
548 		py0 = y;
549 		x += stridex;
550 		y += stridey;
551 		i = 1;
552 		if (--n <= 0)
553 			break;
554 
555 loop1:
556 		hx = HI(x);
557 		xsb1 = hx >> 31;
558 		hx &= ~0x80000000;
559 		if (hx < 0x3e400000)
560 		{
561 			v = *x;
562 			*y = *x;
563 			x += stridex;
564 			y += stridey;
565 			i = 1;
566 			if (--n <= 0)
567 				break;
568 			goto loop1;
569 		}
570 		if (hx > 0x413921fb)
571 		{
572 			if (hx >= 0x7ff00000)
573 			{
574 				x1 = *x;
575 				*y = x1 - x1;
576 			}
577 			else
578 				biguns = 1;
579 			x += stridex;
580 			y += stridey;
581 			i = 1;
582 			if (--n <= 0)
583 				break;
584 			goto loop1;
585 		}
586 		x1 = *x;
587 		py1 = y;
588 		x += stridex;
589 		y += stridey;
590 		i = 2;
591 		if (--n <= 0)
592 			break;
593 
594 loop2:
595 		hx = HI(x);
596 		xsb2 = hx >> 31;
597 		hx &= ~0x80000000;
598 		if (hx < 0x3e400000)
599 		{
600 			v = *x;
601 			*y = *x;
602 			x += stridex;
603 			y += stridey;
604 			i = 2;
605 			if (--n <= 0)
606 				break;
607 			goto loop2;
608 		}
609 		if (hx > 0x413921fb)
610 		{
611 			if (hx >= 0x7ff00000)
612 			{
613 				x2 = *x;
614 				*y = x2 - x2;
615 			}
616 			else
617 				biguns = 1;
618 			x += stridex;
619 			y += stridey;
620 			i = 2;
621 			if (--n <= 0)
622 				break;
623 			goto loop2;
624 		}
625 		x2 = *x;
626 		py2 = y;
627 
628 		n0 = (int) (x0 * invpio2 + half[xsb0]);
629 		n1 = (int) (x1 * invpio2 + half[xsb1]);
630 		n2 = (int) (x2 * invpio2 + half[xsb2]);
631 		fn0 = (double) n0;
632 		fn1 = (double) n1;
633 		fn2 = (double) n2;
634 		n0 &= 3;
635 		n1 &= 3;
636 		n2 &= 3;
637 		a0 = x0 - fn0 * pio2_1;
638 		a1 = x1 - fn1 * pio2_1;
639 		a2 = x2 - fn2 * pio2_1;
640 		w0 = fn0 * pio2_2;
641 		w1 = fn1 * pio2_2;
642 		w2 = fn2 * pio2_2;
643 		x0 = a0 - w0;
644 		x1 = a1 - w1;
645 		x2 = a2 - w2;
646 		y0 = (a0 - x0) - w0;
647 		y1 = (a1 - x1) - w1;
648 		y2 = (a2 - x2) - w2;
649 		a0 = x0;
650 		a1 = x1;
651 		a2 = x2;
652 		w0 = fn0 * pio2_3 - y0;
653 		w1 = fn1 * pio2_3 - y1;
654 		w2 = fn2 * pio2_3 - y2;
655 		x0 = a0 - w0;
656 		x1 = a1 - w1;
657 		x2 = a2 - w2;
658 		y0 = (a0 - x0) - w0;
659 		y1 = (a1 - x1) - w1;
660 		y2 = (a2 - x2) - w2;
661 		a0 = x0;
662 		a1 = x1;
663 		a2 = x2;
664 		w0 = fn0 * pio2_3t - y0;
665 		w1 = fn1 * pio2_3t - y1;
666 		w2 = fn2 * pio2_3t - y2;
667 		x0 = a0 - w0;
668 		x1 = a1 - w1;
669 		x2 = a2 - w2;
670 		y0 = (a0 - x0) - w0;
671 		y1 = (a1 - x1) - w1;
672 		y2 = (a2 - x2) - w2;
673 		xsb0 = HI(&x0);
674 		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
675 		xsb1 = HI(&x1);
676 		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
677 		xsb2 = HI(&x2);
678 		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
679 		switch (i)
680 		{
681 			double		t0, t1, t2, z0, z1, z2;
682 			unsigned	j0, j1, j2;
683 
684 		case 0:
685 			j0 = (xsb0 + 0x4000) & 0xffff8000;
686 			j1 = (xsb1 + 0x4000) & 0xffff8000;
687 			j2 = (xsb2 + 0x4000) & 0xffff8000;
688 			HI(&t0) = j0;
689 			HI(&t1) = j1;
690 			HI(&t2) = j2;
691 			LO(&t0) = 0;
692 			LO(&t1) = 0;
693 			LO(&t2) = 0;
694 			x0 = (x0 - t0) + y0;
695 			x1 = (x1 - t1) + y1;
696 			x2 = (x2 - t2) + y2;
697 			z0 = x0 * x0;
698 			z1 = x1 * x1;
699 			z2 = x2 * x2;
700 			t0 = z0 * (qq1 + z0 * qq2);
701 			t1 = z1 * (qq1 + z1 * qq2);
702 			t2 = z2 * (qq1 + z2 * qq2);
703 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
704 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
705 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
706 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
707 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
708 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
709 			xsb0 = (xsb0 >> 30) & 2;
710 			xsb1 = (xsb1 >> 30) & 2;
711 			xsb2 = (xsb2 >> 30) & 2;
712 			n0 ^= (xsb0 & ~(n0 << 1));
713 			n1 ^= (xsb1 & ~(n1 << 1));
714 			n2 ^= (xsb2 & ~(n2 << 1));
715 			xsb0 |= 1;
716 			xsb1 |= 1;
717 			xsb2 |= 1;
718 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
719 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
720 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
721 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
722 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
723 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
724 			*py0 = ( a0 + t0 );
725 			*py1 = ( a1 + t1 );
726 			*py2 = ( a2 + t2 );
727 			break;
728 
729 		case 1:
730 			j0 = n0 & 1;
731 			j1 = (xsb1 + 0x4000) & 0xffff8000;
732 			j2 = (xsb2 + 0x4000) & 0xffff8000;
733 			HI(&t1) = j1;
734 			HI(&t2) = j2;
735 			LO(&t1) = 0;
736 			LO(&t2) = 0;
737 			x0_or_one[0] = x0;
738 			x0_or_one[2] = -x0;
739 			y0_or_zero[0] = y0;
740 			y0_or_zero[2] = -y0;
741 			x1 = (x1 - t1) + y1;
742 			x2 = (x2 - t2) + y2;
743 			z0 = x0 * x0;
744 			z1 = x1 * x1;
745 			z2 = x2 * x2;
746 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
747 			t1 = z1 * (qq1 + z1 * qq2);
748 			t2 = z2 * (qq1 + z2 * qq2);
749 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
750 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
751 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
752 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
753 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
754 			xsb1 = (xsb1 >> 30) & 2;
755 			xsb2 = (xsb2 >> 30) & 2;
756 			n1 ^= (xsb1 & ~(n1 << 1));
757 			n2 ^= (xsb2 & ~(n2 << 1));
758 			xsb1 |= 1;
759 			xsb2 |= 1;
760 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
761 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
762 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
763 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
764 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
765 			*py0 = t0;
766 			*py1 = ( a1 + t1 );
767 			*py2 = ( a2 + t2 );
768 			break;
769 
770 		case 2:
771 			j0 = (xsb0 + 0x4000) & 0xffff8000;
772 			j1 = n1 & 1;
773 			j2 = (xsb2 + 0x4000) & 0xffff8000;
774 			HI(&t0) = j0;
775 			HI(&t2) = j2;
776 			LO(&t0) = 0;
777 			LO(&t2) = 0;
778 			x1_or_one[0] = x1;
779 			x1_or_one[2] = -x1;
780 			x0 = (x0 - t0) + y0;
781 			y1_or_zero[0] = y1;
782 			y1_or_zero[2] = -y1;
783 			x2 = (x2 - t2) + y2;
784 			z0 = x0 * x0;
785 			z1 = x1 * x1;
786 			z2 = x2 * x2;
787 			t0 = z0 * (qq1 + z0 * qq2);
788 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
789 			t2 = z2 * (qq1 + z2 * qq2);
790 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
791 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
792 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
793 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
794 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
795 			xsb0 = (xsb0 >> 30) & 2;
796 			xsb2 = (xsb2 >> 30) & 2;
797 			n0 ^= (xsb0 & ~(n0 << 1));
798 			n2 ^= (xsb2 & ~(n2 << 1));
799 			xsb0 |= 1;
800 			xsb2 |= 1;
801 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
802 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
803 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
804 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
805 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
806 			*py0 = ( a0 + t0 );
807 			*py1 = t1;
808 			*py2 = ( a2 + t2 );
809 			break;
810 
811 		case 3:
812 			j0 = n0 & 1;
813 			j1 = n1 & 1;
814 			j2 = (xsb2 + 0x4000) & 0xffff8000;
815 			HI(&t2) = j2;
816 			LO(&t2) = 0;
817 			x0_or_one[0] = x0;
818 			x0_or_one[2] = -x0;
819 			x1_or_one[0] = x1;
820 			x1_or_one[2] = -x1;
821 			y0_or_zero[0] = y0;
822 			y0_or_zero[2] = -y0;
823 			y1_or_zero[0] = y1;
824 			y1_or_zero[2] = -y1;
825 			x2 = (x2 - t2) + y2;
826 			z0 = x0 * x0;
827 			z1 = x1 * x1;
828 			z2 = x2 * x2;
829 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
830 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
831 			t2 = z2 * (qq1 + z2 * qq2);
832 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
833 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
834 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
835 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
836 			xsb2 = (xsb2 >> 30) & 2;
837 			n2 ^= (xsb2 & ~(n2 << 1));
838 			xsb2 |= 1;
839 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
840 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
841 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
842 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
843 			*py0 = t0;
844 			*py1 = t1;
845 			*py2 = ( a2 + t2 );
846 			break;
847 
848 		case 4:
849 			j0 = (xsb0 + 0x4000) & 0xffff8000;
850 			j1 = (xsb1 + 0x4000) & 0xffff8000;
851 			j2 = n2 & 1;
852 			HI(&t0) = j0;
853 			HI(&t1) = j1;
854 			LO(&t0) = 0;
855 			LO(&t1) = 0;
856 			x2_or_one[0] = x2;
857 			x2_or_one[2] = -x2;
858 			x0 = (x0 - t0) + y0;
859 			x1 = (x1 - t1) + y1;
860 			y2_or_zero[0] = y2;
861 			y2_or_zero[2] = -y2;
862 			z0 = x0 * x0;
863 			z1 = x1 * x1;
864 			z2 = x2 * x2;
865 			t0 = z0 * (qq1 + z0 * qq2);
866 			t1 = z1 * (qq1 + z1 * qq2);
867 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
868 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
869 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
870 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
871 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
872 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
873 			xsb0 = (xsb0 >> 30) & 2;
874 			xsb1 = (xsb1 >> 30) & 2;
875 			n0 ^= (xsb0 & ~(n0 << 1));
876 			n1 ^= (xsb1 & ~(n1 << 1));
877 			xsb0 |= 1;
878 			xsb1 |= 1;
879 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
880 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
881 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
882 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
883 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
884 			*py0 = ( a0 + t0 );
885 			*py1 = ( a1 + t1 );
886 			*py2 = t2;
887 			break;
888 
889 		case 5:
890 			j0 = n0 & 1;
891 			j1 = (xsb1 + 0x4000) & 0xffff8000;
892 			j2 = n2 & 1;
893 			HI(&t1) = j1;
894 			LO(&t1) = 0;
895 			x0_or_one[0] = x0;
896 			x0_or_one[2] = -x0;
897 			x2_or_one[0] = x2;
898 			x2_or_one[2] = -x2;
899 			y0_or_zero[0] = y0;
900 			y0_or_zero[2] = -y0;
901 			x1 = (x1 - t1) + y1;
902 			y2_or_zero[0] = y2;
903 			y2_or_zero[2] = -y2;
904 			z0 = x0 * x0;
905 			z1 = x1 * x1;
906 			z2 = x2 * x2;
907 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
908 			t1 = z1 * (qq1 + z1 * qq2);
909 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
910 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
911 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
912 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
913 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
914 			xsb1 = (xsb1 >> 30) & 2;
915 			n1 ^= (xsb1 & ~(n1 << 1));
916 			xsb1 |= 1;
917 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
918 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
919 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
920 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
921 			*py0 = t0;
922 			*py1 = ( a1 + t1 );
923 			*py2 = t2;
924 			break;
925 
926 		case 6:
927 			j0 = (xsb0 + 0x4000) & 0xffff8000;
928 			j1 = n1 & 1;
929 			j2 = n2 & 1;
930 			HI(&t0) = j0;
931 			LO(&t0) = 0;
932 			x1_or_one[0] = x1;
933 			x1_or_one[2] = -x1;
934 			x2_or_one[0] = x2;
935 			x2_or_one[2] = -x2;
936 			x0 = (x0 - t0) + y0;
937 			y1_or_zero[0] = y1;
938 			y1_or_zero[2] = -y1;
939 			y2_or_zero[0] = y2;
940 			y2_or_zero[2] = -y2;
941 			z0 = x0 * x0;
942 			z1 = x1 * x1;
943 			z2 = x2 * x2;
944 			t0 = z0 * (qq1 + z0 * qq2);
945 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
946 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
947 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
948 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
949 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
950 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
951 			xsb0 = (xsb0 >> 30) & 2;
952 			n0 ^= (xsb0 & ~(n0 << 1));
953 			xsb0 |= 1;
954 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
955 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
956 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
957 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
958 			*py0 = ( a0 + t0 );
959 			*py1 = t1;
960 			*py2 = t2;
961 			break;
962 
963 		case 7:
964 			j0 = n0 & 1;
965 			j1 = n1 & 1;
966 			j2 = n2 & 1;
967 			x0_or_one[0] = x0;
968 			x0_or_one[2] = -x0;
969 			x1_or_one[0] = x1;
970 			x1_or_one[2] = -x1;
971 			x2_or_one[0] = x2;
972 			x2_or_one[2] = -x2;
973 			y0_or_zero[0] = y0;
974 			y0_or_zero[2] = -y0;
975 			y1_or_zero[0] = y1;
976 			y1_or_zero[2] = -y1;
977 			y2_or_zero[0] = y2;
978 			y2_or_zero[2] = -y2;
979 			z0 = x0 * x0;
980 			z1 = x1 * x1;
981 			z2 = x2 * x2;
982 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
983 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
984 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
985 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
986 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
987 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
988 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
989 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
990 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
991 			*py0 = t0;
992 			*py1 = t1;
993 			*py2 = t2;
994 			break;
995 		}
996 
997 		x += stridex;
998 		y += stridey;
999 		i = 0;
1000 	} while (--n > 0);
1001 
1002 	if (i > 0)
1003 	{
1004 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
1005 		double		t0, t1, z0, z1;
1006 		unsigned	j0, j1;
1007 		int		n0, n1;
1008 
1009 		if (i > 1)
1010 		{
1011 			n1 = (int) (x1 * invpio2 + half[xsb1]);
1012 			fn1 = (double) n1;
1013 			n1 &= 3;
1014 			a1 = x1 - fn1 * pio2_1;
1015 			w1 = fn1 * pio2_2;
1016 			x1 = a1 - w1;
1017 			y1 = (a1 - x1) - w1;
1018 			a1 = x1;
1019 			w1 = fn1 * pio2_3 - y1;
1020 			x1 = a1 - w1;
1021 			y1 = (a1 - x1) - w1;
1022 			a1 = x1;
1023 			w1 = fn1 * pio2_3t - y1;
1024 			x1 = a1 - w1;
1025 			y1 = (a1 - x1) - w1;
1026 			xsb1 = HI(&x1);
1027 			if ((xsb1 & ~0x80000000) < thresh[n1&1])
1028 			{
1029 				j1 = n1 & 1;
1030 				x1_or_one[0] = x1;
1031 				x1_or_one[2] = -x1;
1032 				y1_or_zero[0] = y1;
1033 				y1_or_zero[2] = -y1;
1034 				z1 = x1 * x1;
1035 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1036 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1037 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1038 				*py1 = t1;
1039 			}
1040 			else
1041 			{
1042 				j1 = (xsb1 + 0x4000) & 0xffff8000;
1043 				HI(&t1) = j1;
1044 				LO(&t1) = 0;
1045 				x1 = (x1 - t1) + y1;
1046 				z1 = x1 * x1;
1047 				t1 = z1 * (qq1 + z1 * qq2);
1048 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1049 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1050 				xsb1 = (xsb1 >> 30) & 2;
1051 				n1 ^= (xsb1 & ~(n1 << 1));
1052 				xsb1 |= 1;
1053 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
1054 				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1055 				*py1 = ( a1 + t1 );
1056 			}
1057 		}
1058 		n0 = (int) (x0 * invpio2 + half[xsb0]);
1059 		fn0 = (double) n0;
1060 		n0 &= 3;
1061 		a0 = x0 - fn0 * pio2_1;
1062 		w0 = fn0 * pio2_2;
1063 		x0 = a0 - w0;
1064 		y0 = (a0 - x0) - w0;
1065 		a0 = x0;
1066 		w0 = fn0 * pio2_3 - y0;
1067 		x0 = a0 - w0;
1068 		y0 = (a0 - x0) - w0;
1069 		a0 = x0;
1070 		w0 = fn0 * pio2_3t - y0;
1071 		x0 = a0 - w0;
1072 		y0 = (a0 - x0) - w0;
1073 		xsb0 = HI(&x0);
1074 		if ((xsb0 & ~0x80000000) < thresh[n0&1])
1075 		{
1076 			j0 = n0 & 1;
1077 			x0_or_one[0] = x0;
1078 			x0_or_one[2] = -x0;
1079 			y0_or_zero[0] = y0;
1080 			y0_or_zero[2] = -y0;
1081 			z0 = x0 * x0;
1082 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1083 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1084 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1085 			*py0 = t0;
1086 		}
1087 		else
1088 		{
1089 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1090 			HI(&t0) = j0;
1091 			LO(&t0) = 0;
1092 			x0 = (x0 - t0) + y0;
1093 			z0 = x0 * x0;
1094 			t0 = z0 * (qq1 + z0 * qq2);
1095 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1096 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1097 			xsb0 = (xsb0 >> 30) & 2;
1098 			n0 ^= (xsb0 & ~(n0 << 1));
1099 			xsb0 |= 1;
1100 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
1101 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1102 			*py0 = ( a0 + t0 );
1103 		}
1104 	}
1105 
1106 	if (biguns)
1107 		__vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1108 }
1109