xref: /illumos-gate/usr/src/lib/libmvec/common/__vsin.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
48 
49 static const double
50 	half[2]	= { 0.5, -0.5 },
51 	one		= 1.0,
52 	invpio2 = 0.636619772367581343075535,
53 	pio2_1  = 1.570796326734125614166,
54 	pio2_2  = 6.077100506303965976596e-11,
55 	pio2_3  = 2.022266248711166455796e-21,
56 	pio2_3t = 8.478427660368899643959e-32,
57 	pp1		= -1.666666666605760465276263943134982554676e-0001,
58 	pp2		=  8.333261209690963126718376566146180944442e-0003,
59 	qq1		= -4.999999999977710986407023955908711557870e-0001,
60 	qq2		=  4.166654863857219350645055881018842089580e-0002,
61 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
62 				-4.999999999999931701464060878888294524481e-0001 },
63 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
64 				 4.166666666394861917535640593963708222319e-0002 },
65 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
66 				-1.388888552656142867832756687736851681462e-0003 },
67 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
68 				 2.478519423681460796618128289454530524759e-0005 };
69 
70 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
71 
72 /* Don't __ the following; acomp will handle it */
73 extern double fabs(double);
74 extern void __vlibm_vsin_big(int, double *, int, double *, int, int);
75 
76 void
77 __vsin(int n, double * restrict x, int stridex, double * restrict y,
78 	int stridey)
79 {
80 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
81 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
82 	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
83 	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
84 	int		i, biguns, nsave, sxsave, sysave;
85 	volatile int	v __unused;
86 	nsave = n;
87 	xsave = x;
88 	sxsave = stridex;
89 	ysave = y;
90 	sysave = stridey;
91 	biguns = 0;
92 
93 	x0 = *x;	/* error: 'x0' may be used uninitialized */
94 	do
95 	{
96 LOOP0:
97 		xsb0 = HI(x);
98 		hx0 = xsb0 & ~0x80000000;
99 		if (hx0 > 0x3fe921fb)
100 		{
101 			biguns = 1;
102 			goto MEDIUM;
103 		}
104 		if (hx0 < 0x3e400000)
105 		{
106 			v = *x;
107 			*y = *x;
108 			x += stridex;
109 			y += stridey;
110 			i = 0;
111 			if (--n <= 0)
112 				break;
113 			goto LOOP0;
114 		}
115 		x0 = *x;
116 		py0 = y;
117 		x += stridex;
118 		y += stridey;
119 		i = 1;
120 		if (--n <= 0)
121 			break;
122 
123 LOOP1:
124 		xsb1 = HI(x);
125 		hx1 = xsb1 & ~0x80000000;
126 		if (hx1 > 0x3fe921fb)
127 		{
128 			biguns = 2;
129 			goto MEDIUM;
130 		}
131 		if (hx1 < 0x3e400000)
132 		{
133 			v = *x;
134 			*y = *x;
135 			x += stridex;
136 			y += stridey;
137 			i = 1;
138 			if (--n <= 0)
139 				break;
140 			goto LOOP1;
141 		}
142 		x1 = *x;
143 		py1 = y;
144 		x += stridex;
145 		y += stridey;
146 		i = 2;
147 		if (--n <= 0)
148 			break;
149 
150 LOOP2:
151 		xsb2 = HI(x);
152 		hx2 = xsb2 & ~0x80000000;
153 		if (hx2 > 0x3fe921fb)
154 		{
155 			biguns = 3;
156 			goto MEDIUM;
157 		}
158 		if (hx2 < 0x3e400000)
159 		{
160 			v = *x;
161 			*y = *x;
162 			x += stridex;
163 			y += stridey;
164 			i = 2;
165 			if (--n <= 0)
166 				break;
167 			goto LOOP2;
168 		}
169 		x2 = *x;
170 		py2 = y;
171 
172 		i = (hx0 - 0x3fc90000) >> 31;
173 		i |= ((hx1 - 0x3fc90000) >> 30) & 2;
174 		i |= ((hx2 - 0x3fc90000) >> 29) & 4;
175 		switch (i)
176 		{
177 			double		a0, a1, a2, w0, w1, w2;
178 			double		t0, t1, t2, z0, z1, z2;
179 			unsigned	j0, j1, j2;
180 
181 		case 0:
182 			j0 = (xsb0 + 0x4000) & 0xffff8000;
183 			j1 = (xsb1 + 0x4000) & 0xffff8000;
184 			j2 = (xsb2 + 0x4000) & 0xffff8000;
185 			HI(&t0) = j0;
186 			HI(&t1) = j1;
187 			HI(&t2) = j2;
188 			LO(&t0) = 0;
189 			LO(&t1) = 0;
190 			LO(&t2) = 0;
191 			x0 -= t0;
192 			x1 -= t1;
193 			x2 -= t2;
194 			z0 = x0 * x0;
195 			z1 = x1 * x1;
196 			z2 = x2 * x2;
197 			t0 = z0 * (qq1 + z0 * qq2);
198 			t1 = z1 * (qq1 + z1 * qq2);
199 			t2 = z2 * (qq1 + z2 * qq2);
200 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
201 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
202 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
203 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
204 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
205 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
206 			xsb0 = (xsb0 >> 30) & 2;
207 			xsb1 = (xsb1 >> 30) & 2;
208 			xsb2 = (xsb2 >> 30) & 2;
209 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
210 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
211 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
212 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
213 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
214 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
215 			*py0 = a0 + t0;
216 			*py1 = a1 + t1;
217 			*py2 = a2 + t2;
218 			break;
219 
220 		case 1:
221 			j1 = (xsb1 + 0x4000) & 0xffff8000;
222 			j2 = (xsb2 + 0x4000) & 0xffff8000;
223 			HI(&t1) = j1;
224 			HI(&t2) = j2;
225 			LO(&t1) = 0;
226 			LO(&t2) = 0;
227 			x1 -= t1;
228 			x2 -= t2;
229 			z0 = x0 * x0;
230 			z1 = x1 * x1;
231 			z2 = x2 * x2;
232 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
233 			t1 = z1 * (qq1 + z1 * qq2);
234 			t2 = z2 * (qq1 + z2 * qq2);
235 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
236 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
237 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
238 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
239 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
240 			xsb1 = (xsb1 >> 30) & 2;
241 			xsb2 = (xsb2 >> 30) & 2;
242 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
243 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
244 			t0 = x0 + x0 * t0;
245 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
246 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
247 			*py0 = t0;
248 			*py1 = a1 + t1;
249 			*py2 = a2 + t2;
250 			break;
251 
252 		case 2:
253 			j0 = (xsb0 + 0x4000) & 0xffff8000;
254 			j2 = (xsb2 + 0x4000) & 0xffff8000;
255 			HI(&t0) = j0;
256 			HI(&t2) = j2;
257 			LO(&t0) = 0;
258 			LO(&t2) = 0;
259 			x0 -= t0;
260 			x2 -= t2;
261 			z0 = x0 * x0;
262 			z1 = x1 * x1;
263 			z2 = x2 * x2;
264 			t0 = z0 * (qq1 + z0 * qq2);
265 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
266 			t2 = z2 * (qq1 + z2 * qq2);
267 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
268 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
269 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
270 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
271 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
272 			xsb0 = (xsb0 >> 30) & 2;
273 			xsb2 = (xsb2 >> 30) & 2;
274 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
275 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
276 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
277 			t1 = x1 + x1 * t1;
278 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
279 			*py0 = a0 + t0;
280 			*py1 = t1;
281 			*py2 = a2 + t2;
282 			break;
283 
284 		case 3:
285 			j2 = (xsb2 + 0x4000) & 0xffff8000;
286 			HI(&t2) = j2;
287 			LO(&t2) = 0;
288 			x2 -= t2;
289 			z0 = x0 * x0;
290 			z1 = x1 * x1;
291 			z2 = x2 * x2;
292 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
293 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
294 			t2 = z2 * (qq1 + z2 * qq2);
295 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
296 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
297 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
298 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 			xsb2 = (xsb2 >> 30) & 2;
300 			a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
301 			t0 = x0 + x0 * t0;
302 			t1 = x1 + x1 * t1;
303 			t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
304 			*py0 = t0;
305 			*py1 = t1;
306 			*py2 = a2 + t2;
307 			break;
308 
309 		case 4:
310 			j0 = (xsb0 + 0x4000) & 0xffff8000;
311 			j1 = (xsb1 + 0x4000) & 0xffff8000;
312 			HI(&t0) = j0;
313 			HI(&t1) = j1;
314 			LO(&t0) = 0;
315 			LO(&t1) = 0;
316 			x0 -= t0;
317 			x1 -= t1;
318 			z0 = x0 * x0;
319 			z1 = x1 * x1;
320 			z2 = x2 * x2;
321 			t0 = z0 * (qq1 + z0 * qq2);
322 			t1 = z1 * (qq1 + z1 * qq2);
323 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
324 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
325 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
326 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
327 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
328 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
329 			xsb0 = (xsb0 >> 30) & 2;
330 			xsb1 = (xsb1 >> 30) & 2;
331 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
332 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
333 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
334 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
335 			t2 = x2 + x2 * t2;
336 			*py0 = a0 + t0;
337 			*py1 = a1 + t1;
338 			*py2 = t2;
339 			break;
340 
341 		case 5:
342 			j1 = (xsb1 + 0x4000) & 0xffff8000;
343 			HI(&t1) = j1;
344 			LO(&t1) = 0;
345 			x1 -= t1;
346 			z0 = x0 * x0;
347 			z1 = x1 * x1;
348 			z2 = x2 * x2;
349 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
350 			t1 = z1 * (qq1 + z1 * qq2);
351 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
352 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
353 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
354 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
355 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
356 			xsb1 = (xsb1 >> 30) & 2;
357 			a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
358 			t0 = x0 + x0 * t0;
359 			t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
360 			t2 = x2 + x2 * t2;
361 			*py0 = t0;
362 			*py1 = a1 + t1;
363 			*py2 = t2;
364 			break;
365 
366 		case 6:
367 			j0 = (xsb0 + 0x4000) & 0xffff8000;
368 			HI(&t0) = j0;
369 			LO(&t0) = 0;
370 			x0 -= t0;
371 			z0 = x0 * x0;
372 			z1 = x1 * x1;
373 			z2 = x2 * x2;
374 			t0 = z0 * (qq1 + z0 * qq2);
375 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
376 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
377 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
378 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
379 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
380 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381 			xsb0 = (xsb0 >> 30) & 2;
382 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
383 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
384 			t1 = x1 + x1 * t1;
385 			t2 = x2 + x2 * t2;
386 			*py0 = a0 + t0;
387 			*py1 = t1;
388 			*py2 = t2;
389 			break;
390 
391 		case 7:
392 			z0 = x0 * x0;
393 			z1 = x1 * x1;
394 			z2 = x2 * x2;
395 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
396 			t1 = z1 * (poly3[0] + z1 * poly4[0]);
397 			t2 = z2 * (poly3[0] + z2 * poly4[0]);
398 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
399 			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
400 			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
401 			t0 = x0 + x0 * t0;
402 			t1 = x1 + x1 * t1;
403 			t2 = x2 + x2 * t2;
404 			*py0 = t0;
405 			*py1 = t1;
406 			*py2 = t2;
407 			break;
408 		}
409 
410 		x += stridex;
411 		y += stridey;
412 		i = 0;
413 	} while (--n > 0);
414 
415 	if (i > 0)
416 	{
417 		double		a0, a1, w0, w1;
418 		double		t0, t1, z0, z1;
419 		unsigned	j0, j1;
420 
421 		if (i > 1)
422 		{
423 			if (hx1 < 0x3fc90000)
424 			{
425 				z1 = x1 * x1;
426 				t1 = z1 * (poly3[0] + z1 * poly4[0]);
427 				t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
428 				t1 = x1 + x1 * t1;
429 				*py1 = t1;
430 			}
431 			else
432 			{
433 				j1 = (xsb1 + 0x4000) & 0xffff8000;
434 				HI(&t1) = j1;
435 				LO(&t1) = 0;
436 				x1 -= t1;
437 				z1 = x1 * x1;
438 				t1 = z1 * (qq1 + z1 * qq2);
439 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
440 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
441 				xsb1 = (xsb1 >> 30) & 2;
442 				a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
443 				t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
444 				*py1 = a1 + t1;
445 			}
446 		}
447 		if (hx0 < 0x3fc90000)
448 		{
449 			z0 = x0 * x0;
450 			t0 = z0 * (poly3[0] + z0 * poly4[0]);
451 			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
452 			t0 = x0 + x0 * t0;
453 			*py0 = t0;
454 		}
455 		else
456 		{
457 			j0 = (xsb0 + 0x4000) & 0xffff8000;
458 			HI(&t0) = j0;
459 			LO(&t0) = 0;
460 			x0 -= t0;
461 			z0 = x0 * x0;
462 			t0 = z0 * (qq1 + z0 * qq2);
463 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
464 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
465 			xsb0 = (xsb0 >> 30) & 2;
466 			a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
467 			t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
468 			*py0 = a0 + t0;
469 		}
470 	}
471 
472 	return;
473 
474 	/*
475 	 * MEDIUM RANGE PROCESSING
476 	 * Jump here at first sign of medium range argument.  We are a bit
477 	 * confused due to the jump.. fix up several variables and jump into
478 	 * the nth loop, same as was being processed above.
479 	 */
480 
481 MEDIUM:
482 
483 	x0_or_one[1] = 1.0;
484 	x1_or_one[1] = 1.0;
485 	x2_or_one[1] = 1.0;
486 	x0_or_one[3] = -1.0;
487 	x1_or_one[3] = -1.0;
488 	x2_or_one[3] = -1.0;
489 	y0_or_zero[1] = 0.0;
490 	y1_or_zero[1] = 0.0;
491 	y2_or_zero[1] = 0.0;
492 	y0_or_zero[3] = 0.0;
493 	y1_or_zero[3] = 0.0;
494 	y2_or_zero[3] = 0.0;
495 
496 	if (biguns == 3)
497 	{
498 		biguns = 0;
499 		xsb0 = xsb0 >> 31;
500 		xsb1 = xsb1 >> 31;
501 		goto loop2;
502 	}
503 	else if (biguns == 2)
504 	{
505 		xsb0 = xsb0 >> 31;
506 		biguns = 0;
507 		goto loop1;
508 	}
509 	biguns = 0;
510 
511 	do
512 	{
513 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
514 		unsigned	hx;
515 		int		n0, n1, n2;
516 
517 loop0:
518 		hx = HI(x);
519 		xsb0 = hx >> 31;
520 		hx &= ~0x80000000;
521 		if (hx < 0x3e400000)
522 		{
523 			v = *x;
524 			*y = *x;
525 			x += stridex;
526 			y += stridey;
527 			i = 0;
528 			if (--n <= 0)
529 				break;
530 			goto loop0;
531 		}
532 		if (hx > 0x413921fb)
533 		{
534 			if (hx >= 0x7ff00000)
535 			{
536 				x0 = *x;
537 				*y = x0 - x0;
538 			}
539 			else
540 				biguns = 1;
541 			x += stridex;
542 			y += stridey;
543 			i = 0;
544 			if (--n <= 0)
545 				break;
546 			goto loop0;
547 		}
548 		x0 = *x;
549 		py0 = y;
550 		x += stridex;
551 		y += stridey;
552 		i = 1;
553 		if (--n <= 0)
554 			break;
555 
556 loop1:
557 		hx = HI(x);
558 		xsb1 = hx >> 31;
559 		hx &= ~0x80000000;
560 		if (hx < 0x3e400000)
561 		{
562 			v = *x;
563 			*y = *x;
564 			x += stridex;
565 			y += stridey;
566 			i = 1;
567 			if (--n <= 0)
568 				break;
569 			goto loop1;
570 		}
571 		if (hx > 0x413921fb)
572 		{
573 			if (hx >= 0x7ff00000)
574 			{
575 				x1 = *x;
576 				*y = x1 - x1;
577 			}
578 			else
579 				biguns = 1;
580 			x += stridex;
581 			y += stridey;
582 			i = 1;
583 			if (--n <= 0)
584 				break;
585 			goto loop1;
586 		}
587 		x1 = *x;
588 		py1 = y;
589 		x += stridex;
590 		y += stridey;
591 		i = 2;
592 		if (--n <= 0)
593 			break;
594 
595 loop2:
596 		hx = HI(x);
597 		xsb2 = hx >> 31;
598 		hx &= ~0x80000000;
599 		if (hx < 0x3e400000)
600 		{
601 			v = *x;
602 			*y = *x;
603 			x += stridex;
604 			y += stridey;
605 			i = 2;
606 			if (--n <= 0)
607 				break;
608 			goto loop2;
609 		}
610 		if (hx > 0x413921fb)
611 		{
612 			if (hx >= 0x7ff00000)
613 			{
614 				x2 = *x;
615 				*y = x2 - x2;
616 			}
617 			else
618 				biguns = 1;
619 			x += stridex;
620 			y += stridey;
621 			i = 2;
622 			if (--n <= 0)
623 				break;
624 			goto loop2;
625 		}
626 		x2 = *x;
627 		py2 = y;
628 
629 		n0 = (int) (x0 * invpio2 + half[xsb0]);
630 		n1 = (int) (x1 * invpio2 + half[xsb1]);
631 		n2 = (int) (x2 * invpio2 + half[xsb2]);
632 		fn0 = (double) n0;
633 		fn1 = (double) n1;
634 		fn2 = (double) n2;
635 		n0 &= 3;
636 		n1 &= 3;
637 		n2 &= 3;
638 		a0 = x0 - fn0 * pio2_1;
639 		a1 = x1 - fn1 * pio2_1;
640 		a2 = x2 - fn2 * pio2_1;
641 		w0 = fn0 * pio2_2;
642 		w1 = fn1 * pio2_2;
643 		w2 = fn2 * pio2_2;
644 		x0 = a0 - w0;
645 		x1 = a1 - w1;
646 		x2 = a2 - w2;
647 		y0 = (a0 - x0) - w0;
648 		y1 = (a1 - x1) - w1;
649 		y2 = (a2 - x2) - w2;
650 		a0 = x0;
651 		a1 = x1;
652 		a2 = x2;
653 		w0 = fn0 * pio2_3 - y0;
654 		w1 = fn1 * pio2_3 - y1;
655 		w2 = fn2 * pio2_3 - y2;
656 		x0 = a0 - w0;
657 		x1 = a1 - w1;
658 		x2 = a2 - w2;
659 		y0 = (a0 - x0) - w0;
660 		y1 = (a1 - x1) - w1;
661 		y2 = (a2 - x2) - w2;
662 		a0 = x0;
663 		a1 = x1;
664 		a2 = x2;
665 		w0 = fn0 * pio2_3t - y0;
666 		w1 = fn1 * pio2_3t - y1;
667 		w2 = fn2 * pio2_3t - y2;
668 		x0 = a0 - w0;
669 		x1 = a1 - w1;
670 		x2 = a2 - w2;
671 		y0 = (a0 - x0) - w0;
672 		y1 = (a1 - x1) - w1;
673 		y2 = (a2 - x2) - w2;
674 		xsb0 = HI(&x0);
675 		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
676 		xsb1 = HI(&x1);
677 		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
678 		xsb2 = HI(&x2);
679 		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
680 		switch (i)
681 		{
682 			double		t0, t1, t2, z0, z1, z2;
683 			unsigned	j0, j1, j2;
684 
685 		case 0:
686 			j0 = (xsb0 + 0x4000) & 0xffff8000;
687 			j1 = (xsb1 + 0x4000) & 0xffff8000;
688 			j2 = (xsb2 + 0x4000) & 0xffff8000;
689 			HI(&t0) = j0;
690 			HI(&t1) = j1;
691 			HI(&t2) = j2;
692 			LO(&t0) = 0;
693 			LO(&t1) = 0;
694 			LO(&t2) = 0;
695 			x0 = (x0 - t0) + y0;
696 			x1 = (x1 - t1) + y1;
697 			x2 = (x2 - t2) + y2;
698 			z0 = x0 * x0;
699 			z1 = x1 * x1;
700 			z2 = x2 * x2;
701 			t0 = z0 * (qq1 + z0 * qq2);
702 			t1 = z1 * (qq1 + z1 * qq2);
703 			t2 = z2 * (qq1 + z2 * qq2);
704 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
705 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
706 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
707 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
708 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
709 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
710 			xsb0 = (xsb0 >> 30) & 2;
711 			xsb1 = (xsb1 >> 30) & 2;
712 			xsb2 = (xsb2 >> 30) & 2;
713 			n0 ^= (xsb0 & ~(n0 << 1));
714 			n1 ^= (xsb1 & ~(n1 << 1));
715 			n2 ^= (xsb2 & ~(n2 << 1));
716 			xsb0 |= 1;
717 			xsb1 |= 1;
718 			xsb2 |= 1;
719 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
720 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
721 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
722 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
723 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
724 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
725 			*py0 = ( a0 + t0 );
726 			*py1 = ( a1 + t1 );
727 			*py2 = ( a2 + t2 );
728 			break;
729 
730 		case 1:
731 			j0 = n0 & 1;
732 			j1 = (xsb1 + 0x4000) & 0xffff8000;
733 			j2 = (xsb2 + 0x4000) & 0xffff8000;
734 			HI(&t1) = j1;
735 			HI(&t2) = j2;
736 			LO(&t1) = 0;
737 			LO(&t2) = 0;
738 			x0_or_one[0] = x0;
739 			x0_or_one[2] = -x0;
740 			y0_or_zero[0] = y0;
741 			y0_or_zero[2] = -y0;
742 			x1 = (x1 - t1) + y1;
743 			x2 = (x2 - t2) + y2;
744 			z0 = x0 * x0;
745 			z1 = x1 * x1;
746 			z2 = x2 * x2;
747 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
748 			t1 = z1 * (qq1 + z1 * qq2);
749 			t2 = z2 * (qq1 + z2 * qq2);
750 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
751 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
752 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
753 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
754 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
755 			xsb1 = (xsb1 >> 30) & 2;
756 			xsb2 = (xsb2 >> 30) & 2;
757 			n1 ^= (xsb1 & ~(n1 << 1));
758 			n2 ^= (xsb2 & ~(n2 << 1));
759 			xsb1 |= 1;
760 			xsb2 |= 1;
761 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
762 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
763 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
764 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
765 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
766 			*py0 = t0;
767 			*py1 = ( a1 + t1 );
768 			*py2 = ( a2 + t2 );
769 			break;
770 
771 		case 2:
772 			j0 = (xsb0 + 0x4000) & 0xffff8000;
773 			j1 = n1 & 1;
774 			j2 = (xsb2 + 0x4000) & 0xffff8000;
775 			HI(&t0) = j0;
776 			HI(&t2) = j2;
777 			LO(&t0) = 0;
778 			LO(&t2) = 0;
779 			x1_or_one[0] = x1;
780 			x1_or_one[2] = -x1;
781 			x0 = (x0 - t0) + y0;
782 			y1_or_zero[0] = y1;
783 			y1_or_zero[2] = -y1;
784 			x2 = (x2 - t2) + y2;
785 			z0 = x0 * x0;
786 			z1 = x1 * x1;
787 			z2 = x2 * x2;
788 			t0 = z0 * (qq1 + z0 * qq2);
789 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
790 			t2 = z2 * (qq1 + z2 * qq2);
791 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
792 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
793 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
794 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
795 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
796 			xsb0 = (xsb0 >> 30) & 2;
797 			xsb2 = (xsb2 >> 30) & 2;
798 			n0 ^= (xsb0 & ~(n0 << 1));
799 			n2 ^= (xsb2 & ~(n2 << 1));
800 			xsb0 |= 1;
801 			xsb2 |= 1;
802 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
803 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
804 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
805 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
806 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
807 			*py0 = ( a0 + t0 );
808 			*py1 = t1;
809 			*py2 = ( a2 + t2 );
810 			break;
811 
812 		case 3:
813 			j0 = n0 & 1;
814 			j1 = n1 & 1;
815 			j2 = (xsb2 + 0x4000) & 0xffff8000;
816 			HI(&t2) = j2;
817 			LO(&t2) = 0;
818 			x0_or_one[0] = x0;
819 			x0_or_one[2] = -x0;
820 			x1_or_one[0] = x1;
821 			x1_or_one[2] = -x1;
822 			y0_or_zero[0] = y0;
823 			y0_or_zero[2] = -y0;
824 			y1_or_zero[0] = y1;
825 			y1_or_zero[2] = -y1;
826 			x2 = (x2 - t2) + y2;
827 			z0 = x0 * x0;
828 			z1 = x1 * x1;
829 			z2 = x2 * x2;
830 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
831 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
832 			t2 = z2 * (qq1 + z2 * qq2);
833 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
834 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
835 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
836 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
837 			xsb2 = (xsb2 >> 30) & 2;
838 			n2 ^= (xsb2 & ~(n2 << 1));
839 			xsb2 |= 1;
840 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
841 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
842 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
843 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
844 			*py0 = t0;
845 			*py1 = t1;
846 			*py2 = ( a2 + t2 );
847 			break;
848 
849 		case 4:
850 			j0 = (xsb0 + 0x4000) & 0xffff8000;
851 			j1 = (xsb1 + 0x4000) & 0xffff8000;
852 			j2 = n2 & 1;
853 			HI(&t0) = j0;
854 			HI(&t1) = j1;
855 			LO(&t0) = 0;
856 			LO(&t1) = 0;
857 			x2_or_one[0] = x2;
858 			x2_or_one[2] = -x2;
859 			x0 = (x0 - t0) + y0;
860 			x1 = (x1 - t1) + y1;
861 			y2_or_zero[0] = y2;
862 			y2_or_zero[2] = -y2;
863 			z0 = x0 * x0;
864 			z1 = x1 * x1;
865 			z2 = x2 * x2;
866 			t0 = z0 * (qq1 + z0 * qq2);
867 			t1 = z1 * (qq1 + z1 * qq2);
868 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
869 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
870 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
871 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
872 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
873 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
874 			xsb0 = (xsb0 >> 30) & 2;
875 			xsb1 = (xsb1 >> 30) & 2;
876 			n0 ^= (xsb0 & ~(n0 << 1));
877 			n1 ^= (xsb1 & ~(n1 << 1));
878 			xsb0 |= 1;
879 			xsb1 |= 1;
880 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
881 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
882 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
883 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
884 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
885 			*py0 = ( a0 + t0 );
886 			*py1 = ( a1 + t1 );
887 			*py2 = t2;
888 			break;
889 
890 		case 5:
891 			j0 = n0 & 1;
892 			j1 = (xsb1 + 0x4000) & 0xffff8000;
893 			j2 = n2 & 1;
894 			HI(&t1) = j1;
895 			LO(&t1) = 0;
896 			x0_or_one[0] = x0;
897 			x0_or_one[2] = -x0;
898 			x2_or_one[0] = x2;
899 			x2_or_one[2] = -x2;
900 			y0_or_zero[0] = y0;
901 			y0_or_zero[2] = -y0;
902 			x1 = (x1 - t1) + y1;
903 			y2_or_zero[0] = y2;
904 			y2_or_zero[2] = -y2;
905 			z0 = x0 * x0;
906 			z1 = x1 * x1;
907 			z2 = x2 * x2;
908 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
909 			t1 = z1 * (qq1 + z1 * qq2);
910 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
911 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
912 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
913 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
914 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
915 			xsb1 = (xsb1 >> 30) & 2;
916 			n1 ^= (xsb1 & ~(n1 << 1));
917 			xsb1 |= 1;
918 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
919 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
920 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
921 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
922 			*py0 = t0;
923 			*py1 = ( a1 + t1 );
924 			*py2 = t2;
925 			break;
926 
927 		case 6:
928 			j0 = (xsb0 + 0x4000) & 0xffff8000;
929 			j1 = n1 & 1;
930 			j2 = n2 & 1;
931 			HI(&t0) = j0;
932 			LO(&t0) = 0;
933 			x1_or_one[0] = x1;
934 			x1_or_one[2] = -x1;
935 			x2_or_one[0] = x2;
936 			x2_or_one[2] = -x2;
937 			x0 = (x0 - t0) + y0;
938 			y1_or_zero[0] = y1;
939 			y1_or_zero[2] = -y1;
940 			y2_or_zero[0] = y2;
941 			y2_or_zero[2] = -y2;
942 			z0 = x0 * x0;
943 			z1 = x1 * x1;
944 			z2 = x2 * x2;
945 			t0 = z0 * (qq1 + z0 * qq2);
946 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
947 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
948 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
949 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
950 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
951 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
952 			xsb0 = (xsb0 >> 30) & 2;
953 			n0 ^= (xsb0 & ~(n0 << 1));
954 			xsb0 |= 1;
955 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
956 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
957 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
958 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
959 			*py0 = ( a0 + t0 );
960 			*py1 = t1;
961 			*py2 = t2;
962 			break;
963 
964 		case 7:
965 			j0 = n0 & 1;
966 			j1 = n1 & 1;
967 			j2 = n2 & 1;
968 			x0_or_one[0] = x0;
969 			x0_or_one[2] = -x0;
970 			x1_or_one[0] = x1;
971 			x1_or_one[2] = -x1;
972 			x2_or_one[0] = x2;
973 			x2_or_one[2] = -x2;
974 			y0_or_zero[0] = y0;
975 			y0_or_zero[2] = -y0;
976 			y1_or_zero[0] = y1;
977 			y1_or_zero[2] = -y1;
978 			y2_or_zero[0] = y2;
979 			y2_or_zero[2] = -y2;
980 			z0 = x0 * x0;
981 			z1 = x1 * x1;
982 			z2 = x2 * x2;
983 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
984 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
985 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
986 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
987 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
988 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
989 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
990 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
991 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
992 			*py0 = t0;
993 			*py1 = t1;
994 			*py2 = t2;
995 			break;
996 		}
997 
998 		x += stridex;
999 		y += stridey;
1000 		i = 0;
1001 	} while (--n > 0);
1002 
1003 	if (i > 0)
1004 	{
1005 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
1006 		double		t0, t1, z0, z1;
1007 		unsigned	j0, j1;
1008 		int		n0, n1;
1009 
1010 		if (i > 1)
1011 		{
1012 			n1 = (int) (x1 * invpio2 + half[xsb1]);
1013 			fn1 = (double) n1;
1014 			n1 &= 3;
1015 			a1 = x1 - fn1 * pio2_1;
1016 			w1 = fn1 * pio2_2;
1017 			x1 = a1 - w1;
1018 			y1 = (a1 - x1) - w1;
1019 			a1 = x1;
1020 			w1 = fn1 * pio2_3 - y1;
1021 			x1 = a1 - w1;
1022 			y1 = (a1 - x1) - w1;
1023 			a1 = x1;
1024 			w1 = fn1 * pio2_3t - y1;
1025 			x1 = a1 - w1;
1026 			y1 = (a1 - x1) - w1;
1027 			xsb1 = HI(&x1);
1028 			if ((xsb1 & ~0x80000000) < thresh[n1&1])
1029 			{
1030 				j1 = n1 & 1;
1031 				x1_or_one[0] = x1;
1032 				x1_or_one[2] = -x1;
1033 				y1_or_zero[0] = y1;
1034 				y1_or_zero[2] = -y1;
1035 				z1 = x1 * x1;
1036 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1037 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1038 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1039 				*py1 = t1;
1040 			}
1041 			else
1042 			{
1043 				j1 = (xsb1 + 0x4000) & 0xffff8000;
1044 				HI(&t1) = j1;
1045 				LO(&t1) = 0;
1046 				x1 = (x1 - t1) + y1;
1047 				z1 = x1 * x1;
1048 				t1 = z1 * (qq1 + z1 * qq2);
1049 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1050 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1051 				xsb1 = (xsb1 >> 30) & 2;
1052 				n1 ^= (xsb1 & ~(n1 << 1));
1053 				xsb1 |= 1;
1054 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
1055 				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1056 				*py1 = ( a1 + t1 );
1057 			}
1058 		}
1059 		n0 = (int) (x0 * invpio2 + half[xsb0]);
1060 		fn0 = (double) n0;
1061 		n0 &= 3;
1062 		a0 = x0 - fn0 * pio2_1;
1063 		w0 = fn0 * pio2_2;
1064 		x0 = a0 - w0;
1065 		y0 = (a0 - x0) - w0;
1066 		a0 = x0;
1067 		w0 = fn0 * pio2_3 - y0;
1068 		x0 = a0 - w0;
1069 		y0 = (a0 - x0) - w0;
1070 		a0 = x0;
1071 		w0 = fn0 * pio2_3t - y0;
1072 		x0 = a0 - w0;
1073 		y0 = (a0 - x0) - w0;
1074 		xsb0 = HI(&x0);
1075 		if ((xsb0 & ~0x80000000) < thresh[n0&1])
1076 		{
1077 			j0 = n0 & 1;
1078 			x0_or_one[0] = x0;
1079 			x0_or_one[2] = -x0;
1080 			y0_or_zero[0] = y0;
1081 			y0_or_zero[2] = -y0;
1082 			z0 = x0 * x0;
1083 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1084 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1085 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1086 			*py0 = t0;
1087 		}
1088 		else
1089 		{
1090 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1091 			HI(&t0) = j0;
1092 			LO(&t0) = 0;
1093 			x0 = (x0 - t0) + y0;
1094 			z0 = x0 * x0;
1095 			t0 = z0 * (qq1 + z0 * qq2);
1096 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1097 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1098 			xsb0 = (xsb0 >> 30) & 2;
1099 			n0 ^= (xsb0 & ~(n0 << 1));
1100 			xsb0 |= 1;
1101 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
1102 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1103 			*py0 = ( a0 + t0 );
1104 		}
1105 	}
1106 
1107 	if (biguns)
1108 		__vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1109 }
1110