xref: /titanic_51/usr/src/lib/libmvec/common/__vcosbig_ultra3.c (revision 633aa8079184298dfc2ae6966d0d9cc897712b57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x)	*(1+(int*)x)
34 #define LO(x)	*(unsigned*)x
35 #else
36 #define HI(x)	*(int*)x
37 #define LO(x)	*(1+(unsigned*)x)
38 #endif
39 
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
45 
46 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
47 
48 static const double
49 	half[2]	= { 0.5, -0.5 },
50 	one		= 1.0,
51 	invpio2	= 0.636619772367581343075535,
52 	pio2_1	= 1.570796326734125614166,
53 	pio2_2	= 6.077100506303965976596e-11,
54 	pio2_3	= 2.022266248711166455796e-21,
55 	pio2_3t	= 8.478427660368899643959e-32,
56 	pp1		= -1.666666666605760465276263943134982554676e-0001,
57 	pp2		=  8.333261209690963126718376566146180944442e-0003,
58 	qq1		= -4.999999999977710986407023955908711557870e-0001,
59 	qq2		=  4.166654863857219350645055881018842089580e-0002,
60 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
61 				-4.999999999999931701464060878888294524481e-0001 },
62 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
63 				 4.166666666394861917535640593963708222319e-0002 },
64 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
65 				-1.388888552656142867832756687736851681462e-0003 },
66 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
67 				 2.478519423681460796618128289454530524759e-0005 };
68 
69 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
70 
71 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
72 
73 void
74 __vlibm_vcos_big_ultra3(int n, double * restrict x, int stridex, double * restrict y,
75 	int stridey, int pthresh)
76 {
77 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
78 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
79 	double		x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
80 	unsigned	xsb0, xsb1, xsb2;
81 	int			i, biguns, nsave, sxsave, sysave;
82 
83 	nsave = n;
84 	xsave = x;
85 	sxsave = stridex;
86 	ysave = y;
87 	sysave = stridey;
88 	biguns = 0;
89 
90 	x0_or_one[1] = 1.0;
91 	x1_or_one[1] = 1.0;
92 	x2_or_one[1] = 1.0;
93 	x0_or_one[3] = -1.0;
94 	x1_or_one[3] = -1.0;
95 	x2_or_one[3] = -1.0;
96 	y0_or_zero[1] = 0.0;
97 	y1_or_zero[1] = 0.0;
98 	y2_or_zero[1] = 0.0;
99 	y0_or_zero[3] = 0.0;
100 	y1_or_zero[3] = 0.0;
101 	y2_or_zero[3] = 0.0;
102 
103 	do
104 	{
105 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
106 		unsigned	hx;
107 		int			n0, n1, n2;
108 
109 loop0:
110 		hx = HI(x);
111 		xsb0 = hx >> 31;
112 		hx &= ~0x80000000;
113 		if (hx <= pthresh || hx > 0x413921fb)
114 		{
115 			if (hx > 0x413921fb && hx < 0x7ff00000)
116 				biguns = 1;
117 			x += stridex;
118 			y += stridey;
119 			i = 0;
120 			if (--n <= 0)
121 				break;
122 			goto loop0;
123 		}
124 		x0 = *x;
125 		py0 = y;
126 		x += stridex;
127 		y += stridey;
128 		i = 1;
129 		if (--n <= 0)
130 			break;
131 
132 loop1:
133 		hx = HI(x);
134 		xsb1 = hx >> 31;
135 		hx &= ~0x80000000;
136 		if (hx <= pthresh || hx > 0x413921fb)
137 		{
138 			if (hx > 0x413921fb && hx < 0x7ff00000)
139 				biguns = 1;
140 			x += stridex;
141 			y += stridey;
142 			i = 1;
143 			if (--n <= 0)
144 				break;
145 			goto loop1;
146 		}
147 		x1 = *x;
148 		py1 = y;
149 		x += stridex;
150 		y += stridey;
151 		i = 2;
152 		if (--n <= 0)
153 			break;
154 
155 loop2:
156 		hx = HI(x);
157 		xsb2 = hx >> 31;
158 		hx &= ~0x80000000;
159 		if (hx <= pthresh || hx > 0x413921fb)
160 		{
161 			if (hx > 0x413921fb && hx < 0x7ff00000)
162 				biguns = 1;
163 			x += stridex;
164 			y += stridey;
165 			i = 2;
166 			if (--n <= 0)
167 				break;
168 			goto loop2;
169 		}
170 		x2 = *x;
171 		py2 = y;
172 
173 		n0 = (int) (x0 * invpio2 + half[xsb0]);
174 		n1 = (int) (x1 * invpio2 + half[xsb1]);
175 		n2 = (int) (x2 * invpio2 + half[xsb2]);
176 		fn0 = (double) n0;
177 		fn1 = (double) n1;
178 		fn2 = (double) n2;
179 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
180 		n1 = (n1 + 1) & 3;
181 		n2 = (n2 + 1) & 3;
182 		a0 = x0 - fn0 * pio2_1;
183 		a1 = x1 - fn1 * pio2_1;
184 		a2 = x2 - fn2 * pio2_1;
185 		w0 = fn0 * pio2_2;
186 		w1 = fn1 * pio2_2;
187 		w2 = fn2 * pio2_2;
188 		x0 = a0 - w0;
189 		x1 = a1 - w1;
190 		x2 = a2 - w2;
191 		y0 = (a0 - x0) - w0;
192 		y1 = (a1 - x1) - w1;
193 		y2 = (a2 - x2) - w2;
194 		a0 = x0;
195 		a1 = x1;
196 		a2 = x2;
197 		w0 = fn0 * pio2_3 - y0;
198 		w1 = fn1 * pio2_3 - y1;
199 		w2 = fn2 * pio2_3 - y2;
200 		x0 = a0 - w0;
201 		x1 = a1 - w1;
202 		x2 = a2 - w2;
203 		y0 = (a0 - x0) - w0;
204 		y1 = (a1 - x1) - w1;
205 		y2 = (a2 - x2) - w2;
206 		a0 = x0;
207 		a1 = x1;
208 		a2 = x2;
209 		w0 = fn0 * pio2_3t - y0;
210 		w1 = fn1 * pio2_3t - y1;
211 		w2 = fn2 * pio2_3t - y2;
212 		x0 = a0 - w0;
213 		x1 = a1 - w1;
214 		x2 = a2 - w2;
215 		y0 = (a0 - x0) - w0;
216 		y1 = (a1 - x1) - w1;
217 		y2 = (a2 - x2) - w2;
218 		xsb0 = HI(&x0);
219 		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
220 		xsb1 = HI(&x1);
221 		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
222 		xsb2 = HI(&x2);
223 		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
224 		switch (i)
225 		{
226 			double		t0, t1, t2, z0, z1, z2;
227 			unsigned	j0, j1, j2;
228 
229 		case 0:
230 			j0 = (xsb0 + 0x4000) & 0xffff8000;
231 			j1 = (xsb1 + 0x4000) & 0xffff8000;
232 			j2 = (xsb2 + 0x4000) & 0xffff8000;
233 			HI(&t0) = j0;
234 			HI(&t1) = j1;
235 			HI(&t2) = j2;
236 			LO(&t0) = 0;
237 			LO(&t1) = 0;
238 			LO(&t2) = 0;
239 			x0 = (x0 - t0) + y0;
240 			x1 = (x1 - t1) + y1;
241 			x2 = (x2 - t2) + y2;
242 			z0 = x0 * x0;
243 			z1 = x1 * x1;
244 			z2 = x2 * x2;
245 			t0 = z0 * (qq1 + z0 * qq2);
246 			t1 = z1 * (qq1 + z1 * qq2);
247 			t2 = z2 * (qq1 + z2 * qq2);
248 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
249 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
250 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
251 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
252 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
253 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
254 			xsb0 = (xsb0 >> 30) & 2;
255 			xsb1 = (xsb1 >> 30) & 2;
256 			xsb2 = (xsb2 >> 30) & 2;
257 			n0 ^= (xsb0 & ~(n0 << 1));
258 			n1 ^= (xsb1 & ~(n1 << 1));
259 			n2 ^= (xsb2 & ~(n2 << 1));
260 			xsb0 |= 1;
261 			xsb1 |= 1;
262 			xsb2 |= 1;
263 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
264 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
265 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
266 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
267 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
268 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
269 			*py0 = ( a0 + t0 );
270 			*py1 = ( a1 + t1 );
271 			*py2 = ( a2 + t2 );
272 			break;
273 
274 		case 1:
275 			j0 = n0 & 1;
276 			j1 = (xsb1 + 0x4000) & 0xffff8000;
277 			j2 = (xsb2 + 0x4000) & 0xffff8000;
278 			HI(&t1) = j1;
279 			HI(&t2) = j2;
280 			LO(&t1) = 0;
281 			LO(&t2) = 0;
282 			x0_or_one[0] = x0;
283 			x0_or_one[2] = -x0;
284 			y0_or_zero[0] = y0;
285 			y0_or_zero[2] = -y0;
286 			x1 = (x1 - t1) + y1;
287 			x2 = (x2 - t2) + y2;
288 			z0 = x0 * x0;
289 			z1 = x1 * x1;
290 			z2 = x2 * x2;
291 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
292 			t1 = z1 * (qq1 + z1 * qq2);
293 			t2 = z2 * (qq1 + z2 * qq2);
294 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
295 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
296 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 			xsb1 = (xsb1 >> 30) & 2;
300 			xsb2 = (xsb2 >> 30) & 2;
301 			n1 ^= (xsb1 & ~(n1 << 1));
302 			n2 ^= (xsb2 & ~(n2 << 1));
303 			xsb1 |= 1;
304 			xsb2 |= 1;
305 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
306 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
307 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
308 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
309 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
310 			*py0 = t0;
311 			*py1 = ( a1 + t1 );
312 			*py2 = ( a2 + t2 );
313 			break;
314 
315 		case 2:
316 			j0 = (xsb0 + 0x4000) & 0xffff8000;
317 			j1 = n1 & 1;
318 			j2 = (xsb2 + 0x4000) & 0xffff8000;
319 			HI(&t0) = j0;
320 			HI(&t2) = j2;
321 			LO(&t0) = 0;
322 			LO(&t2) = 0;
323 			x1_or_one[0] = x1;
324 			x1_or_one[2] = -x1;
325 			x0 = (x0 - t0) + y0;
326 			y1_or_zero[0] = y1;
327 			y1_or_zero[2] = -y1;
328 			x2 = (x2 - t2) + y2;
329 			z0 = x0 * x0;
330 			z1 = x1 * x1;
331 			z2 = x2 * x2;
332 			t0 = z0 * (qq1 + z0 * qq2);
333 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
334 			t2 = z2 * (qq1 + z2 * qq2);
335 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
336 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
337 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
338 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
339 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
340 			xsb0 = (xsb0 >> 30) & 2;
341 			xsb2 = (xsb2 >> 30) & 2;
342 			n0 ^= (xsb0 & ~(n0 << 1));
343 			n2 ^= (xsb2 & ~(n2 << 1));
344 			xsb0 |= 1;
345 			xsb2 |= 1;
346 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
347 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
348 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
349 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
350 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
351 			*py0 = ( a0 + t0 );
352 			*py1 = t1;
353 			*py2 = ( a2 + t2 );
354 			break;
355 
356 		case 3:
357 			j0 = n0 & 1;
358 			j1 = n1 & 1;
359 			j2 = (xsb2 + 0x4000) & 0xffff8000;
360 			HI(&t2) = j2;
361 			LO(&t2) = 0;
362 			x0_or_one[0] = x0;
363 			x0_or_one[2] = -x0;
364 			x1_or_one[0] = x1;
365 			x1_or_one[2] = -x1;
366 			y0_or_zero[0] = y0;
367 			y0_or_zero[2] = -y0;
368 			y1_or_zero[0] = y1;
369 			y1_or_zero[2] = -y1;
370 			x2 = (x2 - t2) + y2;
371 			z0 = x0 * x0;
372 			z1 = x1 * x1;
373 			z2 = x2 * x2;
374 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
375 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
376 			t2 = z2 * (qq1 + z2 * qq2);
377 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
378 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
379 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
380 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381 			xsb2 = (xsb2 >> 30) & 2;
382 			n2 ^= (xsb2 & ~(n2 << 1));
383 			xsb2 |= 1;
384 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
385 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
386 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
387 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
388 			*py0 = t0;
389 			*py1 = t1;
390 			*py2 = ( a2 + t2 );
391 			break;
392 
393 		case 4:
394 			j0 = (xsb0 + 0x4000) & 0xffff8000;
395 			j1 = (xsb1 + 0x4000) & 0xffff8000;
396 			j2 = n2 & 1;
397 			HI(&t0) = j0;
398 			HI(&t1) = j1;
399 			LO(&t0) = 0;
400 			LO(&t1) = 0;
401 			x2_or_one[0] = x2;
402 			x2_or_one[2] = -x2;
403 			x0 = (x0 - t0) + y0;
404 			x1 = (x1 - t1) + y1;
405 			y2_or_zero[0] = y2;
406 			y2_or_zero[2] = -y2;
407 			z0 = x0 * x0;
408 			z1 = x1 * x1;
409 			z2 = x2 * x2;
410 			t0 = z0 * (qq1 + z0 * qq2);
411 			t1 = z1 * (qq1 + z1 * qq2);
412 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
413 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
414 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
415 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
416 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
417 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
418 			xsb0 = (xsb0 >> 30) & 2;
419 			xsb1 = (xsb1 >> 30) & 2;
420 			n0 ^= (xsb0 & ~(n0 << 1));
421 			n1 ^= (xsb1 & ~(n1 << 1));
422 			xsb0 |= 1;
423 			xsb1 |= 1;
424 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
425 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
426 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
427 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
428 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
429 			*py0 = ( a0 + t0 );
430 			*py1 = ( a1 + t1 );
431 			*py2 = t2;
432 			break;
433 
434 		case 5:
435 			j0 = n0 & 1;
436 			j1 = (xsb1 + 0x4000) & 0xffff8000;
437 			j2 = n2 & 1;
438 			HI(&t1) = j1;
439 			LO(&t1) = 0;
440 			x0_or_one[0] = x0;
441 			x0_or_one[2] = -x0;
442 			x2_or_one[0] = x2;
443 			x2_or_one[2] = -x2;
444 			y0_or_zero[0] = y0;
445 			y0_or_zero[2] = -y0;
446 			x1 = (x1 - t1) + y1;
447 			y2_or_zero[0] = y2;
448 			y2_or_zero[2] = -y2;
449 			z0 = x0 * x0;
450 			z1 = x1 * x1;
451 			z2 = x2 * x2;
452 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
453 			t1 = z1 * (qq1 + z1 * qq2);
454 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
455 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
456 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
457 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
458 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459 			xsb1 = (xsb1 >> 30) & 2;
460 			n1 ^= (xsb1 & ~(n1 << 1));
461 			xsb1 |= 1;
462 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
463 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
464 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
465 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
466 			*py0 = t0;
467 			*py1 = ( a1 + t1 );
468 			*py2 = t2;
469 			break;
470 
471 		case 6:
472 			j0 = (xsb0 + 0x4000) & 0xffff8000;
473 			j1 = n1 & 1;
474 			j2 = n2 & 1;
475 			HI(&t0) = j0;
476 			LO(&t0) = 0;
477 			x1_or_one[0] = x1;
478 			x1_or_one[2] = -x1;
479 			x2_or_one[0] = x2;
480 			x2_or_one[2] = -x2;
481 			x0 = (x0 - t0) + y0;
482 			y1_or_zero[0] = y1;
483 			y1_or_zero[2] = -y1;
484 			y2_or_zero[0] = y2;
485 			y2_or_zero[2] = -y2;
486 			z0 = x0 * x0;
487 			z1 = x1 * x1;
488 			z2 = x2 * x2;
489 			t0 = z0 * (qq1 + z0 * qq2);
490 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
491 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
492 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
493 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
494 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
495 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
496 			xsb0 = (xsb0 >> 30) & 2;
497 			n0 ^= (xsb0 & ~(n0 << 1));
498 			xsb0 |= 1;
499 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
500 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
501 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
502 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
503 			*py0 = ( a0 + t0 );
504 			*py1 = t1;
505 			*py2 = t2;
506 			break;
507 
508 		case 7:
509 			j0 = n0 & 1;
510 			j1 = n1 & 1;
511 			j2 = n2 & 1;
512 			x0_or_one[0] = x0;
513 			x0_or_one[2] = -x0;
514 			x1_or_one[0] = x1;
515 			x1_or_one[2] = -x1;
516 			x2_or_one[0] = x2;
517 			x2_or_one[2] = -x2;
518 			y0_or_zero[0] = y0;
519 			y0_or_zero[2] = -y0;
520 			y1_or_zero[0] = y1;
521 			y1_or_zero[2] = -y1;
522 			y2_or_zero[0] = y2;
523 			y2_or_zero[2] = -y2;
524 			z0 = x0 * x0;
525 			z1 = x1 * x1;
526 			z2 = x2 * x2;
527 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
528 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
529 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
530 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
531 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
532 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
533 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
534 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
535 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
536 			*py0 = t0;
537 			*py1 = t1;
538 			*py2 = t2;
539 			break;
540 		}
541 
542 		x += stridex;
543 		y += stridey;
544 		i = 0;
545 	} while (--n > 0);
546 
547 	if (i > 0)
548 	{
549 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
550 		double		t0, t1, z0, z1;
551 		unsigned	j0, j1;
552 		int			n0, n1;
553 
554 		if (i > 1)
555 		{
556 			n1 = (int) (x1 * invpio2 + half[xsb1]);
557 			fn1 = (double) n1;
558 			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
559 			a1 = x1 - fn1 * pio2_1;
560 			w1 = fn1 * pio2_2;
561 			x1 = a1 - w1;
562 			y1 = (a1 - x1) - w1;
563 			a1 = x1;
564 			w1 = fn1 * pio2_3 - y1;
565 			x1 = a1 - w1;
566 			y1 = (a1 - x1) - w1;
567 			a1 = x1;
568 			w1 = fn1 * pio2_3t - y1;
569 			x1 = a1 - w1;
570 			y1 = (a1 - x1) - w1;
571 			xsb1 = HI(&x1);
572 			if ((xsb1 & ~0x80000000) < thresh[n1&1])
573 			{
574 				j1 = n1 & 1;
575 				x1_or_one[0] = x1;
576 				x1_or_one[2] = -x1;
577 				y1_or_zero[0] = y1;
578 				y1_or_zero[2] = -y1;
579 				z1 = x1 * x1;
580 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
581 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
582 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
583 				*py1 = t1;
584 			}
585 			else
586 			{
587 				j1 = (xsb1 + 0x4000) & 0xffff8000;
588 				HI(&t1) = j1;
589 				LO(&t1) = 0;
590 				x1 = (x1 - t1) + y1;
591 				z1 = x1 * x1;
592 				t1 = z1 * (qq1 + z1 * qq2);
593 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
594 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
595 				xsb1 = (xsb1 >> 30) & 2;
596 				n1 ^= (xsb1 & ~(n1 << 1));
597 				xsb1 |= 1;
598 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
599 				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
600 				*py1 = ( a1 + t1 );
601 			}
602 		}
603 		n0 = (int) (x0 * invpio2 + half[xsb0]);
604 		fn0 = (double) n0;
605 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
606 		a0 = x0 - fn0 * pio2_1;
607 		w0 = fn0 * pio2_2;
608 		x0 = a0 - w0;
609 		y0 = (a0 - x0) - w0;
610 		a0 = x0;
611 		w0 = fn0 * pio2_3 - y0;
612 		x0 = a0 - w0;
613 		y0 = (a0 - x0) - w0;
614 		a0 = x0;
615 		w0 = fn0 * pio2_3t - y0;
616 		x0 = a0 - w0;
617 		y0 = (a0 - x0) - w0;
618 		xsb0 = HI(&x0);
619 		if ((xsb0 & ~0x80000000) < thresh[n0&1])
620 		{
621 			j0 = n0 & 1;
622 			x0_or_one[0] = x0;
623 			x0_or_one[2] = -x0;
624 			y0_or_zero[0] = y0;
625 			y0_or_zero[2] = -y0;
626 			z0 = x0 * x0;
627 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
628 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
629 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
630 			*py0 = t0;
631 		}
632 		else
633 		{
634 			j0 = (xsb0 + 0x4000) & 0xffff8000;
635 			HI(&t0) = j0;
636 			LO(&t0) = 0;
637 			x0 = (x0 - t0) + y0;
638 			z0 = x0 * x0;
639 			t0 = z0 * (qq1 + z0 * qq2);
640 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
641 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
642 			xsb0 = (xsb0 >> 30) & 2;
643 			n0 ^= (xsb0 & ~(n0 << 1));
644 			xsb0 |= 1;
645 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
646 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
647 			*py0 = ( a0 + t0 );
648 		}
649 	}
650 
651 	if (biguns)
652 		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
653 }
654