1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vcos.1.c
49 *
50 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
51 * in the primary range part.
52 *
53 * Modification to primary range processing. If an argument that does not
54 * fall in the primary range is encountered, then processing is continued
55 * in the medium range.
56 *
57 */
58
59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60
61 static const double
62 half[2] = { 0.5, -0.5 },
63 one = 1.0,
64 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
65 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
66 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69 pp1 = -1.666666666605760465276263943134982554676e-0001,
70 pp2 = 8.333261209690963126718376566146180944442e-0003,
71 qq1 = -4.999999999977710986407023955908711557870e-0001,
72 qq2 = 4.166654863857219350645055881018842089580e-0002,
73 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74 -4.999999999999931701464060878888294524481e-0001 },
75 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
76 4.166666666394861917535640593963708222319e-0002 },
77 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78 -1.388888552656142867832756687736851681462e-0003 },
79 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
80 2.478519423681460796618128289454530524759e-0005 };
81
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs(double);
86 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
87
88 /*
89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90 *
91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93 *
94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95 */
96 void
__vcos(int n,double * restrict x,int stridex,double * restrict y,int stridey)97 __vcos(int n, double * restrict x, int stridex, double * restrict y,
98 int stridey)
99 {
100 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 int i, biguns, nsave, sxsave, sysave;
105 volatile int v __unused;
106 nsave = n;
107 xsave = x;
108 sxsave = stridex;
109 ysave = y;
110 sysave = stridey;
111 biguns = 0;
112
113 x0 = *x; /* 'x0' may be used uninitialized */
114 do /* MAIN LOOP */
115 {
116 /* Gotos here so _break_ exits MAIN LOOP. */
117 LOOP0: /* Find first arg in right range. */
118 xsb0 = HI(x); /* get most significant word */
119 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
120 if (hx0 > 0x3fe921fb) {
121 /* Too big: arg reduction needed, so leave for second part */
122 biguns = 1;
123 goto MEDIUM;
124 }
125 if (hx0 < 0x3e400000) {
126 /* Too small. cos x ~ 1. */
127 v = *x;
128 *y = 1.0;
129 x += stridex;
130 y += stridey;
131 i = 0;
132 if (--n <= 0)
133 break;
134 goto LOOP0;
135 }
136 x0 = *x;
137 py0 = y;
138 x += stridex;
139 y += stridey;
140 i = 1;
141 if (--n <= 0)
142 break;
143
144 LOOP1: /* Get second arg, same as above. */
145 xsb1 = HI(x);
146 hx1 = xsb1 & ~0x80000000;
147 if (hx1 > 0x3fe921fb)
148 {
149 biguns = 2;
150 goto MEDIUM;
151 }
152 if (hx1 < 0x3e400000)
153 {
154 v = *x;
155 *y = 1.0;
156 x += stridex;
157 y += stridey;
158 i = 1;
159 if (--n <= 0)
160 break;
161 goto LOOP1;
162 }
163 x1 = *x;
164 py1 = y;
165 x += stridex;
166 y += stridey;
167 i = 2;
168 if (--n <= 0)
169 break;
170
171 LOOP2: /* Get third arg, same as above. */
172 xsb2 = HI(x);
173 hx2 = xsb2 & ~0x80000000;
174 if (hx2 > 0x3fe921fb)
175 {
176 biguns = 3;
177 goto MEDIUM;
178 }
179 if (hx2 < 0x3e400000)
180 {
181 v = *x;
182 *y = 1.0;
183 x += stridex;
184 y += stridey;
185 i = 2;
186 if (--n <= 0)
187 break;
188 goto LOOP2;
189 }
190 x2 = *x;
191 py2 = y;
192
193 /*
194 * 0x3fc40000 = 5/32 ~ 0.15625
195 * Get msb after subtraction. Will be 1 only if
196 * hx0 - 5/32 is negative.
197 */
198 i = (hx0 - 0x3fc40000) >> 31;
199 i |= ((hx1 - 0x3fc40000) >> 30) & 2;
200 i |= ((hx2 - 0x3fc40000) >> 29) & 4;
201 switch (i)
202 {
203 double a0, a1, a2, w0, w1, w2;
204 double t0, t1, t2, z0, z1, z2;
205 unsigned j0, j1, j2;
206
207 case 0: /* All are > 5/32 */
208 j0 = (xsb0 + 0x4000) & 0xffff8000;
209 j1 = (xsb1 + 0x4000) & 0xffff8000;
210 j2 = (xsb2 + 0x4000) & 0xffff8000;
211 HI(&t0) = j0;
212 HI(&t1) = j1;
213 HI(&t2) = j2;
214 LO(&t0) = 0;
215 LO(&t1) = 0;
216 LO(&t2) = 0;
217 x0 -= t0;
218 x1 -= t1;
219 x2 -= t2;
220 z0 = x0 * x0;
221 z1 = x1 * x1;
222 z2 = x2 * x2;
223 t0 = z0 * (qq1 + z0 * qq2);
224 t1 = z1 * (qq1 + z1 * qq2);
225 t2 = z2 * (qq1 + z2 * qq2);
226 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
227 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
228 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
229 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
230 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
231 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
232 xsb0 = (xsb0 >> 30) & 2;
233 xsb1 = (xsb1 >> 30) & 2;
234 xsb2 = (xsb2 >> 30) & 2;
235 a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
236 a1 = __vlibm_TBL_sincos_hi[j1+1];
237 a2 = __vlibm_TBL_sincos_hi[j2+1];
238 /* cos_lo(t) sin_hi(t) */
239 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
240 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
241 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
242
243 *py0 = a0 + t0;
244 *py1 = a1 + t1;
245 *py2 = a2 + t2;
246 break;
247
248 case 1:
249 j1 = (xsb1 + 0x4000) & 0xffff8000;
250 j2 = (xsb2 + 0x4000) & 0xffff8000;
251 HI(&t1) = j1;
252 HI(&t2) = j2;
253 LO(&t1) = 0;
254 LO(&t2) = 0;
255 x1 -= t1;
256 x2 -= t2;
257 z0 = x0 * x0;
258 z1 = x1 * x1;
259 z2 = x2 * x2;
260 t0 = z0 * (poly3[1] + z0 * poly4[1]);
261 t1 = z1 * (qq1 + z1 * qq2);
262 t2 = z2 * (qq1 + z2 * qq2);
263 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
264 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
265 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
266 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
268 xsb1 = (xsb1 >> 30) & 2;
269 xsb2 = (xsb2 >> 30) & 2;
270 a1 = __vlibm_TBL_sincos_hi[j1+1];
271 a2 = __vlibm_TBL_sincos_hi[j2+1];
272 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
273 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
274 *py0 = one + t0;
275 *py1 = a1 + t1;
276 *py2 = a2 + t2;
277 break;
278
279 case 2:
280 j0 = (xsb0 + 0x4000) & 0xffff8000;
281 j2 = (xsb2 + 0x4000) & 0xffff8000;
282 HI(&t0) = j0;
283 HI(&t2) = j2;
284 LO(&t0) = 0;
285 LO(&t2) = 0;
286 x0 -= t0;
287 x2 -= t2;
288 z0 = x0 * x0;
289 z1 = x1 * x1;
290 z2 = x2 * x2;
291 t0 = z0 * (qq1 + z0 * qq2);
292 t1 = z1 * (poly3[1] + z1 * poly4[1]);
293 t2 = z2 * (qq1 + z2 * qq2);
294 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
295 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
296 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 xsb0 = (xsb0 >> 30) & 2;
300 xsb2 = (xsb2 >> 30) & 2;
301 a0 = __vlibm_TBL_sincos_hi[j0+1];
302 a2 = __vlibm_TBL_sincos_hi[j2+1];
303 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
304 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
305 *py0 = a0 + t0;
306 *py1 = one + t1;
307 *py2 = a2 + t2;
308 break;
309
310 case 3:
311 j2 = (xsb2 + 0x4000) & 0xffff8000;
312 HI(&t2) = j2;
313 LO(&t2) = 0;
314 x2 -= t2;
315 z0 = x0 * x0;
316 z1 = x1 * x1;
317 z2 = x2 * x2;
318 t0 = z0 * (poly3[1] + z0 * poly4[1]);
319 t1 = z1 * (poly3[1] + z1 * poly4[1]);
320 t2 = z2 * (qq1 + z2 * qq2);
321 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
322 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
323 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
324 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
325 xsb2 = (xsb2 >> 30) & 2;
326 a2 = __vlibm_TBL_sincos_hi[j2+1];
327 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
328 *py0 = one + t0;
329 *py1 = one + t1;
330 *py2 = a2 + t2;
331 break;
332
333 case 4:
334 j0 = (xsb0 + 0x4000) & 0xffff8000;
335 j1 = (xsb1 + 0x4000) & 0xffff8000;
336 HI(&t0) = j0;
337 HI(&t1) = j1;
338 LO(&t0) = 0;
339 LO(&t1) = 0;
340 x0 -= t0;
341 x1 -= t1;
342 z0 = x0 * x0;
343 z1 = x1 * x1;
344 z2 = x2 * x2;
345 t0 = z0 * (qq1 + z0 * qq2);
346 t1 = z1 * (qq1 + z1 * qq2);
347 t2 = z2 * (poly3[1] + z2 * poly4[1]);
348 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
349 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
350 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
351 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
352 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
353 xsb0 = (xsb0 >> 30) & 2;
354 xsb1 = (xsb1 >> 30) & 2;
355 a0 = __vlibm_TBL_sincos_hi[j0+1];
356 a1 = __vlibm_TBL_sincos_hi[j1+1];
357 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
358 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
359 *py0 = a0 + t0;
360 *py1 = a1 + t1;
361 *py2 = one + t2;
362 break;
363
364 case 5:
365 j1 = (xsb1 + 0x4000) & 0xffff8000;
366 HI(&t1) = j1;
367 LO(&t1) = 0;
368 x1 -= t1;
369 z0 = x0 * x0;
370 z1 = x1 * x1;
371 z2 = x2 * x2;
372 t0 = z0 * (poly3[1] + z0 * poly4[1]);
373 t1 = z1 * (qq1 + z1 * qq2);
374 t2 = z2 * (poly3[1] + z2 * poly4[1]);
375 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
376 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
377 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
378 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
379 xsb1 = (xsb1 >> 30) & 2;
380 a1 = __vlibm_TBL_sincos_hi[j1+1];
381 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
382 *py0 = one + t0;
383 *py1 = a1 + t1;
384 *py2 = one + t2;
385 break;
386
387 case 6:
388 j0 = (xsb0 + 0x4000) & 0xffff8000;
389 HI(&t0) = j0;
390 LO(&t0) = 0;
391 x0 -= t0;
392 z0 = x0 * x0;
393 z1 = x1 * x1;
394 z2 = x2 * x2;
395 t0 = z0 * (qq1 + z0 * qq2);
396 t1 = z1 * (poly3[1] + z1 * poly4[1]);
397 t2 = z2 * (poly3[1] + z2 * poly4[1]);
398 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
399 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
400 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
401 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
402 xsb0 = (xsb0 >> 30) & 2;
403 a0 = __vlibm_TBL_sincos_hi[j0+1];
404 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
405 *py0 = a0 + t0;
406 *py1 = one + t1;
407 *py2 = one + t2;
408 break;
409
410 case 7: /* All are < 5/32 */
411 z0 = x0 * x0;
412 z1 = x1 * x1;
413 z2 = x2 * x2;
414 t0 = z0 * (poly3[1] + z0 * poly4[1]);
415 t1 = z1 * (poly3[1] + z1 * poly4[1]);
416 t2 = z2 * (poly3[1] + z2 * poly4[1]);
417 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
418 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
419 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
420 *py0 = one + t0;
421 *py1 = one + t1;
422 *py2 = one + t2;
423 break;
424 }
425
426 x += stridex;
427 y += stridey;
428 i = 0;
429 } while (--n > 0); /* END MAIN LOOP */
430
431 /*
432 * CLEAN UP last 0, 1, or 2 elts.
433 */
434 if (i > 0) /* Clean up elts at tail. i < 3. */
435 {
436 double a0, a1, w0, w1;
437 double t0, t1, z0, z1;
438 unsigned j0, j1;
439
440 if (i > 1)
441 {
442 if (hx1 < 0x3fc40000)
443 {
444 z1 = x1 * x1;
445 t1 = z1 * (poly3[1] + z1 * poly4[1]);
446 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
447 t1 = one + t1;
448 *py1 = t1;
449 }
450 else
451 {
452 j1 = (xsb1 + 0x4000) & 0xffff8000;
453 HI(&t1) = j1;
454 LO(&t1) = 0;
455 x1 -= t1;
456 z1 = x1 * x1;
457 t1 = z1 * (qq1 + z1 * qq2);
458 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
459 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
460 xsb1 = (xsb1 >> 30) & 2;
461 a1 = __vlibm_TBL_sincos_hi[j1+1];
462 t1 = __vlibm_TBL_sincos_lo[j1+1]
463 - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
464 *py1 = a1 + t1;
465 }
466 }
467 if (hx0 < 0x3fc40000)
468 {
469 z0 = x0 * x0;
470 t0 = z0 * (poly3[1] + z0 * poly4[1]);
471 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
472 t0 = one + t0;
473 *py0 = t0;
474 }
475 else
476 {
477 j0 = (xsb0 + 0x4000) & 0xffff8000;
478 HI(&t0) = j0;
479 LO(&t0) = 0;
480 x0 -= t0;
481 z0 = x0 * x0;
482 t0 = z0 * (qq1 + z0 * qq2);
483 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
484 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
485 xsb0 = (xsb0 >> 30) & 2;
486 a0 = __vlibm_TBL_sincos_hi[j0+1];
487 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
488 *py0 = a0 + t0;
489 }
490 } /* END CLEAN UP */
491
492 return;
493
494 /*
495 * Take care of BIGUNS.
496 *
497 * We have jumped here in the middle of processing after having
498 * encountered a medium range argument. Therefore things are in a
499 * bit of a tizzy.
500 */
501
502 MEDIUM:
503
504 x0_or_one[1] = 1.0;
505 x1_or_one[1] = 1.0;
506 x2_or_one[1] = 1.0;
507 x0_or_one[3] = -1.0;
508 x1_or_one[3] = -1.0;
509 x2_or_one[3] = -1.0;
510 y0_or_zero[1] = 0.0;
511 y1_or_zero[1] = 0.0;
512 y2_or_zero[1] = 0.0;
513 y0_or_zero[3] = 0.0;
514 y1_or_zero[3] = 0.0;
515 y2_or_zero[3] = 0.0;
516
517 if (biguns == 3)
518 {
519 biguns = 0;
520 xsb0 = xsb0 >> 31;
521 xsb1 = xsb1 >> 31;
522 goto loop2;
523 }
524 else if (biguns == 2)
525 {
526 xsb0 = xsb0 >> 31;
527 biguns = 0;
528 goto loop1;
529 }
530 biguns = 0;
531
532 do
533 {
534 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
535 unsigned hx;
536 int n0, n1, n2;
537
538 /*
539 * Find 3 more to work on: Not already done, not too big.
540 */
541
542 loop0:
543 hx = HI(x);
544 xsb0 = hx >> 31;
545 hx &= ~0x80000000;
546 if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
547 {
548 if (hx >= 0x7ff00000) /* Inf or NaN */
549 {
550 x0 = *x;
551 *y = x0 - x0;
552 }
553 else
554 biguns = 1;
555 x += stridex;
556 y += stridey;
557 i = 0;
558 if (--n <= 0)
559 break;
560 goto loop0;
561 }
562 x0 = *x;
563 py0 = y;
564 x += stridex;
565 y += stridey;
566 i = 1;
567 if (--n <= 0)
568 break;
569
570 loop1:
571 hx = HI(x);
572 xsb1 = hx >> 31;
573 hx &= ~0x80000000;
574 if (hx > 0x413921fb)
575 {
576 if (hx >= 0x7ff00000)
577 {
578 x1 = *x;
579 *y = x1 - x1;
580 }
581 else
582 biguns = 1;
583 x += stridex;
584 y += stridey;
585 i = 1;
586 if (--n <= 0)
587 break;
588 goto loop1;
589 }
590 x1 = *x;
591 py1 = y;
592 x += stridex;
593 y += stridey;
594 i = 2;
595 if (--n <= 0)
596 break;
597
598 loop2:
599 hx = HI(x);
600 xsb2 = hx >> 31;
601 hx &= ~0x80000000;
602 if (hx > 0x413921fb)
603 {
604 if (hx >= 0x7ff00000)
605 {
606 x2 = *x;
607 *y = x2 - x2;
608 }
609 else
610 biguns = 1;
611 x += stridex;
612 y += stridey;
613 i = 2;
614 if (--n <= 0)
615 break;
616 goto loop2;
617 }
618 x2 = *x;
619 py2 = y;
620
621 n0 = (int) (x0 * invpio2 + half[xsb0]);
622 n1 = (int) (x1 * invpio2 + half[xsb1]);
623 n2 = (int) (x2 * invpio2 + half[xsb2]);
624 fn0 = (double) n0;
625 fn1 = (double) n1;
626 fn2 = (double) n2;
627 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
628 n1 = (n1 + 1) & 3;
629 n2 = (n2 + 1) & 3;
630 a0 = x0 - fn0 * pio2_1;
631 a1 = x1 - fn1 * pio2_1;
632 a2 = x2 - fn2 * pio2_1;
633 w0 = fn0 * pio2_2;
634 w1 = fn1 * pio2_2;
635 w2 = fn2 * pio2_2;
636 x0 = a0 - w0;
637 x1 = a1 - w1;
638 x2 = a2 - w2;
639 y0 = (a0 - x0) - w0;
640 y1 = (a1 - x1) - w1;
641 y2 = (a2 - x2) - w2;
642 a0 = x0;
643 a1 = x1;
644 a2 = x2;
645 w0 = fn0 * pio2_3 - y0;
646 w1 = fn1 * pio2_3 - y1;
647 w2 = fn2 * pio2_3 - y2;
648 x0 = a0 - w0;
649 x1 = a1 - w1;
650 x2 = a2 - w2;
651 y0 = (a0 - x0) - w0;
652 y1 = (a1 - x1) - w1;
653 y2 = (a2 - x2) - w2;
654 a0 = x0;
655 a1 = x1;
656 a2 = x2;
657 w0 = fn0 * pio2_3t - y0;
658 w1 = fn1 * pio2_3t - y1;
659 w2 = fn2 * pio2_3t - y2;
660 x0 = a0 - w0;
661 x1 = a1 - w1;
662 x2 = a2 - w2;
663 y0 = (a0 - x0) - w0;
664 y1 = (a1 - x1) - w1;
665 y2 = (a2 - x2) - w2;
666 xsb0 = HI(&x0);
667 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
668 xsb1 = HI(&x1);
669 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
670 xsb2 = HI(&x2);
671 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
672 switch (i)
673 {
674 double t0, t1, t2, z0, z1, z2;
675 unsigned j0, j1, j2;
676
677 case 0:
678 j0 = (xsb0 + 0x4000) & 0xffff8000;
679 j1 = (xsb1 + 0x4000) & 0xffff8000;
680 j2 = (xsb2 + 0x4000) & 0xffff8000;
681 HI(&t0) = j0;
682 HI(&t1) = j1;
683 HI(&t2) = j2;
684 LO(&t0) = 0;
685 LO(&t1) = 0;
686 LO(&t2) = 0;
687 x0 = (x0 - t0) + y0;
688 x1 = (x1 - t1) + y1;
689 x2 = (x2 - t2) + y2;
690 z0 = x0 * x0;
691 z1 = x1 * x1;
692 z2 = x2 * x2;
693 t0 = z0 * (qq1 + z0 * qq2);
694 t1 = z1 * (qq1 + z1 * qq2);
695 t2 = z2 * (qq1 + z2 * qq2);
696 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
697 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
698 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
699 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
700 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
701 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
702 xsb0 = (xsb0 >> 30) & 2;
703 xsb1 = (xsb1 >> 30) & 2;
704 xsb2 = (xsb2 >> 30) & 2;
705 n0 ^= (xsb0 & ~(n0 << 1));
706 n1 ^= (xsb1 & ~(n1 << 1));
707 n2 ^= (xsb2 & ~(n2 << 1));
708 xsb0 |= 1;
709 xsb1 |= 1;
710 xsb2 |= 1;
711 a0 = __vlibm_TBL_sincos_hi[j0+n0];
712 a1 = __vlibm_TBL_sincos_hi[j1+n1];
713 a2 = __vlibm_TBL_sincos_hi[j2+n2];
714 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
715 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
716 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
717 *py0 = ( a0 + t0 );
718 *py1 = ( a1 + t1 );
719 *py2 = ( a2 + t2 );
720 break;
721
722 case 1:
723 j0 = n0 & 1;
724 j1 = (xsb1 + 0x4000) & 0xffff8000;
725 j2 = (xsb2 + 0x4000) & 0xffff8000;
726 HI(&t1) = j1;
727 HI(&t2) = j2;
728 LO(&t1) = 0;
729 LO(&t2) = 0;
730 x0_or_one[0] = x0;
731 x0_or_one[2] = -x0;
732 y0_or_zero[0] = y0;
733 y0_or_zero[2] = -y0;
734 x1 = (x1 - t1) + y1;
735 x2 = (x2 - t2) + y2;
736 z0 = x0 * x0;
737 z1 = x1 * x1;
738 z2 = x2 * x2;
739 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
740 t1 = z1 * (qq1 + z1 * qq2);
741 t2 = z2 * (qq1 + z2 * qq2);
742 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
743 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
744 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
745 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
746 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
747 xsb1 = (xsb1 >> 30) & 2;
748 xsb2 = (xsb2 >> 30) & 2;
749 n1 ^= (xsb1 & ~(n1 << 1));
750 n2 ^= (xsb2 & ~(n2 << 1));
751 xsb1 |= 1;
752 xsb2 |= 1;
753 a1 = __vlibm_TBL_sincos_hi[j1+n1];
754 a2 = __vlibm_TBL_sincos_hi[j2+n2];
755 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
756 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
757 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
758 *py0 = t0;
759 *py1 = ( a1 + t1 );
760 *py2 = ( a2 + t2 );
761 break;
762
763 case 2:
764 j0 = (xsb0 + 0x4000) & 0xffff8000;
765 j1 = n1 & 1;
766 j2 = (xsb2 + 0x4000) & 0xffff8000;
767 HI(&t0) = j0;
768 HI(&t2) = j2;
769 LO(&t0) = 0;
770 LO(&t2) = 0;
771 x1_or_one[0] = x1;
772 x1_or_one[2] = -x1;
773 x0 = (x0 - t0) + y0;
774 y1_or_zero[0] = y1;
775 y1_or_zero[2] = -y1;
776 x2 = (x2 - t2) + y2;
777 z0 = x0 * x0;
778 z1 = x1 * x1;
779 z2 = x2 * x2;
780 t0 = z0 * (qq1 + z0 * qq2);
781 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
782 t2 = z2 * (qq1 + z2 * qq2);
783 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
784 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
785 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
786 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
787 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
788 xsb0 = (xsb0 >> 30) & 2;
789 xsb2 = (xsb2 >> 30) & 2;
790 n0 ^= (xsb0 & ~(n0 << 1));
791 n2 ^= (xsb2 & ~(n2 << 1));
792 xsb0 |= 1;
793 xsb2 |= 1;
794 a0 = __vlibm_TBL_sincos_hi[j0+n0];
795 a2 = __vlibm_TBL_sincos_hi[j2+n2];
796 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
797 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
798 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
799 *py0 = ( a0 + t0 );
800 *py1 = t1;
801 *py2 = ( a2 + t2 );
802 break;
803
804 case 3:
805 j0 = n0 & 1;
806 j1 = n1 & 1;
807 j2 = (xsb2 + 0x4000) & 0xffff8000;
808 HI(&t2) = j2;
809 LO(&t2) = 0;
810 x0_or_one[0] = x0;
811 x0_or_one[2] = -x0;
812 x1_or_one[0] = x1;
813 x1_or_one[2] = -x1;
814 y0_or_zero[0] = y0;
815 y0_or_zero[2] = -y0;
816 y1_or_zero[0] = y1;
817 y1_or_zero[2] = -y1;
818 x2 = (x2 - t2) + y2;
819 z0 = x0 * x0;
820 z1 = x1 * x1;
821 z2 = x2 * x2;
822 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
823 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
824 t2 = z2 * (qq1 + z2 * qq2);
825 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
826 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
827 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
828 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
829 xsb2 = (xsb2 >> 30) & 2;
830 n2 ^= (xsb2 & ~(n2 << 1));
831 xsb2 |= 1;
832 a2 = __vlibm_TBL_sincos_hi[j2+n2];
833 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
834 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
835 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
836 *py0 = t0;
837 *py1 = t1;
838 *py2 = ( a2 + t2 );
839 break;
840
841 case 4:
842 j0 = (xsb0 + 0x4000) & 0xffff8000;
843 j1 = (xsb1 + 0x4000) & 0xffff8000;
844 j2 = n2 & 1;
845 HI(&t0) = j0;
846 HI(&t1) = j1;
847 LO(&t0) = 0;
848 LO(&t1) = 0;
849 x2_or_one[0] = x2;
850 x2_or_one[2] = -x2;
851 x0 = (x0 - t0) + y0;
852 x1 = (x1 - t1) + y1;
853 y2_or_zero[0] = y2;
854 y2_or_zero[2] = -y2;
855 z0 = x0 * x0;
856 z1 = x1 * x1;
857 z2 = x2 * x2;
858 t0 = z0 * (qq1 + z0 * qq2);
859 t1 = z1 * (qq1 + z1 * qq2);
860 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
861 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
862 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
863 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
864 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
865 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
866 xsb0 = (xsb0 >> 30) & 2;
867 xsb1 = (xsb1 >> 30) & 2;
868 n0 ^= (xsb0 & ~(n0 << 1));
869 n1 ^= (xsb1 & ~(n1 << 1));
870 xsb0 |= 1;
871 xsb1 |= 1;
872 a0 = __vlibm_TBL_sincos_hi[j0+n0];
873 a1 = __vlibm_TBL_sincos_hi[j1+n1];
874 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
875 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
876 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
877 *py0 = ( a0 + t0 );
878 *py1 = ( a1 + t1 );
879 *py2 = t2;
880 break;
881
882 case 5:
883 j0 = n0 & 1;
884 j1 = (xsb1 + 0x4000) & 0xffff8000;
885 j2 = n2 & 1;
886 HI(&t1) = j1;
887 LO(&t1) = 0;
888 x0_or_one[0] = x0;
889 x0_or_one[2] = -x0;
890 x2_or_one[0] = x2;
891 x2_or_one[2] = -x2;
892 y0_or_zero[0] = y0;
893 y0_or_zero[2] = -y0;
894 x1 = (x1 - t1) + y1;
895 y2_or_zero[0] = y2;
896 y2_or_zero[2] = -y2;
897 z0 = x0 * x0;
898 z1 = x1 * x1;
899 z2 = x2 * x2;
900 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
901 t1 = z1 * (qq1 + z1 * qq2);
902 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
903 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
904 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
905 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
906 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
907 xsb1 = (xsb1 >> 30) & 2;
908 n1 ^= (xsb1 & ~(n1 << 1));
909 xsb1 |= 1;
910 a1 = __vlibm_TBL_sincos_hi[j1+n1];
911 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
912 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
913 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
914 *py0 = t0;
915 *py1 = ( a1 + t1 );
916 *py2 = t2;
917 break;
918
919 case 6:
920 j0 = (xsb0 + 0x4000) & 0xffff8000;
921 j1 = n1 & 1;
922 j2 = n2 & 1;
923 HI(&t0) = j0;
924 LO(&t0) = 0;
925 x1_or_one[0] = x1;
926 x1_or_one[2] = -x1;
927 x2_or_one[0] = x2;
928 x2_or_one[2] = -x2;
929 x0 = (x0 - t0) + y0;
930 y1_or_zero[0] = y1;
931 y1_or_zero[2] = -y1;
932 y2_or_zero[0] = y2;
933 y2_or_zero[2] = -y2;
934 z0 = x0 * x0;
935 z1 = x1 * x1;
936 z2 = x2 * x2;
937 t0 = z0 * (qq1 + z0 * qq2);
938 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
939 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
940 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
941 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
942 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
943 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
944 xsb0 = (xsb0 >> 30) & 2;
945 n0 ^= (xsb0 & ~(n0 << 1));
946 xsb0 |= 1;
947 a0 = __vlibm_TBL_sincos_hi[j0+n0];
948 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
949 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
950 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
951 *py0 = ( a0 + t0 );
952 *py1 = t1;
953 *py2 = t2;
954 break;
955
956 case 7:
957 j0 = n0 & 1;
958 j1 = n1 & 1;
959 j2 = n2 & 1;
960 x0_or_one[0] = x0;
961 x0_or_one[2] = -x0;
962 x1_or_one[0] = x1;
963 x1_or_one[2] = -x1;
964 x2_or_one[0] = x2;
965 x2_or_one[2] = -x2;
966 y0_or_zero[0] = y0;
967 y0_or_zero[2] = -y0;
968 y1_or_zero[0] = y1;
969 y1_or_zero[2] = -y1;
970 y2_or_zero[0] = y2;
971 y2_or_zero[2] = -y2;
972 z0 = x0 * x0;
973 z1 = x1 * x1;
974 z2 = x2 * x2;
975 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
976 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
977 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
978 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
979 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
980 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
981 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
982 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
983 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
984 *py0 = t0;
985 *py1 = t1;
986 *py2 = t2;
987 break;
988 }
989
990 x += stridex;
991 y += stridey;
992 i = 0;
993 } while (--n > 0);
994
995 if (i > 0)
996 {
997 double fn0, fn1, a0, a1, w0, w1, y0, y1;
998 double t0, t1, z0, z1;
999 unsigned j0, j1;
1000 int n0, n1;
1001
1002 if (i > 1)
1003 {
1004 n1 = (int) (x1 * invpio2 + half[xsb1]);
1005 fn1 = (double) n1;
1006 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1007 a1 = x1 - fn1 * pio2_1;
1008 w1 = fn1 * pio2_2;
1009 x1 = a1 - w1;
1010 y1 = (a1 - x1) - w1;
1011 a1 = x1;
1012 w1 = fn1 * pio2_3 - y1;
1013 x1 = a1 - w1;
1014 y1 = (a1 - x1) - w1;
1015 a1 = x1;
1016 w1 = fn1 * pio2_3t - y1;
1017 x1 = a1 - w1;
1018 y1 = (a1 - x1) - w1;
1019 xsb1 = HI(&x1);
1020 if ((xsb1 & ~0x80000000) < thresh[n1&1])
1021 {
1022 j1 = n1 & 1;
1023 x1_or_one[0] = x1;
1024 x1_or_one[2] = -x1;
1025 y1_or_zero[0] = y1;
1026 y1_or_zero[2] = -y1;
1027 z1 = x1 * x1;
1028 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1029 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1030 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1031 *py1 = t1;
1032 }
1033 else
1034 {
1035 j1 = (xsb1 + 0x4000) & 0xffff8000;
1036 HI(&t1) = j1;
1037 LO(&t1) = 0;
1038 x1 = (x1 - t1) + y1;
1039 z1 = x1 * x1;
1040 t1 = z1 * (qq1 + z1 * qq2);
1041 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1042 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1043 xsb1 = (xsb1 >> 30) & 2;
1044 n1 ^= (xsb1 & ~(n1 << 1));
1045 xsb1 |= 1;
1046 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1047 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1048 *py1 = ( a1 + t1 );
1049 }
1050 }
1051 n0 = (int) (x0 * invpio2 + half[xsb0]);
1052 fn0 = (double) n0;
1053 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1054 a0 = x0 - fn0 * pio2_1;
1055 w0 = fn0 * pio2_2;
1056 x0 = a0 - w0;
1057 y0 = (a0 - x0) - w0;
1058 a0 = x0;
1059 w0 = fn0 * pio2_3 - y0;
1060 x0 = a0 - w0;
1061 y0 = (a0 - x0) - w0;
1062 a0 = x0;
1063 w0 = fn0 * pio2_3t - y0;
1064 x0 = a0 - w0;
1065 y0 = (a0 - x0) - w0;
1066 xsb0 = HI(&x0);
1067 if ((xsb0 & ~0x80000000) < thresh[n0&1])
1068 {
1069 j0 = n0 & 1;
1070 x0_or_one[0] = x0;
1071 x0_or_one[2] = -x0;
1072 y0_or_zero[0] = y0;
1073 y0_or_zero[2] = -y0;
1074 z0 = x0 * x0;
1075 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1076 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1077 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1078 *py0 = t0;
1079 }
1080 else
1081 {
1082 j0 = (xsb0 + 0x4000) & 0xffff8000;
1083 HI(&t0) = j0;
1084 LO(&t0) = 0;
1085 x0 = (x0 - t0) + y0;
1086 z0 = x0 * x0;
1087 t0 = z0 * (qq1 + z0 * qq2);
1088 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1089 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1090 xsb0 = (xsb0 >> 30) & 2;
1091 n0 ^= (xsb0 & ~(n0 << 1));
1092 xsb0 |= 1;
1093 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1094 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1095 *py0 = ( a0 + t0 );
1096 }
1097 }
1098
1099 if (biguns)
1100 __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1101 }
1102