1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vcos.1.c
49 *
50 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
51 * in the primary range part.
52 *
53 * Modification to primary range processing. If an argument that does not
54 * fall in the primary range is encountered, then processing is continued
55 * in the medium range.
56 *
57 */
58
59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60
61 static const double
62 half[2] = { 0.5, -0.5 },
63 one = 1.0,
64 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
65 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
66 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69 pp1 = -1.666666666605760465276263943134982554676e-0001,
70 pp2 = 8.333261209690963126718376566146180944442e-0003,
71 qq1 = -4.999999999977710986407023955908711557870e-0001,
72 qq2 = 4.166654863857219350645055881018842089580e-0002,
73 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74 -4.999999999999931701464060878888294524481e-0001 },
75 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
76 4.166666666394861917535640593963708222319e-0002 },
77 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78 -1.388888552656142867832756687736851681462e-0003 },
79 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
80 2.478519423681460796618128289454530524759e-0005 };
81
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs(double);
86 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
87
88 /*
89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90 *
91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93 *
94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95 */
96 void
__vcos(int n,double * restrict x,int stridex,double * restrict y,int stridey)97 __vcos(int n, double * restrict x, int stridex, double * restrict y,
98 int stridey)
99 {
100 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 int i, biguns, nsave, sxsave, sysave;
105 volatile int v __GNU_UNUSED;
106 nsave = n;
107 xsave = x;
108 sxsave = stridex;
109 ysave = y;
110 sysave = stridey;
111 biguns = 0;
112
113 do /* MAIN LOOP */
114 {
115 /* Gotos here so _break_ exits MAIN LOOP. */
116 LOOP0: /* Find first arg in right range. */
117 xsb0 = HI(x); /* get most significant word */
118 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
119 if (hx0 > 0x3fe921fb) {
120 /* Too big: arg reduction needed, so leave for second part */
121 biguns = 1;
122 goto MEDIUM;
123 }
124 if (hx0 < 0x3e400000) {
125 /* Too small. cos x ~ 1. */
126 v = *x;
127 *y = 1.0;
128 x += stridex;
129 y += stridey;
130 i = 0;
131 if (--n <= 0)
132 break;
133 goto LOOP0;
134 }
135 x0 = *x;
136 py0 = y;
137 x += stridex;
138 y += stridey;
139 i = 1;
140 if (--n <= 0)
141 break;
142
143 LOOP1: /* Get second arg, same as above. */
144 xsb1 = HI(x);
145 hx1 = xsb1 & ~0x80000000;
146 if (hx1 > 0x3fe921fb)
147 {
148 biguns = 2;
149 goto MEDIUM;
150 }
151 if (hx1 < 0x3e400000)
152 {
153 v = *x;
154 *y = 1.0;
155 x += stridex;
156 y += stridey;
157 i = 1;
158 if (--n <= 0)
159 break;
160 goto LOOP1;
161 }
162 x1 = *x;
163 py1 = y;
164 x += stridex;
165 y += stridey;
166 i = 2;
167 if (--n <= 0)
168 break;
169
170 LOOP2: /* Get third arg, same as above. */
171 xsb2 = HI(x);
172 hx2 = xsb2 & ~0x80000000;
173 if (hx2 > 0x3fe921fb)
174 {
175 biguns = 3;
176 goto MEDIUM;
177 }
178 if (hx2 < 0x3e400000)
179 {
180 v = *x;
181 *y = 1.0;
182 x += stridex;
183 y += stridey;
184 i = 2;
185 if (--n <= 0)
186 break;
187 goto LOOP2;
188 }
189 x2 = *x;
190 py2 = y;
191
192 /*
193 * 0x3fc40000 = 5/32 ~ 0.15625
194 * Get msb after subtraction. Will be 1 only if
195 * hx0 - 5/32 is negative.
196 */
197 i = (hx0 - 0x3fc40000) >> 31;
198 i |= ((hx1 - 0x3fc40000) >> 30) & 2;
199 i |= ((hx2 - 0x3fc40000) >> 29) & 4;
200 switch (i)
201 {
202 double a0, a1, a2, w0, w1, w2;
203 double t0, t1, t2, z0, z1, z2;
204 unsigned j0, j1, j2;
205
206 case 0: /* All are > 5/32 */
207 j0 = (xsb0 + 0x4000) & 0xffff8000;
208 j1 = (xsb1 + 0x4000) & 0xffff8000;
209 j2 = (xsb2 + 0x4000) & 0xffff8000;
210 HI(&t0) = j0;
211 HI(&t1) = j1;
212 HI(&t2) = j2;
213 LO(&t0) = 0;
214 LO(&t1) = 0;
215 LO(&t2) = 0;
216 x0 -= t0;
217 x1 -= t1;
218 x2 -= t2;
219 z0 = x0 * x0;
220 z1 = x1 * x1;
221 z2 = x2 * x2;
222 t0 = z0 * (qq1 + z0 * qq2);
223 t1 = z1 * (qq1 + z1 * qq2);
224 t2 = z2 * (qq1 + z2 * qq2);
225 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
226 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
227 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
228 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
229 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
230 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
231 xsb0 = (xsb0 >> 30) & 2;
232 xsb1 = (xsb1 >> 30) & 2;
233 xsb2 = (xsb2 >> 30) & 2;
234 a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
235 a1 = __vlibm_TBL_sincos_hi[j1+1];
236 a2 = __vlibm_TBL_sincos_hi[j2+1];
237 /* cos_lo(t) sin_hi(t) */
238 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
239 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
240 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
241
242 *py0 = a0 + t0;
243 *py1 = a1 + t1;
244 *py2 = a2 + t2;
245 break;
246
247 case 1:
248 j1 = (xsb1 + 0x4000) & 0xffff8000;
249 j2 = (xsb2 + 0x4000) & 0xffff8000;
250 HI(&t1) = j1;
251 HI(&t2) = j2;
252 LO(&t1) = 0;
253 LO(&t2) = 0;
254 x1 -= t1;
255 x2 -= t2;
256 z0 = x0 * x0;
257 z1 = x1 * x1;
258 z2 = x2 * x2;
259 t0 = z0 * (poly3[1] + z0 * poly4[1]);
260 t1 = z1 * (qq1 + z1 * qq2);
261 t2 = z2 * (qq1 + z2 * qq2);
262 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
263 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
264 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
265 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
266 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 xsb1 = (xsb1 >> 30) & 2;
268 xsb2 = (xsb2 >> 30) & 2;
269 a1 = __vlibm_TBL_sincos_hi[j1+1];
270 a2 = __vlibm_TBL_sincos_hi[j2+1];
271 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
272 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
273 *py0 = one + t0;
274 *py1 = a1 + t1;
275 *py2 = a2 + t2;
276 break;
277
278 case 2:
279 j0 = (xsb0 + 0x4000) & 0xffff8000;
280 j2 = (xsb2 + 0x4000) & 0xffff8000;
281 HI(&t0) = j0;
282 HI(&t2) = j2;
283 LO(&t0) = 0;
284 LO(&t2) = 0;
285 x0 -= t0;
286 x2 -= t2;
287 z0 = x0 * x0;
288 z1 = x1 * x1;
289 z2 = x2 * x2;
290 t0 = z0 * (qq1 + z0 * qq2);
291 t1 = z1 * (poly3[1] + z1 * poly4[1]);
292 t2 = z2 * (qq1 + z2 * qq2);
293 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
294 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
295 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
296 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
297 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 xsb0 = (xsb0 >> 30) & 2;
299 xsb2 = (xsb2 >> 30) & 2;
300 a0 = __vlibm_TBL_sincos_hi[j0+1];
301 a2 = __vlibm_TBL_sincos_hi[j2+1];
302 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
303 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
304 *py0 = a0 + t0;
305 *py1 = one + t1;
306 *py2 = a2 + t2;
307 break;
308
309 case 3:
310 j2 = (xsb2 + 0x4000) & 0xffff8000;
311 HI(&t2) = j2;
312 LO(&t2) = 0;
313 x2 -= t2;
314 z0 = x0 * x0;
315 z1 = x1 * x1;
316 z2 = x2 * x2;
317 t0 = z0 * (poly3[1] + z0 * poly4[1]);
318 t1 = z1 * (poly3[1] + z1 * poly4[1]);
319 t2 = z2 * (qq1 + z2 * qq2);
320 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
321 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
322 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
323 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
324 xsb2 = (xsb2 >> 30) & 2;
325 a2 = __vlibm_TBL_sincos_hi[j2+1];
326 t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
327 *py0 = one + t0;
328 *py1 = one + t1;
329 *py2 = a2 + t2;
330 break;
331
332 case 4:
333 j0 = (xsb0 + 0x4000) & 0xffff8000;
334 j1 = (xsb1 + 0x4000) & 0xffff8000;
335 HI(&t0) = j0;
336 HI(&t1) = j1;
337 LO(&t0) = 0;
338 LO(&t1) = 0;
339 x0 -= t0;
340 x1 -= t1;
341 z0 = x0 * x0;
342 z1 = x1 * x1;
343 z2 = x2 * x2;
344 t0 = z0 * (qq1 + z0 * qq2);
345 t1 = z1 * (qq1 + z1 * qq2);
346 t2 = z2 * (poly3[1] + z2 * poly4[1]);
347 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
348 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
349 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
350 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
351 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
352 xsb0 = (xsb0 >> 30) & 2;
353 xsb1 = (xsb1 >> 30) & 2;
354 a0 = __vlibm_TBL_sincos_hi[j0+1];
355 a1 = __vlibm_TBL_sincos_hi[j1+1];
356 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
357 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
358 *py0 = a0 + t0;
359 *py1 = a1 + t1;
360 *py2 = one + t2;
361 break;
362
363 case 5:
364 j1 = (xsb1 + 0x4000) & 0xffff8000;
365 HI(&t1) = j1;
366 LO(&t1) = 0;
367 x1 -= t1;
368 z0 = x0 * x0;
369 z1 = x1 * x1;
370 z2 = x2 * x2;
371 t0 = z0 * (poly3[1] + z0 * poly4[1]);
372 t1 = z1 * (qq1 + z1 * qq2);
373 t2 = z2 * (poly3[1] + z2 * poly4[1]);
374 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
375 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
376 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
377 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
378 xsb1 = (xsb1 >> 30) & 2;
379 a1 = __vlibm_TBL_sincos_hi[j1+1];
380 t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
381 *py0 = one + t0;
382 *py1 = a1 + t1;
383 *py2 = one + t2;
384 break;
385
386 case 6:
387 j0 = (xsb0 + 0x4000) & 0xffff8000;
388 HI(&t0) = j0;
389 LO(&t0) = 0;
390 x0 -= t0;
391 z0 = x0 * x0;
392 z1 = x1 * x1;
393 z2 = x2 * x2;
394 t0 = z0 * (qq1 + z0 * qq2);
395 t1 = z1 * (poly3[1] + z1 * poly4[1]);
396 t2 = z2 * (poly3[1] + z2 * poly4[1]);
397 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
398 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
399 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
400 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
401 xsb0 = (xsb0 >> 30) & 2;
402 a0 = __vlibm_TBL_sincos_hi[j0+1];
403 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
404 *py0 = a0 + t0;
405 *py1 = one + t1;
406 *py2 = one + t2;
407 break;
408
409 case 7: /* All are < 5/32 */
410 z0 = x0 * x0;
411 z1 = x1 * x1;
412 z2 = x2 * x2;
413 t0 = z0 * (poly3[1] + z0 * poly4[1]);
414 t1 = z1 * (poly3[1] + z1 * poly4[1]);
415 t2 = z2 * (poly3[1] + z2 * poly4[1]);
416 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
417 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
418 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
419 *py0 = one + t0;
420 *py1 = one + t1;
421 *py2 = one + t2;
422 break;
423 }
424
425 x += stridex;
426 y += stridey;
427 i = 0;
428 } while (--n > 0); /* END MAIN LOOP */
429
430 /*
431 * CLEAN UP last 0, 1, or 2 elts.
432 */
433 if (i > 0) /* Clean up elts at tail. i < 3. */
434 {
435 double a0, a1, w0, w1;
436 double t0, t1, z0, z1;
437 unsigned j0, j1;
438
439 if (i > 1)
440 {
441 if (hx1 < 0x3fc40000)
442 {
443 z1 = x1 * x1;
444 t1 = z1 * (poly3[1] + z1 * poly4[1]);
445 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
446 t1 = one + t1;
447 *py1 = t1;
448 }
449 else
450 {
451 j1 = (xsb1 + 0x4000) & 0xffff8000;
452 HI(&t1) = j1;
453 LO(&t1) = 0;
454 x1 -= t1;
455 z1 = x1 * x1;
456 t1 = z1 * (qq1 + z1 * qq2);
457 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
458 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459 xsb1 = (xsb1 >> 30) & 2;
460 a1 = __vlibm_TBL_sincos_hi[j1+1];
461 t1 = __vlibm_TBL_sincos_lo[j1+1]
462 - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
463 *py1 = a1 + t1;
464 }
465 }
466 if (hx0 < 0x3fc40000)
467 {
468 z0 = x0 * x0;
469 t0 = z0 * (poly3[1] + z0 * poly4[1]);
470 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
471 t0 = one + t0;
472 *py0 = t0;
473 }
474 else
475 {
476 j0 = (xsb0 + 0x4000) & 0xffff8000;
477 HI(&t0) = j0;
478 LO(&t0) = 0;
479 x0 -= t0;
480 z0 = x0 * x0;
481 t0 = z0 * (qq1 + z0 * qq2);
482 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
483 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
484 xsb0 = (xsb0 >> 30) & 2;
485 a0 = __vlibm_TBL_sincos_hi[j0+1];
486 t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
487 *py0 = a0 + t0;
488 }
489 } /* END CLEAN UP */
490
491 return;
492
493 /*
494 * Take care of BIGUNS.
495 *
496 * We have jumped here in the middle of processing after having
497 * encountered a medium range argument. Therefore things are in a
498 * bit of a tizzy.
499 */
500
501 MEDIUM:
502
503 x0_or_one[1] = 1.0;
504 x1_or_one[1] = 1.0;
505 x2_or_one[1] = 1.0;
506 x0_or_one[3] = -1.0;
507 x1_or_one[3] = -1.0;
508 x2_or_one[3] = -1.0;
509 y0_or_zero[1] = 0.0;
510 y1_or_zero[1] = 0.0;
511 y2_or_zero[1] = 0.0;
512 y0_or_zero[3] = 0.0;
513 y1_or_zero[3] = 0.0;
514 y2_or_zero[3] = 0.0;
515
516 if (biguns == 3)
517 {
518 biguns = 0;
519 xsb0 = xsb0 >> 31;
520 xsb1 = xsb1 >> 31;
521 goto loop2;
522 }
523 else if (biguns == 2)
524 {
525 xsb0 = xsb0 >> 31;
526 biguns = 0;
527 goto loop1;
528 }
529 biguns = 0;
530
531 do
532 {
533 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
534 unsigned hx;
535 int n0, n1, n2;
536
537 /*
538 * Find 3 more to work on: Not already done, not too big.
539 */
540
541 loop0:
542 hx = HI(x);
543 xsb0 = hx >> 31;
544 hx &= ~0x80000000;
545 if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
546 {
547 if (hx >= 0x7ff00000) /* Inf or NaN */
548 {
549 x0 = *x;
550 *y = x0 - x0;
551 }
552 else
553 biguns = 1;
554 x += stridex;
555 y += stridey;
556 i = 0;
557 if (--n <= 0)
558 break;
559 goto loop0;
560 }
561 x0 = *x;
562 py0 = y;
563 x += stridex;
564 y += stridey;
565 i = 1;
566 if (--n <= 0)
567 break;
568
569 loop1:
570 hx = HI(x);
571 xsb1 = hx >> 31;
572 hx &= ~0x80000000;
573 if (hx > 0x413921fb)
574 {
575 if (hx >= 0x7ff00000)
576 {
577 x1 = *x;
578 *y = x1 - x1;
579 }
580 else
581 biguns = 1;
582 x += stridex;
583 y += stridey;
584 i = 1;
585 if (--n <= 0)
586 break;
587 goto loop1;
588 }
589 x1 = *x;
590 py1 = y;
591 x += stridex;
592 y += stridey;
593 i = 2;
594 if (--n <= 0)
595 break;
596
597 loop2:
598 hx = HI(x);
599 xsb2 = hx >> 31;
600 hx &= ~0x80000000;
601 if (hx > 0x413921fb)
602 {
603 if (hx >= 0x7ff00000)
604 {
605 x2 = *x;
606 *y = x2 - x2;
607 }
608 else
609 biguns = 1;
610 x += stridex;
611 y += stridey;
612 i = 2;
613 if (--n <= 0)
614 break;
615 goto loop2;
616 }
617 x2 = *x;
618 py2 = y;
619
620 n0 = (int) (x0 * invpio2 + half[xsb0]);
621 n1 = (int) (x1 * invpio2 + half[xsb1]);
622 n2 = (int) (x2 * invpio2 + half[xsb2]);
623 fn0 = (double) n0;
624 fn1 = (double) n1;
625 fn2 = (double) n2;
626 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
627 n1 = (n1 + 1) & 3;
628 n2 = (n2 + 1) & 3;
629 a0 = x0 - fn0 * pio2_1;
630 a1 = x1 - fn1 * pio2_1;
631 a2 = x2 - fn2 * pio2_1;
632 w0 = fn0 * pio2_2;
633 w1 = fn1 * pio2_2;
634 w2 = fn2 * pio2_2;
635 x0 = a0 - w0;
636 x1 = a1 - w1;
637 x2 = a2 - w2;
638 y0 = (a0 - x0) - w0;
639 y1 = (a1 - x1) - w1;
640 y2 = (a2 - x2) - w2;
641 a0 = x0;
642 a1 = x1;
643 a2 = x2;
644 w0 = fn0 * pio2_3 - y0;
645 w1 = fn1 * pio2_3 - y1;
646 w2 = fn2 * pio2_3 - y2;
647 x0 = a0 - w0;
648 x1 = a1 - w1;
649 x2 = a2 - w2;
650 y0 = (a0 - x0) - w0;
651 y1 = (a1 - x1) - w1;
652 y2 = (a2 - x2) - w2;
653 a0 = x0;
654 a1 = x1;
655 a2 = x2;
656 w0 = fn0 * pio2_3t - y0;
657 w1 = fn1 * pio2_3t - y1;
658 w2 = fn2 * pio2_3t - y2;
659 x0 = a0 - w0;
660 x1 = a1 - w1;
661 x2 = a2 - w2;
662 y0 = (a0 - x0) - w0;
663 y1 = (a1 - x1) - w1;
664 y2 = (a2 - x2) - w2;
665 xsb0 = HI(&x0);
666 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
667 xsb1 = HI(&x1);
668 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
669 xsb2 = HI(&x2);
670 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
671 switch (i)
672 {
673 double t0, t1, t2, z0, z1, z2;
674 unsigned j0, j1, j2;
675
676 case 0:
677 j0 = (xsb0 + 0x4000) & 0xffff8000;
678 j1 = (xsb1 + 0x4000) & 0xffff8000;
679 j2 = (xsb2 + 0x4000) & 0xffff8000;
680 HI(&t0) = j0;
681 HI(&t1) = j1;
682 HI(&t2) = j2;
683 LO(&t0) = 0;
684 LO(&t1) = 0;
685 LO(&t2) = 0;
686 x0 = (x0 - t0) + y0;
687 x1 = (x1 - t1) + y1;
688 x2 = (x2 - t2) + y2;
689 z0 = x0 * x0;
690 z1 = x1 * x1;
691 z2 = x2 * x2;
692 t0 = z0 * (qq1 + z0 * qq2);
693 t1 = z1 * (qq1 + z1 * qq2);
694 t2 = z2 * (qq1 + z2 * qq2);
695 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
696 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
697 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
698 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
699 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
700 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
701 xsb0 = (xsb0 >> 30) & 2;
702 xsb1 = (xsb1 >> 30) & 2;
703 xsb2 = (xsb2 >> 30) & 2;
704 n0 ^= (xsb0 & ~(n0 << 1));
705 n1 ^= (xsb1 & ~(n1 << 1));
706 n2 ^= (xsb2 & ~(n2 << 1));
707 xsb0 |= 1;
708 xsb1 |= 1;
709 xsb2 |= 1;
710 a0 = __vlibm_TBL_sincos_hi[j0+n0];
711 a1 = __vlibm_TBL_sincos_hi[j1+n1];
712 a2 = __vlibm_TBL_sincos_hi[j2+n2];
713 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
714 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
715 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
716 *py0 = ( a0 + t0 );
717 *py1 = ( a1 + t1 );
718 *py2 = ( a2 + t2 );
719 break;
720
721 case 1:
722 j0 = n0 & 1;
723 j1 = (xsb1 + 0x4000) & 0xffff8000;
724 j2 = (xsb2 + 0x4000) & 0xffff8000;
725 HI(&t1) = j1;
726 HI(&t2) = j2;
727 LO(&t1) = 0;
728 LO(&t2) = 0;
729 x0_or_one[0] = x0;
730 x0_or_one[2] = -x0;
731 y0_or_zero[0] = y0;
732 y0_or_zero[2] = -y0;
733 x1 = (x1 - t1) + y1;
734 x2 = (x2 - t2) + y2;
735 z0 = x0 * x0;
736 z1 = x1 * x1;
737 z2 = x2 * x2;
738 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
739 t1 = z1 * (qq1 + z1 * qq2);
740 t2 = z2 * (qq1 + z2 * qq2);
741 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
742 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
743 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
744 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
745 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
746 xsb1 = (xsb1 >> 30) & 2;
747 xsb2 = (xsb2 >> 30) & 2;
748 n1 ^= (xsb1 & ~(n1 << 1));
749 n2 ^= (xsb2 & ~(n2 << 1));
750 xsb1 |= 1;
751 xsb2 |= 1;
752 a1 = __vlibm_TBL_sincos_hi[j1+n1];
753 a2 = __vlibm_TBL_sincos_hi[j2+n2];
754 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
755 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
756 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
757 *py0 = t0;
758 *py1 = ( a1 + t1 );
759 *py2 = ( a2 + t2 );
760 break;
761
762 case 2:
763 j0 = (xsb0 + 0x4000) & 0xffff8000;
764 j1 = n1 & 1;
765 j2 = (xsb2 + 0x4000) & 0xffff8000;
766 HI(&t0) = j0;
767 HI(&t2) = j2;
768 LO(&t0) = 0;
769 LO(&t2) = 0;
770 x1_or_one[0] = x1;
771 x1_or_one[2] = -x1;
772 x0 = (x0 - t0) + y0;
773 y1_or_zero[0] = y1;
774 y1_or_zero[2] = -y1;
775 x2 = (x2 - t2) + y2;
776 z0 = x0 * x0;
777 z1 = x1 * x1;
778 z2 = x2 * x2;
779 t0 = z0 * (qq1 + z0 * qq2);
780 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
781 t2 = z2 * (qq1 + z2 * qq2);
782 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
783 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
784 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
785 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
786 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
787 xsb0 = (xsb0 >> 30) & 2;
788 xsb2 = (xsb2 >> 30) & 2;
789 n0 ^= (xsb0 & ~(n0 << 1));
790 n2 ^= (xsb2 & ~(n2 << 1));
791 xsb0 |= 1;
792 xsb2 |= 1;
793 a0 = __vlibm_TBL_sincos_hi[j0+n0];
794 a2 = __vlibm_TBL_sincos_hi[j2+n2];
795 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
796 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
797 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
798 *py0 = ( a0 + t0 );
799 *py1 = t1;
800 *py2 = ( a2 + t2 );
801 break;
802
803 case 3:
804 j0 = n0 & 1;
805 j1 = n1 & 1;
806 j2 = (xsb2 + 0x4000) & 0xffff8000;
807 HI(&t2) = j2;
808 LO(&t2) = 0;
809 x0_or_one[0] = x0;
810 x0_or_one[2] = -x0;
811 x1_or_one[0] = x1;
812 x1_or_one[2] = -x1;
813 y0_or_zero[0] = y0;
814 y0_or_zero[2] = -y0;
815 y1_or_zero[0] = y1;
816 y1_or_zero[2] = -y1;
817 x2 = (x2 - t2) + y2;
818 z0 = x0 * x0;
819 z1 = x1 * x1;
820 z2 = x2 * x2;
821 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
822 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
823 t2 = z2 * (qq1 + z2 * qq2);
824 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
825 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
826 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
827 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
828 xsb2 = (xsb2 >> 30) & 2;
829 n2 ^= (xsb2 & ~(n2 << 1));
830 xsb2 |= 1;
831 a2 = __vlibm_TBL_sincos_hi[j2+n2];
832 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
833 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
834 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
835 *py0 = t0;
836 *py1 = t1;
837 *py2 = ( a2 + t2 );
838 break;
839
840 case 4:
841 j0 = (xsb0 + 0x4000) & 0xffff8000;
842 j1 = (xsb1 + 0x4000) & 0xffff8000;
843 j2 = n2 & 1;
844 HI(&t0) = j0;
845 HI(&t1) = j1;
846 LO(&t0) = 0;
847 LO(&t1) = 0;
848 x2_or_one[0] = x2;
849 x2_or_one[2] = -x2;
850 x0 = (x0 - t0) + y0;
851 x1 = (x1 - t1) + y1;
852 y2_or_zero[0] = y2;
853 y2_or_zero[2] = -y2;
854 z0 = x0 * x0;
855 z1 = x1 * x1;
856 z2 = x2 * x2;
857 t0 = z0 * (qq1 + z0 * qq2);
858 t1 = z1 * (qq1 + z1 * qq2);
859 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
860 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
861 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
862 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
863 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
864 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
865 xsb0 = (xsb0 >> 30) & 2;
866 xsb1 = (xsb1 >> 30) & 2;
867 n0 ^= (xsb0 & ~(n0 << 1));
868 n1 ^= (xsb1 & ~(n1 << 1));
869 xsb0 |= 1;
870 xsb1 |= 1;
871 a0 = __vlibm_TBL_sincos_hi[j0+n0];
872 a1 = __vlibm_TBL_sincos_hi[j1+n1];
873 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
874 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
875 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
876 *py0 = ( a0 + t0 );
877 *py1 = ( a1 + t1 );
878 *py2 = t2;
879 break;
880
881 case 5:
882 j0 = n0 & 1;
883 j1 = (xsb1 + 0x4000) & 0xffff8000;
884 j2 = n2 & 1;
885 HI(&t1) = j1;
886 LO(&t1) = 0;
887 x0_or_one[0] = x0;
888 x0_or_one[2] = -x0;
889 x2_or_one[0] = x2;
890 x2_or_one[2] = -x2;
891 y0_or_zero[0] = y0;
892 y0_or_zero[2] = -y0;
893 x1 = (x1 - t1) + y1;
894 y2_or_zero[0] = y2;
895 y2_or_zero[2] = -y2;
896 z0 = x0 * x0;
897 z1 = x1 * x1;
898 z2 = x2 * x2;
899 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
900 t1 = z1 * (qq1 + z1 * qq2);
901 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
902 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
903 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
904 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
905 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
906 xsb1 = (xsb1 >> 30) & 2;
907 n1 ^= (xsb1 & ~(n1 << 1));
908 xsb1 |= 1;
909 a1 = __vlibm_TBL_sincos_hi[j1+n1];
910 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
911 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
912 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
913 *py0 = t0;
914 *py1 = ( a1 + t1 );
915 *py2 = t2;
916 break;
917
918 case 6:
919 j0 = (xsb0 + 0x4000) & 0xffff8000;
920 j1 = n1 & 1;
921 j2 = n2 & 1;
922 HI(&t0) = j0;
923 LO(&t0) = 0;
924 x1_or_one[0] = x1;
925 x1_or_one[2] = -x1;
926 x2_or_one[0] = x2;
927 x2_or_one[2] = -x2;
928 x0 = (x0 - t0) + y0;
929 y1_or_zero[0] = y1;
930 y1_or_zero[2] = -y1;
931 y2_or_zero[0] = y2;
932 y2_or_zero[2] = -y2;
933 z0 = x0 * x0;
934 z1 = x1 * x1;
935 z2 = x2 * x2;
936 t0 = z0 * (qq1 + z0 * qq2);
937 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
938 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
939 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
940 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
941 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
942 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
943 xsb0 = (xsb0 >> 30) & 2;
944 n0 ^= (xsb0 & ~(n0 << 1));
945 xsb0 |= 1;
946 a0 = __vlibm_TBL_sincos_hi[j0+n0];
947 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
948 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
949 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
950 *py0 = ( a0 + t0 );
951 *py1 = t1;
952 *py2 = t2;
953 break;
954
955 case 7:
956 j0 = n0 & 1;
957 j1 = n1 & 1;
958 j2 = n2 & 1;
959 x0_or_one[0] = x0;
960 x0_or_one[2] = -x0;
961 x1_or_one[0] = x1;
962 x1_or_one[2] = -x1;
963 x2_or_one[0] = x2;
964 x2_or_one[2] = -x2;
965 y0_or_zero[0] = y0;
966 y0_or_zero[2] = -y0;
967 y1_or_zero[0] = y1;
968 y1_or_zero[2] = -y1;
969 y2_or_zero[0] = y2;
970 y2_or_zero[2] = -y2;
971 z0 = x0 * x0;
972 z1 = x1 * x1;
973 z2 = x2 * x2;
974 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
975 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
976 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
977 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
978 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
979 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
980 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
981 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
982 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
983 *py0 = t0;
984 *py1 = t1;
985 *py2 = t2;
986 break;
987 }
988
989 x += stridex;
990 y += stridey;
991 i = 0;
992 } while (--n > 0);
993
994 if (i > 0)
995 {
996 double fn0, fn1, a0, a1, w0, w1, y0, y1;
997 double t0, t1, z0, z1;
998 unsigned j0, j1;
999 int n0, n1;
1000
1001 if (i > 1)
1002 {
1003 n1 = (int) (x1 * invpio2 + half[xsb1]);
1004 fn1 = (double) n1;
1005 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1006 a1 = x1 - fn1 * pio2_1;
1007 w1 = fn1 * pio2_2;
1008 x1 = a1 - w1;
1009 y1 = (a1 - x1) - w1;
1010 a1 = x1;
1011 w1 = fn1 * pio2_3 - y1;
1012 x1 = a1 - w1;
1013 y1 = (a1 - x1) - w1;
1014 a1 = x1;
1015 w1 = fn1 * pio2_3t - y1;
1016 x1 = a1 - w1;
1017 y1 = (a1 - x1) - w1;
1018 xsb1 = HI(&x1);
1019 if ((xsb1 & ~0x80000000) < thresh[n1&1])
1020 {
1021 j1 = n1 & 1;
1022 x1_or_one[0] = x1;
1023 x1_or_one[2] = -x1;
1024 y1_or_zero[0] = y1;
1025 y1_or_zero[2] = -y1;
1026 z1 = x1 * x1;
1027 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1028 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1029 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1030 *py1 = t1;
1031 }
1032 else
1033 {
1034 j1 = (xsb1 + 0x4000) & 0xffff8000;
1035 HI(&t1) = j1;
1036 LO(&t1) = 0;
1037 x1 = (x1 - t1) + y1;
1038 z1 = x1 * x1;
1039 t1 = z1 * (qq1 + z1 * qq2);
1040 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1041 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1042 xsb1 = (xsb1 >> 30) & 2;
1043 n1 ^= (xsb1 & ~(n1 << 1));
1044 xsb1 |= 1;
1045 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1046 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1047 *py1 = ( a1 + t1 );
1048 }
1049 }
1050 n0 = (int) (x0 * invpio2 + half[xsb0]);
1051 fn0 = (double) n0;
1052 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1053 a0 = x0 - fn0 * pio2_1;
1054 w0 = fn0 * pio2_2;
1055 x0 = a0 - w0;
1056 y0 = (a0 - x0) - w0;
1057 a0 = x0;
1058 w0 = fn0 * pio2_3 - y0;
1059 x0 = a0 - w0;
1060 y0 = (a0 - x0) - w0;
1061 a0 = x0;
1062 w0 = fn0 * pio2_3t - y0;
1063 x0 = a0 - w0;
1064 y0 = (a0 - x0) - w0;
1065 xsb0 = HI(&x0);
1066 if ((xsb0 & ~0x80000000) < thresh[n0&1])
1067 {
1068 j0 = n0 & 1;
1069 x0_or_one[0] = x0;
1070 x0_or_one[2] = -x0;
1071 y0_or_zero[0] = y0;
1072 y0_or_zero[2] = -y0;
1073 z0 = x0 * x0;
1074 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1075 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1076 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1077 *py0 = t0;
1078 }
1079 else
1080 {
1081 j0 = (xsb0 + 0x4000) & 0xffff8000;
1082 HI(&t0) = j0;
1083 LO(&t0) = 0;
1084 x0 = (x0 - t0) + y0;
1085 z0 = x0 * x0;
1086 t0 = z0 * (qq1 + z0 * qq2);
1087 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1088 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1089 xsb0 = (xsb0 >> 30) & 2;
1090 n0 ^= (xsb0 & ~(n0 << 1));
1091 xsb0 |= 1;
1092 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1093 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1094 *py0 = ( a0 + t0 );
1095 }
1096 }
1097
1098 if (biguns)
1099 __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1100 }
1101