/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #ifdef _LITTLE_ENDIAN #define HI(x) *(1+(int*)x) #define LO(x) *(unsigned*)x #else #define HI(x) *(int*)x #define LO(x) *(1+(unsigned*)x) #endif #ifdef __RESTRICT #define restrict _Restrict #else #define restrict #endif /* * vcos.1.c * * Vector cosine function. Just slight modifications to vsin.8.c, mainly * in the primary range part. * * Modification to primary range processing. If an argument that does not * fall in the primary range is encountered, then processing is continued * in the medium range. * */ extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; static const double half[2] = { 0.5, -0.5 }, one = 1.0, invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ pp1 = -1.666666666605760465276263943134982554676e-0001, pp2 = 8.333261209690963126718376566146180944442e-0003, qq1 = -4.999999999977710986407023955908711557870e-0001, qq2 = 4.166654863857219350645055881018842089580e-0002, poly1[2]= { -1.666666666666629669805215138920301589656e-0001, -4.999999999999931701464060878888294524481e-0001 }, poly2[2]= { 8.333333332390951295683993455280336376663e-0003, 4.166666666394861917535640593963708222319e-0002 }, poly3[2]= { -1.984126237997976692791551778230098403960e-0004, -1.388888552656142867832756687736851681462e-0003 }, poly4[2]= { 2.753403624854277237649987622848330351110e-0006, 2.478519423681460796618128289454530524759e-0005 }; static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; /* Don't __ the following; acomp will handle it */ extern double fabs(double); extern void __vlibm_vcos_big(int, double *, int, double *, int, int); /* * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n. * * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06. * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. * * elts < 2^-27 use the approximation 1.0 ~ cos(x). */ void __vcos(int n, double * restrict x, int stridex, double * restrict y, int stridey) { double x0_or_one[4], x1_or_one[4], x2_or_one[4]; double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave; unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2; int i, biguns, nsave, sxsave, sysave; volatile int v __unused; nsave = n; xsave = x; sxsave = stridex; ysave = y; sysave = stridey; biguns = 0; do /* MAIN LOOP */ { /* Gotos here so _break_ exits MAIN LOOP. */ LOOP0: /* Find first arg in right range. */ xsb0 = HI(x); /* get most significant word */ hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ if (hx0 > 0x3fe921fb) { /* Too big: arg reduction needed, so leave for second part */ biguns = 1; goto MEDIUM; } if (hx0 < 0x3e400000) { /* Too small. cos x ~ 1. */ v = *x; *y = 1.0; x += stridex; y += stridey; i = 0; if (--n <= 0) break; goto LOOP0; } x0 = *x; py0 = y; x += stridex; y += stridey; i = 1; if (--n <= 0) break; LOOP1: /* Get second arg, same as above. */ xsb1 = HI(x); hx1 = xsb1 & ~0x80000000; if (hx1 > 0x3fe921fb) { biguns = 2; goto MEDIUM; } if (hx1 < 0x3e400000) { v = *x; *y = 1.0; x += stridex; y += stridey; i = 1; if (--n <= 0) break; goto LOOP1; } x1 = *x; py1 = y; x += stridex; y += stridey; i = 2; if (--n <= 0) break; LOOP2: /* Get third arg, same as above. */ xsb2 = HI(x); hx2 = xsb2 & ~0x80000000; if (hx2 > 0x3fe921fb) { biguns = 3; goto MEDIUM; } if (hx2 < 0x3e400000) { v = *x; *y = 1.0; x += stridex; y += stridey; i = 2; if (--n <= 0) break; goto LOOP2; } x2 = *x; py2 = y; /* * 0x3fc40000 = 5/32 ~ 0.15625 * Get msb after subtraction. Will be 1 only if * hx0 - 5/32 is negative. */ i = (hx0 - 0x3fc40000) >> 31; i |= ((hx1 - 0x3fc40000) >> 30) & 2; i |= ((hx2 - 0x3fc40000) >> 29) & 4; switch (i) { double a0, a1, a2, w0, w1, w2; double t0, t1, t2, z0, z1, z2; unsigned j0, j1, j2; case 0: /* All are > 5/32 */ j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t0) = j0; HI(&t1) = j1; HI(&t2) = j2; LO(&t0) = 0; LO(&t1) = 0; LO(&t2) = 0; x0 -= t0; x1 -= t1; x2 -= t2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (qq1 + z2 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb1 = (xsb1 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ a1 = __vlibm_TBL_sincos_hi[j1+1]; a2 = __vlibm_TBL_sincos_hi[j2+1]; /* cos_lo(t) sin_hi(t) */ t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); *py0 = a0 + t0; *py1 = a1 + t1; *py2 = a2 + t2; break; case 1: j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t1) = j1; HI(&t2) = j2; LO(&t1) = 0; LO(&t2) = 0; x1 -= t1; x2 -= t2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[1] + z0 * poly4[1]); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (qq1 + z2 * qq2); t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; a1 = __vlibm_TBL_sincos_hi[j1+1]; a2 = __vlibm_TBL_sincos_hi[j2+1]; t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); *py0 = one + t0; *py1 = a1 + t1; *py2 = a2 + t2; break; case 2: j0 = (xsb0 + 0x4000) & 0xffff8000; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t0) = j0; HI(&t2) = j2; LO(&t0) = 0; LO(&t2) = 0; x0 -= t0; x2 -= t2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (poly3[1] + z1 * poly4[1]); t2 = z2 * (qq1 + z2 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; a0 = __vlibm_TBL_sincos_hi[j0+1]; a2 = __vlibm_TBL_sincos_hi[j2+1]; t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); *py0 = a0 + t0; *py1 = one + t1; *py2 = a2 + t2; break; case 3: j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t2) = j2; LO(&t2) = 0; x2 -= t2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[1] + z0 * poly4[1]); t1 = z1 * (poly3[1] + z1 * poly4[1]); t2 = z2 * (qq1 + z2 * qq2); t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb2 = (xsb2 >> 30) & 2; a2 = __vlibm_TBL_sincos_hi[j2+1]; t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); *py0 = one + t0; *py1 = one + t1; *py2 = a2 + t2; break; case 4: j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = (xsb1 + 0x4000) & 0xffff8000; HI(&t0) = j0; HI(&t1) = j1; LO(&t0) = 0; LO(&t1) = 0; x0 -= t0; x1 -= t1; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (poly3[1] + z2 * poly4[1]); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb1 = (xsb1 >> 30) & 2; a0 = __vlibm_TBL_sincos_hi[j0+1]; a1 = __vlibm_TBL_sincos_hi[j1+1]; t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); *py0 = a0 + t0; *py1 = a1 + t1; *py2 = one + t2; break; case 5: j1 = (xsb1 + 0x4000) & 0xffff8000; HI(&t1) = j1; LO(&t1) = 0; x1 -= t1; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[1] + z0 * poly4[1]); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (poly3[1] + z2 * poly4[1]); t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; a1 = __vlibm_TBL_sincos_hi[j1+1]; t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); *py0 = one + t0; *py1 = a1 + t1; *py2 = one + t2; break; case 6: j0 = (xsb0 + 0x4000) & 0xffff8000; HI(&t0) = j0; LO(&t0) = 0; x0 -= t0; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (poly3[1] + z1 * poly4[1]); t2 = z2 * (poly3[1] + z2 * poly4[1]); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; a0 = __vlibm_TBL_sincos_hi[j0+1]; t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); *py0 = a0 + t0; *py1 = one + t1; *py2 = one + t2; break; case 7: /* All are < 5/32 */ z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[1] + z0 * poly4[1]); t1 = z1 * (poly3[1] + z1 * poly4[1]); t2 = z2 * (poly3[1] + z2 * poly4[1]); t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); *py0 = one + t0; *py1 = one + t1; *py2 = one + t2; break; } x += stridex; y += stridey; i = 0; } while (--n > 0); /* END MAIN LOOP */ /* * CLEAN UP last 0, 1, or 2 elts. */ if (i > 0) /* Clean up elts at tail. i < 3. */ { double a0, a1, w0, w1; double t0, t1, z0, z1; unsigned j0, j1; if (i > 1) { if (hx1 < 0x3fc40000) { z1 = x1 * x1; t1 = z1 * (poly3[1] + z1 * poly4[1]); t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); t1 = one + t1; *py1 = t1; } else { j1 = (xsb1 + 0x4000) & 0xffff8000; HI(&t1) = j1; LO(&t1) = 0; x1 -= t1; z1 = x1 * x1; t1 = z1 * (qq1 + z1 * qq2); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; a1 = __vlibm_TBL_sincos_hi[j1+1]; t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); *py1 = a1 + t1; } } if (hx0 < 0x3fc40000) { z0 = x0 * x0; t0 = z0 * (poly3[1] + z0 * poly4[1]); t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); t0 = one + t0; *py0 = t0; } else { j0 = (xsb0 + 0x4000) & 0xffff8000; HI(&t0) = j0; LO(&t0) = 0; x0 -= t0; z0 = x0 * x0; t0 = z0 * (qq1 + z0 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; a0 = __vlibm_TBL_sincos_hi[j0+1]; t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); *py0 = a0 + t0; } } /* END CLEAN UP */ return; /* * Take care of BIGUNS. * * We have jumped here in the middle of processing after having * encountered a medium range argument. Therefore things are in a * bit of a tizzy. */ MEDIUM: x0_or_one[1] = 1.0; x1_or_one[1] = 1.0; x2_or_one[1] = 1.0; x0_or_one[3] = -1.0; x1_or_one[3] = -1.0; x2_or_one[3] = -1.0; y0_or_zero[1] = 0.0; y1_or_zero[1] = 0.0; y2_or_zero[1] = 0.0; y0_or_zero[3] = 0.0; y1_or_zero[3] = 0.0; y2_or_zero[3] = 0.0; if (biguns == 3) { biguns = 0; xsb0 = xsb0 >> 31; xsb1 = xsb1 >> 31; goto loop2; } else if (biguns == 2) { xsb0 = xsb0 >> 31; biguns = 0; goto loop1; } biguns = 0; do { double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; unsigned hx; int n0, n1, n2; /* * Find 3 more to work on: Not already done, not too big. */ loop0: hx = HI(x); xsb0 = hx >> 31; hx &= ~0x80000000; if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ { if (hx >= 0x7ff00000) /* Inf or NaN */ { x0 = *x; *y = x0 - x0; } else biguns = 1; x += stridex; y += stridey; i = 0; if (--n <= 0) break; goto loop0; } x0 = *x; py0 = y; x += stridex; y += stridey; i = 1; if (--n <= 0) break; loop1: hx = HI(x); xsb1 = hx >> 31; hx &= ~0x80000000; if (hx > 0x413921fb) { if (hx >= 0x7ff00000) { x1 = *x; *y = x1 - x1; } else biguns = 1; x += stridex; y += stridey; i = 1; if (--n <= 0) break; goto loop1; } x1 = *x; py1 = y; x += stridex; y += stridey; i = 2; if (--n <= 0) break; loop2: hx = HI(x); xsb2 = hx >> 31; hx &= ~0x80000000; if (hx > 0x413921fb) { if (hx >= 0x7ff00000) { x2 = *x; *y = x2 - x2; } else biguns = 1; x += stridex; y += stridey; i = 2; if (--n <= 0) break; goto loop2; } x2 = *x; py2 = y; n0 = (int) (x0 * invpio2 + half[xsb0]); n1 = (int) (x1 * invpio2 + half[xsb1]); n2 = (int) (x2 * invpio2 + half[xsb2]); fn0 = (double) n0; fn1 = (double) n1; fn2 = (double) n2; n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ n1 = (n1 + 1) & 3; n2 = (n2 + 1) & 3; a0 = x0 - fn0 * pio2_1; a1 = x1 - fn1 * pio2_1; a2 = x2 - fn2 * pio2_1; w0 = fn0 * pio2_2; w1 = fn1 * pio2_2; w2 = fn2 * pio2_2; x0 = a0 - w0; x1 = a1 - w1; x2 = a2 - w2; y0 = (a0 - x0) - w0; y1 = (a1 - x1) - w1; y2 = (a2 - x2) - w2; a0 = x0; a1 = x1; a2 = x2; w0 = fn0 * pio2_3 - y0; w1 = fn1 * pio2_3 - y1; w2 = fn2 * pio2_3 - y2; x0 = a0 - w0; x1 = a1 - w1; x2 = a2 - w2; y0 = (a0 - x0) - w0; y1 = (a1 - x1) - w1; y2 = (a2 - x2) - w2; a0 = x0; a1 = x1; a2 = x2; w0 = fn0 * pio2_3t - y0; w1 = fn1 * pio2_3t - y1; w2 = fn2 * pio2_3t - y2; x0 = a0 - w0; x1 = a1 - w1; x2 = a2 - w2; y0 = (a0 - x0) - w0; y1 = (a1 - x1) - w1; y2 = (a2 - x2) - w2; xsb0 = HI(&x0); i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; xsb1 = HI(&x1); i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; xsb2 = HI(&x2); i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; switch (i) { double t0, t1, t2, z0, z1, z2; unsigned j0, j1, j2; case 0: j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t0) = j0; HI(&t1) = j1; HI(&t2) = j2; LO(&t0) = 0; LO(&t1) = 0; LO(&t2) = 0; x0 = (x0 - t0) + y0; x1 = (x1 - t1) + y1; x2 = (x2 - t2) + y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (qq1 + z2 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb1 = (xsb1 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; n0 ^= (xsb0 & ~(n0 << 1)); n1 ^= (xsb1 & ~(n1 << 1)); n2 ^= (xsb2 & ~(n2 << 1)); xsb0 |= 1; xsb1 |= 1; xsb2 |= 1; a0 = __vlibm_TBL_sincos_hi[j0+n0]; a1 = __vlibm_TBL_sincos_hi[j1+n1]; a2 = __vlibm_TBL_sincos_hi[j2+n2]; t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; *py0 = ( a0 + t0 ); *py1 = ( a1 + t1 ); *py2 = ( a2 + t2 ); break; case 1: j0 = n0 & 1; j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t1) = j1; HI(&t2) = j2; LO(&t1) = 0; LO(&t2) = 0; x0_or_one[0] = x0; x0_or_one[2] = -x0; y0_or_zero[0] = y0; y0_or_zero[2] = -y0; x1 = (x1 - t1) + y1; x2 = (x2 - t2) + y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[j0] + z0 * poly4[j0]); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (qq1 + z2 * qq2); t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; n1 ^= (xsb1 & ~(n1 << 1)); n2 ^= (xsb2 & ~(n2 << 1)); xsb1 |= 1; xsb2 |= 1; a1 = __vlibm_TBL_sincos_hi[j1+n1]; a2 = __vlibm_TBL_sincos_hi[j2+n2]; t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; *py0 = t0; *py1 = ( a1 + t1 ); *py2 = ( a2 + t2 ); break; case 2: j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = n1 & 1; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t0) = j0; HI(&t2) = j2; LO(&t0) = 0; LO(&t2) = 0; x1_or_one[0] = x1; x1_or_one[2] = -x1; x0 = (x0 - t0) + y0; y1_or_zero[0] = y1; y1_or_zero[2] = -y1; x2 = (x2 - t2) + y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (poly3[j1] + z1 * poly4[j1]); t2 = z2 * (qq1 + z2 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb2 = (xsb2 >> 30) & 2; n0 ^= (xsb0 & ~(n0 << 1)); n2 ^= (xsb2 & ~(n2 << 1)); xsb0 |= 1; xsb2 |= 1; a0 = __vlibm_TBL_sincos_hi[j0+n0]; a2 = __vlibm_TBL_sincos_hi[j2+n2]; t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; *py0 = ( a0 + t0 ); *py1 = t1; *py2 = ( a2 + t2 ); break; case 3: j0 = n0 & 1; j1 = n1 & 1; j2 = (xsb2 + 0x4000) & 0xffff8000; HI(&t2) = j2; LO(&t2) = 0; x0_or_one[0] = x0; x0_or_one[2] = -x0; x1_or_one[0] = x1; x1_or_one[2] = -x1; y0_or_zero[0] = y0; y0_or_zero[2] = -y0; y1_or_zero[0] = y1; y1_or_zero[2] = -y1; x2 = (x2 - t2) + y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[j0] + z0 * poly4[j0]); t1 = z1 * (poly3[j1] + z1 * poly4[j1]); t2 = z2 * (qq1 + z2 * qq2); t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb2 = (xsb2 >> 30) & 2; n2 ^= (xsb2 & ~(n2 << 1)); xsb2 |= 1; a2 = __vlibm_TBL_sincos_hi[j2+n2]; t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; *py0 = t0; *py1 = t1; *py2 = ( a2 + t2 ); break; case 4: j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = n2 & 1; HI(&t0) = j0; HI(&t1) = j1; LO(&t0) = 0; LO(&t1) = 0; x2_or_one[0] = x2; x2_or_one[2] = -x2; x0 = (x0 - t0) + y0; x1 = (x1 - t1) + y1; y2_or_zero[0] = y2; y2_or_zero[2] = -y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (poly3[j2] + z2 * poly4[j2]); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; xsb1 = (xsb1 >> 30) & 2; n0 ^= (xsb0 & ~(n0 << 1)); n1 ^= (xsb1 & ~(n1 << 1)); xsb0 |= 1; xsb1 |= 1; a0 = __vlibm_TBL_sincos_hi[j0+n0]; a1 = __vlibm_TBL_sincos_hi[j1+n1]; t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); *py0 = ( a0 + t0 ); *py1 = ( a1 + t1 ); *py2 = t2; break; case 5: j0 = n0 & 1; j1 = (xsb1 + 0x4000) & 0xffff8000; j2 = n2 & 1; HI(&t1) = j1; LO(&t1) = 0; x0_or_one[0] = x0; x0_or_one[2] = -x0; x2_or_one[0] = x2; x2_or_one[2] = -x2; y0_or_zero[0] = y0; y0_or_zero[2] = -y0; x1 = (x1 - t1) + y1; y2_or_zero[0] = y2; y2_or_zero[2] = -y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[j0] + z0 * poly4[j0]); t1 = z1 * (qq1 + z1 * qq2); t2 = z2 * (poly3[j2] + z2 * poly4[j2]); t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; n1 ^= (xsb1 & ~(n1 << 1)); xsb1 |= 1; a1 = __vlibm_TBL_sincos_hi[j1+n1]; t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); *py0 = t0; *py1 = ( a1 + t1 ); *py2 = t2; break; case 6: j0 = (xsb0 + 0x4000) & 0xffff8000; j1 = n1 & 1; j2 = n2 & 1; HI(&t0) = j0; LO(&t0) = 0; x1_or_one[0] = x1; x1_or_one[2] = -x1; x2_or_one[0] = x2; x2_or_one[2] = -x2; x0 = (x0 - t0) + y0; y1_or_zero[0] = y1; y1_or_zero[2] = -y1; y2_or_zero[0] = y2; y2_or_zero[2] = -y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (qq1 + z0 * qq2); t1 = z1 * (poly3[j1] + z1 * poly4[j1]); t2 = z2 * (poly3[j2] + z2 * poly4[j2]); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; n0 ^= (xsb0 & ~(n0 << 1)); xsb0 |= 1; a0 = __vlibm_TBL_sincos_hi[j0+n0]; t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); *py0 = ( a0 + t0 ); *py1 = t1; *py2 = t2; break; case 7: j0 = n0 & 1; j1 = n1 & 1; j2 = n2 & 1; x0_or_one[0] = x0; x0_or_one[2] = -x0; x1_or_one[0] = x1; x1_or_one[2] = -x1; x2_or_one[0] = x2; x2_or_one[2] = -x2; y0_or_zero[0] = y0; y0_or_zero[2] = -y0; y1_or_zero[0] = y1; y1_or_zero[2] = -y1; y2_or_zero[0] = y2; y2_or_zero[2] = -y2; z0 = x0 * x0; z1 = x1 * x1; z2 = x2 * x2; t0 = z0 * (poly3[j0] + z0 * poly4[j0]); t1 = z1 * (poly3[j1] + z1 * poly4[j1]); t2 = z2 * (poly3[j2] + z2 * poly4[j2]); t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); *py0 = t0; *py1 = t1; *py2 = t2; break; } x += stridex; y += stridey; i = 0; } while (--n > 0); if (i > 0) { double fn0, fn1, a0, a1, w0, w1, y0, y1; double t0, t1, z0, z1; unsigned j0, j1; int n0, n1; if (i > 1) { n1 = (int) (x1 * invpio2 + half[xsb1]); fn1 = (double) n1; n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ a1 = x1 - fn1 * pio2_1; w1 = fn1 * pio2_2; x1 = a1 - w1; y1 = (a1 - x1) - w1; a1 = x1; w1 = fn1 * pio2_3 - y1; x1 = a1 - w1; y1 = (a1 - x1) - w1; a1 = x1; w1 = fn1 * pio2_3t - y1; x1 = a1 - w1; y1 = (a1 - x1) - w1; xsb1 = HI(&x1); if ((xsb1 & ~0x80000000) < thresh[n1&1]) { j1 = n1 & 1; x1_or_one[0] = x1; x1_or_one[2] = -x1; y1_or_zero[0] = y1; y1_or_zero[2] = -y1; z1 = x1 * x1; t1 = z1 * (poly3[j1] + z1 * poly4[j1]); t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); *py1 = t1; } else { j1 = (xsb1 + 0x4000) & 0xffff8000; HI(&t1) = j1; LO(&t1) = 0; x1 = (x1 - t1) + y1; z1 = x1 * x1; t1 = z1 * (qq1 + z1 * qq2); w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb1 = (xsb1 >> 30) & 2; n1 ^= (xsb1 & ~(n1 << 1)); xsb1 |= 1; a1 = __vlibm_TBL_sincos_hi[j1+n1]; t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; *py1 = ( a1 + t1 ); } } n0 = (int) (x0 * invpio2 + half[xsb0]); fn0 = (double) n0; n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ a0 = x0 - fn0 * pio2_1; w0 = fn0 * pio2_2; x0 = a0 - w0; y0 = (a0 - x0) - w0; a0 = x0; w0 = fn0 * pio2_3 - y0; x0 = a0 - w0; y0 = (a0 - x0) - w0; a0 = x0; w0 = fn0 * pio2_3t - y0; x0 = a0 - w0; y0 = (a0 - x0) - w0; xsb0 = HI(&x0); if ((xsb0 & ~0x80000000) < thresh[n0&1]) { j0 = n0 & 1; x0_or_one[0] = x0; x0_or_one[2] = -x0; y0_or_zero[0] = y0; y0_or_zero[2] = -y0; z0 = x0 * x0; t0 = z0 * (poly3[j0] + z0 * poly4[j0]); t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); *py0 = t0; } else { j0 = (xsb0 + 0x4000) & 0xffff8000; HI(&t0) = j0; LO(&t0) = 0; x0 = (x0 - t0) + y0; z0 = x0 * x0; t0 = z0 * (qq1 + z0 * qq2); w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; xsb0 = (xsb0 >> 30) & 2; n0 ^= (xsb0 & ~(n0 << 1)); xsb0 |= 1; a0 = __vlibm_TBL_sincos_hi[j0+n0]; t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; *py0 = ( a0 + t0 ); } } if (biguns) __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); }