125c28e83SPiotr Jasiukajtis /* 225c28e83SPiotr Jasiukajtis * CDDL HEADER START 325c28e83SPiotr Jasiukajtis * 425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 725c28e83SPiotr Jasiukajtis * 825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 1125c28e83SPiotr Jasiukajtis * and limitations under the License. 1225c28e83SPiotr Jasiukajtis * 1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 1825c28e83SPiotr Jasiukajtis * 1925c28e83SPiotr Jasiukajtis * CDDL HEADER END 2025c28e83SPiotr Jasiukajtis */ 2125c28e83SPiotr Jasiukajtis 2225c28e83SPiotr Jasiukajtis /* 2325c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 2425c28e83SPiotr Jasiukajtis */ 2525c28e83SPiotr Jasiukajtis /* 2625c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 2725c28e83SPiotr Jasiukajtis * Use is subject to license terms. 2825c28e83SPiotr Jasiukajtis */ 2925c28e83SPiotr Jasiukajtis 3025c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h> 3125c28e83SPiotr Jasiukajtis #include <sys/ccompile.h> 3225c28e83SPiotr Jasiukajtis 3325c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN 3425c28e83SPiotr Jasiukajtis #define HI(x) *(1+(int*)x) 3525c28e83SPiotr Jasiukajtis #define LO(x) *(unsigned*)x 3625c28e83SPiotr Jasiukajtis #else 3725c28e83SPiotr Jasiukajtis #define HI(x) *(int*)x 3825c28e83SPiotr Jasiukajtis #define LO(x) *(1+(unsigned*)x) 3925c28e83SPiotr Jasiukajtis #endif 4025c28e83SPiotr Jasiukajtis 4125c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 4225c28e83SPiotr Jasiukajtis #define restrict _Restrict 4325c28e83SPiotr Jasiukajtis #else 4425c28e83SPiotr Jasiukajtis #define restrict 4525c28e83SPiotr Jasiukajtis #endif 4625c28e83SPiotr Jasiukajtis 4725c28e83SPiotr Jasiukajtis /* 4825c28e83SPiotr Jasiukajtis * vcos.1.c 4925c28e83SPiotr Jasiukajtis * 5025c28e83SPiotr Jasiukajtis * Vector cosine function. Just slight modifications to vsin.8.c, mainly 5125c28e83SPiotr Jasiukajtis * in the primary range part. 5225c28e83SPiotr Jasiukajtis * 5325c28e83SPiotr Jasiukajtis * Modification to primary range processing. If an argument that does not 5425c28e83SPiotr Jasiukajtis * fall in the primary range is encountered, then processing is continued 5525c28e83SPiotr Jasiukajtis * in the medium range. 5625c28e83SPiotr Jasiukajtis * 5725c28e83SPiotr Jasiukajtis */ 5825c28e83SPiotr Jasiukajtis 5925c28e83SPiotr Jasiukajtis extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; 6025c28e83SPiotr Jasiukajtis 6125c28e83SPiotr Jasiukajtis static const double 6225c28e83SPiotr Jasiukajtis half[2] = { 0.5, -0.5 }, 6325c28e83SPiotr Jasiukajtis one = 1.0, 6425c28e83SPiotr Jasiukajtis invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ 6525c28e83SPiotr Jasiukajtis pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ 6625c28e83SPiotr Jasiukajtis pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ 6725c28e83SPiotr Jasiukajtis pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ 6825c28e83SPiotr Jasiukajtis pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ 6925c28e83SPiotr Jasiukajtis pp1 = -1.666666666605760465276263943134982554676e-0001, 7025c28e83SPiotr Jasiukajtis pp2 = 8.333261209690963126718376566146180944442e-0003, 7125c28e83SPiotr Jasiukajtis qq1 = -4.999999999977710986407023955908711557870e-0001, 7225c28e83SPiotr Jasiukajtis qq2 = 4.166654863857219350645055881018842089580e-0002, 7325c28e83SPiotr Jasiukajtis poly1[2]= { -1.666666666666629669805215138920301589656e-0001, 7425c28e83SPiotr Jasiukajtis -4.999999999999931701464060878888294524481e-0001 }, 7525c28e83SPiotr Jasiukajtis poly2[2]= { 8.333333332390951295683993455280336376663e-0003, 7625c28e83SPiotr Jasiukajtis 4.166666666394861917535640593963708222319e-0002 }, 7725c28e83SPiotr Jasiukajtis poly3[2]= { -1.984126237997976692791551778230098403960e-0004, 7825c28e83SPiotr Jasiukajtis -1.388888552656142867832756687736851681462e-0003 }, 7925c28e83SPiotr Jasiukajtis poly4[2]= { 2.753403624854277237649987622848330351110e-0006, 8025c28e83SPiotr Jasiukajtis 2.478519423681460796618128289454530524759e-0005 }; 8125c28e83SPiotr Jasiukajtis 8225c28e83SPiotr Jasiukajtis static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; 8325c28e83SPiotr Jasiukajtis 8425c28e83SPiotr Jasiukajtis /* Don't __ the following; acomp will handle it */ 8525c28e83SPiotr Jasiukajtis extern double fabs(double); 8625c28e83SPiotr Jasiukajtis extern void __vlibm_vcos_big(int, double *, int, double *, int, int); 8725c28e83SPiotr Jasiukajtis 8825c28e83SPiotr Jasiukajtis /* 8925c28e83SPiotr Jasiukajtis * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n. 9025c28e83SPiotr Jasiukajtis * 9125c28e83SPiotr Jasiukajtis * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06. 9225c28e83SPiotr Jasiukajtis * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. 9325c28e83SPiotr Jasiukajtis * 9425c28e83SPiotr Jasiukajtis * elts < 2^-27 use the approximation 1.0 ~ cos(x). 9525c28e83SPiotr Jasiukajtis */ 9625c28e83SPiotr Jasiukajtis void 9725c28e83SPiotr Jasiukajtis __vcos(int n, double * restrict x, int stridex, double * restrict y, 9825c28e83SPiotr Jasiukajtis int stridey) 9925c28e83SPiotr Jasiukajtis { 10025c28e83SPiotr Jasiukajtis double x0_or_one[4], x1_or_one[4], x2_or_one[4]; 10125c28e83SPiotr Jasiukajtis double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; 10225c28e83SPiotr Jasiukajtis double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave; 10325c28e83SPiotr Jasiukajtis unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2; 10425c28e83SPiotr Jasiukajtis int i, biguns, nsave, sxsave, sysave; 105*c702eacaSToomas Soome volatile int v __unused; 10625c28e83SPiotr Jasiukajtis nsave = n; 10725c28e83SPiotr Jasiukajtis xsave = x; 10825c28e83SPiotr Jasiukajtis sxsave = stridex; 10925c28e83SPiotr Jasiukajtis ysave = y; 11025c28e83SPiotr Jasiukajtis sysave = stridey; 11125c28e83SPiotr Jasiukajtis biguns = 0; 11225c28e83SPiotr Jasiukajtis 11325c28e83SPiotr Jasiukajtis do /* MAIN LOOP */ 11425c28e83SPiotr Jasiukajtis { 11525c28e83SPiotr Jasiukajtis /* Gotos here so _break_ exits MAIN LOOP. */ 11625c28e83SPiotr Jasiukajtis LOOP0: /* Find first arg in right range. */ 11725c28e83SPiotr Jasiukajtis xsb0 = HI(x); /* get most significant word */ 11825c28e83SPiotr Jasiukajtis hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ 11925c28e83SPiotr Jasiukajtis if (hx0 > 0x3fe921fb) { 12025c28e83SPiotr Jasiukajtis /* Too big: arg reduction needed, so leave for second part */ 12125c28e83SPiotr Jasiukajtis biguns = 1; 12225c28e83SPiotr Jasiukajtis goto MEDIUM; 12325c28e83SPiotr Jasiukajtis } 12425c28e83SPiotr Jasiukajtis if (hx0 < 0x3e400000) { 12525c28e83SPiotr Jasiukajtis /* Too small. cos x ~ 1. */ 12625c28e83SPiotr Jasiukajtis v = *x; 12725c28e83SPiotr Jasiukajtis *y = 1.0; 12825c28e83SPiotr Jasiukajtis x += stridex; 12925c28e83SPiotr Jasiukajtis y += stridey; 13025c28e83SPiotr Jasiukajtis i = 0; 13125c28e83SPiotr Jasiukajtis if (--n <= 0) 13225c28e83SPiotr Jasiukajtis break; 13325c28e83SPiotr Jasiukajtis goto LOOP0; 13425c28e83SPiotr Jasiukajtis } 13525c28e83SPiotr Jasiukajtis x0 = *x; 13625c28e83SPiotr Jasiukajtis py0 = y; 13725c28e83SPiotr Jasiukajtis x += stridex; 13825c28e83SPiotr Jasiukajtis y += stridey; 13925c28e83SPiotr Jasiukajtis i = 1; 14025c28e83SPiotr Jasiukajtis if (--n <= 0) 14125c28e83SPiotr Jasiukajtis break; 14225c28e83SPiotr Jasiukajtis 14325c28e83SPiotr Jasiukajtis LOOP1: /* Get second arg, same as above. */ 14425c28e83SPiotr Jasiukajtis xsb1 = HI(x); 14525c28e83SPiotr Jasiukajtis hx1 = xsb1 & ~0x80000000; 14625c28e83SPiotr Jasiukajtis if (hx1 > 0x3fe921fb) 14725c28e83SPiotr Jasiukajtis { 14825c28e83SPiotr Jasiukajtis biguns = 2; 14925c28e83SPiotr Jasiukajtis goto MEDIUM; 15025c28e83SPiotr Jasiukajtis } 15125c28e83SPiotr Jasiukajtis if (hx1 < 0x3e400000) 15225c28e83SPiotr Jasiukajtis { 15325c28e83SPiotr Jasiukajtis v = *x; 15425c28e83SPiotr Jasiukajtis *y = 1.0; 15525c28e83SPiotr Jasiukajtis x += stridex; 15625c28e83SPiotr Jasiukajtis y += stridey; 15725c28e83SPiotr Jasiukajtis i = 1; 15825c28e83SPiotr Jasiukajtis if (--n <= 0) 15925c28e83SPiotr Jasiukajtis break; 16025c28e83SPiotr Jasiukajtis goto LOOP1; 16125c28e83SPiotr Jasiukajtis } 16225c28e83SPiotr Jasiukajtis x1 = *x; 16325c28e83SPiotr Jasiukajtis py1 = y; 16425c28e83SPiotr Jasiukajtis x += stridex; 16525c28e83SPiotr Jasiukajtis y += stridey; 16625c28e83SPiotr Jasiukajtis i = 2; 16725c28e83SPiotr Jasiukajtis if (--n <= 0) 16825c28e83SPiotr Jasiukajtis break; 16925c28e83SPiotr Jasiukajtis 17025c28e83SPiotr Jasiukajtis LOOP2: /* Get third arg, same as above. */ 17125c28e83SPiotr Jasiukajtis xsb2 = HI(x); 17225c28e83SPiotr Jasiukajtis hx2 = xsb2 & ~0x80000000; 17325c28e83SPiotr Jasiukajtis if (hx2 > 0x3fe921fb) 17425c28e83SPiotr Jasiukajtis { 17525c28e83SPiotr Jasiukajtis biguns = 3; 17625c28e83SPiotr Jasiukajtis goto MEDIUM; 17725c28e83SPiotr Jasiukajtis } 17825c28e83SPiotr Jasiukajtis if (hx2 < 0x3e400000) 17925c28e83SPiotr Jasiukajtis { 18025c28e83SPiotr Jasiukajtis v = *x; 18125c28e83SPiotr Jasiukajtis *y = 1.0; 18225c28e83SPiotr Jasiukajtis x += stridex; 18325c28e83SPiotr Jasiukajtis y += stridey; 18425c28e83SPiotr Jasiukajtis i = 2; 18525c28e83SPiotr Jasiukajtis if (--n <= 0) 18625c28e83SPiotr Jasiukajtis break; 18725c28e83SPiotr Jasiukajtis goto LOOP2; 18825c28e83SPiotr Jasiukajtis } 18925c28e83SPiotr Jasiukajtis x2 = *x; 19025c28e83SPiotr Jasiukajtis py2 = y; 19125c28e83SPiotr Jasiukajtis 19225c28e83SPiotr Jasiukajtis /* 19325c28e83SPiotr Jasiukajtis * 0x3fc40000 = 5/32 ~ 0.15625 19425c28e83SPiotr Jasiukajtis * Get msb after subtraction. Will be 1 only if 19525c28e83SPiotr Jasiukajtis * hx0 - 5/32 is negative. 19625c28e83SPiotr Jasiukajtis */ 19725c28e83SPiotr Jasiukajtis i = (hx0 - 0x3fc40000) >> 31; 19825c28e83SPiotr Jasiukajtis i |= ((hx1 - 0x3fc40000) >> 30) & 2; 19925c28e83SPiotr Jasiukajtis i |= ((hx2 - 0x3fc40000) >> 29) & 4; 20025c28e83SPiotr Jasiukajtis switch (i) 20125c28e83SPiotr Jasiukajtis { 20225c28e83SPiotr Jasiukajtis double a0, a1, a2, w0, w1, w2; 20325c28e83SPiotr Jasiukajtis double t0, t1, t2, z0, z1, z2; 20425c28e83SPiotr Jasiukajtis unsigned j0, j1, j2; 20525c28e83SPiotr Jasiukajtis 20625c28e83SPiotr Jasiukajtis case 0: /* All are > 5/32 */ 20725c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 20825c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 20925c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 21025c28e83SPiotr Jasiukajtis HI(&t0) = j0; 21125c28e83SPiotr Jasiukajtis HI(&t1) = j1; 21225c28e83SPiotr Jasiukajtis HI(&t2) = j2; 21325c28e83SPiotr Jasiukajtis LO(&t0) = 0; 21425c28e83SPiotr Jasiukajtis LO(&t1) = 0; 21525c28e83SPiotr Jasiukajtis LO(&t2) = 0; 21625c28e83SPiotr Jasiukajtis x0 -= t0; 21725c28e83SPiotr Jasiukajtis x1 -= t1; 21825c28e83SPiotr Jasiukajtis x2 -= t2; 21925c28e83SPiotr Jasiukajtis z0 = x0 * x0; 22025c28e83SPiotr Jasiukajtis z1 = x1 * x1; 22125c28e83SPiotr Jasiukajtis z2 = x2 * x2; 22225c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 22325c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 22425c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 22525c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 22625c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 22725c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 22825c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 22925c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 23025c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 23125c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 23225c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 23325c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 23425c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 23525c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+1]; 23625c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+1]; 23725c28e83SPiotr Jasiukajtis /* cos_lo(t) sin_hi(t) */ 23825c28e83SPiotr Jasiukajtis t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); 23925c28e83SPiotr Jasiukajtis t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); 24025c28e83SPiotr Jasiukajtis t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); 24125c28e83SPiotr Jasiukajtis 24225c28e83SPiotr Jasiukajtis *py0 = a0 + t0; 24325c28e83SPiotr Jasiukajtis *py1 = a1 + t1; 24425c28e83SPiotr Jasiukajtis *py2 = a2 + t2; 24525c28e83SPiotr Jasiukajtis break; 24625c28e83SPiotr Jasiukajtis 24725c28e83SPiotr Jasiukajtis case 1: 24825c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 24925c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 25025c28e83SPiotr Jasiukajtis HI(&t1) = j1; 25125c28e83SPiotr Jasiukajtis HI(&t2) = j2; 25225c28e83SPiotr Jasiukajtis LO(&t1) = 0; 25325c28e83SPiotr Jasiukajtis LO(&t2) = 0; 25425c28e83SPiotr Jasiukajtis x1 -= t1; 25525c28e83SPiotr Jasiukajtis x2 -= t2; 25625c28e83SPiotr Jasiukajtis z0 = x0 * x0; 25725c28e83SPiotr Jasiukajtis z1 = x1 * x1; 25825c28e83SPiotr Jasiukajtis z2 = x2 * x2; 25925c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[1] + z0 * poly4[1]); 26025c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 26125c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 26225c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 26325c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 26425c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 26525c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 26625c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 26725c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 26825c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 26925c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+1]; 27025c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+1]; 27125c28e83SPiotr Jasiukajtis t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); 27225c28e83SPiotr Jasiukajtis t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); 27325c28e83SPiotr Jasiukajtis *py0 = one + t0; 27425c28e83SPiotr Jasiukajtis *py1 = a1 + t1; 27525c28e83SPiotr Jasiukajtis *py2 = a2 + t2; 27625c28e83SPiotr Jasiukajtis break; 27725c28e83SPiotr Jasiukajtis 27825c28e83SPiotr Jasiukajtis case 2: 27925c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 28025c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 28125c28e83SPiotr Jasiukajtis HI(&t0) = j0; 28225c28e83SPiotr Jasiukajtis HI(&t2) = j2; 28325c28e83SPiotr Jasiukajtis LO(&t0) = 0; 28425c28e83SPiotr Jasiukajtis LO(&t2) = 0; 28525c28e83SPiotr Jasiukajtis x0 -= t0; 28625c28e83SPiotr Jasiukajtis x2 -= t2; 28725c28e83SPiotr Jasiukajtis z0 = x0 * x0; 28825c28e83SPiotr Jasiukajtis z1 = x1 * x1; 28925c28e83SPiotr Jasiukajtis z2 = x2 * x2; 29025c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 29125c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[1] + z1 * poly4[1]); 29225c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 29325c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 29425c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 29525c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 29625c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 29725c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 29825c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 29925c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 30025c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+1]; 30125c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+1]; 30225c28e83SPiotr Jasiukajtis t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); 30325c28e83SPiotr Jasiukajtis t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); 30425c28e83SPiotr Jasiukajtis *py0 = a0 + t0; 30525c28e83SPiotr Jasiukajtis *py1 = one + t1; 30625c28e83SPiotr Jasiukajtis *py2 = a2 + t2; 30725c28e83SPiotr Jasiukajtis break; 30825c28e83SPiotr Jasiukajtis 30925c28e83SPiotr Jasiukajtis case 3: 31025c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 31125c28e83SPiotr Jasiukajtis HI(&t2) = j2; 31225c28e83SPiotr Jasiukajtis LO(&t2) = 0; 31325c28e83SPiotr Jasiukajtis x2 -= t2; 31425c28e83SPiotr Jasiukajtis z0 = x0 * x0; 31525c28e83SPiotr Jasiukajtis z1 = x1 * x1; 31625c28e83SPiotr Jasiukajtis z2 = x2 * x2; 31725c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[1] + z0 * poly4[1]); 31825c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[1] + z1 * poly4[1]); 31925c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 32025c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 32125c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 32225c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 32325c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 32425c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 32525c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+1]; 32625c28e83SPiotr Jasiukajtis t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); 32725c28e83SPiotr Jasiukajtis *py0 = one + t0; 32825c28e83SPiotr Jasiukajtis *py1 = one + t1; 32925c28e83SPiotr Jasiukajtis *py2 = a2 + t2; 33025c28e83SPiotr Jasiukajtis break; 33125c28e83SPiotr Jasiukajtis 33225c28e83SPiotr Jasiukajtis case 4: 33325c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 33425c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 33525c28e83SPiotr Jasiukajtis HI(&t0) = j0; 33625c28e83SPiotr Jasiukajtis HI(&t1) = j1; 33725c28e83SPiotr Jasiukajtis LO(&t0) = 0; 33825c28e83SPiotr Jasiukajtis LO(&t1) = 0; 33925c28e83SPiotr Jasiukajtis x0 -= t0; 34025c28e83SPiotr Jasiukajtis x1 -= t1; 34125c28e83SPiotr Jasiukajtis z0 = x0 * x0; 34225c28e83SPiotr Jasiukajtis z1 = x1 * x1; 34325c28e83SPiotr Jasiukajtis z2 = x2 * x2; 34425c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 34525c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 34625c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[1] + z2 * poly4[1]); 34725c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 34825c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 34925c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 35025c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 35125c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 35225c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 35325c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 35425c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+1]; 35525c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+1]; 35625c28e83SPiotr Jasiukajtis t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); 35725c28e83SPiotr Jasiukajtis t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); 35825c28e83SPiotr Jasiukajtis *py0 = a0 + t0; 35925c28e83SPiotr Jasiukajtis *py1 = a1 + t1; 36025c28e83SPiotr Jasiukajtis *py2 = one + t2; 36125c28e83SPiotr Jasiukajtis break; 36225c28e83SPiotr Jasiukajtis 36325c28e83SPiotr Jasiukajtis case 5: 36425c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 36525c28e83SPiotr Jasiukajtis HI(&t1) = j1; 36625c28e83SPiotr Jasiukajtis LO(&t1) = 0; 36725c28e83SPiotr Jasiukajtis x1 -= t1; 36825c28e83SPiotr Jasiukajtis z0 = x0 * x0; 36925c28e83SPiotr Jasiukajtis z1 = x1 * x1; 37025c28e83SPiotr Jasiukajtis z2 = x2 * x2; 37125c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[1] + z0 * poly4[1]); 37225c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 37325c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[1] + z2 * poly4[1]); 37425c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 37525c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 37625c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 37725c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 37825c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 37925c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+1]; 38025c28e83SPiotr Jasiukajtis t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); 38125c28e83SPiotr Jasiukajtis *py0 = one + t0; 38225c28e83SPiotr Jasiukajtis *py1 = a1 + t1; 38325c28e83SPiotr Jasiukajtis *py2 = one + t2; 38425c28e83SPiotr Jasiukajtis break; 38525c28e83SPiotr Jasiukajtis 38625c28e83SPiotr Jasiukajtis case 6: 38725c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 38825c28e83SPiotr Jasiukajtis HI(&t0) = j0; 38925c28e83SPiotr Jasiukajtis LO(&t0) = 0; 39025c28e83SPiotr Jasiukajtis x0 -= t0; 39125c28e83SPiotr Jasiukajtis z0 = x0 * x0; 39225c28e83SPiotr Jasiukajtis z1 = x1 * x1; 39325c28e83SPiotr Jasiukajtis z2 = x2 * x2; 39425c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 39525c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[1] + z1 * poly4[1]); 39625c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[1] + z2 * poly4[1]); 39725c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 39825c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 39925c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 40025c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 40125c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 40225c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+1]; 40325c28e83SPiotr Jasiukajtis t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); 40425c28e83SPiotr Jasiukajtis *py0 = a0 + t0; 40525c28e83SPiotr Jasiukajtis *py1 = one + t1; 40625c28e83SPiotr Jasiukajtis *py2 = one + t2; 40725c28e83SPiotr Jasiukajtis break; 40825c28e83SPiotr Jasiukajtis 40925c28e83SPiotr Jasiukajtis case 7: /* All are < 5/32 */ 41025c28e83SPiotr Jasiukajtis z0 = x0 * x0; 41125c28e83SPiotr Jasiukajtis z1 = x1 * x1; 41225c28e83SPiotr Jasiukajtis z2 = x2 * x2; 41325c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[1] + z0 * poly4[1]); 41425c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[1] + z1 * poly4[1]); 41525c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[1] + z2 * poly4[1]); 41625c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 41725c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 41825c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 41925c28e83SPiotr Jasiukajtis *py0 = one + t0; 42025c28e83SPiotr Jasiukajtis *py1 = one + t1; 42125c28e83SPiotr Jasiukajtis *py2 = one + t2; 42225c28e83SPiotr Jasiukajtis break; 42325c28e83SPiotr Jasiukajtis } 42425c28e83SPiotr Jasiukajtis 42525c28e83SPiotr Jasiukajtis x += stridex; 42625c28e83SPiotr Jasiukajtis y += stridey; 42725c28e83SPiotr Jasiukajtis i = 0; 42825c28e83SPiotr Jasiukajtis } while (--n > 0); /* END MAIN LOOP */ 42925c28e83SPiotr Jasiukajtis 43025c28e83SPiotr Jasiukajtis /* 43125c28e83SPiotr Jasiukajtis * CLEAN UP last 0, 1, or 2 elts. 43225c28e83SPiotr Jasiukajtis */ 43325c28e83SPiotr Jasiukajtis if (i > 0) /* Clean up elts at tail. i < 3. */ 43425c28e83SPiotr Jasiukajtis { 43525c28e83SPiotr Jasiukajtis double a0, a1, w0, w1; 43625c28e83SPiotr Jasiukajtis double t0, t1, z0, z1; 43725c28e83SPiotr Jasiukajtis unsigned j0, j1; 43825c28e83SPiotr Jasiukajtis 43925c28e83SPiotr Jasiukajtis if (i > 1) 44025c28e83SPiotr Jasiukajtis { 44125c28e83SPiotr Jasiukajtis if (hx1 < 0x3fc40000) 44225c28e83SPiotr Jasiukajtis { 44325c28e83SPiotr Jasiukajtis z1 = x1 * x1; 44425c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[1] + z1 * poly4[1]); 44525c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 44625c28e83SPiotr Jasiukajtis t1 = one + t1; 44725c28e83SPiotr Jasiukajtis *py1 = t1; 44825c28e83SPiotr Jasiukajtis } 44925c28e83SPiotr Jasiukajtis else 45025c28e83SPiotr Jasiukajtis { 45125c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 45225c28e83SPiotr Jasiukajtis HI(&t1) = j1; 45325c28e83SPiotr Jasiukajtis LO(&t1) = 0; 45425c28e83SPiotr Jasiukajtis x1 -= t1; 45525c28e83SPiotr Jasiukajtis z1 = x1 * x1; 45625c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 45725c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 45825c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 45925c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 46025c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+1]; 46125c28e83SPiotr Jasiukajtis t1 = __vlibm_TBL_sincos_lo[j1+1] 46225c28e83SPiotr Jasiukajtis - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); 46325c28e83SPiotr Jasiukajtis *py1 = a1 + t1; 46425c28e83SPiotr Jasiukajtis } 46525c28e83SPiotr Jasiukajtis } 46625c28e83SPiotr Jasiukajtis if (hx0 < 0x3fc40000) 46725c28e83SPiotr Jasiukajtis { 46825c28e83SPiotr Jasiukajtis z0 = x0 * x0; 46925c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[1] + z0 * poly4[1]); 47025c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 47125c28e83SPiotr Jasiukajtis t0 = one + t0; 47225c28e83SPiotr Jasiukajtis *py0 = t0; 47325c28e83SPiotr Jasiukajtis } 47425c28e83SPiotr Jasiukajtis else 47525c28e83SPiotr Jasiukajtis { 47625c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 47725c28e83SPiotr Jasiukajtis HI(&t0) = j0; 47825c28e83SPiotr Jasiukajtis LO(&t0) = 0; 47925c28e83SPiotr Jasiukajtis x0 -= t0; 48025c28e83SPiotr Jasiukajtis z0 = x0 * x0; 48125c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 48225c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 48325c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 48425c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 48525c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+1]; 48625c28e83SPiotr Jasiukajtis t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); 48725c28e83SPiotr Jasiukajtis *py0 = a0 + t0; 48825c28e83SPiotr Jasiukajtis } 48925c28e83SPiotr Jasiukajtis } /* END CLEAN UP */ 49025c28e83SPiotr Jasiukajtis 49125c28e83SPiotr Jasiukajtis return; 49225c28e83SPiotr Jasiukajtis 49325c28e83SPiotr Jasiukajtis /* 49425c28e83SPiotr Jasiukajtis * Take care of BIGUNS. 49525c28e83SPiotr Jasiukajtis * 49625c28e83SPiotr Jasiukajtis * We have jumped here in the middle of processing after having 49725c28e83SPiotr Jasiukajtis * encountered a medium range argument. Therefore things are in a 49825c28e83SPiotr Jasiukajtis * bit of a tizzy. 49925c28e83SPiotr Jasiukajtis */ 50025c28e83SPiotr Jasiukajtis 50125c28e83SPiotr Jasiukajtis MEDIUM: 50225c28e83SPiotr Jasiukajtis 50325c28e83SPiotr Jasiukajtis x0_or_one[1] = 1.0; 50425c28e83SPiotr Jasiukajtis x1_or_one[1] = 1.0; 50525c28e83SPiotr Jasiukajtis x2_or_one[1] = 1.0; 50625c28e83SPiotr Jasiukajtis x0_or_one[3] = -1.0; 50725c28e83SPiotr Jasiukajtis x1_or_one[3] = -1.0; 50825c28e83SPiotr Jasiukajtis x2_or_one[3] = -1.0; 50925c28e83SPiotr Jasiukajtis y0_or_zero[1] = 0.0; 51025c28e83SPiotr Jasiukajtis y1_or_zero[1] = 0.0; 51125c28e83SPiotr Jasiukajtis y2_or_zero[1] = 0.0; 51225c28e83SPiotr Jasiukajtis y0_or_zero[3] = 0.0; 51325c28e83SPiotr Jasiukajtis y1_or_zero[3] = 0.0; 51425c28e83SPiotr Jasiukajtis y2_or_zero[3] = 0.0; 51525c28e83SPiotr Jasiukajtis 51625c28e83SPiotr Jasiukajtis if (biguns == 3) 51725c28e83SPiotr Jasiukajtis { 51825c28e83SPiotr Jasiukajtis biguns = 0; 51925c28e83SPiotr Jasiukajtis xsb0 = xsb0 >> 31; 52025c28e83SPiotr Jasiukajtis xsb1 = xsb1 >> 31; 52125c28e83SPiotr Jasiukajtis goto loop2; 52225c28e83SPiotr Jasiukajtis } 52325c28e83SPiotr Jasiukajtis else if (biguns == 2) 52425c28e83SPiotr Jasiukajtis { 52525c28e83SPiotr Jasiukajtis xsb0 = xsb0 >> 31; 52625c28e83SPiotr Jasiukajtis biguns = 0; 52725c28e83SPiotr Jasiukajtis goto loop1; 52825c28e83SPiotr Jasiukajtis } 52925c28e83SPiotr Jasiukajtis biguns = 0; 53025c28e83SPiotr Jasiukajtis 53125c28e83SPiotr Jasiukajtis do 53225c28e83SPiotr Jasiukajtis { 53325c28e83SPiotr Jasiukajtis double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; 53425c28e83SPiotr Jasiukajtis unsigned hx; 53525c28e83SPiotr Jasiukajtis int n0, n1, n2; 53625c28e83SPiotr Jasiukajtis 53725c28e83SPiotr Jasiukajtis /* 53825c28e83SPiotr Jasiukajtis * Find 3 more to work on: Not already done, not too big. 53925c28e83SPiotr Jasiukajtis */ 54025c28e83SPiotr Jasiukajtis 54125c28e83SPiotr Jasiukajtis loop0: 54225c28e83SPiotr Jasiukajtis hx = HI(x); 54325c28e83SPiotr Jasiukajtis xsb0 = hx >> 31; 54425c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 54525c28e83SPiotr Jasiukajtis if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ 54625c28e83SPiotr Jasiukajtis { 54725c28e83SPiotr Jasiukajtis if (hx >= 0x7ff00000) /* Inf or NaN */ 54825c28e83SPiotr Jasiukajtis { 54925c28e83SPiotr Jasiukajtis x0 = *x; 55025c28e83SPiotr Jasiukajtis *y = x0 - x0; 55125c28e83SPiotr Jasiukajtis } 55225c28e83SPiotr Jasiukajtis else 55325c28e83SPiotr Jasiukajtis biguns = 1; 55425c28e83SPiotr Jasiukajtis x += stridex; 55525c28e83SPiotr Jasiukajtis y += stridey; 55625c28e83SPiotr Jasiukajtis i = 0; 55725c28e83SPiotr Jasiukajtis if (--n <= 0) 55825c28e83SPiotr Jasiukajtis break; 55925c28e83SPiotr Jasiukajtis goto loop0; 56025c28e83SPiotr Jasiukajtis } 56125c28e83SPiotr Jasiukajtis x0 = *x; 56225c28e83SPiotr Jasiukajtis py0 = y; 56325c28e83SPiotr Jasiukajtis x += stridex; 56425c28e83SPiotr Jasiukajtis y += stridey; 56525c28e83SPiotr Jasiukajtis i = 1; 56625c28e83SPiotr Jasiukajtis if (--n <= 0) 56725c28e83SPiotr Jasiukajtis break; 56825c28e83SPiotr Jasiukajtis 56925c28e83SPiotr Jasiukajtis loop1: 57025c28e83SPiotr Jasiukajtis hx = HI(x); 57125c28e83SPiotr Jasiukajtis xsb1 = hx >> 31; 57225c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 57325c28e83SPiotr Jasiukajtis if (hx > 0x413921fb) 57425c28e83SPiotr Jasiukajtis { 57525c28e83SPiotr Jasiukajtis if (hx >= 0x7ff00000) 57625c28e83SPiotr Jasiukajtis { 57725c28e83SPiotr Jasiukajtis x1 = *x; 57825c28e83SPiotr Jasiukajtis *y = x1 - x1; 57925c28e83SPiotr Jasiukajtis } 58025c28e83SPiotr Jasiukajtis else 58125c28e83SPiotr Jasiukajtis biguns = 1; 58225c28e83SPiotr Jasiukajtis x += stridex; 58325c28e83SPiotr Jasiukajtis y += stridey; 58425c28e83SPiotr Jasiukajtis i = 1; 58525c28e83SPiotr Jasiukajtis if (--n <= 0) 58625c28e83SPiotr Jasiukajtis break; 58725c28e83SPiotr Jasiukajtis goto loop1; 58825c28e83SPiotr Jasiukajtis } 58925c28e83SPiotr Jasiukajtis x1 = *x; 59025c28e83SPiotr Jasiukajtis py1 = y; 59125c28e83SPiotr Jasiukajtis x += stridex; 59225c28e83SPiotr Jasiukajtis y += stridey; 59325c28e83SPiotr Jasiukajtis i = 2; 59425c28e83SPiotr Jasiukajtis if (--n <= 0) 59525c28e83SPiotr Jasiukajtis break; 59625c28e83SPiotr Jasiukajtis 59725c28e83SPiotr Jasiukajtis loop2: 59825c28e83SPiotr Jasiukajtis hx = HI(x); 59925c28e83SPiotr Jasiukajtis xsb2 = hx >> 31; 60025c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 60125c28e83SPiotr Jasiukajtis if (hx > 0x413921fb) 60225c28e83SPiotr Jasiukajtis { 60325c28e83SPiotr Jasiukajtis if (hx >= 0x7ff00000) 60425c28e83SPiotr Jasiukajtis { 60525c28e83SPiotr Jasiukajtis x2 = *x; 60625c28e83SPiotr Jasiukajtis *y = x2 - x2; 60725c28e83SPiotr Jasiukajtis } 60825c28e83SPiotr Jasiukajtis else 60925c28e83SPiotr Jasiukajtis biguns = 1; 61025c28e83SPiotr Jasiukajtis x += stridex; 61125c28e83SPiotr Jasiukajtis y += stridey; 61225c28e83SPiotr Jasiukajtis i = 2; 61325c28e83SPiotr Jasiukajtis if (--n <= 0) 61425c28e83SPiotr Jasiukajtis break; 61525c28e83SPiotr Jasiukajtis goto loop2; 61625c28e83SPiotr Jasiukajtis } 61725c28e83SPiotr Jasiukajtis x2 = *x; 61825c28e83SPiotr Jasiukajtis py2 = y; 61925c28e83SPiotr Jasiukajtis 62025c28e83SPiotr Jasiukajtis n0 = (int) (x0 * invpio2 + half[xsb0]); 62125c28e83SPiotr Jasiukajtis n1 = (int) (x1 * invpio2 + half[xsb1]); 62225c28e83SPiotr Jasiukajtis n2 = (int) (x2 * invpio2 + half[xsb2]); 62325c28e83SPiotr Jasiukajtis fn0 = (double) n0; 62425c28e83SPiotr Jasiukajtis fn1 = (double) n1; 62525c28e83SPiotr Jasiukajtis fn2 = (double) n2; 62625c28e83SPiotr Jasiukajtis n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 62725c28e83SPiotr Jasiukajtis n1 = (n1 + 1) & 3; 62825c28e83SPiotr Jasiukajtis n2 = (n2 + 1) & 3; 62925c28e83SPiotr Jasiukajtis a0 = x0 - fn0 * pio2_1; 63025c28e83SPiotr Jasiukajtis a1 = x1 - fn1 * pio2_1; 63125c28e83SPiotr Jasiukajtis a2 = x2 - fn2 * pio2_1; 63225c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_2; 63325c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_2; 63425c28e83SPiotr Jasiukajtis w2 = fn2 * pio2_2; 63525c28e83SPiotr Jasiukajtis x0 = a0 - w0; 63625c28e83SPiotr Jasiukajtis x1 = a1 - w1; 63725c28e83SPiotr Jasiukajtis x2 = a2 - w2; 63825c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 63925c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 64025c28e83SPiotr Jasiukajtis y2 = (a2 - x2) - w2; 64125c28e83SPiotr Jasiukajtis a0 = x0; 64225c28e83SPiotr Jasiukajtis a1 = x1; 64325c28e83SPiotr Jasiukajtis a2 = x2; 64425c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_3 - y0; 64525c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_3 - y1; 64625c28e83SPiotr Jasiukajtis w2 = fn2 * pio2_3 - y2; 64725c28e83SPiotr Jasiukajtis x0 = a0 - w0; 64825c28e83SPiotr Jasiukajtis x1 = a1 - w1; 64925c28e83SPiotr Jasiukajtis x2 = a2 - w2; 65025c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 65125c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 65225c28e83SPiotr Jasiukajtis y2 = (a2 - x2) - w2; 65325c28e83SPiotr Jasiukajtis a0 = x0; 65425c28e83SPiotr Jasiukajtis a1 = x1; 65525c28e83SPiotr Jasiukajtis a2 = x2; 65625c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_3t - y0; 65725c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_3t - y1; 65825c28e83SPiotr Jasiukajtis w2 = fn2 * pio2_3t - y2; 65925c28e83SPiotr Jasiukajtis x0 = a0 - w0; 66025c28e83SPiotr Jasiukajtis x1 = a1 - w1; 66125c28e83SPiotr Jasiukajtis x2 = a2 - w2; 66225c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 66325c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 66425c28e83SPiotr Jasiukajtis y2 = (a2 - x2) - w2; 66525c28e83SPiotr Jasiukajtis xsb0 = HI(&x0); 66625c28e83SPiotr Jasiukajtis i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; 66725c28e83SPiotr Jasiukajtis xsb1 = HI(&x1); 66825c28e83SPiotr Jasiukajtis i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; 66925c28e83SPiotr Jasiukajtis xsb2 = HI(&x2); 67025c28e83SPiotr Jasiukajtis i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; 67125c28e83SPiotr Jasiukajtis switch (i) 67225c28e83SPiotr Jasiukajtis { 67325c28e83SPiotr Jasiukajtis double t0, t1, t2, z0, z1, z2; 67425c28e83SPiotr Jasiukajtis unsigned j0, j1, j2; 67525c28e83SPiotr Jasiukajtis 67625c28e83SPiotr Jasiukajtis case 0: 67725c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 67825c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 67925c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 68025c28e83SPiotr Jasiukajtis HI(&t0) = j0; 68125c28e83SPiotr Jasiukajtis HI(&t1) = j1; 68225c28e83SPiotr Jasiukajtis HI(&t2) = j2; 68325c28e83SPiotr Jasiukajtis LO(&t0) = 0; 68425c28e83SPiotr Jasiukajtis LO(&t1) = 0; 68525c28e83SPiotr Jasiukajtis LO(&t2) = 0; 68625c28e83SPiotr Jasiukajtis x0 = (x0 - t0) + y0; 68725c28e83SPiotr Jasiukajtis x1 = (x1 - t1) + y1; 68825c28e83SPiotr Jasiukajtis x2 = (x2 - t2) + y2; 68925c28e83SPiotr Jasiukajtis z0 = x0 * x0; 69025c28e83SPiotr Jasiukajtis z1 = x1 * x1; 69125c28e83SPiotr Jasiukajtis z2 = x2 * x2; 69225c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 69325c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 69425c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 69525c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 69625c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 69725c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 69825c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 69925c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 70025c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 70125c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 70225c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 70325c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 70425c28e83SPiotr Jasiukajtis n0 ^= (xsb0 & ~(n0 << 1)); 70525c28e83SPiotr Jasiukajtis n1 ^= (xsb1 & ~(n1 << 1)); 70625c28e83SPiotr Jasiukajtis n2 ^= (xsb2 & ~(n2 << 1)); 70725c28e83SPiotr Jasiukajtis xsb0 |= 1; 70825c28e83SPiotr Jasiukajtis xsb1 |= 1; 70925c28e83SPiotr Jasiukajtis xsb2 |= 1; 71025c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+n0]; 71125c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+n1]; 71225c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+n2]; 71325c28e83SPiotr Jasiukajtis t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; 71425c28e83SPiotr Jasiukajtis t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; 71525c28e83SPiotr Jasiukajtis t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; 71625c28e83SPiotr Jasiukajtis *py0 = ( a0 + t0 ); 71725c28e83SPiotr Jasiukajtis *py1 = ( a1 + t1 ); 71825c28e83SPiotr Jasiukajtis *py2 = ( a2 + t2 ); 71925c28e83SPiotr Jasiukajtis break; 72025c28e83SPiotr Jasiukajtis 72125c28e83SPiotr Jasiukajtis case 1: 72225c28e83SPiotr Jasiukajtis j0 = n0 & 1; 72325c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 72425c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 72525c28e83SPiotr Jasiukajtis HI(&t1) = j1; 72625c28e83SPiotr Jasiukajtis HI(&t2) = j2; 72725c28e83SPiotr Jasiukajtis LO(&t1) = 0; 72825c28e83SPiotr Jasiukajtis LO(&t2) = 0; 72925c28e83SPiotr Jasiukajtis x0_or_one[0] = x0; 73025c28e83SPiotr Jasiukajtis x0_or_one[2] = -x0; 73125c28e83SPiotr Jasiukajtis y0_or_zero[0] = y0; 73225c28e83SPiotr Jasiukajtis y0_or_zero[2] = -y0; 73325c28e83SPiotr Jasiukajtis x1 = (x1 - t1) + y1; 73425c28e83SPiotr Jasiukajtis x2 = (x2 - t2) + y2; 73525c28e83SPiotr Jasiukajtis z0 = x0 * x0; 73625c28e83SPiotr Jasiukajtis z1 = x1 * x1; 73725c28e83SPiotr Jasiukajtis z2 = x2 * x2; 73825c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 73925c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 74025c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 74125c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 74225c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 74325c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 74425c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 74525c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 74625c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 74725c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 74825c28e83SPiotr Jasiukajtis n1 ^= (xsb1 & ~(n1 << 1)); 74925c28e83SPiotr Jasiukajtis n2 ^= (xsb2 & ~(n2 << 1)); 75025c28e83SPiotr Jasiukajtis xsb1 |= 1; 75125c28e83SPiotr Jasiukajtis xsb2 |= 1; 75225c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+n1]; 75325c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+n2]; 75425c28e83SPiotr Jasiukajtis t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 75525c28e83SPiotr Jasiukajtis t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; 75625c28e83SPiotr Jasiukajtis t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; 75725c28e83SPiotr Jasiukajtis *py0 = t0; 75825c28e83SPiotr Jasiukajtis *py1 = ( a1 + t1 ); 75925c28e83SPiotr Jasiukajtis *py2 = ( a2 + t2 ); 76025c28e83SPiotr Jasiukajtis break; 76125c28e83SPiotr Jasiukajtis 76225c28e83SPiotr Jasiukajtis case 2: 76325c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 76425c28e83SPiotr Jasiukajtis j1 = n1 & 1; 76525c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 76625c28e83SPiotr Jasiukajtis HI(&t0) = j0; 76725c28e83SPiotr Jasiukajtis HI(&t2) = j2; 76825c28e83SPiotr Jasiukajtis LO(&t0) = 0; 76925c28e83SPiotr Jasiukajtis LO(&t2) = 0; 77025c28e83SPiotr Jasiukajtis x1_or_one[0] = x1; 77125c28e83SPiotr Jasiukajtis x1_or_one[2] = -x1; 77225c28e83SPiotr Jasiukajtis x0 = (x0 - t0) + y0; 77325c28e83SPiotr Jasiukajtis y1_or_zero[0] = y1; 77425c28e83SPiotr Jasiukajtis y1_or_zero[2] = -y1; 77525c28e83SPiotr Jasiukajtis x2 = (x2 - t2) + y2; 77625c28e83SPiotr Jasiukajtis z0 = x0 * x0; 77725c28e83SPiotr Jasiukajtis z1 = x1 * x1; 77825c28e83SPiotr Jasiukajtis z2 = x2 * x2; 77925c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 78025c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 78125c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 78225c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 78325c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 78425c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 78525c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 78625c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 78725c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 78825c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 78925c28e83SPiotr Jasiukajtis n0 ^= (xsb0 & ~(n0 << 1)); 79025c28e83SPiotr Jasiukajtis n2 ^= (xsb2 & ~(n2 << 1)); 79125c28e83SPiotr Jasiukajtis xsb0 |= 1; 79225c28e83SPiotr Jasiukajtis xsb2 |= 1; 79325c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+n0]; 79425c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+n2]; 79525c28e83SPiotr Jasiukajtis t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; 79625c28e83SPiotr Jasiukajtis t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 79725c28e83SPiotr Jasiukajtis t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; 79825c28e83SPiotr Jasiukajtis *py0 = ( a0 + t0 ); 79925c28e83SPiotr Jasiukajtis *py1 = t1; 80025c28e83SPiotr Jasiukajtis *py2 = ( a2 + t2 ); 80125c28e83SPiotr Jasiukajtis break; 80225c28e83SPiotr Jasiukajtis 80325c28e83SPiotr Jasiukajtis case 3: 80425c28e83SPiotr Jasiukajtis j0 = n0 & 1; 80525c28e83SPiotr Jasiukajtis j1 = n1 & 1; 80625c28e83SPiotr Jasiukajtis j2 = (xsb2 + 0x4000) & 0xffff8000; 80725c28e83SPiotr Jasiukajtis HI(&t2) = j2; 80825c28e83SPiotr Jasiukajtis LO(&t2) = 0; 80925c28e83SPiotr Jasiukajtis x0_or_one[0] = x0; 81025c28e83SPiotr Jasiukajtis x0_or_one[2] = -x0; 81125c28e83SPiotr Jasiukajtis x1_or_one[0] = x1; 81225c28e83SPiotr Jasiukajtis x1_or_one[2] = -x1; 81325c28e83SPiotr Jasiukajtis y0_or_zero[0] = y0; 81425c28e83SPiotr Jasiukajtis y0_or_zero[2] = -y0; 81525c28e83SPiotr Jasiukajtis y1_or_zero[0] = y1; 81625c28e83SPiotr Jasiukajtis y1_or_zero[2] = -y1; 81725c28e83SPiotr Jasiukajtis x2 = (x2 - t2) + y2; 81825c28e83SPiotr Jasiukajtis z0 = x0 * x0; 81925c28e83SPiotr Jasiukajtis z1 = x1 * x1; 82025c28e83SPiotr Jasiukajtis z2 = x2 * x2; 82125c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 82225c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 82325c28e83SPiotr Jasiukajtis t2 = z2 * (qq1 + z2 * qq2); 82425c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 82525c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 82625c28e83SPiotr Jasiukajtis w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 82725c28e83SPiotr Jasiukajtis j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 82825c28e83SPiotr Jasiukajtis xsb2 = (xsb2 >> 30) & 2; 82925c28e83SPiotr Jasiukajtis n2 ^= (xsb2 & ~(n2 << 1)); 83025c28e83SPiotr Jasiukajtis xsb2 |= 1; 83125c28e83SPiotr Jasiukajtis a2 = __vlibm_TBL_sincos_hi[j2+n2]; 83225c28e83SPiotr Jasiukajtis t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 83325c28e83SPiotr Jasiukajtis t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 83425c28e83SPiotr Jasiukajtis t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; 83525c28e83SPiotr Jasiukajtis *py0 = t0; 83625c28e83SPiotr Jasiukajtis *py1 = t1; 83725c28e83SPiotr Jasiukajtis *py2 = ( a2 + t2 ); 83825c28e83SPiotr Jasiukajtis break; 83925c28e83SPiotr Jasiukajtis 84025c28e83SPiotr Jasiukajtis case 4: 84125c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 84225c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 84325c28e83SPiotr Jasiukajtis j2 = n2 & 1; 84425c28e83SPiotr Jasiukajtis HI(&t0) = j0; 84525c28e83SPiotr Jasiukajtis HI(&t1) = j1; 84625c28e83SPiotr Jasiukajtis LO(&t0) = 0; 84725c28e83SPiotr Jasiukajtis LO(&t1) = 0; 84825c28e83SPiotr Jasiukajtis x2_or_one[0] = x2; 84925c28e83SPiotr Jasiukajtis x2_or_one[2] = -x2; 85025c28e83SPiotr Jasiukajtis x0 = (x0 - t0) + y0; 85125c28e83SPiotr Jasiukajtis x1 = (x1 - t1) + y1; 85225c28e83SPiotr Jasiukajtis y2_or_zero[0] = y2; 85325c28e83SPiotr Jasiukajtis y2_or_zero[2] = -y2; 85425c28e83SPiotr Jasiukajtis z0 = x0 * x0; 85525c28e83SPiotr Jasiukajtis z1 = x1 * x1; 85625c28e83SPiotr Jasiukajtis z2 = x2 * x2; 85725c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 85825c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 85925c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 86025c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 86125c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 86225c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 86325c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 86425c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 86525c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 86625c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 86725c28e83SPiotr Jasiukajtis n0 ^= (xsb0 & ~(n0 << 1)); 86825c28e83SPiotr Jasiukajtis n1 ^= (xsb1 & ~(n1 << 1)); 86925c28e83SPiotr Jasiukajtis xsb0 |= 1; 87025c28e83SPiotr Jasiukajtis xsb1 |= 1; 87125c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+n0]; 87225c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+n1]; 87325c28e83SPiotr Jasiukajtis t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; 87425c28e83SPiotr Jasiukajtis t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; 87525c28e83SPiotr Jasiukajtis t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 87625c28e83SPiotr Jasiukajtis *py0 = ( a0 + t0 ); 87725c28e83SPiotr Jasiukajtis *py1 = ( a1 + t1 ); 87825c28e83SPiotr Jasiukajtis *py2 = t2; 87925c28e83SPiotr Jasiukajtis break; 88025c28e83SPiotr Jasiukajtis 88125c28e83SPiotr Jasiukajtis case 5: 88225c28e83SPiotr Jasiukajtis j0 = n0 & 1; 88325c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 88425c28e83SPiotr Jasiukajtis j2 = n2 & 1; 88525c28e83SPiotr Jasiukajtis HI(&t1) = j1; 88625c28e83SPiotr Jasiukajtis LO(&t1) = 0; 88725c28e83SPiotr Jasiukajtis x0_or_one[0] = x0; 88825c28e83SPiotr Jasiukajtis x0_or_one[2] = -x0; 88925c28e83SPiotr Jasiukajtis x2_or_one[0] = x2; 89025c28e83SPiotr Jasiukajtis x2_or_one[2] = -x2; 89125c28e83SPiotr Jasiukajtis y0_or_zero[0] = y0; 89225c28e83SPiotr Jasiukajtis y0_or_zero[2] = -y0; 89325c28e83SPiotr Jasiukajtis x1 = (x1 - t1) + y1; 89425c28e83SPiotr Jasiukajtis y2_or_zero[0] = y2; 89525c28e83SPiotr Jasiukajtis y2_or_zero[2] = -y2; 89625c28e83SPiotr Jasiukajtis z0 = x0 * x0; 89725c28e83SPiotr Jasiukajtis z1 = x1 * x1; 89825c28e83SPiotr Jasiukajtis z2 = x2 * x2; 89925c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 90025c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 90125c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 90225c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 90325c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 90425c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 90525c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 90625c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 90725c28e83SPiotr Jasiukajtis n1 ^= (xsb1 & ~(n1 << 1)); 90825c28e83SPiotr Jasiukajtis xsb1 |= 1; 90925c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+n1]; 91025c28e83SPiotr Jasiukajtis t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 91125c28e83SPiotr Jasiukajtis t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; 91225c28e83SPiotr Jasiukajtis t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 91325c28e83SPiotr Jasiukajtis *py0 = t0; 91425c28e83SPiotr Jasiukajtis *py1 = ( a1 + t1 ); 91525c28e83SPiotr Jasiukajtis *py2 = t2; 91625c28e83SPiotr Jasiukajtis break; 91725c28e83SPiotr Jasiukajtis 91825c28e83SPiotr Jasiukajtis case 6: 91925c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 92025c28e83SPiotr Jasiukajtis j1 = n1 & 1; 92125c28e83SPiotr Jasiukajtis j2 = n2 & 1; 92225c28e83SPiotr Jasiukajtis HI(&t0) = j0; 92325c28e83SPiotr Jasiukajtis LO(&t0) = 0; 92425c28e83SPiotr Jasiukajtis x1_or_one[0] = x1; 92525c28e83SPiotr Jasiukajtis x1_or_one[2] = -x1; 92625c28e83SPiotr Jasiukajtis x2_or_one[0] = x2; 92725c28e83SPiotr Jasiukajtis x2_or_one[2] = -x2; 92825c28e83SPiotr Jasiukajtis x0 = (x0 - t0) + y0; 92925c28e83SPiotr Jasiukajtis y1_or_zero[0] = y1; 93025c28e83SPiotr Jasiukajtis y1_or_zero[2] = -y1; 93125c28e83SPiotr Jasiukajtis y2_or_zero[0] = y2; 93225c28e83SPiotr Jasiukajtis y2_or_zero[2] = -y2; 93325c28e83SPiotr Jasiukajtis z0 = x0 * x0; 93425c28e83SPiotr Jasiukajtis z1 = x1 * x1; 93525c28e83SPiotr Jasiukajtis z2 = x2 * x2; 93625c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 93725c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 93825c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 93925c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 94025c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 94125c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 94225c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 94325c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 94425c28e83SPiotr Jasiukajtis n0 ^= (xsb0 & ~(n0 << 1)); 94525c28e83SPiotr Jasiukajtis xsb0 |= 1; 94625c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+n0]; 94725c28e83SPiotr Jasiukajtis t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; 94825c28e83SPiotr Jasiukajtis t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 94925c28e83SPiotr Jasiukajtis t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 95025c28e83SPiotr Jasiukajtis *py0 = ( a0 + t0 ); 95125c28e83SPiotr Jasiukajtis *py1 = t1; 95225c28e83SPiotr Jasiukajtis *py2 = t2; 95325c28e83SPiotr Jasiukajtis break; 95425c28e83SPiotr Jasiukajtis 95525c28e83SPiotr Jasiukajtis case 7: 95625c28e83SPiotr Jasiukajtis j0 = n0 & 1; 95725c28e83SPiotr Jasiukajtis j1 = n1 & 1; 95825c28e83SPiotr Jasiukajtis j2 = n2 & 1; 95925c28e83SPiotr Jasiukajtis x0_or_one[0] = x0; 96025c28e83SPiotr Jasiukajtis x0_or_one[2] = -x0; 96125c28e83SPiotr Jasiukajtis x1_or_one[0] = x1; 96225c28e83SPiotr Jasiukajtis x1_or_one[2] = -x1; 96325c28e83SPiotr Jasiukajtis x2_or_one[0] = x2; 96425c28e83SPiotr Jasiukajtis x2_or_one[2] = -x2; 96525c28e83SPiotr Jasiukajtis y0_or_zero[0] = y0; 96625c28e83SPiotr Jasiukajtis y0_or_zero[2] = -y0; 96725c28e83SPiotr Jasiukajtis y1_or_zero[0] = y1; 96825c28e83SPiotr Jasiukajtis y1_or_zero[2] = -y1; 96925c28e83SPiotr Jasiukajtis y2_or_zero[0] = y2; 97025c28e83SPiotr Jasiukajtis y2_or_zero[2] = -y2; 97125c28e83SPiotr Jasiukajtis z0 = x0 * x0; 97225c28e83SPiotr Jasiukajtis z1 = x1 * x1; 97325c28e83SPiotr Jasiukajtis z2 = x2 * x2; 97425c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 97525c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 97625c28e83SPiotr Jasiukajtis t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 97725c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 97825c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 97925c28e83SPiotr Jasiukajtis t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 98025c28e83SPiotr Jasiukajtis t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 98125c28e83SPiotr Jasiukajtis t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 98225c28e83SPiotr Jasiukajtis t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 98325c28e83SPiotr Jasiukajtis *py0 = t0; 98425c28e83SPiotr Jasiukajtis *py1 = t1; 98525c28e83SPiotr Jasiukajtis *py2 = t2; 98625c28e83SPiotr Jasiukajtis break; 98725c28e83SPiotr Jasiukajtis } 98825c28e83SPiotr Jasiukajtis 98925c28e83SPiotr Jasiukajtis x += stridex; 99025c28e83SPiotr Jasiukajtis y += stridey; 99125c28e83SPiotr Jasiukajtis i = 0; 99225c28e83SPiotr Jasiukajtis } while (--n > 0); 99325c28e83SPiotr Jasiukajtis 99425c28e83SPiotr Jasiukajtis if (i > 0) 99525c28e83SPiotr Jasiukajtis { 99625c28e83SPiotr Jasiukajtis double fn0, fn1, a0, a1, w0, w1, y0, y1; 99725c28e83SPiotr Jasiukajtis double t0, t1, z0, z1; 99825c28e83SPiotr Jasiukajtis unsigned j0, j1; 99925c28e83SPiotr Jasiukajtis int n0, n1; 100025c28e83SPiotr Jasiukajtis 100125c28e83SPiotr Jasiukajtis if (i > 1) 100225c28e83SPiotr Jasiukajtis { 100325c28e83SPiotr Jasiukajtis n1 = (int) (x1 * invpio2 + half[xsb1]); 100425c28e83SPiotr Jasiukajtis fn1 = (double) n1; 100525c28e83SPiotr Jasiukajtis n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 100625c28e83SPiotr Jasiukajtis a1 = x1 - fn1 * pio2_1; 100725c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_2; 100825c28e83SPiotr Jasiukajtis x1 = a1 - w1; 100925c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 101025c28e83SPiotr Jasiukajtis a1 = x1; 101125c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_3 - y1; 101225c28e83SPiotr Jasiukajtis x1 = a1 - w1; 101325c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 101425c28e83SPiotr Jasiukajtis a1 = x1; 101525c28e83SPiotr Jasiukajtis w1 = fn1 * pio2_3t - y1; 101625c28e83SPiotr Jasiukajtis x1 = a1 - w1; 101725c28e83SPiotr Jasiukajtis y1 = (a1 - x1) - w1; 101825c28e83SPiotr Jasiukajtis xsb1 = HI(&x1); 101925c28e83SPiotr Jasiukajtis if ((xsb1 & ~0x80000000) < thresh[n1&1]) 102025c28e83SPiotr Jasiukajtis { 102125c28e83SPiotr Jasiukajtis j1 = n1 & 1; 102225c28e83SPiotr Jasiukajtis x1_or_one[0] = x1; 102325c28e83SPiotr Jasiukajtis x1_or_one[2] = -x1; 102425c28e83SPiotr Jasiukajtis y1_or_zero[0] = y1; 102525c28e83SPiotr Jasiukajtis y1_or_zero[2] = -y1; 102625c28e83SPiotr Jasiukajtis z1 = x1 * x1; 102725c28e83SPiotr Jasiukajtis t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 102825c28e83SPiotr Jasiukajtis t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 102925c28e83SPiotr Jasiukajtis t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 103025c28e83SPiotr Jasiukajtis *py1 = t1; 103125c28e83SPiotr Jasiukajtis } 103225c28e83SPiotr Jasiukajtis else 103325c28e83SPiotr Jasiukajtis { 103425c28e83SPiotr Jasiukajtis j1 = (xsb1 + 0x4000) & 0xffff8000; 103525c28e83SPiotr Jasiukajtis HI(&t1) = j1; 103625c28e83SPiotr Jasiukajtis LO(&t1) = 0; 103725c28e83SPiotr Jasiukajtis x1 = (x1 - t1) + y1; 103825c28e83SPiotr Jasiukajtis z1 = x1 * x1; 103925c28e83SPiotr Jasiukajtis t1 = z1 * (qq1 + z1 * qq2); 104025c28e83SPiotr Jasiukajtis w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 104125c28e83SPiotr Jasiukajtis j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 104225c28e83SPiotr Jasiukajtis xsb1 = (xsb1 >> 30) & 2; 104325c28e83SPiotr Jasiukajtis n1 ^= (xsb1 & ~(n1 << 1)); 104425c28e83SPiotr Jasiukajtis xsb1 |= 1; 104525c28e83SPiotr Jasiukajtis a1 = __vlibm_TBL_sincos_hi[j1+n1]; 104625c28e83SPiotr Jasiukajtis t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; 104725c28e83SPiotr Jasiukajtis *py1 = ( a1 + t1 ); 104825c28e83SPiotr Jasiukajtis } 104925c28e83SPiotr Jasiukajtis } 105025c28e83SPiotr Jasiukajtis n0 = (int) (x0 * invpio2 + half[xsb0]); 105125c28e83SPiotr Jasiukajtis fn0 = (double) n0; 105225c28e83SPiotr Jasiukajtis n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 105325c28e83SPiotr Jasiukajtis a0 = x0 - fn0 * pio2_1; 105425c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_2; 105525c28e83SPiotr Jasiukajtis x0 = a0 - w0; 105625c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 105725c28e83SPiotr Jasiukajtis a0 = x0; 105825c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_3 - y0; 105925c28e83SPiotr Jasiukajtis x0 = a0 - w0; 106025c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 106125c28e83SPiotr Jasiukajtis a0 = x0; 106225c28e83SPiotr Jasiukajtis w0 = fn0 * pio2_3t - y0; 106325c28e83SPiotr Jasiukajtis x0 = a0 - w0; 106425c28e83SPiotr Jasiukajtis y0 = (a0 - x0) - w0; 106525c28e83SPiotr Jasiukajtis xsb0 = HI(&x0); 106625c28e83SPiotr Jasiukajtis if ((xsb0 & ~0x80000000) < thresh[n0&1]) 106725c28e83SPiotr Jasiukajtis { 106825c28e83SPiotr Jasiukajtis j0 = n0 & 1; 106925c28e83SPiotr Jasiukajtis x0_or_one[0] = x0; 107025c28e83SPiotr Jasiukajtis x0_or_one[2] = -x0; 107125c28e83SPiotr Jasiukajtis y0_or_zero[0] = y0; 107225c28e83SPiotr Jasiukajtis y0_or_zero[2] = -y0; 107325c28e83SPiotr Jasiukajtis z0 = x0 * x0; 107425c28e83SPiotr Jasiukajtis t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 107525c28e83SPiotr Jasiukajtis t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 107625c28e83SPiotr Jasiukajtis t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 107725c28e83SPiotr Jasiukajtis *py0 = t0; 107825c28e83SPiotr Jasiukajtis } 107925c28e83SPiotr Jasiukajtis else 108025c28e83SPiotr Jasiukajtis { 108125c28e83SPiotr Jasiukajtis j0 = (xsb0 + 0x4000) & 0xffff8000; 108225c28e83SPiotr Jasiukajtis HI(&t0) = j0; 108325c28e83SPiotr Jasiukajtis LO(&t0) = 0; 108425c28e83SPiotr Jasiukajtis x0 = (x0 - t0) + y0; 108525c28e83SPiotr Jasiukajtis z0 = x0 * x0; 108625c28e83SPiotr Jasiukajtis t0 = z0 * (qq1 + z0 * qq2); 108725c28e83SPiotr Jasiukajtis w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 108825c28e83SPiotr Jasiukajtis j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 108925c28e83SPiotr Jasiukajtis xsb0 = (xsb0 >> 30) & 2; 109025c28e83SPiotr Jasiukajtis n0 ^= (xsb0 & ~(n0 << 1)); 109125c28e83SPiotr Jasiukajtis xsb0 |= 1; 109225c28e83SPiotr Jasiukajtis a0 = __vlibm_TBL_sincos_hi[j0+n0]; 109325c28e83SPiotr Jasiukajtis t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; 109425c28e83SPiotr Jasiukajtis *py0 = ( a0 + t0 ); 109525c28e83SPiotr Jasiukajtis } 109625c28e83SPiotr Jasiukajtis } 109725c28e83SPiotr Jasiukajtis 109825c28e83SPiotr Jasiukajtis if (biguns) 109925c28e83SPiotr Jasiukajtis __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); 110025c28e83SPiotr Jasiukajtis } 1101