1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include <sys/ccompile.h> 32 33 #ifdef _LITTLE_ENDIAN 34 #define HI(x) *(1+(int*)x) 35 #define LO(x) *(unsigned*)x 36 #else 37 #define HI(x) *(int*)x 38 #define LO(x) *(1+(unsigned*)x) 39 #endif 40 41 #ifdef __RESTRICT 42 #define restrict _Restrict 43 #else 44 #define restrict 45 #endif 46 47 /* 48 * vsincos.c 49 * 50 * Vector sine and cosine function. Just slight modifications to vcos.c. 51 */ 52 53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; 54 55 static const double 56 half[2] = { 0.5, -0.5 }, 57 one = 1.0, 58 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ 59 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ 60 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ 61 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ 62 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ 63 pp1 = -1.666666666605760465276263943134982554676e-0001, 64 pp2 = 8.333261209690963126718376566146180944442e-0003, 65 qq1 = -4.999999999977710986407023955908711557870e-0001, 66 qq2 = 4.166654863857219350645055881018842089580e-0002, 67 poly1[2]= { -1.666666666666629669805215138920301589656e-0001, 68 -4.999999999999931701464060878888294524481e-0001 }, 69 poly2[2]= { 8.333333332390951295683993455280336376663e-0003, 70 4.166666666394861917535640593963708222319e-0002 }, 71 poly3[2]= { -1.984126237997976692791551778230098403960e-0004, 72 -1.388888552656142867832756687736851681462e-0003 }, 73 poly4[2]= { 2.753403624854277237649987622848330351110e-0006, 74 2.478519423681460796618128289454530524759e-0005 }; 75 76 /* Don't __ the following; acomp will handle it */ 77 extern double fabs(double); 78 extern void __vlibm_vsincos_big(int, double *, int, double *, int, double *, int, int); 79 80 /* 81 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n. 82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n. 83 * 84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06. 85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. 86 * 87 * elts < 2^-27 use the approximation 1.0 ~ cos(x). 88 */ 89 void 90 __vsincos(int n, double * restrict x, int stridex, 91 double * restrict y, int stridey, 92 double * restrict c, int stridec) 93 { 94 double x0_or_one[4], x1_or_one[4], x2_or_one[4]; 95 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; 96 double x0, x1, x2, 97 *py0, *py1, *py2, 98 *pc0, *pc1, *pc2, 99 *xsave, *ysave, *csave; 100 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2; 101 int i, biguns, nsave, sxsave, sysave, scsave; 102 volatile int v __GNU_UNUSED; 103 nsave = n; 104 xsave = x; 105 sxsave = stridex; 106 ysave = y; 107 sysave = stridey; 108 csave = c; 109 scsave = stridec; 110 biguns = 0; 111 112 do /* MAIN LOOP */ 113 { 114 115 /* Gotos here so _break_ exits MAIN LOOP. */ 116 LOOP0: /* Find first arg in right range. */ 117 xsb0 = HI(x); /* get most significant word */ 118 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ 119 if (hx0 > 0x3fe921fb) { 120 /* Too big: arg reduction needed, so leave for second part */ 121 biguns = 1; 122 x += stridex; 123 y += stridey; 124 c += stridec; 125 i = 0; 126 if (--n <= 0) 127 break; 128 goto LOOP0; 129 } 130 if (hx0 < 0x3e400000) { 131 /* Too small. cos x ~ 1, sin x ~ x. */ 132 v = *x; 133 *c = 1.0; 134 *y = *x; 135 x += stridex; 136 y += stridey; 137 c += stridec; 138 i = 0; 139 if (--n <= 0) 140 break; 141 goto LOOP0; 142 } 143 x0 = *x; 144 py0 = y; 145 pc0 = c; 146 x += stridex; 147 y += stridey; 148 c += stridec; 149 i = 1; 150 if (--n <= 0) 151 break; 152 153 LOOP1: /* Get second arg, same as above. */ 154 xsb1 = HI(x); 155 hx1 = xsb1 & ~0x80000000; 156 if (hx1 > 0x3fe921fb) 157 { 158 biguns = 1; 159 x += stridex; 160 y += stridey; 161 c += stridec; 162 i = 1; 163 if (--n <= 0) 164 break; 165 goto LOOP1; 166 } 167 if (hx1 < 0x3e400000) 168 { 169 v = *x; 170 *c = 1.0; 171 *y = *x; 172 x += stridex; 173 y += stridey; 174 c += stridec; 175 i = 1; 176 if (--n <= 0) 177 break; 178 goto LOOP1; 179 } 180 x1 = *x; 181 py1 = y; 182 pc1 = c; 183 x += stridex; 184 y += stridey; 185 c += stridec; 186 i = 2; 187 if (--n <= 0) 188 break; 189 190 LOOP2: /* Get third arg, same as above. */ 191 xsb2 = HI(x); 192 hx2 = xsb2 & ~0x80000000; 193 if (hx2 > 0x3fe921fb) 194 { 195 biguns = 1; 196 x += stridex; 197 y += stridey; 198 c += stridec; 199 i = 2; 200 if (--n <= 0) 201 break; 202 goto LOOP2; 203 } 204 if (hx2 < 0x3e400000) 205 { 206 v = *x; 207 *c = 1.0; 208 *y = *x; 209 x += stridex; 210 y += stridey; 211 c += stridec; 212 i = 2; 213 if (--n <= 0) 214 break; 215 goto LOOP2; 216 } 217 x2 = *x; 218 py2 = y; 219 pc2 = c; 220 221 /* 222 * 0x3fc40000 = 5/32 ~ 0.15625 223 * Get msb after subtraction. Will be 1 only if 224 * hx0 - 5/32 is negative. 225 */ 226 i = (hx2 - 0x3fc40000) >> 31; 227 i |= ((hx1 - 0x3fc40000) >> 30) & 2; 228 i |= ((hx0 - 0x3fc40000) >> 29) & 4; 229 switch (i) 230 { 231 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; 232 double w0, w1, w2; 233 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; 234 double z0, z1, z2; 235 unsigned j0, j1, j2; 236 237 case 0: /* All are > 5/32 */ 238 j0 = (xsb0 + 0x4000) & 0xffff8000; 239 j1 = (xsb1 + 0x4000) & 0xffff8000; 240 j2 = (xsb2 + 0x4000) & 0xffff8000; 241 242 HI(&t0) = j0; 243 HI(&t1) = j1; 244 HI(&t2) = j2; 245 LO(&t0) = 0; 246 LO(&t1) = 0; 247 LO(&t2) = 0; 248 249 x0 -= t0; 250 x1 -= t1; 251 x2 -= t2; 252 253 z0 = x0 * x0; 254 z1 = x1 * x1; 255 z2 = x2 * x2; 256 257 t0 = z0 * (qq1 + z0 * qq2); 258 t1 = z1 * (qq1 + z1 * qq2); 259 t2 = z2 * (qq1 + z2 * qq2); 260 261 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 262 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 263 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 264 265 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 266 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 267 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 268 269 xsb0 = (xsb0 >> 30) & 2; 270 xsb1 = (xsb1 >> 30) & 2; 271 xsb2 = (xsb2 >> 30) & 2; 272 273 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ 274 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; 275 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; 276 277 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 278 a2_1 = __vlibm_TBL_sincos_hi[j1+1]; 279 a2_2 = __vlibm_TBL_sincos_hi[j2+1]; 280 /* cos_lo(t) */ 281 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); 282 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); 283 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); 284 285 *pc0 = a2_0 + t2_0; 286 *pc1 = a2_1 + t2_1; 287 *pc2 = a2_2 + t2_2; 288 289 t1_0 = a2_0*w0 + a1_0*t0; 290 t1_1 = a2_1*w1 + a1_1*t1; 291 t1_2 = a2_2*w2 + a1_2*t2; 292 293 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ 294 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; 295 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; 296 297 *py0 = a1_0 + t1_0; 298 *py1 = a1_1 + t1_1; 299 *py2 = a1_2 + t1_2; 300 301 break; 302 303 case 1: 304 j0 = (xsb0 + 0x4000) & 0xffff8000; 305 j1 = (xsb1 + 0x4000) & 0xffff8000; 306 HI(&t0) = j0; 307 HI(&t1) = j1; 308 LO(&t0) = 0; 309 LO(&t1) = 0; 310 x0 -= t0; 311 x1 -= t1; 312 z0 = x0 * x0; 313 z1 = x1 * x1; 314 z2 = x2 * x2; 315 t0 = z0 * (qq1 + z0 * qq2); 316 t1 = z1 * (qq1 + z1 * qq2); 317 t2 = z2 * (poly3[1] + z2 * poly4[1]); 318 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 319 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 320 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 321 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 322 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 323 xsb0 = (xsb0 >> 30) & 2; 324 xsb1 = (xsb1 >> 30) & 2; 325 326 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ 327 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; 328 329 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 330 a2_1 = __vlibm_TBL_sincos_hi[j1+1]; 331 /* cos_lo(t) */ 332 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); 333 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); 334 335 *pc0 = a2_0 + t2_0; 336 *pc1 = a2_1 + t2_1; 337 *pc2 = one + t2; 338 339 t1_0 = a2_0*w0 + a1_0*t0; 340 t1_1 = a2_1*w1 + a1_1*t1; 341 t2 = z2 * (poly3[0] + z2 * poly4[0]); 342 343 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ 344 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; 345 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); 346 347 *py0 = a1_0 + t1_0; 348 *py1 = a1_1 + t1_1; 349 t2 = x2 + x2 * t2; 350 *py2 = t2; 351 352 break; 353 354 case 2: 355 j0 = (xsb0 + 0x4000) & 0xffff8000; 356 j2 = (xsb2 + 0x4000) & 0xffff8000; 357 HI(&t0) = j0; 358 HI(&t2) = j2; 359 LO(&t0) = 0; 360 LO(&t2) = 0; 361 x0 -= t0; 362 x2 -= t2; 363 z0 = x0 * x0; 364 z1 = x1 * x1; 365 z2 = x2 * x2; 366 t0 = z0 * (qq1 + z0 * qq2); 367 t1 = z1 * (poly3[1] + z1 * poly4[1]); 368 t2 = z2 * (qq1 + z2 * qq2); 369 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 370 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 371 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 372 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 373 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 374 xsb0 = (xsb0 >> 30) & 2; 375 xsb2 = (xsb2 >> 30) & 2; 376 377 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ 378 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; 379 380 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 381 a2_2 = __vlibm_TBL_sincos_hi[j2+1]; 382 /* cos_lo(t) */ 383 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); 384 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); 385 386 *pc0 = a2_0 + t2_0; 387 *pc1 = one + t1; 388 *pc2 = a2_2 + t2_2; 389 390 t1_0 = a2_0*w0 + a1_0*t0; 391 t1 = z1 * (poly3[0] + z1 * poly4[0]); 392 t1_2 = a2_2*w2 + a1_2*t2; 393 394 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ 395 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); 396 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; 397 398 *py0 = a1_0 + t1_0; 399 t1 = x1 + x1 * t1; 400 *py1 = t1; 401 *py2 = a1_2 + t1_2; 402 403 break; 404 405 case 3: 406 j0 = (xsb0 + 0x4000) & 0xffff8000; 407 HI(&t0) = j0; 408 LO(&t0) = 0; 409 x0 -= t0; 410 z0 = x0 * x0; 411 z1 = x1 * x1; 412 z2 = x2 * x2; 413 t0 = z0 * (qq1 + z0 * qq2); 414 t1 = z1 * (poly3[1] + z1 * poly4[1]); 415 t2 = z2 * (poly3[1] + z2 * poly4[1]); 416 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 417 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 418 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 419 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 420 xsb0 = (xsb0 >> 30) & 2; 421 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ 422 423 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 424 425 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); 426 427 *pc0 = a2_0 + t2_0; 428 *pc1 = one + t1; 429 *pc2 = one + t2; 430 431 t1_0 = a2_0*w0 + a1_0*t0; 432 t1 = z1 * (poly3[0] + z1 * poly4[0]); 433 t2 = z2 * (poly3[0] + z2 * poly4[0]); 434 435 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ 436 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); 437 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); 438 439 *py0 = a1_0 + t1_0; 440 t1 = x1 + x1 * t1; 441 *py1 = t1; 442 t2 = x2 + x2 * t2; 443 *py2 = t2; 444 445 break; 446 447 case 4: 448 j1 = (xsb1 + 0x4000) & 0xffff8000; 449 j2 = (xsb2 + 0x4000) & 0xffff8000; 450 HI(&t1) = j1; 451 HI(&t2) = j2; 452 LO(&t1) = 0; 453 LO(&t2) = 0; 454 x1 -= t1; 455 x2 -= t2; 456 z0 = x0 * x0; 457 z1 = x1 * x1; 458 z2 = x2 * x2; 459 t0 = z0 * (poly3[1] + z0 * poly4[1]); 460 t1 = z1 * (qq1 + z1 * qq2); 461 t2 = z2 * (qq1 + z2 * qq2); 462 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 463 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 464 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 465 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 466 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 467 xsb1 = (xsb1 >> 30) & 2; 468 xsb2 = (xsb2 >> 30) & 2; 469 470 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; 471 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; 472 473 a2_1 = __vlibm_TBL_sincos_hi[j1+1]; 474 a2_2 = __vlibm_TBL_sincos_hi[j2+1]; 475 /* cos_lo(t) */ 476 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); 477 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); 478 479 *pc0 = one + t0; 480 *pc1 = a2_1 + t2_1; 481 *pc2 = a2_2 + t2_2; 482 483 t0 = z0 * (poly3[0] + z0 * poly4[0]); 484 t1_1 = a2_1*w1 + a1_1*t1; 485 t1_2 = a2_2*w2 + a1_2*t2; 486 487 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); 488 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; 489 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; 490 491 t0 = x0 + x0 * t0; 492 *py0 = t0; 493 *py1 = a1_1 + t1_1; 494 *py2 = a1_2 + t1_2; 495 496 break; 497 498 case 5: 499 j1 = (xsb1 + 0x4000) & 0xffff8000; 500 HI(&t1) = j1; 501 LO(&t1) = 0; 502 x1 -= t1; 503 z0 = x0 * x0; 504 z1 = x1 * x1; 505 z2 = x2 * x2; 506 t0 = z0 * (poly3[1] + z0 * poly4[1]); 507 t1 = z1 * (qq1 + z1 * qq2); 508 t2 = z2 * (poly3[1] + z2 * poly4[1]); 509 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 510 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 511 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 512 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 513 xsb1 = (xsb1 >> 30) & 2; 514 515 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; 516 517 a2_1 = __vlibm_TBL_sincos_hi[j1+1]; 518 519 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); 520 521 *pc0 = one + t0; 522 *pc1 = a2_1 + t2_1; 523 *pc2 = one + t2; 524 525 t0 = z0 * (poly3[0] + z0 * poly4[0]); 526 t1_1 = a2_1*w1 + a1_1*t1; 527 t2 = z2 * (poly3[0] + z2 * poly4[0]); 528 529 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); 530 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; 531 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); 532 533 t0 = x0 + x0 * t0; 534 *py0 = t0; 535 *py1 = a1_1 + t1_1; 536 t2 = x2 + x2 * t2; 537 *py2 = t2; 538 539 break; 540 541 case 6: 542 j2 = (xsb2 + 0x4000) & 0xffff8000; 543 HI(&t2) = j2; 544 LO(&t2) = 0; 545 x2 -= t2; 546 z0 = x0 * x0; 547 z1 = x1 * x1; 548 z2 = x2 * x2; 549 t0 = z0 * (poly3[1] + z0 * poly4[1]); 550 t1 = z1 * (poly3[1] + z1 * poly4[1]); 551 t2 = z2 * (qq1 + z2 * qq2); 552 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 553 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 554 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 555 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 556 xsb2 = (xsb2 >> 30) & 2; 557 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; 558 559 a2_2 = __vlibm_TBL_sincos_hi[j2+1]; 560 561 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); 562 563 *pc0 = one + t0; 564 *pc1 = one + t1; 565 *pc2 = a2_2 + t2_2; 566 567 t0 = z0 * (poly3[0] + z0 * poly4[0]); 568 t1 = z1 * (poly3[0] + z1 * poly4[0]); 569 t1_2 = a2_2*w2 + a1_2*t2; 570 571 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); 572 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); 573 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; 574 575 t0 = x0 + x0 * t0; 576 *py0 = t0; 577 t1 = x1 + x1 * t1; 578 *py1 = t1; 579 *py2 = a1_2 + t1_2; 580 581 break; 582 583 case 7: /* All are < 5/32 */ 584 z0 = x0 * x0; 585 z1 = x1 * x1; 586 z2 = x2 * x2; 587 t0 = z0 * (poly3[1] + z0 * poly4[1]); 588 t1 = z1 * (poly3[1] + z1 * poly4[1]); 589 t2 = z2 * (poly3[1] + z2 * poly4[1]); 590 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 591 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 592 t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); 593 *pc0 = one + t0; 594 *pc1 = one + t1; 595 *pc2 = one + t2; 596 t0 = z0 * (poly3[0] + z0 * poly4[0]); 597 t1 = z1 * (poly3[0] + z1 * poly4[0]); 598 t2 = z2 * (poly3[0] + z2 * poly4[0]); 599 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); 600 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); 601 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); 602 t0 = x0 + x0 * t0; 603 t1 = x1 + x1 * t1; 604 t2 = x2 + x2 * t2; 605 *py0 = t0; 606 *py1 = t1; 607 *py2 = t2; 608 break; 609 } 610 611 x += stridex; 612 y += stridey; 613 c += stridec; 614 i = 0; 615 } while (--n > 0); /* END MAIN LOOP */ 616 617 /* 618 * CLEAN UP last 0, 1, or 2 elts. 619 */ 620 if (i > 0) /* Clean up elts at tail. i < 3. */ 621 { 622 double a1_0, a1_1, a2_0, a2_1; 623 double w0, w1; 624 double t0, t1, t1_0, t1_1, t2_0, t2_1; 625 double z0, z1; 626 unsigned j0, j1; 627 628 if (i > 1) 629 { 630 if (hx1 < 0x3fc40000) 631 { 632 z1 = x1 * x1; 633 t1 = z1 * (poly3[1] + z1 * poly4[1]); 634 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); 635 t1 = one + t1; 636 *pc1 = t1; 637 t1 = z1 * (poly3[0] + z1 * poly4[0]); 638 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); 639 t1 = x1 + x1 * t1; 640 *py1 = t1; 641 } 642 else 643 { 644 j1 = (xsb1 + 0x4000) & 0xffff8000; 645 HI(&t1) = j1; 646 LO(&t1) = 0; 647 x1 -= t1; 648 z1 = x1 * x1; 649 t1 = z1 * (qq1 + z1 * qq2); 650 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 651 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 652 xsb1 = (xsb1 >> 30) & 2; 653 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; 654 a2_1 = __vlibm_TBL_sincos_hi[j1+1]; 655 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); 656 *pc1 = a2_1 + t2_1; 657 t1_1 = a2_1*w1 + a1_1*t1; 658 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; 659 *py1 = a1_1 + t1_1; 660 } 661 } 662 if (hx0 < 0x3fc40000) 663 { 664 z0 = x0 * x0; 665 t0 = z0 * (poly3[1] + z0 * poly4[1]); 666 t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); 667 t0 = one + t0; 668 *pc0 = t0; 669 t0 = z0 * (poly3[0] + z0 * poly4[0]); 670 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); 671 t0 = x0 + x0 * t0; 672 *py0 = t0; 673 } 674 else 675 { 676 j0 = (xsb0 + 0x4000) & 0xffff8000; 677 HI(&t0) = j0; 678 LO(&t0) = 0; 679 x0 -= t0; 680 z0 = x0 * x0; 681 t0 = z0 * (qq1 + z0 * qq2); 682 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 683 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 684 xsb0 = (xsb0 >> 30) & 2; 685 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ 686 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ 687 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); 688 *pc0 = a2_0 + t2_0; 689 t1_0 = a2_0*w0 + a1_0*t0; 690 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ 691 *py0 = a1_0 + t1_0; 692 } 693 } /* END CLEAN UP */ 694 695 if (!biguns) 696 return; 697 698 /* 699 * Take care of BIGUNS. 700 */ 701 n = nsave; 702 x = xsave; 703 stridex = sxsave; 704 y = ysave; 705 stridey = sysave; 706 c = csave; 707 stridec = scsave; 708 biguns = 0; 709 710 x0_or_one[1] = 1.0; 711 x1_or_one[1] = 1.0; 712 x2_or_one[1] = 1.0; 713 x0_or_one[3] = -1.0; 714 x1_or_one[3] = -1.0; 715 x2_or_one[3] = -1.0; 716 y0_or_zero[1] = 0.0; 717 y1_or_zero[1] = 0.0; 718 y2_or_zero[1] = 0.0; 719 y0_or_zero[3] = 0.0; 720 y1_or_zero[3] = 0.0; 721 y2_or_zero[3] = 0.0; 722 723 do 724 { 725 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; 726 unsigned hx; 727 int n0, n1, n2; 728 729 /* 730 * Find 3 more to work on: Not already done, not too big. 731 */ 732 loop0: 733 hx = HI(x); 734 xsb0 = hx >> 31; 735 hx &= ~0x80000000; 736 if (hx <= 0x3fe921fb) /* Done above. */ 737 { 738 x += stridex; 739 y += stridey; 740 c += stridec; 741 i = 0; 742 if (--n <= 0) 743 break; 744 goto loop0; 745 } 746 if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ 747 { 748 if (hx >= 0x7ff00000) /* Inf or NaN */ 749 { 750 x0 = *x; 751 *y = x0 - x0; 752 *c = x0 - x0; 753 } 754 else { 755 biguns = 1; 756 } 757 x += stridex; 758 y += stridey; 759 c += stridec; 760 i = 0; 761 if (--n <= 0) 762 break; 763 goto loop0; 764 } 765 x0 = *x; 766 py0 = y; 767 pc0 = c; 768 x += stridex; 769 y += stridey; 770 c += stridec; 771 i = 1; 772 if (--n <= 0) 773 break; 774 775 loop1: 776 hx = HI(x); 777 xsb1 = hx >> 31; 778 hx &= ~0x80000000; 779 if (hx <= 0x3fe921fb) 780 { 781 x += stridex; 782 y += stridey; 783 c += stridec; 784 i = 1; 785 if (--n <= 0) 786 break; 787 goto loop1; 788 } 789 if (hx > 0x413921fb) 790 { 791 if (hx >= 0x7ff00000) 792 { 793 x1 = *x; 794 *y = x1 - x1; 795 *c = x1 - x1; 796 } 797 else { 798 biguns = 1; 799 } 800 x += stridex; 801 y += stridey; 802 c += stridec; 803 i = 1; 804 if (--n <= 0) 805 break; 806 goto loop1; 807 } 808 x1 = *x; 809 py1 = y; 810 pc1 = c; 811 x += stridex; 812 y += stridey; 813 c += stridec; 814 i = 2; 815 if (--n <= 0) 816 break; 817 818 loop2: 819 hx = HI(x); 820 xsb2 = hx >> 31; 821 hx &= ~0x80000000; 822 if (hx <= 0x3fe921fb) 823 { 824 x += stridex; 825 y += stridey; 826 c += stridec; 827 i = 2; 828 if (--n <= 0) 829 break; 830 goto loop2; 831 } 832 if (hx > 0x413921fb) 833 { 834 if (hx >= 0x7ff00000) 835 { 836 x2 = *x; 837 *y = x2 - x2; 838 *c = x2 - x2; 839 } 840 else { 841 biguns = 1; 842 } 843 x += stridex; 844 y += stridey; 845 c += stridec; 846 i = 2; 847 if (--n <= 0) 848 break; 849 goto loop2; 850 } 851 x2 = *x; 852 py2 = y; 853 pc2 = c; 854 855 n0 = (int) (x0 * invpio2 + half[xsb0]); 856 n1 = (int) (x1 * invpio2 + half[xsb1]); 857 n2 = (int) (x2 * invpio2 + half[xsb2]); 858 fn0 = (double) n0; 859 fn1 = (double) n1; 860 fn2 = (double) n2; 861 n0 &= 3; 862 n1 &= 3; 863 n2 &= 3; 864 a0 = x0 - fn0 * pio2_1; 865 a1 = x1 - fn1 * pio2_1; 866 a2 = x2 - fn2 * pio2_1; 867 w0 = fn0 * pio2_2; 868 w1 = fn1 * pio2_2; 869 w2 = fn2 * pio2_2; 870 x0 = a0 - w0; 871 x1 = a1 - w1; 872 x2 = a2 - w2; 873 y0 = (a0 - x0) - w0; 874 y1 = (a1 - x1) - w1; 875 y2 = (a2 - x2) - w2; 876 a0 = x0; 877 a1 = x1; 878 a2 = x2; 879 w0 = fn0 * pio2_3 - y0; 880 w1 = fn1 * pio2_3 - y1; 881 w2 = fn2 * pio2_3 - y2; 882 x0 = a0 - w0; 883 x1 = a1 - w1; 884 x2 = a2 - w2; 885 y0 = (a0 - x0) - w0; 886 y1 = (a1 - x1) - w1; 887 y2 = (a2 - x2) - w2; 888 a0 = x0; 889 a1 = x1; 890 a2 = x2; 891 w0 = fn0 * pio2_3t - y0; 892 w1 = fn1 * pio2_3t - y1; 893 w2 = fn2 * pio2_3t - y2; 894 x0 = a0 - w0; 895 x1 = a1 - w1; 896 x2 = a2 - w2; 897 y0 = (a0 - x0) - w0; 898 y1 = (a1 - x1) - w1; 899 y2 = (a2 - x2) - w2; 900 xsb2 = HI(&x2); 901 i = ((xsb2 & ~0x80000000) - 0x3fc40000) >> 31; 902 xsb1 = HI(&x1); 903 i |= (((xsb1 & ~0x80000000) - 0x3fc40000) >> 30) & 2; 904 xsb0 = HI(&x0); 905 i |= (((xsb0 & ~0x80000000) - 0x3fc40000) >> 29) & 4; 906 switch (i) 907 { 908 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; 909 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; 910 double z0, z1, z2; 911 unsigned j0, j1, j2; 912 913 case 0: 914 j0 = (xsb0 + 0x4000) & 0xffff8000; 915 j1 = (xsb1 + 0x4000) & 0xffff8000; 916 j2 = (xsb2 + 0x4000) & 0xffff8000; 917 HI(&t0) = j0; 918 HI(&t1) = j1; 919 HI(&t2) = j2; 920 LO(&t0) = 0; 921 LO(&t1) = 0; 922 LO(&t2) = 0; 923 x0 = (x0 - t0) + y0; 924 x1 = (x1 - t1) + y1; 925 x2 = (x2 - t2) + y2; 926 z0 = x0 * x0; 927 z1 = x1 * x1; 928 z2 = x2 * x2; 929 t0 = z0 * (qq1 + z0 * qq2); 930 t1 = z1 * (qq1 + z1 * qq2); 931 t2 = z2 * (qq1 + z2 * qq2); 932 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 933 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 934 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 935 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 936 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 937 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 938 xsb0 = (xsb0 >> 30) & 2; 939 xsb1 = (xsb1 >> 30) & 2; 940 xsb2 = (xsb2 >> 30) & 2; 941 n0 ^= (xsb0 & ~(n0 << 1)); 942 n1 ^= (xsb1 & ~(n1 << 1)); 943 n2 ^= (xsb2 & ~(n2 << 1)); 944 xsb0 |= 1; 945 xsb1 |= 1; 946 xsb2 |= 1; 947 948 a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; 949 a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; 950 a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; 951 952 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; 953 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; 954 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; 955 956 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); 957 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); 958 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); 959 960 w0 *= a2_0; 961 w1 *= a2_1; 962 w2 *= a2_2; 963 964 *pc0 = a2_0 + t2_0; 965 *pc1 = a2_1 + t2_1; 966 *pc2 = a2_2 + t2_2; 967 968 t1_0 = w0 + a1_0*t0; 969 t1_1 = w1 + a1_1*t1; 970 t1_2 = w2 + a1_2*t2; 971 972 t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; 973 t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; 974 t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; 975 976 *py0 = a1_0 + t1_0; 977 *py1 = a1_1 + t1_1; 978 *py2 = a1_2 + t1_2; 979 980 break; 981 982 case 1: 983 j0 = (xsb0 + 0x4000) & 0xffff8000; 984 j1 = (xsb1 + 0x4000) & 0xffff8000; 985 j2 = n2 & 1; 986 HI(&t0) = j0; 987 HI(&t1) = j1; 988 LO(&t0) = 0; 989 LO(&t1) = 0; 990 x2_or_one[0] = x2; 991 x2_or_one[2] = -x2; 992 x0 = (x0 - t0) + y0; 993 x1 = (x1 - t1) + y1; 994 y2_or_zero[0] = y2; 995 y2_or_zero[2] = -y2; 996 z0 = x0 * x0; 997 z1 = x1 * x1; 998 z2 = x2 * x2; 999 t0 = z0 * (qq1 + z0 * qq2); 1000 t1 = z1 * (qq1 + z1 * qq2); 1001 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1002 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 1003 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 1004 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1005 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1006 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1007 xsb0 = (xsb0 >> 30) & 2; 1008 xsb1 = (xsb1 >> 30) & 2; 1009 n0 ^= (xsb0 & ~(n0 << 1)); 1010 n1 ^= (xsb1 & ~(n1 << 1)); 1011 xsb0 |= 1; 1012 xsb1 |= 1; 1013 a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; 1014 a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; 1015 1016 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; 1017 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; 1018 1019 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); 1020 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); 1021 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1022 1023 *pc0 = a2_0 + t2_0; 1024 *pc1 = a2_1 + t2_1; 1025 *py2 = t2; 1026 1027 n2 = (n2 + 1) & 3; 1028 j2 = (j2 + 1) & 1; 1029 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1030 1031 t1_0 = a2_0*w0 + a1_0*t0; 1032 t1_1 = a2_1*w1 + a1_1*t1; 1033 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1034 1035 t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; 1036 t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; 1037 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1038 1039 *py0 = a1_0 + t1_0; 1040 *py1 = a1_1 + t1_1; 1041 *pc2 = t2; 1042 1043 break; 1044 1045 case 2: 1046 j0 = (xsb0 + 0x4000) & 0xffff8000; 1047 j1 = n1 & 1; 1048 j2 = (xsb2 + 0x4000) & 0xffff8000; 1049 HI(&t0) = j0; 1050 HI(&t2) = j2; 1051 LO(&t0) = 0; 1052 LO(&t2) = 0; 1053 x1_or_one[0] = x1; 1054 x1_or_one[2] = -x1; 1055 x0 = (x0 - t0) + y0; 1056 y1_or_zero[0] = y1; 1057 y1_or_zero[2] = -y1; 1058 x2 = (x2 - t2) + y2; 1059 z0 = x0 * x0; 1060 z1 = x1 * x1; 1061 z2 = x2 * x2; 1062 t0 = z0 * (qq1 + z0 * qq2); 1063 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1064 t2 = z2 * (qq1 + z2 * qq2); 1065 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 1066 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1067 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 1068 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1069 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1070 xsb0 = (xsb0 >> 30) & 2; 1071 xsb2 = (xsb2 >> 30) & 2; 1072 n0 ^= (xsb0 & ~(n0 << 1)); 1073 n2 ^= (xsb2 & ~(n2 << 1)); 1074 xsb0 |= 1; 1075 xsb2 |= 1; 1076 1077 a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; 1078 a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; 1079 1080 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; 1081 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; 1082 1083 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); 1084 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1085 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); 1086 1087 *pc0 = a2_0 + t2_0; 1088 *py1 = t1; 1089 *pc2 = a2_2 + t2_2; 1090 1091 n1 = (n1 + 1) & 3; 1092 j1 = (j1 + 1) & 1; 1093 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1094 1095 t1_0 = a2_0*w0 + a1_0*t0; 1096 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1097 t1_2 = a2_2*w2 + a1_2*t2; 1098 1099 t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; 1100 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1101 t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; 1102 1103 *py0 = a1_0 + t1_0; 1104 *pc1 = t1; 1105 *py2 = a1_2 + t1_2; 1106 1107 break; 1108 1109 case 3: 1110 j0 = (xsb0 + 0x4000) & 0xffff8000; 1111 j1 = n1 & 1; 1112 j2 = n2 & 1; 1113 HI(&t0) = j0; 1114 LO(&t0) = 0; 1115 x1_or_one[0] = x1; 1116 x1_or_one[2] = -x1; 1117 x2_or_one[0] = x2; 1118 x2_or_one[2] = -x2; 1119 x0 = (x0 - t0) + y0; 1120 y1_or_zero[0] = y1; 1121 y1_or_zero[2] = -y1; 1122 y2_or_zero[0] = y2; 1123 y2_or_zero[2] = -y2; 1124 z0 = x0 * x0; 1125 z1 = x1 * x1; 1126 z2 = x2 * x2; 1127 t0 = z0 * (qq1 + z0 * qq2); 1128 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1129 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1130 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 1131 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1132 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1133 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1134 xsb0 = (xsb0 >> 30) & 2; 1135 n0 ^= (xsb0 & ~(n0 << 1)); 1136 xsb0 |= 1; 1137 1138 a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; 1139 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; 1140 1141 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); 1142 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1143 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1144 1145 *pc0 = a2_0 + t2_0; 1146 *py1 = t1; 1147 *py2 = t2; 1148 1149 n1 = (n1 + 1) & 3; 1150 n2 = (n2 + 1) & 3; 1151 j1 = (j1 + 1) & 1; 1152 j2 = (j2 + 1) & 1; 1153 1154 t1_0 = a2_0*w0 + a1_0*t0; 1155 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1156 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1157 1158 t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; 1159 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1160 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1161 1162 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1163 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1164 1165 *py0 = a1_0 + t1_0; 1166 *pc1 = t1; 1167 *pc2 = t2; 1168 1169 break; 1170 1171 case 4: 1172 j0 = n0 & 1; 1173 j1 = (xsb1 + 0x4000) & 0xffff8000; 1174 j2 = (xsb2 + 0x4000) & 0xffff8000; 1175 HI(&t1) = j1; 1176 HI(&t2) = j2; 1177 LO(&t1) = 0; 1178 LO(&t2) = 0; 1179 x0_or_one[0] = x0; 1180 x0_or_one[2] = -x0; 1181 y0_or_zero[0] = y0; 1182 y0_or_zero[2] = -y0; 1183 x1 = (x1 - t1) + y1; 1184 x2 = (x2 - t2) + y2; 1185 z0 = x0 * x0; 1186 z1 = x1 * x1; 1187 z2 = x2 * x2; 1188 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1189 t1 = z1 * (qq1 + z1 * qq2); 1190 t2 = z2 * (qq1 + z2 * qq2); 1191 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1192 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 1193 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 1194 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1195 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1196 xsb1 = (xsb1 >> 30) & 2; 1197 xsb2 = (xsb2 >> 30) & 2; 1198 n1 ^= (xsb1 & ~(n1 << 1)); 1199 n2 ^= (xsb2 & ~(n2 << 1)); 1200 xsb1 |= 1; 1201 xsb2 |= 1; 1202 1203 a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; 1204 a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; 1205 1206 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; 1207 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; 1208 1209 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1210 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); 1211 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); 1212 1213 *py0 = t0; 1214 *pc1 = a2_1 + t2_1; 1215 *pc2 = a2_2 + t2_2; 1216 1217 n0 = (n0 + 1) & 3; 1218 j0 = (j0 + 1) & 1; 1219 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1220 1221 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1222 t1_1 = a2_1*w1 + a1_1*t1; 1223 t1_2 = a2_2*w2 + a1_2*t2; 1224 1225 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1226 t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; 1227 t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; 1228 1229 *py1 = a1_1 + t1_1; 1230 *py2 = a1_2 + t1_2; 1231 *pc0 = t0; 1232 1233 break; 1234 1235 case 5: 1236 j0 = n0 & 1; 1237 j1 = (xsb1 + 0x4000) & 0xffff8000; 1238 j2 = n2 & 1; 1239 HI(&t1) = j1; 1240 LO(&t1) = 0; 1241 x0_or_one[0] = x0; 1242 x0_or_one[2] = -x0; 1243 x2_or_one[0] = x2; 1244 x2_or_one[2] = -x2; 1245 y0_or_zero[0] = y0; 1246 y0_or_zero[2] = -y0; 1247 x1 = (x1 - t1) + y1; 1248 y2_or_zero[0] = y2; 1249 y2_or_zero[2] = -y2; 1250 z0 = x0 * x0; 1251 z1 = x1 * x1; 1252 z2 = x2 * x2; 1253 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1254 t1 = z1 * (qq1 + z1 * qq2); 1255 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1256 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1257 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 1258 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1259 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1260 xsb1 = (xsb1 >> 30) & 2; 1261 n1 ^= (xsb1 & ~(n1 << 1)); 1262 xsb1 |= 1; 1263 1264 a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; 1265 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; 1266 1267 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1268 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); 1269 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1270 1271 *py0 = t0; 1272 *pc1 = a2_1 + t2_1; 1273 *py2 = t2; 1274 1275 n0 = (n0 + 1) & 3; 1276 n2 = (n2 + 1) & 3; 1277 j0 = (j0 + 1) & 1; 1278 j2 = (j2 + 1) & 1; 1279 1280 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1281 t1_1 = a2_1*w1 + a1_1*t1; 1282 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1283 1284 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1285 t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; 1286 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1287 1288 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1289 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1290 1291 *pc0 = t0; 1292 *py1 = a1_1 + t1_1; 1293 *pc2 = t2; 1294 1295 break; 1296 1297 case 6: 1298 j0 = n0 & 1; 1299 j1 = n1 & 1; 1300 j2 = (xsb2 + 0x4000) & 0xffff8000; 1301 HI(&t2) = j2; 1302 LO(&t2) = 0; 1303 x0_or_one[0] = x0; 1304 x0_or_one[2] = -x0; 1305 x1_or_one[0] = x1; 1306 x1_or_one[2] = -x1; 1307 y0_or_zero[0] = y0; 1308 y0_or_zero[2] = -y0; 1309 y1_or_zero[0] = y1; 1310 y1_or_zero[2] = -y1; 1311 x2 = (x2 - t2) + y2; 1312 z0 = x0 * x0; 1313 z1 = x1 * x1; 1314 z2 = x2 * x2; 1315 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1316 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1317 t2 = z2 * (qq1 + z2 * qq2); 1318 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1319 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1320 w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); 1321 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1322 xsb2 = (xsb2 >> 30) & 2; 1323 n2 ^= (xsb2 & ~(n2 << 1)); 1324 xsb2 |= 1; 1325 1326 a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; 1327 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; 1328 1329 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1330 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1331 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); 1332 1333 *py0 = t0; 1334 *py1 = t1; 1335 *pc2 = a2_2 + t2_2; 1336 1337 n0 = (n0 + 1) & 3; 1338 n1 = (n1 + 1) & 3; 1339 j0 = (j0 + 1) & 1; 1340 j1 = (j1 + 1) & 1; 1341 1342 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1343 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1344 t1_2 = a2_2*w2 + a1_2*t2; 1345 1346 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1347 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1348 t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; 1349 1350 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1351 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1352 1353 *pc0 = t0; 1354 *pc1 = t1; 1355 *py2 = a1_2 + t1_2; 1356 1357 break; 1358 1359 case 7: 1360 j0 = n0 & 1; 1361 j1 = n1 & 1; 1362 j2 = n2 & 1; 1363 x0_or_one[0] = x0; 1364 x0_or_one[2] = -x0; 1365 x1_or_one[0] = x1; 1366 x1_or_one[2] = -x1; 1367 x2_or_one[0] = x2; 1368 x2_or_one[2] = -x2; 1369 y0_or_zero[0] = y0; 1370 y0_or_zero[2] = -y0; 1371 y1_or_zero[0] = y1; 1372 y1_or_zero[2] = -y1; 1373 y2_or_zero[0] = y2; 1374 y2_or_zero[2] = -y2; 1375 z0 = x0 * x0; 1376 z1 = x1 * x1; 1377 z2 = x2 * x2; 1378 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1379 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1380 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1381 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1382 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1383 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1384 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1385 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1386 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1387 *py0 = t0; 1388 *py1 = t1; 1389 *py2 = t2; 1390 1391 n0 = (n0 + 1) & 3; 1392 n1 = (n1 + 1) & 3; 1393 n2 = (n2 + 1) & 3; 1394 j0 = (j0 + 1) & 1; 1395 j1 = (j1 + 1) & 1; 1396 j2 = (j2 + 1) & 1; 1397 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1398 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1399 t2 = z2 * (poly3[j2] + z2 * poly4[j2]); 1400 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1401 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1402 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); 1403 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1404 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1405 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); 1406 *pc0 = t0; 1407 *pc1 = t1; 1408 *pc2 = t2; 1409 break; 1410 } 1411 1412 x += stridex; 1413 y += stridey; 1414 c += stridec; 1415 i = 0; 1416 } while (--n > 0); 1417 1418 if (i > 0) 1419 { 1420 double a1_0, a1_1, a2_0, a2_1; 1421 double t0, t1, t1_0, t1_1, t2_0, t2_1; 1422 double fn0, fn1, a0, a1, w0, w1, y0, y1; 1423 double z0, z1; 1424 unsigned j0, j1; 1425 int n0, n1; 1426 1427 if (i > 1) 1428 { 1429 n1 = (int) (x1 * invpio2 + half[xsb1]); 1430 fn1 = (double) n1; 1431 n1 &= 3; 1432 a1 = x1 - fn1 * pio2_1; 1433 w1 = fn1 * pio2_2; 1434 x1 = a1 - w1; 1435 y1 = (a1 - x1) - w1; 1436 a1 = x1; 1437 w1 = fn1 * pio2_3 - y1; 1438 x1 = a1 - w1; 1439 y1 = (a1 - x1) - w1; 1440 a1 = x1; 1441 w1 = fn1 * pio2_3t - y1; 1442 x1 = a1 - w1; 1443 y1 = (a1 - x1) - w1; 1444 xsb1 = HI(&x1); 1445 if ((xsb1 & ~0x80000000) < 0x3fc40000) 1446 { 1447 j1 = n1 & 1; 1448 x1_or_one[0] = x1; 1449 x1_or_one[2] = -x1; 1450 y1_or_zero[0] = y1; 1451 y1_or_zero[2] = -y1; 1452 z1 = x1 * x1; 1453 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1454 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1455 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1456 *py1 = t1; 1457 n1 = (n1 + 1) & 3; 1458 j1 = (j1 + 1) & 1; 1459 t1 = z1 * (poly3[j1] + z1 * poly4[j1]); 1460 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); 1461 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); 1462 *pc1 = t1; 1463 } 1464 else 1465 { 1466 j1 = (xsb1 + 0x4000) & 0xffff8000; 1467 HI(&t1) = j1; 1468 LO(&t1) = 0; 1469 x1 = (x1 - t1) + y1; 1470 z1 = x1 * x1; 1471 t1 = z1 * (qq1 + z1 * qq2); 1472 w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); 1473 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1474 xsb1 = (xsb1 >> 30) & 2; 1475 n1 ^= (xsb1 & ~(n1 << 1)); 1476 xsb1 |= 1; 1477 a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; 1478 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; 1479 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); 1480 *pc1 = a2_1 + t2_1; 1481 t1_1 = a2_1*w1 + a1_1*t1; 1482 t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; 1483 *py1 = a1_1 + t1_1; 1484 } 1485 } 1486 n0 = (int) (x0 * invpio2 + half[xsb0]); 1487 fn0 = (double) n0; 1488 n0 &= 3; 1489 a0 = x0 - fn0 * pio2_1; 1490 w0 = fn0 * pio2_2; 1491 x0 = a0 - w0; 1492 y0 = (a0 - x0) - w0; 1493 a0 = x0; 1494 w0 = fn0 * pio2_3 - y0; 1495 x0 = a0 - w0; 1496 y0 = (a0 - x0) - w0; 1497 a0 = x0; 1498 w0 = fn0 * pio2_3t - y0; 1499 x0 = a0 - w0; 1500 y0 = (a0 - x0) - w0; 1501 xsb0 = HI(&x0); 1502 if ((xsb0 & ~0x80000000) < 0x3fc40000) 1503 { 1504 j0 = n0 & 1; 1505 x0_or_one[0] = x0; 1506 x0_or_one[2] = -x0; 1507 y0_or_zero[0] = y0; 1508 y0_or_zero[2] = -y0; 1509 z0 = x0 * x0; 1510 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1511 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1512 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1513 *py0 = t0; 1514 n0 = (n0 + 1) & 3; 1515 j0 = (j0 + 1) & 1; 1516 t0 = z0 * (poly3[j0] + z0 * poly4[j0]); 1517 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); 1518 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); 1519 *pc0 = t0; 1520 } 1521 else 1522 { 1523 j0 = (xsb0 + 0x4000) & 0xffff8000; 1524 HI(&t0) = j0; 1525 LO(&t0) = 0; 1526 x0 = (x0 - t0) + y0; 1527 z0 = x0 * x0; 1528 t0 = z0 * (qq1 + z0 * qq2); 1529 w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); 1530 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; 1531 xsb0 = (xsb0 >> 30) & 2; 1532 n0 ^= (xsb0 & ~(n0 << 1)); 1533 xsb0 |= 1; 1534 a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; 1535 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; 1536 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); 1537 *pc0 = a2_0 + t2_0; 1538 t1_0 = a2_0*w0 + a1_0*t0; 1539 t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; 1540 *py0 = a1_0 + t1_0; 1541 } 1542 } 1543 1544 if (biguns) { 1545 __vlibm_vsincos_big(nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb); 1546 } 1547 } 1548