1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include "libm_inlines.h" 32 33 #ifdef _LITTLE_ENDIAN 34 #define HI(x) *(1+(int*)x) 35 #define LO(x) *(unsigned*)x 36 #else 37 #define HI(x) *(int*)x 38 #define LO(x) *(1+(unsigned*)x) 39 #endif 40 41 #ifdef __RESTRICT 42 #define restrict _Restrict 43 #else 44 #define restrict 45 #endif 46 47 extern const double __vlibm_TBL_atan2[]; 48 49 static const double 50 zero = 0.0, 51 twom3 = 0.125, 52 one = 1.0, 53 two110 = 1.2980742146337069071e+33, 54 pio4 = 7.8539816339744827900e-01, 55 pio2 = 1.5707963267948965580e+00, 56 pio2_lo = 6.1232339957367658860e-17, 57 pi = 3.1415926535897931160e+00, 58 pi_lo = 1.2246467991473531772e-16, 59 p1 = -3.33333333333327571893331786354179101074860633009e-0001, 60 p2 = 1.99999999942671624230086497610394721817438631379e-0001, 61 p3 = -1.42856965565428636896183013324727205980484158356e-0001, 62 p4 = 1.10894981496317081405107718475040168084164825641e-0001; 63 64 /* Don't __ the following; acomp will handle it */ 65 extern double fabs(double); 66 67 void 68 __vatan2(int n, double * restrict y, int stridey, double * restrict x, 69 int stridex, double * restrict z, int stridez) 70 { 71 double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; 72 double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2; 73 double z0, z1, z2, sign0, sign1, sign2, xh; 74 int i, k, hx, hy, sx, sy; 75 76 do 77 { 78 loop0: 79 hy = HI(y); 80 sy = hy & 0x80000000; 81 hy &= ~0x80000000; 82 sign0 = (sy)? -one : one; 83 84 hx = HI(x); 85 sx = hx & 0x80000000; 86 hx &= ~0x80000000; 87 88 if (hy > hx || (hy == hx && LO(y) > LO(x))) 89 { 90 i = hx; 91 hx = hy; 92 hy = i; 93 x0 = fabs(*y); 94 y0 = fabs(*x); 95 if (sx) 96 { 97 ah0 = pio2; 98 al0 = pio2_lo; 99 } 100 else 101 { 102 ah0 = -pio2; 103 al0 = -pio2_lo; 104 sign0 = -sign0; 105 } 106 } 107 else 108 { 109 x0 = fabs(*x); 110 y0 = fabs(*y); 111 if (sx) 112 { 113 ah0 = -pi; 114 al0 = -pi_lo; 115 sign0 = -sign0; 116 } 117 else 118 ah0 = al0 = zero; 119 } 120 121 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) 122 { 123 if (hx >= 0x7ff00000) 124 { 125 if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */ 126 ah0 = x0 + y0; 127 else if (hy >= 0x7ff00000) 128 ah0 += pio4; 129 *z = sign0 * ah0; 130 x += stridex; 131 y += stridey; 132 z += stridez; 133 i = 0; 134 if (--n <= 0) 135 break; 136 goto loop0; 137 } 138 if (hx - hy >= 0x03600000) 139 { 140 if ((int) ah0 == 0) 141 ah0 = y0 / x0; 142 *z = sign0 * ah0; 143 x += stridex; 144 y += stridey; 145 z += stridez; 146 i = 0; 147 if (--n <= 0) 148 break; 149 goto loop0; 150 } 151 y0 *= twom3; 152 x0 *= twom3; 153 hy -= 0x00300000; 154 hx -= 0x00300000; 155 } 156 else if (hy < 0x00100000) 157 { 158 if ((hy | LO(&y0)) == 0) 159 { 160 *z = sign0 * ah0; 161 x += stridex; 162 y += stridey; 163 z += stridez; 164 i = 0; 165 if (--n <= 0) 166 break; 167 goto loop0; 168 } 169 y0 *= two110; 170 x0 *= two110; 171 hy = HI(&y0); 172 hx = HI(&x0); 173 } 174 175 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; 176 if (k > 644) 177 k = 644; 178 ah0 += __vlibm_TBL_atan2[k]; 179 al0 += __vlibm_TBL_atan2[k+1]; 180 t0 = __vlibm_TBL_atan2[k+2]; 181 182 xh = x0; 183 LO(&xh) = 0; 184 z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0); 185 pz0 = z; 186 x += stridex; 187 y += stridey; 188 z += stridez; 189 i = 1; 190 if (--n <= 0) 191 break; 192 193 loop1: 194 hy = HI(y); 195 sy = hy & 0x80000000; 196 hy &= ~0x80000000; 197 sign1 = (sy)? -one : one; 198 199 hx = HI(x); 200 sx = hx & 0x80000000; 201 hx &= ~0x80000000; 202 203 if (hy > hx || (hy == hx && LO(y) > LO(x))) 204 { 205 i = hx; 206 hx = hy; 207 hy = i; 208 x1 = fabs(*y); 209 y1 = fabs(*x); 210 if (sx) 211 { 212 ah1 = pio2; 213 al1 = pio2_lo; 214 } 215 else 216 { 217 ah1 = -pio2; 218 al1 = -pio2_lo; 219 sign1 = -sign1; 220 } 221 } 222 else 223 { 224 x1 = fabs(*x); 225 y1 = fabs(*y); 226 if (sx) 227 { 228 ah1 = -pi; 229 al1 = -pi_lo; 230 sign1 = -sign1; 231 } 232 else 233 ah1 = al1 = zero; 234 } 235 236 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) 237 { 238 if (hx >= 0x7ff00000) 239 { 240 if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */ 241 ah1 = x1 + y1; 242 else if (hy >= 0x7ff00000) 243 ah1 += pio4; 244 *z = sign1 * ah1; 245 x += stridex; 246 y += stridey; 247 z += stridez; 248 i = 1; 249 if (--n <= 0) 250 break; 251 goto loop1; 252 } 253 if (hx - hy >= 0x03600000) 254 { 255 if ((int) ah1 == 0) 256 ah1 = y1 / x1; 257 *z = sign1 * ah1; 258 x += stridex; 259 y += stridey; 260 z += stridez; 261 i = 1; 262 if (--n <= 0) 263 break; 264 goto loop1; 265 } 266 y1 *= twom3; 267 x1 *= twom3; 268 hy -= 0x00300000; 269 hx -= 0x00300000; 270 } 271 else if (hy < 0x00100000) 272 { 273 if ((hy | LO(&y1)) == 0) 274 { 275 *z = sign1 * ah1; 276 x += stridex; 277 y += stridey; 278 z += stridez; 279 i = 1; 280 if (--n <= 0) 281 break; 282 goto loop1; 283 } 284 y1 *= two110; 285 x1 *= two110; 286 hy = HI(&y1); 287 hx = HI(&x1); 288 } 289 290 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; 291 if (k > 644) 292 k = 644; 293 ah1 += __vlibm_TBL_atan2[k]; 294 al1 += __vlibm_TBL_atan2[k+1]; 295 t1 = __vlibm_TBL_atan2[k+2]; 296 297 xh = x1; 298 LO(&xh) = 0; 299 z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1); 300 pz1 = z; 301 x += stridex; 302 y += stridey; 303 z += stridez; 304 i = 2; 305 if (--n <= 0) 306 break; 307 308 loop2: 309 hy = HI(y); 310 sy = hy & 0x80000000; 311 hy &= ~0x80000000; 312 sign2 = (sy)? -one : one; 313 314 hx = HI(x); 315 sx = hx & 0x80000000; 316 hx &= ~0x80000000; 317 318 if (hy > hx || (hy == hx && LO(y) > LO(x))) 319 { 320 i = hx; 321 hx = hy; 322 hy = i; 323 x2 = fabs(*y); 324 y2 = fabs(*x); 325 if (sx) 326 { 327 ah2 = pio2; 328 al2 = pio2_lo; 329 } 330 else 331 { 332 ah2 = -pio2; 333 al2 = -pio2_lo; 334 sign2 = -sign2; 335 } 336 } 337 else 338 { 339 x2 = fabs(*x); 340 y2 = fabs(*y); 341 if (sx) 342 { 343 ah2 = -pi; 344 al2 = -pi_lo; 345 sign2 = -sign2; 346 } 347 else 348 ah2 = al2 = zero; 349 } 350 351 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) 352 { 353 if (hx >= 0x7ff00000) 354 { 355 if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */ 356 ah2 = x2 + y2; 357 else if (hy >= 0x7ff00000) 358 ah2 += pio4; 359 *z = sign2 * ah2; 360 x += stridex; 361 y += stridey; 362 z += stridez; 363 i = 2; 364 if (--n <= 0) 365 break; 366 goto loop2; 367 } 368 if (hx - hy >= 0x03600000) 369 { 370 if ((int) ah2 == 0) 371 ah2 = y2 / x2; 372 *z = sign2 * ah2; 373 x += stridex; 374 y += stridey; 375 z += stridez; 376 i = 2; 377 if (--n <= 0) 378 break; 379 goto loop2; 380 } 381 y2 *= twom3; 382 x2 *= twom3; 383 hy -= 0x00300000; 384 hx -= 0x00300000; 385 } 386 else if (hy < 0x00100000) 387 { 388 if ((hy | LO(&y2)) == 0) 389 { 390 *z = sign2 * ah2; 391 x += stridex; 392 y += stridey; 393 z += stridez; 394 i = 2; 395 if (--n <= 0) 396 break; 397 goto loop2; 398 } 399 y2 *= two110; 400 x2 *= two110; 401 hy = HI(&y2); 402 hx = HI(&x2); 403 } 404 405 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; 406 if (k > 644) 407 k = 644; 408 ah2 += __vlibm_TBL_atan2[k]; 409 al2 += __vlibm_TBL_atan2[k+1]; 410 t2 = __vlibm_TBL_atan2[k+2]; 411 412 xh = x2; 413 LO(&xh) = 0; 414 z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2); 415 pz2 = z; 416 417 x0 = z0 * z0; 418 x1 = z1 * z1; 419 x2 = z2 * z2; 420 421 t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * 422 (p2 + x0 * (p3 + x0 * p4))))); 423 t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * 424 (p2 + x1 * (p3 + x1 * p4))))); 425 t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 * 426 (p2 + x2 * (p3 + x2 * p4))))); 427 428 *pz0 = sign0 * t0; 429 *pz1 = sign1 * t1; 430 *pz2 = sign2 * t2; 431 432 x += stridex; 433 y += stridey; 434 z += stridez; 435 i = 0; 436 } while (--n > 0); 437 438 if (i > 0) 439 { 440 if (i > 1) 441 { 442 x1 = z1 * z1; 443 t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * 444 (p2 + x1 * (p3 + x1 * p4))))); 445 *pz1 = sign1 * t1; 446 } 447 448 x0 = z0 * z0; 449 t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * 450 (p2 + x0 * (p3 + x0 * p4))))); 451 *pz0 = sign0 * t0; 452 } 453 } 454