1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 31*25c28e83SPiotr Jasiukajtis #define restrict _Restrict 32*25c28e83SPiotr Jasiukajtis #else 33*25c28e83SPiotr Jasiukajtis #define restrict 34*25c28e83SPiotr Jasiukajtis #endif 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis extern const double __vlibm_TBL_atan1[]; 37*25c28e83SPiotr Jasiukajtis 38*25c28e83SPiotr Jasiukajtis static const double 39*25c28e83SPiotr Jasiukajtis pio4 = 7.8539816339744827900e-01, 40*25c28e83SPiotr Jasiukajtis pio2 = 1.5707963267948965580e+00, 41*25c28e83SPiotr Jasiukajtis pi = 3.1415926535897931160e+00; 42*25c28e83SPiotr Jasiukajtis 43*25c28e83SPiotr Jasiukajtis static const float 44*25c28e83SPiotr Jasiukajtis zero = 0.0f, 45*25c28e83SPiotr Jasiukajtis one = 1.0f, 46*25c28e83SPiotr Jasiukajtis q1 = -3.3333333333296428046e-01f, 47*25c28e83SPiotr Jasiukajtis q2 = 1.9999999186853752618e-01f, 48*25c28e83SPiotr Jasiukajtis twop24 = 16777216.0f; 49*25c28e83SPiotr Jasiukajtis 50*25c28e83SPiotr Jasiukajtis void 51*25c28e83SPiotr Jasiukajtis __vatan2f(int n, float * restrict y, int stridey, float * restrict x, 52*25c28e83SPiotr Jasiukajtis int stridex, float * restrict z, int stridez) 53*25c28e83SPiotr Jasiukajtis { 54*25c28e83SPiotr Jasiukajtis float x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2; 55*25c28e83SPiotr Jasiukajtis double ah0, ah1, ah2; 56*25c28e83SPiotr Jasiukajtis double t0, t1, t2; 57*25c28e83SPiotr Jasiukajtis double sx0, sx1, sx2; 58*25c28e83SPiotr Jasiukajtis double sign0, sign1, sign2; 59*25c28e83SPiotr Jasiukajtis int i, k0 = 0, k1, k2, hx, sx, sy; 60*25c28e83SPiotr Jasiukajtis int hy0, hy1, hy2; 61*25c28e83SPiotr Jasiukajtis float base0 = 0.0, base1, base2; 62*25c28e83SPiotr Jasiukajtis double num0, num1, num2; 63*25c28e83SPiotr Jasiukajtis double den0, den1, den2; 64*25c28e83SPiotr Jasiukajtis double dx0, dx1, dx2; 65*25c28e83SPiotr Jasiukajtis double dy0, dy1, dy2; 66*25c28e83SPiotr Jasiukajtis double db0, db1, db2; 67*25c28e83SPiotr Jasiukajtis 68*25c28e83SPiotr Jasiukajtis do 69*25c28e83SPiotr Jasiukajtis { 70*25c28e83SPiotr Jasiukajtis loop0: 71*25c28e83SPiotr Jasiukajtis hy0 = *(int*)y; 72*25c28e83SPiotr Jasiukajtis hx = *(int*)x; 73*25c28e83SPiotr Jasiukajtis sign0 = one; 74*25c28e83SPiotr Jasiukajtis sy = hy0 & 0x80000000; 75*25c28e83SPiotr Jasiukajtis hy0 &= ~0x80000000; 76*25c28e83SPiotr Jasiukajtis 77*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000; 78*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 79*25c28e83SPiotr Jasiukajtis 80*25c28e83SPiotr Jasiukajtis if (hy0 > hx) 81*25c28e83SPiotr Jasiukajtis { 82*25c28e83SPiotr Jasiukajtis x0 = *y; 83*25c28e83SPiotr Jasiukajtis y0 = *x; 84*25c28e83SPiotr Jasiukajtis i = hx; 85*25c28e83SPiotr Jasiukajtis hx = hy0; 86*25c28e83SPiotr Jasiukajtis hy0 = i; 87*25c28e83SPiotr Jasiukajtis if (sy) 88*25c28e83SPiotr Jasiukajtis { 89*25c28e83SPiotr Jasiukajtis x0 = -x0; 90*25c28e83SPiotr Jasiukajtis sign0 = -sign0; 91*25c28e83SPiotr Jasiukajtis } 92*25c28e83SPiotr Jasiukajtis if (sx) 93*25c28e83SPiotr Jasiukajtis { 94*25c28e83SPiotr Jasiukajtis y0 = -y0; 95*25c28e83SPiotr Jasiukajtis ah0 = pio2; 96*25c28e83SPiotr Jasiukajtis } 97*25c28e83SPiotr Jasiukajtis else 98*25c28e83SPiotr Jasiukajtis { 99*25c28e83SPiotr Jasiukajtis ah0 = -pio2; 100*25c28e83SPiotr Jasiukajtis sign0 = -sign0; 101*25c28e83SPiotr Jasiukajtis } 102*25c28e83SPiotr Jasiukajtis } 103*25c28e83SPiotr Jasiukajtis else 104*25c28e83SPiotr Jasiukajtis { 105*25c28e83SPiotr Jasiukajtis y0 = *y; 106*25c28e83SPiotr Jasiukajtis x0 = *x; 107*25c28e83SPiotr Jasiukajtis if (sy) 108*25c28e83SPiotr Jasiukajtis { 109*25c28e83SPiotr Jasiukajtis y0 = -y0; 110*25c28e83SPiotr Jasiukajtis sign0 = -sign0; 111*25c28e83SPiotr Jasiukajtis } 112*25c28e83SPiotr Jasiukajtis if (sx) 113*25c28e83SPiotr Jasiukajtis { 114*25c28e83SPiotr Jasiukajtis x0 = -x0; 115*25c28e83SPiotr Jasiukajtis ah0 = -pi; 116*25c28e83SPiotr Jasiukajtis sign0 = -sign0; 117*25c28e83SPiotr Jasiukajtis } 118*25c28e83SPiotr Jasiukajtis else 119*25c28e83SPiotr Jasiukajtis ah0 = zero; 120*25c28e83SPiotr Jasiukajtis } 121*25c28e83SPiotr Jasiukajtis 122*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy0 >= 0x0c800000) 123*25c28e83SPiotr Jasiukajtis { 124*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000) 125*25c28e83SPiotr Jasiukajtis { 126*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */ 127*25c28e83SPiotr Jasiukajtis ah0 = x0 + y0; 128*25c28e83SPiotr Jasiukajtis else if (hy0 >= 0x7f800000) 129*25c28e83SPiotr Jasiukajtis ah0 += pio4; 130*25c28e83SPiotr Jasiukajtis } 131*25c28e83SPiotr Jasiukajtis else if ((int) ah0 == 0) 132*25c28e83SPiotr Jasiukajtis ah0 = y0 / x0; 133*25c28e83SPiotr Jasiukajtis *z = (sign0 == one) ? ah0 : -ah0; 134*25c28e83SPiotr Jasiukajtis /* sign0*ah0 would change nan behavior relative to previous release */ 135*25c28e83SPiotr Jasiukajtis x += stridex; 136*25c28e83SPiotr Jasiukajtis y += stridey; 137*25c28e83SPiotr Jasiukajtis z += stridez; 138*25c28e83SPiotr Jasiukajtis i = 0; 139*25c28e83SPiotr Jasiukajtis if (--n <= 0) 140*25c28e83SPiotr Jasiukajtis break; 141*25c28e83SPiotr Jasiukajtis goto loop0; 142*25c28e83SPiotr Jasiukajtis } 143*25c28e83SPiotr Jasiukajtis if (hy0 < 0x00800000) { 144*25c28e83SPiotr Jasiukajtis if (hy0 == 0) 145*25c28e83SPiotr Jasiukajtis { 146*25c28e83SPiotr Jasiukajtis *z = sign0 * (float) ah0; 147*25c28e83SPiotr Jasiukajtis x += stridex; 148*25c28e83SPiotr Jasiukajtis y += stridey; 149*25c28e83SPiotr Jasiukajtis z += stridez; 150*25c28e83SPiotr Jasiukajtis i = 0; 151*25c28e83SPiotr Jasiukajtis if (--n <= 0) 152*25c28e83SPiotr Jasiukajtis break; 153*25c28e83SPiotr Jasiukajtis goto loop0; 154*25c28e83SPiotr Jasiukajtis } 155*25c28e83SPiotr Jasiukajtis y0 *= twop24; /* scale subnormal y */ 156*25c28e83SPiotr Jasiukajtis x0 *= twop24; /* scale possibly subnormal x */ 157*25c28e83SPiotr Jasiukajtis hy0 = *(int*)&y0; 158*25c28e83SPiotr Jasiukajtis hx = *(int*)&x0; 159*25c28e83SPiotr Jasiukajtis } 160*25c28e83SPiotr Jasiukajtis pz0 = z; 161*25c28e83SPiotr Jasiukajtis 162*25c28e83SPiotr Jasiukajtis k0 = (hy0 - hx + 0x3f800000) & 0xfff80000; 163*25c28e83SPiotr Jasiukajtis if (k0 >= 0x3C800000) /* if |x| >= (1/64)... */ 164*25c28e83SPiotr Jasiukajtis { 165*25c28e83SPiotr Jasiukajtis *(int*)&base0 = k0; 166*25c28e83SPiotr Jasiukajtis k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 167*25c28e83SPiotr Jasiukajtis k0 += 4; 168*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */ 169*25c28e83SPiotr Jasiukajtis } 170*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */ 171*25c28e83SPiotr Jasiukajtis { 172*25c28e83SPiotr Jasiukajtis k0 = 0; 173*25c28e83SPiotr Jasiukajtis base0 = zero; 174*25c28e83SPiotr Jasiukajtis } 175*25c28e83SPiotr Jasiukajtis 176*25c28e83SPiotr Jasiukajtis x += stridex; 177*25c28e83SPiotr Jasiukajtis y += stridey; 178*25c28e83SPiotr Jasiukajtis z += stridez; 179*25c28e83SPiotr Jasiukajtis i = 1; 180*25c28e83SPiotr Jasiukajtis if (--n <= 0) 181*25c28e83SPiotr Jasiukajtis break; 182*25c28e83SPiotr Jasiukajtis 183*25c28e83SPiotr Jasiukajtis 184*25c28e83SPiotr Jasiukajtis loop1: 185*25c28e83SPiotr Jasiukajtis hy1 = *(int*)y; 186*25c28e83SPiotr Jasiukajtis hx = *(int*)x; 187*25c28e83SPiotr Jasiukajtis sign1 = one; 188*25c28e83SPiotr Jasiukajtis sy = hy1 & 0x80000000; 189*25c28e83SPiotr Jasiukajtis hy1 &= ~0x80000000; 190*25c28e83SPiotr Jasiukajtis 191*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000; 192*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 193*25c28e83SPiotr Jasiukajtis 194*25c28e83SPiotr Jasiukajtis if (hy1 > hx) 195*25c28e83SPiotr Jasiukajtis { 196*25c28e83SPiotr Jasiukajtis x1 = *y; 197*25c28e83SPiotr Jasiukajtis y1 = *x; 198*25c28e83SPiotr Jasiukajtis i = hx; 199*25c28e83SPiotr Jasiukajtis hx = hy1; 200*25c28e83SPiotr Jasiukajtis hy1 = i; 201*25c28e83SPiotr Jasiukajtis if (sy) 202*25c28e83SPiotr Jasiukajtis { 203*25c28e83SPiotr Jasiukajtis x1 = -x1; 204*25c28e83SPiotr Jasiukajtis sign1 = -sign1; 205*25c28e83SPiotr Jasiukajtis } 206*25c28e83SPiotr Jasiukajtis if (sx) 207*25c28e83SPiotr Jasiukajtis { 208*25c28e83SPiotr Jasiukajtis y1 = -y1; 209*25c28e83SPiotr Jasiukajtis ah1 = pio2; 210*25c28e83SPiotr Jasiukajtis } 211*25c28e83SPiotr Jasiukajtis else 212*25c28e83SPiotr Jasiukajtis { 213*25c28e83SPiotr Jasiukajtis ah1 = -pio2; 214*25c28e83SPiotr Jasiukajtis sign1 = -sign1; 215*25c28e83SPiotr Jasiukajtis } 216*25c28e83SPiotr Jasiukajtis } 217*25c28e83SPiotr Jasiukajtis else 218*25c28e83SPiotr Jasiukajtis { 219*25c28e83SPiotr Jasiukajtis y1 = *y; 220*25c28e83SPiotr Jasiukajtis x1 = *x; 221*25c28e83SPiotr Jasiukajtis if (sy) 222*25c28e83SPiotr Jasiukajtis { 223*25c28e83SPiotr Jasiukajtis y1 = -y1; 224*25c28e83SPiotr Jasiukajtis sign1 = -sign1; 225*25c28e83SPiotr Jasiukajtis } 226*25c28e83SPiotr Jasiukajtis if (sx) 227*25c28e83SPiotr Jasiukajtis { 228*25c28e83SPiotr Jasiukajtis x1 = -x1; 229*25c28e83SPiotr Jasiukajtis ah1 = -pi; 230*25c28e83SPiotr Jasiukajtis sign1 = -sign1; 231*25c28e83SPiotr Jasiukajtis } 232*25c28e83SPiotr Jasiukajtis else 233*25c28e83SPiotr Jasiukajtis ah1 = zero; 234*25c28e83SPiotr Jasiukajtis } 235*25c28e83SPiotr Jasiukajtis 236*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy1 >= 0x0c800000) 237*25c28e83SPiotr Jasiukajtis { 238*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000) 239*25c28e83SPiotr Jasiukajtis { 240*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */ 241*25c28e83SPiotr Jasiukajtis ah1 = x1 + y1; 242*25c28e83SPiotr Jasiukajtis else if (hy1 >= 0x7f800000) 243*25c28e83SPiotr Jasiukajtis ah1 += pio4; 244*25c28e83SPiotr Jasiukajtis } 245*25c28e83SPiotr Jasiukajtis else if ((int) ah1 == 0) 246*25c28e83SPiotr Jasiukajtis ah1 = y1 / x1; 247*25c28e83SPiotr Jasiukajtis *z = (sign1 == one)? ah1 : -ah1; 248*25c28e83SPiotr Jasiukajtis x += stridex; 249*25c28e83SPiotr Jasiukajtis y += stridey; 250*25c28e83SPiotr Jasiukajtis z += stridez; 251*25c28e83SPiotr Jasiukajtis i = 1; 252*25c28e83SPiotr Jasiukajtis if (--n <= 0) 253*25c28e83SPiotr Jasiukajtis break; 254*25c28e83SPiotr Jasiukajtis goto loop1; 255*25c28e83SPiotr Jasiukajtis } 256*25c28e83SPiotr Jasiukajtis if (hy1 < 0x00800000) { 257*25c28e83SPiotr Jasiukajtis if (hy1 == 0) 258*25c28e83SPiotr Jasiukajtis { 259*25c28e83SPiotr Jasiukajtis *z = sign1 * (float) ah1; 260*25c28e83SPiotr Jasiukajtis x += stridex; 261*25c28e83SPiotr Jasiukajtis y += stridey; 262*25c28e83SPiotr Jasiukajtis z += stridez; 263*25c28e83SPiotr Jasiukajtis i = 1; 264*25c28e83SPiotr Jasiukajtis if (--n <= 0) 265*25c28e83SPiotr Jasiukajtis break; 266*25c28e83SPiotr Jasiukajtis goto loop1; 267*25c28e83SPiotr Jasiukajtis } 268*25c28e83SPiotr Jasiukajtis y1 *= twop24; /* scale subnormal y */ 269*25c28e83SPiotr Jasiukajtis x1 *= twop24; /* scale possibly subnormal x */ 270*25c28e83SPiotr Jasiukajtis hy1 = *(int*)&y1; 271*25c28e83SPiotr Jasiukajtis hx = *(int*)&x1; 272*25c28e83SPiotr Jasiukajtis } 273*25c28e83SPiotr Jasiukajtis pz1 = z; 274*25c28e83SPiotr Jasiukajtis 275*25c28e83SPiotr Jasiukajtis k1 = (hy1 - hx + 0x3f800000) & 0xfff80000; 276*25c28e83SPiotr Jasiukajtis if (k1 >= 0x3C800000) /* if |x| >= (1/64)... */ 277*25c28e83SPiotr Jasiukajtis { 278*25c28e83SPiotr Jasiukajtis *(int*)&base1 = k1; 279*25c28e83SPiotr Jasiukajtis k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 280*25c28e83SPiotr Jasiukajtis k1 += 4; 281*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */ 282*25c28e83SPiotr Jasiukajtis } 283*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */ 284*25c28e83SPiotr Jasiukajtis { 285*25c28e83SPiotr Jasiukajtis k1 = 0; 286*25c28e83SPiotr Jasiukajtis base1 = zero; 287*25c28e83SPiotr Jasiukajtis } 288*25c28e83SPiotr Jasiukajtis 289*25c28e83SPiotr Jasiukajtis x += stridex; 290*25c28e83SPiotr Jasiukajtis y += stridey; 291*25c28e83SPiotr Jasiukajtis z += stridez; 292*25c28e83SPiotr Jasiukajtis i = 2; 293*25c28e83SPiotr Jasiukajtis if (--n <= 0) 294*25c28e83SPiotr Jasiukajtis break; 295*25c28e83SPiotr Jasiukajtis 296*25c28e83SPiotr Jasiukajtis loop2: 297*25c28e83SPiotr Jasiukajtis hy2 = *(int*)y; 298*25c28e83SPiotr Jasiukajtis hx = *(int*)x; 299*25c28e83SPiotr Jasiukajtis sign2 = one; 300*25c28e83SPiotr Jasiukajtis sy = hy2 & 0x80000000; 301*25c28e83SPiotr Jasiukajtis hy2 &= ~0x80000000; 302*25c28e83SPiotr Jasiukajtis 303*25c28e83SPiotr Jasiukajtis sx = hx & 0x80000000; 304*25c28e83SPiotr Jasiukajtis hx &= ~0x80000000; 305*25c28e83SPiotr Jasiukajtis 306*25c28e83SPiotr Jasiukajtis if (hy2 > hx) 307*25c28e83SPiotr Jasiukajtis { 308*25c28e83SPiotr Jasiukajtis x2 = *y; 309*25c28e83SPiotr Jasiukajtis y2 = *x; 310*25c28e83SPiotr Jasiukajtis i = hx; 311*25c28e83SPiotr Jasiukajtis hx = hy2; 312*25c28e83SPiotr Jasiukajtis hy2 = i; 313*25c28e83SPiotr Jasiukajtis if (sy) 314*25c28e83SPiotr Jasiukajtis { 315*25c28e83SPiotr Jasiukajtis x2 = -x2; 316*25c28e83SPiotr Jasiukajtis sign2 = -sign2; 317*25c28e83SPiotr Jasiukajtis } 318*25c28e83SPiotr Jasiukajtis if (sx) 319*25c28e83SPiotr Jasiukajtis { 320*25c28e83SPiotr Jasiukajtis y2 = -y2; 321*25c28e83SPiotr Jasiukajtis ah2 = pio2; 322*25c28e83SPiotr Jasiukajtis } 323*25c28e83SPiotr Jasiukajtis else 324*25c28e83SPiotr Jasiukajtis { 325*25c28e83SPiotr Jasiukajtis ah2 = -pio2; 326*25c28e83SPiotr Jasiukajtis sign2 = -sign2; 327*25c28e83SPiotr Jasiukajtis } 328*25c28e83SPiotr Jasiukajtis } 329*25c28e83SPiotr Jasiukajtis else 330*25c28e83SPiotr Jasiukajtis { 331*25c28e83SPiotr Jasiukajtis y2 = *y; 332*25c28e83SPiotr Jasiukajtis x2 = *x; 333*25c28e83SPiotr Jasiukajtis if (sy) 334*25c28e83SPiotr Jasiukajtis { 335*25c28e83SPiotr Jasiukajtis y2 = -y2; 336*25c28e83SPiotr Jasiukajtis sign2 = -sign2; 337*25c28e83SPiotr Jasiukajtis } 338*25c28e83SPiotr Jasiukajtis if (sx) 339*25c28e83SPiotr Jasiukajtis { 340*25c28e83SPiotr Jasiukajtis x2 = -x2; 341*25c28e83SPiotr Jasiukajtis ah2 = -pi; 342*25c28e83SPiotr Jasiukajtis sign2 = -sign2; 343*25c28e83SPiotr Jasiukajtis } 344*25c28e83SPiotr Jasiukajtis else 345*25c28e83SPiotr Jasiukajtis ah2 = zero; 346*25c28e83SPiotr Jasiukajtis } 347*25c28e83SPiotr Jasiukajtis 348*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000 || hx - hy2 >= 0x0c800000) 349*25c28e83SPiotr Jasiukajtis { 350*25c28e83SPiotr Jasiukajtis if (hx >= 0x7f800000) 351*25c28e83SPiotr Jasiukajtis { 352*25c28e83SPiotr Jasiukajtis if (hx ^ 0x7f800000) /* nan */ 353*25c28e83SPiotr Jasiukajtis ah2 = x2 + y2; 354*25c28e83SPiotr Jasiukajtis else if (hy2 >= 0x7f800000) 355*25c28e83SPiotr Jasiukajtis ah2 += pio4; 356*25c28e83SPiotr Jasiukajtis } 357*25c28e83SPiotr Jasiukajtis else if ((int) ah2 == 0) 358*25c28e83SPiotr Jasiukajtis ah2 = y2 / x2; 359*25c28e83SPiotr Jasiukajtis *z = (sign2 == one)? ah2 : -ah2; 360*25c28e83SPiotr Jasiukajtis x += stridex; 361*25c28e83SPiotr Jasiukajtis y += stridey; 362*25c28e83SPiotr Jasiukajtis z += stridez; 363*25c28e83SPiotr Jasiukajtis i = 2; 364*25c28e83SPiotr Jasiukajtis if (--n <= 0) 365*25c28e83SPiotr Jasiukajtis break; 366*25c28e83SPiotr Jasiukajtis goto loop2; 367*25c28e83SPiotr Jasiukajtis } 368*25c28e83SPiotr Jasiukajtis if (hy2 < 0x00800000) { 369*25c28e83SPiotr Jasiukajtis if (hy2 == 0) 370*25c28e83SPiotr Jasiukajtis { 371*25c28e83SPiotr Jasiukajtis *z = sign2 * (float) ah2; 372*25c28e83SPiotr Jasiukajtis x += stridex; 373*25c28e83SPiotr Jasiukajtis y += stridey; 374*25c28e83SPiotr Jasiukajtis z += stridez; 375*25c28e83SPiotr Jasiukajtis i = 2; 376*25c28e83SPiotr Jasiukajtis if (--n <= 0) 377*25c28e83SPiotr Jasiukajtis break; 378*25c28e83SPiotr Jasiukajtis goto loop2; 379*25c28e83SPiotr Jasiukajtis } 380*25c28e83SPiotr Jasiukajtis y2 *= twop24; /* scale subnormal y */ 381*25c28e83SPiotr Jasiukajtis x2 *= twop24; /* scale possibly subnormal x */ 382*25c28e83SPiotr Jasiukajtis hy2 = *(int*)&y2; 383*25c28e83SPiotr Jasiukajtis hx = *(int*)&x2; 384*25c28e83SPiotr Jasiukajtis } 385*25c28e83SPiotr Jasiukajtis 386*25c28e83SPiotr Jasiukajtis pz2 = z; 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis k2 = (hy2 - hx + 0x3f800000) & 0xfff80000; 389*25c28e83SPiotr Jasiukajtis if (k2 >= 0x3C800000) /* if |x| >= (1/64)... */ 390*25c28e83SPiotr Jasiukajtis { 391*25c28e83SPiotr Jasiukajtis *(int*)&base2 = k2; 392*25c28e83SPiotr Jasiukajtis k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 393*25c28e83SPiotr Jasiukajtis k2 += 4; 394*25c28e83SPiotr Jasiukajtis /* skip over 0,0,pi/2,pi/2 */ 395*25c28e83SPiotr Jasiukajtis } 396*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */ 397*25c28e83SPiotr Jasiukajtis { 398*25c28e83SPiotr Jasiukajtis k2 = 0; 399*25c28e83SPiotr Jasiukajtis base2 = zero; 400*25c28e83SPiotr Jasiukajtis } 401*25c28e83SPiotr Jasiukajtis 402*25c28e83SPiotr Jasiukajtis goto endloop; 403*25c28e83SPiotr Jasiukajtis 404*25c28e83SPiotr Jasiukajtis endloop: 405*25c28e83SPiotr Jasiukajtis 406*25c28e83SPiotr Jasiukajtis ah2 += __vlibm_TBL_atan1[k2]; 407*25c28e83SPiotr Jasiukajtis ah1 += __vlibm_TBL_atan1[k1]; 408*25c28e83SPiotr Jasiukajtis ah0 += __vlibm_TBL_atan1[k0]; 409*25c28e83SPiotr Jasiukajtis 410*25c28e83SPiotr Jasiukajtis db2 = base2; 411*25c28e83SPiotr Jasiukajtis db1 = base1; 412*25c28e83SPiotr Jasiukajtis db0 = base0; 413*25c28e83SPiotr Jasiukajtis dy2 = y2; 414*25c28e83SPiotr Jasiukajtis dy1 = y1; 415*25c28e83SPiotr Jasiukajtis dy0 = y0; 416*25c28e83SPiotr Jasiukajtis dx2 = x2; 417*25c28e83SPiotr Jasiukajtis dx1 = x1; 418*25c28e83SPiotr Jasiukajtis dx0 = x0; 419*25c28e83SPiotr Jasiukajtis 420*25c28e83SPiotr Jasiukajtis num2 = dy2 - dx2 * db2; 421*25c28e83SPiotr Jasiukajtis den2 = dx2 + dy2 * db2; 422*25c28e83SPiotr Jasiukajtis 423*25c28e83SPiotr Jasiukajtis num1 = dy1 - dx1 * db1; 424*25c28e83SPiotr Jasiukajtis den1 = dx1 + dy1 * db1; 425*25c28e83SPiotr Jasiukajtis 426*25c28e83SPiotr Jasiukajtis num0 = dy0 - dx0 * db0; 427*25c28e83SPiotr Jasiukajtis den0 = dx0 + dy0 * db0; 428*25c28e83SPiotr Jasiukajtis 429*25c28e83SPiotr Jasiukajtis t2 = num2 / den2; 430*25c28e83SPiotr Jasiukajtis t1 = num1 / den1; 431*25c28e83SPiotr Jasiukajtis t0 = num0 / den0; 432*25c28e83SPiotr Jasiukajtis 433*25c28e83SPiotr Jasiukajtis sx2 = t2 * t2; 434*25c28e83SPiotr Jasiukajtis sx1 = t1 * t1; 435*25c28e83SPiotr Jasiukajtis sx0 = t0 * t0; 436*25c28e83SPiotr Jasiukajtis 437*25c28e83SPiotr Jasiukajtis t2 += t2 * sx2 * (q1 + sx2 * q2); 438*25c28e83SPiotr Jasiukajtis t1 += t1 * sx1 * (q1 + sx1 * q2); 439*25c28e83SPiotr Jasiukajtis t0 += t0 * sx0 * (q1 + sx0 * q2); 440*25c28e83SPiotr Jasiukajtis 441*25c28e83SPiotr Jasiukajtis t2 += ah2; 442*25c28e83SPiotr Jasiukajtis t1 += ah1; 443*25c28e83SPiotr Jasiukajtis t0 += ah0; 444*25c28e83SPiotr Jasiukajtis 445*25c28e83SPiotr Jasiukajtis *pz2 = sign2 * t2; 446*25c28e83SPiotr Jasiukajtis *pz1 = sign1 * t1; 447*25c28e83SPiotr Jasiukajtis *pz0 = sign0 * t0; 448*25c28e83SPiotr Jasiukajtis 449*25c28e83SPiotr Jasiukajtis x += stridex; 450*25c28e83SPiotr Jasiukajtis y += stridey; 451*25c28e83SPiotr Jasiukajtis z += stridez; 452*25c28e83SPiotr Jasiukajtis i = 0; 453*25c28e83SPiotr Jasiukajtis } while (--n > 0); 454*25c28e83SPiotr Jasiukajtis 455*25c28e83SPiotr Jasiukajtis if (i > 1) 456*25c28e83SPiotr Jasiukajtis { 457*25c28e83SPiotr Jasiukajtis ah1 += __vlibm_TBL_atan1[k1]; 458*25c28e83SPiotr Jasiukajtis t1 = (y1 - x1 * (double)base1) / 459*25c28e83SPiotr Jasiukajtis (x1 + y1 * (double)base1); 460*25c28e83SPiotr Jasiukajtis sx1 = t1 * t1; 461*25c28e83SPiotr Jasiukajtis t1 += t1 * sx1 * (q1 + sx1 * q2); 462*25c28e83SPiotr Jasiukajtis t1 += ah1; 463*25c28e83SPiotr Jasiukajtis *pz1 = sign1 * t1; 464*25c28e83SPiotr Jasiukajtis } 465*25c28e83SPiotr Jasiukajtis 466*25c28e83SPiotr Jasiukajtis if (i > 0) 467*25c28e83SPiotr Jasiukajtis { 468*25c28e83SPiotr Jasiukajtis ah0 += __vlibm_TBL_atan1[k0]; 469*25c28e83SPiotr Jasiukajtis t0 = (y0 - x0 * (double)base0) / 470*25c28e83SPiotr Jasiukajtis (x0 + y0 * (double)base0); 471*25c28e83SPiotr Jasiukajtis sx0 = t0 * t0; 472*25c28e83SPiotr Jasiukajtis t0 += t0 * sx0 * (q1 + sx0 * q2); 473*25c28e83SPiotr Jasiukajtis t0 += ah0; 474*25c28e83SPiotr Jasiukajtis *pz0 = sign0 * t0; 475*25c28e83SPiotr Jasiukajtis } 476*25c28e83SPiotr Jasiukajtis } 477