1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 31*25c28e83SPiotr Jasiukajtis #define restrict _Restrict 32*25c28e83SPiotr Jasiukajtis #else 33*25c28e83SPiotr Jasiukajtis #define restrict 34*25c28e83SPiotr Jasiukajtis #endif 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis void 37*25c28e83SPiotr Jasiukajtis __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey) 38*25c28e83SPiotr Jasiukajtis { 39*25c28e83SPiotr Jasiukajtis extern const double __vlibm_TBL_atan1[]; 40*25c28e83SPiotr Jasiukajtis double conup0, conup1, conup2; 41*25c28e83SPiotr Jasiukajtis float dummy, ansf = 0.0; 42*25c28e83SPiotr Jasiukajtis float f0, f1, f2; 43*25c28e83SPiotr Jasiukajtis float ans0, ans1, ans2; 44*25c28e83SPiotr Jasiukajtis float poly0, poly1, poly2; 45*25c28e83SPiotr Jasiukajtis float sign0, sign1, sign2; 46*25c28e83SPiotr Jasiukajtis int intf, intz, argcount; 47*25c28e83SPiotr Jasiukajtis int index0, index1, index2; 48*25c28e83SPiotr Jasiukajtis float z,*yaddr0,*yaddr1,*yaddr2; 49*25c28e83SPiotr Jasiukajtis int *pz = (int *) &z; 50*25c28e83SPiotr Jasiukajtis #ifdef UNROLL4 51*25c28e83SPiotr Jasiukajtis double conup3; 52*25c28e83SPiotr Jasiukajtis int index3; 53*25c28e83SPiotr Jasiukajtis float f3, ans3, poly3, sign3, *yaddr3; 54*25c28e83SPiotr Jasiukajtis #endif 55*25c28e83SPiotr Jasiukajtis 56*25c28e83SPiotr Jasiukajtis /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 57*25c28e83SPiotr Jasiukajtis * Error = -3.08254E-18 On the interval |x| < 1/64 */ 58*25c28e83SPiotr Jasiukajtis 59*25c28e83SPiotr Jasiukajtis static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ; 60*25c28e83SPiotr Jasiukajtis static const float pone = 1.0f; 61*25c28e83SPiotr Jasiukajtis 62*25c28e83SPiotr Jasiukajtis if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ 63*25c28e83SPiotr Jasiukajtis do 64*25c28e83SPiotr Jasiukajtis { 65*25c28e83SPiotr Jasiukajtis LOOP0: 66*25c28e83SPiotr Jasiukajtis 67*25c28e83SPiotr Jasiukajtis intf = *(int *) x; /* upper half of x, as integer */ 68*25c28e83SPiotr Jasiukajtis f0 = *x; 69*25c28e83SPiotr Jasiukajtis sign0 = pone; 70*25c28e83SPiotr Jasiukajtis if (intf < 0) { 71*25c28e83SPiotr Jasiukajtis intf = intf & ~0x80000000; /* abs(upper argument) */ 72*25c28e83SPiotr Jasiukajtis f0 = -f0; 73*25c28e83SPiotr Jasiukajtis sign0 = -sign0; 74*25c28e83SPiotr Jasiukajtis } 75*25c28e83SPiotr Jasiukajtis 76*25c28e83SPiotr Jasiukajtis if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 77*25c28e83SPiotr Jasiukajtis { 78*25c28e83SPiotr Jasiukajtis if (intf > 0x7f800000) 79*25c28e83SPiotr Jasiukajtis { 80*25c28e83SPiotr Jasiukajtis ansf = f0- f0; /* return NaN if x=NaN*/ 81*25c28e83SPiotr Jasiukajtis } 82*25c28e83SPiotr Jasiukajtis else if (intf < 0x31800000) /* avoid underflow for small arg */ 83*25c28e83SPiotr Jasiukajtis { 84*25c28e83SPiotr Jasiukajtis dummy = 1.0e37 + f0; 85*25c28e83SPiotr Jasiukajtis dummy = dummy; 86*25c28e83SPiotr Jasiukajtis ansf = f0; 87*25c28e83SPiotr Jasiukajtis } 88*25c28e83SPiotr Jasiukajtis else if (intf > 0x5B000000) /* avoid underflow for big arg */ 89*25c28e83SPiotr Jasiukajtis { 90*25c28e83SPiotr Jasiukajtis index0= 2; 91*25c28e83SPiotr Jasiukajtis ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */ 92*25c28e83SPiotr Jasiukajtis } 93*25c28e83SPiotr Jasiukajtis *y = sign0*ansf; /* store answer, with sign bit */ 94*25c28e83SPiotr Jasiukajtis x += stridex; 95*25c28e83SPiotr Jasiukajtis y += stridey; 96*25c28e83SPiotr Jasiukajtis argcount = 0; /* initialize argcount */ 97*25c28e83SPiotr Jasiukajtis if (--n <=0) break; /* we are done */ 98*25c28e83SPiotr Jasiukajtis goto LOOP0; /* otherwise, examine next arg */ 99*25c28e83SPiotr Jasiukajtis } 100*25c28e83SPiotr Jasiukajtis 101*25c28e83SPiotr Jasiukajtis if (intf > 0x42800000) /* if (|x| > 64 */ 102*25c28e83SPiotr Jasiukajtis { 103*25c28e83SPiotr Jasiukajtis f0 = -pone/f0; 104*25c28e83SPiotr Jasiukajtis index0 = 2; /* point to pi/2 upper, lower */ 105*25c28e83SPiotr Jasiukajtis } 106*25c28e83SPiotr Jasiukajtis else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 107*25c28e83SPiotr Jasiukajtis { 108*25c28e83SPiotr Jasiukajtis intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 109*25c28e83SPiotr Jasiukajtis pz[0] = intz; /* store as a float (z) */ 110*25c28e83SPiotr Jasiukajtis f0 = (f0 - z)/(pone + f0*z); 111*25c28e83SPiotr Jasiukajtis index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 112*25c28e83SPiotr Jasiukajtis index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */ 113*25c28e83SPiotr Jasiukajtis } 114*25c28e83SPiotr Jasiukajtis else /* |x| < 1/64 */ 115*25c28e83SPiotr Jasiukajtis { 116*25c28e83SPiotr Jasiukajtis index0 = 0; /* points to 0,0 in table */ 117*25c28e83SPiotr Jasiukajtis } 118*25c28e83SPiotr Jasiukajtis yaddr0 = y; /* address to store this answer */ 119*25c28e83SPiotr Jasiukajtis x += stridex; /* point to next arg */ 120*25c28e83SPiotr Jasiukajtis y += stridey; /* point to next result */ 121*25c28e83SPiotr Jasiukajtis argcount = 1; /* we now have 1 good argument */ 122*25c28e83SPiotr Jasiukajtis if (--n <=0) 123*25c28e83SPiotr Jasiukajtis { 124*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 1 good arg */ 125*25c28e83SPiotr Jasiukajtis } 126*25c28e83SPiotr Jasiukajtis 127*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 128*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 129*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 130*25c28e83SPiotr Jasiukajtis 131*25c28e83SPiotr Jasiukajtis LOOP1: 132*25c28e83SPiotr Jasiukajtis 133*25c28e83SPiotr Jasiukajtis intf = *(int *) x; /* upper half of x, as integer */ 134*25c28e83SPiotr Jasiukajtis f1 = *x; 135*25c28e83SPiotr Jasiukajtis sign1 = pone; 136*25c28e83SPiotr Jasiukajtis if (intf < 0) { 137*25c28e83SPiotr Jasiukajtis intf = intf & ~0x80000000; /* abs(upper argument) */ 138*25c28e83SPiotr Jasiukajtis f1 = -f1; 139*25c28e83SPiotr Jasiukajtis sign1 = -sign1; 140*25c28e83SPiotr Jasiukajtis } 141*25c28e83SPiotr Jasiukajtis 142*25c28e83SPiotr Jasiukajtis if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 143*25c28e83SPiotr Jasiukajtis { 144*25c28e83SPiotr Jasiukajtis if (intf > 0x7f800000) 145*25c28e83SPiotr Jasiukajtis { 146*25c28e83SPiotr Jasiukajtis ansf = f1 - f1; /* return NaN if x=NaN*/ 147*25c28e83SPiotr Jasiukajtis } 148*25c28e83SPiotr Jasiukajtis else if (intf < 0x31800000) /* avoid underflow for small arg */ 149*25c28e83SPiotr Jasiukajtis { 150*25c28e83SPiotr Jasiukajtis dummy = 1.0e37 + f1; 151*25c28e83SPiotr Jasiukajtis dummy = dummy; 152*25c28e83SPiotr Jasiukajtis ansf = f1; 153*25c28e83SPiotr Jasiukajtis } 154*25c28e83SPiotr Jasiukajtis else if (intf > 0x5B000000) /* avoid underflow for big arg */ 155*25c28e83SPiotr Jasiukajtis { 156*25c28e83SPiotr Jasiukajtis index1 = 2; 157*25c28e83SPiotr Jasiukajtis ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */ 158*25c28e83SPiotr Jasiukajtis } 159*25c28e83SPiotr Jasiukajtis *y = sign1 * ansf; /* store answer, with sign bit */ 160*25c28e83SPiotr Jasiukajtis x += stridex; 161*25c28e83SPiotr Jasiukajtis y += stridey; 162*25c28e83SPiotr Jasiukajtis argcount = 1; /* we still have 1 good arg */ 163*25c28e83SPiotr Jasiukajtis if (--n <=0) 164*25c28e83SPiotr Jasiukajtis { 165*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 1 good arg */ 166*25c28e83SPiotr Jasiukajtis } 167*25c28e83SPiotr Jasiukajtis goto LOOP1; /* otherwise, examine next arg */ 168*25c28e83SPiotr Jasiukajtis } 169*25c28e83SPiotr Jasiukajtis 170*25c28e83SPiotr Jasiukajtis if (intf > 0x42800000) /* if (|x| > 64 */ 171*25c28e83SPiotr Jasiukajtis { 172*25c28e83SPiotr Jasiukajtis f1 = -pone/f1; 173*25c28e83SPiotr Jasiukajtis index1 = 2; /* point to pi/2 upper, lower */ 174*25c28e83SPiotr Jasiukajtis } 175*25c28e83SPiotr Jasiukajtis else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 176*25c28e83SPiotr Jasiukajtis { 177*25c28e83SPiotr Jasiukajtis intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 178*25c28e83SPiotr Jasiukajtis pz[0] = intz; /* store as a float (z) */ 179*25c28e83SPiotr Jasiukajtis f1 = (f1 - z)/(pone + f1*z); 180*25c28e83SPiotr Jasiukajtis index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 181*25c28e83SPiotr Jasiukajtis index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ 182*25c28e83SPiotr Jasiukajtis } 183*25c28e83SPiotr Jasiukajtis else 184*25c28e83SPiotr Jasiukajtis { 185*25c28e83SPiotr Jasiukajtis index1 = 0; /* points to 0,0 in table */ 186*25c28e83SPiotr Jasiukajtis } 187*25c28e83SPiotr Jasiukajtis 188*25c28e83SPiotr Jasiukajtis yaddr1 = y; /* address to store this answer */ 189*25c28e83SPiotr Jasiukajtis x += stridex; /* point to next arg */ 190*25c28e83SPiotr Jasiukajtis y += stridey; /* point to next result */ 191*25c28e83SPiotr Jasiukajtis argcount = 2; /* we now have 2 good arguments */ 192*25c28e83SPiotr Jasiukajtis if (--n <=0) 193*25c28e83SPiotr Jasiukajtis { 194*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 2 good args */ 195*25c28e83SPiotr Jasiukajtis } 196*25c28e83SPiotr Jasiukajtis 197*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 198*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 199*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 200*25c28e83SPiotr Jasiukajtis 201*25c28e83SPiotr Jasiukajtis LOOP2: 202*25c28e83SPiotr Jasiukajtis 203*25c28e83SPiotr Jasiukajtis intf = *(int *) x; /* upper half of x, as integer */ 204*25c28e83SPiotr Jasiukajtis f2 = *x; 205*25c28e83SPiotr Jasiukajtis sign2 = pone; 206*25c28e83SPiotr Jasiukajtis if (intf < 0) { 207*25c28e83SPiotr Jasiukajtis intf = intf & ~0x80000000; /* abs(upper argument) */ 208*25c28e83SPiotr Jasiukajtis f2 = -f2; 209*25c28e83SPiotr Jasiukajtis sign2 = -sign2; 210*25c28e83SPiotr Jasiukajtis } 211*25c28e83SPiotr Jasiukajtis 212*25c28e83SPiotr Jasiukajtis if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 213*25c28e83SPiotr Jasiukajtis { 214*25c28e83SPiotr Jasiukajtis if (intf > 0x7f800000) 215*25c28e83SPiotr Jasiukajtis { 216*25c28e83SPiotr Jasiukajtis ansf = f2 - f2; /* return NaN if x=NaN*/ 217*25c28e83SPiotr Jasiukajtis } 218*25c28e83SPiotr Jasiukajtis else if (intf < 0x31800000) /* avoid underflow for small arg */ 219*25c28e83SPiotr Jasiukajtis { 220*25c28e83SPiotr Jasiukajtis dummy = 1.0e37 + f2; 221*25c28e83SPiotr Jasiukajtis dummy = dummy; 222*25c28e83SPiotr Jasiukajtis ansf = f2; 223*25c28e83SPiotr Jasiukajtis } 224*25c28e83SPiotr Jasiukajtis else if (intf > 0x5B000000) /* avoid underflow for big arg */ 225*25c28e83SPiotr Jasiukajtis { 226*25c28e83SPiotr Jasiukajtis index2 = 2; 227*25c28e83SPiotr Jasiukajtis ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */ 228*25c28e83SPiotr Jasiukajtis } 229*25c28e83SPiotr Jasiukajtis *y = sign2 * ansf; /* store answer, with sign bit */ 230*25c28e83SPiotr Jasiukajtis x += stridex; 231*25c28e83SPiotr Jasiukajtis y += stridey; 232*25c28e83SPiotr Jasiukajtis argcount = 2; /* we still have 2 good args */ 233*25c28e83SPiotr Jasiukajtis if (--n <=0) 234*25c28e83SPiotr Jasiukajtis { 235*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 2 good args */ 236*25c28e83SPiotr Jasiukajtis } 237*25c28e83SPiotr Jasiukajtis goto LOOP2; /* otherwise, examine next arg */ 238*25c28e83SPiotr Jasiukajtis } 239*25c28e83SPiotr Jasiukajtis 240*25c28e83SPiotr Jasiukajtis if (intf > 0x42800000) /* if (|x| > 64 */ 241*25c28e83SPiotr Jasiukajtis { 242*25c28e83SPiotr Jasiukajtis f2 = -pone/f2; 243*25c28e83SPiotr Jasiukajtis index2 = 2; /* point to pi/2 upper, lower */ 244*25c28e83SPiotr Jasiukajtis } 245*25c28e83SPiotr Jasiukajtis else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 246*25c28e83SPiotr Jasiukajtis { 247*25c28e83SPiotr Jasiukajtis intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 248*25c28e83SPiotr Jasiukajtis pz[0] = intz; /* store as a float (z) */ 249*25c28e83SPiotr Jasiukajtis f2 = (f2 - z)/(pone + f2*z); 250*25c28e83SPiotr Jasiukajtis index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 251*25c28e83SPiotr Jasiukajtis index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ 252*25c28e83SPiotr Jasiukajtis } 253*25c28e83SPiotr Jasiukajtis else 254*25c28e83SPiotr Jasiukajtis { 255*25c28e83SPiotr Jasiukajtis index2 = 0; /* points to 0,0 in table */ 256*25c28e83SPiotr Jasiukajtis } 257*25c28e83SPiotr Jasiukajtis yaddr2 = y; /* address to store this answer */ 258*25c28e83SPiotr Jasiukajtis x += stridex; /* point to next arg */ 259*25c28e83SPiotr Jasiukajtis y += stridey; /* point to next result */ 260*25c28e83SPiotr Jasiukajtis argcount = 3; /* we now have 3 good arguments */ 261*25c28e83SPiotr Jasiukajtis if (--n <=0) 262*25c28e83SPiotr Jasiukajtis { 263*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 2 good args */ 264*25c28e83SPiotr Jasiukajtis } 265*25c28e83SPiotr Jasiukajtis 266*25c28e83SPiotr Jasiukajtis 267*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 268*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 269*25c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 270*25c28e83SPiotr Jasiukajtis 271*25c28e83SPiotr Jasiukajtis #ifdef UNROLL4 272*25c28e83SPiotr Jasiukajtis LOOP3: 273*25c28e83SPiotr Jasiukajtis 274*25c28e83SPiotr Jasiukajtis intf = *(int *) x; /* upper half of x, as integer */ 275*25c28e83SPiotr Jasiukajtis f3 = *x; 276*25c28e83SPiotr Jasiukajtis sign3 = pone; 277*25c28e83SPiotr Jasiukajtis if (intf < 0) { 278*25c28e83SPiotr Jasiukajtis intf = intf & ~0x80000000; /* abs(upper argument) */ 279*25c28e83SPiotr Jasiukajtis f3 = -f3; 280*25c28e83SPiotr Jasiukajtis sign3 = -sign3; 281*25c28e83SPiotr Jasiukajtis } 282*25c28e83SPiotr Jasiukajtis 283*25c28e83SPiotr Jasiukajtis if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 284*25c28e83SPiotr Jasiukajtis { 285*25c28e83SPiotr Jasiukajtis if (intf > 0x7f800000) 286*25c28e83SPiotr Jasiukajtis { 287*25c28e83SPiotr Jasiukajtis ansf = f3 - f3; /* return NaN if x=NaN*/ 288*25c28e83SPiotr Jasiukajtis } 289*25c28e83SPiotr Jasiukajtis else if (intf < 0x31800000) /* avoid underflow for small arg */ 290*25c28e83SPiotr Jasiukajtis { 291*25c28e83SPiotr Jasiukajtis dummy = 1.0e37 + f3; 292*25c28e83SPiotr Jasiukajtis dummy = dummy; 293*25c28e83SPiotr Jasiukajtis ansf = f3; 294*25c28e83SPiotr Jasiukajtis } 295*25c28e83SPiotr Jasiukajtis else if (intf > 0x5B000000) /* avoid underflow for big arg */ 296*25c28e83SPiotr Jasiukajtis { 297*25c28e83SPiotr Jasiukajtis index3 = 2; 298*25c28e83SPiotr Jasiukajtis ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */ 299*25c28e83SPiotr Jasiukajtis } 300*25c28e83SPiotr Jasiukajtis *y = sign3 * ansf; /* store answer, with sign bit */ 301*25c28e83SPiotr Jasiukajtis x += stridex; 302*25c28e83SPiotr Jasiukajtis y += stridey; 303*25c28e83SPiotr Jasiukajtis argcount = 3; /* we still have 3 good args */ 304*25c28e83SPiotr Jasiukajtis if (--n <=0) 305*25c28e83SPiotr Jasiukajtis { 306*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 3 good args */ 307*25c28e83SPiotr Jasiukajtis } 308*25c28e83SPiotr Jasiukajtis goto LOOP3; /* otherwise, examine next arg */ 309*25c28e83SPiotr Jasiukajtis } 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis if (intf > 0x42800000) /* if (|x| > 64 */ 312*25c28e83SPiotr Jasiukajtis { 313*25c28e83SPiotr Jasiukajtis n3 = -pone; 314*25c28e83SPiotr Jasiukajtis d3 = f3; 315*25c28e83SPiotr Jasiukajtis f3 = n3/d3; 316*25c28e83SPiotr Jasiukajtis index3 = 2; /* point to pi/2 upper, lower */ 317*25c28e83SPiotr Jasiukajtis } 318*25c28e83SPiotr Jasiukajtis else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 319*25c28e83SPiotr Jasiukajtis { 320*25c28e83SPiotr Jasiukajtis intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 321*25c28e83SPiotr Jasiukajtis pz[0] = intz; /* store as a float (z) */ 322*25c28e83SPiotr Jasiukajtis n3 = (f3 - z); 323*25c28e83SPiotr Jasiukajtis d3 = (pone + f3*z); /* get reduced argument */ 324*25c28e83SPiotr Jasiukajtis f3 = n3/d3; 325*25c28e83SPiotr Jasiukajtis index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 326*25c28e83SPiotr Jasiukajtis index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */ 327*25c28e83SPiotr Jasiukajtis } 328*25c28e83SPiotr Jasiukajtis else 329*25c28e83SPiotr Jasiukajtis { 330*25c28e83SPiotr Jasiukajtis n3 = f3; 331*25c28e83SPiotr Jasiukajtis d3 = pone; 332*25c28e83SPiotr Jasiukajtis index3 = 0; /* points to 0,0 in table */ 333*25c28e83SPiotr Jasiukajtis } 334*25c28e83SPiotr Jasiukajtis yaddr3 = y; /* address to store this answer */ 335*25c28e83SPiotr Jasiukajtis x += stridex; /* point to next arg */ 336*25c28e83SPiotr Jasiukajtis y += stridey; /* point to next result */ 337*25c28e83SPiotr Jasiukajtis argcount = 4; /* we now have 4 good arguments */ 338*25c28e83SPiotr Jasiukajtis if (--n <=0) 339*25c28e83SPiotr Jasiukajtis { 340*25c28e83SPiotr Jasiukajtis goto UNROLL; /* finish up with 3 good args */ 341*25c28e83SPiotr Jasiukajtis } 342*25c28e83SPiotr Jasiukajtis #endif /* UNROLL4 */ 343*25c28e83SPiotr Jasiukajtis 344*25c28e83SPiotr Jasiukajtis /* here is the n-way unrolled section, 345*25c28e83SPiotr Jasiukajtis but we may actually have less than n 346*25c28e83SPiotr Jasiukajtis arguments at this point 347*25c28e83SPiotr Jasiukajtis */ 348*25c28e83SPiotr Jasiukajtis 349*25c28e83SPiotr Jasiukajtis UNROLL: 350*25c28e83SPiotr Jasiukajtis 351*25c28e83SPiotr Jasiukajtis #ifdef UNROLL4 352*25c28e83SPiotr Jasiukajtis if (argcount == 4) 353*25c28e83SPiotr Jasiukajtis { 354*25c28e83SPiotr Jasiukajtis conup0 = __vlibm_TBL_atan1[index0]; 355*25c28e83SPiotr Jasiukajtis conup1 = __vlibm_TBL_atan1[index1]; 356*25c28e83SPiotr Jasiukajtis conup2 = __vlibm_TBL_atan1[index2]; 357*25c28e83SPiotr Jasiukajtis conup3 = __vlibm_TBL_atan1[index3]; 358*25c28e83SPiotr Jasiukajtis poly0 = p1*f0*f0*f0 + f0; 359*25c28e83SPiotr Jasiukajtis ans0 = sign0 * (float)(conup0 + poly0); 360*25c28e83SPiotr Jasiukajtis poly1 = p1*f1*f1*f1 + f1; 361*25c28e83SPiotr Jasiukajtis ans1 = sign1 * (float)(conup1 + poly1); 362*25c28e83SPiotr Jasiukajtis poly2 = p1*f2*f2*f2 + f2; 363*25c28e83SPiotr Jasiukajtis ans2 = sign2 * (float)(conup2 + poly2); 364*25c28e83SPiotr Jasiukajtis poly3 = p1*f3*f3*f3 + f3; 365*25c28e83SPiotr Jasiukajtis ans3 = sign3 * (float)(conup3 + poly3); 366*25c28e83SPiotr Jasiukajtis *yaddr0 = ans0; 367*25c28e83SPiotr Jasiukajtis *yaddr1 = ans1; 368*25c28e83SPiotr Jasiukajtis *yaddr2 = ans2; 369*25c28e83SPiotr Jasiukajtis *yaddr3 = ans3; 370*25c28e83SPiotr Jasiukajtis } 371*25c28e83SPiotr Jasiukajtis else 372*25c28e83SPiotr Jasiukajtis #endif 373*25c28e83SPiotr Jasiukajtis if (argcount == 3) 374*25c28e83SPiotr Jasiukajtis { 375*25c28e83SPiotr Jasiukajtis conup0 = __vlibm_TBL_atan1[index0]; 376*25c28e83SPiotr Jasiukajtis conup1 = __vlibm_TBL_atan1[index1]; 377*25c28e83SPiotr Jasiukajtis conup2 = __vlibm_TBL_atan1[index2]; 378*25c28e83SPiotr Jasiukajtis poly0 = p1*f0*f0*f0 + f0; 379*25c28e83SPiotr Jasiukajtis poly1 = p1*f1*f1*f1 + f1; 380*25c28e83SPiotr Jasiukajtis poly2 = p1*f2*f2*f2 + f2; 381*25c28e83SPiotr Jasiukajtis ans0 = sign0 * (float)(conup0 + poly0); 382*25c28e83SPiotr Jasiukajtis ans1 = sign1 * (float)(conup1 + poly1); 383*25c28e83SPiotr Jasiukajtis ans2 = sign2 * (float)(conup2 + poly2); 384*25c28e83SPiotr Jasiukajtis *yaddr0 = ans0; 385*25c28e83SPiotr Jasiukajtis *yaddr1 = ans1; 386*25c28e83SPiotr Jasiukajtis *yaddr2 = ans2; 387*25c28e83SPiotr Jasiukajtis } 388*25c28e83SPiotr Jasiukajtis else 389*25c28e83SPiotr Jasiukajtis if (argcount == 2) 390*25c28e83SPiotr Jasiukajtis { 391*25c28e83SPiotr Jasiukajtis conup0 = __vlibm_TBL_atan1[index0]; 392*25c28e83SPiotr Jasiukajtis conup1 = __vlibm_TBL_atan1[index1]; 393*25c28e83SPiotr Jasiukajtis poly0 = p1*f0*f0*f0 + f0; 394*25c28e83SPiotr Jasiukajtis poly1 = p1*f1*f1*f1 + f1; 395*25c28e83SPiotr Jasiukajtis ans0 = sign0 * (float)(conup0 + poly0); 396*25c28e83SPiotr Jasiukajtis ans1 = sign1 * (float)(conup1 + poly1); 397*25c28e83SPiotr Jasiukajtis *yaddr0 = ans0; 398*25c28e83SPiotr Jasiukajtis *yaddr1 = ans1; 399*25c28e83SPiotr Jasiukajtis } 400*25c28e83SPiotr Jasiukajtis else 401*25c28e83SPiotr Jasiukajtis if (argcount == 1) 402*25c28e83SPiotr Jasiukajtis { 403*25c28e83SPiotr Jasiukajtis conup0 = __vlibm_TBL_atan1[index0]; 404*25c28e83SPiotr Jasiukajtis poly0 = p1*f0*f0*f0 + f0; 405*25c28e83SPiotr Jasiukajtis ans0 = sign0 * (float)(conup0 + poly0); 406*25c28e83SPiotr Jasiukajtis *yaddr0 = ans0; 407*25c28e83SPiotr Jasiukajtis } 408*25c28e83SPiotr Jasiukajtis 409*25c28e83SPiotr Jasiukajtis } while (n > 0); 410*25c28e83SPiotr Jasiukajtis 411*25c28e83SPiotr Jasiukajtis } 412