1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #ifdef __RESTRICT 31 #define restrict _Restrict 32 #else 33 #define restrict 34 #endif 35 36 void 37 __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey) 38 { 39 extern const double __vlibm_TBL_atan1[]; 40 double conup0, conup1, conup2; 41 float dummy, ansf = 0.0; 42 float f0, f1, f2; 43 float ans0, ans1, ans2; 44 float poly0, poly1, poly2; 45 float sign0, sign1, sign2; 46 int intf, intz, argcount; 47 int index0, index1, index2; 48 float z,*yaddr0,*yaddr1,*yaddr2; 49 int *pz = (int *) &z; 50 #ifdef UNROLL4 51 double conup3; 52 int index3; 53 float f3, ans3, poly3, sign3, *yaddr3; 54 #endif 55 56 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 57 * Error = -3.08254E-18 On the interval |x| < 1/64 */ 58 59 static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ; 60 static const float pone = 1.0f; 61 62 if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ 63 do 64 { 65 LOOP0: 66 67 intf = *(int *) x; /* upper half of x, as integer */ 68 f0 = *x; 69 sign0 = pone; 70 if (intf < 0) { 71 intf = intf & ~0x80000000; /* abs(upper argument) */ 72 f0 = -f0; 73 sign0 = -sign0; 74 } 75 76 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 77 { 78 if (intf > 0x7f800000) 79 { 80 ansf = f0- f0; /* return NaN if x=NaN*/ 81 } 82 else if (intf < 0x31800000) /* avoid underflow for small arg */ 83 { 84 dummy = 1.0e37 + f0; 85 dummy = dummy; 86 ansf = f0; 87 } 88 else if (intf > 0x5B000000) /* avoid underflow for big arg */ 89 { 90 index0= 2; 91 ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */ 92 } 93 *y = sign0*ansf; /* store answer, with sign bit */ 94 x += stridex; 95 y += stridey; 96 argcount = 0; /* initialize argcount */ 97 if (--n <=0) break; /* we are done */ 98 goto LOOP0; /* otherwise, examine next arg */ 99 } 100 101 if (intf > 0x42800000) /* if (|x| > 64 */ 102 { 103 f0 = -pone/f0; 104 index0 = 2; /* point to pi/2 upper, lower */ 105 } 106 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 107 { 108 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 109 pz[0] = intz; /* store as a float (z) */ 110 f0 = (f0 - z)/(pone + f0*z); 111 index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 112 index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */ 113 } 114 else /* |x| < 1/64 */ 115 { 116 index0 = 0; /* points to 0,0 in table */ 117 } 118 yaddr0 = y; /* address to store this answer */ 119 x += stridex; /* point to next arg */ 120 y += stridey; /* point to next result */ 121 argcount = 1; /* we now have 1 good argument */ 122 if (--n <=0) 123 { 124 goto UNROLL; /* finish up with 1 good arg */ 125 } 126 127 /*--------------------------------------------------------------------------*/ 128 /*--------------------------------------------------------------------------*/ 129 /*--------------------------------------------------------------------------*/ 130 131 LOOP1: 132 133 intf = *(int *) x; /* upper half of x, as integer */ 134 f1 = *x; 135 sign1 = pone; 136 if (intf < 0) { 137 intf = intf & ~0x80000000; /* abs(upper argument) */ 138 f1 = -f1; 139 sign1 = -sign1; 140 } 141 142 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 143 { 144 if (intf > 0x7f800000) 145 { 146 ansf = f1 - f1; /* return NaN if x=NaN*/ 147 } 148 else if (intf < 0x31800000) /* avoid underflow for small arg */ 149 { 150 dummy = 1.0e37 + f1; 151 dummy = dummy; 152 ansf = f1; 153 } 154 else if (intf > 0x5B000000) /* avoid underflow for big arg */ 155 { 156 index1 = 2; 157 ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */ 158 } 159 *y = sign1 * ansf; /* store answer, with sign bit */ 160 x += stridex; 161 y += stridey; 162 argcount = 1; /* we still have 1 good arg */ 163 if (--n <=0) 164 { 165 goto UNROLL; /* finish up with 1 good arg */ 166 } 167 goto LOOP1; /* otherwise, examine next arg */ 168 } 169 170 if (intf > 0x42800000) /* if (|x| > 64 */ 171 { 172 f1 = -pone/f1; 173 index1 = 2; /* point to pi/2 upper, lower */ 174 } 175 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 176 { 177 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 178 pz[0] = intz; /* store as a float (z) */ 179 f1 = (f1 - z)/(pone + f1*z); 180 index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 181 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ 182 } 183 else 184 { 185 index1 = 0; /* points to 0,0 in table */ 186 } 187 188 yaddr1 = y; /* address to store this answer */ 189 x += stridex; /* point to next arg */ 190 y += stridey; /* point to next result */ 191 argcount = 2; /* we now have 2 good arguments */ 192 if (--n <=0) 193 { 194 goto UNROLL; /* finish up with 2 good args */ 195 } 196 197 /*--------------------------------------------------------------------------*/ 198 /*--------------------------------------------------------------------------*/ 199 /*--------------------------------------------------------------------------*/ 200 201 LOOP2: 202 203 intf = *(int *) x; /* upper half of x, as integer */ 204 f2 = *x; 205 sign2 = pone; 206 if (intf < 0) { 207 intf = intf & ~0x80000000; /* abs(upper argument) */ 208 f2 = -f2; 209 sign2 = -sign2; 210 } 211 212 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 213 { 214 if (intf > 0x7f800000) 215 { 216 ansf = f2 - f2; /* return NaN if x=NaN*/ 217 } 218 else if (intf < 0x31800000) /* avoid underflow for small arg */ 219 { 220 dummy = 1.0e37 + f2; 221 dummy = dummy; 222 ansf = f2; 223 } 224 else if (intf > 0x5B000000) /* avoid underflow for big arg */ 225 { 226 index2 = 2; 227 ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */ 228 } 229 *y = sign2 * ansf; /* store answer, with sign bit */ 230 x += stridex; 231 y += stridey; 232 argcount = 2; /* we still have 2 good args */ 233 if (--n <=0) 234 { 235 goto UNROLL; /* finish up with 2 good args */ 236 } 237 goto LOOP2; /* otherwise, examine next arg */ 238 } 239 240 if (intf > 0x42800000) /* if (|x| > 64 */ 241 { 242 f2 = -pone/f2; 243 index2 = 2; /* point to pi/2 upper, lower */ 244 } 245 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 246 { 247 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 248 pz[0] = intz; /* store as a float (z) */ 249 f2 = (f2 - z)/(pone + f2*z); 250 index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 251 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ 252 } 253 else 254 { 255 index2 = 0; /* points to 0,0 in table */ 256 } 257 yaddr2 = y; /* address to store this answer */ 258 x += stridex; /* point to next arg */ 259 y += stridey; /* point to next result */ 260 argcount = 3; /* we now have 3 good arguments */ 261 if (--n <=0) 262 { 263 goto UNROLL; /* finish up with 2 good args */ 264 } 265 266 267 /*--------------------------------------------------------------------------*/ 268 /*--------------------------------------------------------------------------*/ 269 /*--------------------------------------------------------------------------*/ 270 271 #ifdef UNROLL4 272 LOOP3: 273 274 intf = *(int *) x; /* upper half of x, as integer */ 275 f3 = *x; 276 sign3 = pone; 277 if (intf < 0) { 278 intf = intf & ~0x80000000; /* abs(upper argument) */ 279 f3 = -f3; 280 sign3 = -sign3; 281 } 282 283 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ 284 { 285 if (intf > 0x7f800000) 286 { 287 ansf = f3 - f3; /* return NaN if x=NaN*/ 288 } 289 else if (intf < 0x31800000) /* avoid underflow for small arg */ 290 { 291 dummy = 1.0e37 + f3; 292 dummy = dummy; 293 ansf = f3; 294 } 295 else if (intf > 0x5B000000) /* avoid underflow for big arg */ 296 { 297 index3 = 2; 298 ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */ 299 } 300 *y = sign3 * ansf; /* store answer, with sign bit */ 301 x += stridex; 302 y += stridey; 303 argcount = 3; /* we still have 3 good args */ 304 if (--n <=0) 305 { 306 goto UNROLL; /* finish up with 3 good args */ 307 } 308 goto LOOP3; /* otherwise, examine next arg */ 309 } 310 311 if (intf > 0x42800000) /* if (|x| > 64 */ 312 { 313 n3 = -pone; 314 d3 = f3; 315 f3 = n3/d3; 316 index3 = 2; /* point to pi/2 upper, lower */ 317 } 318 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ 319 { 320 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ 321 pz[0] = intz; /* store as a float (z) */ 322 n3 = (f3 - z); 323 d3 = (pone + f3*z); /* get reduced argument */ 324 f3 = n3/d3; 325 index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 326 index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */ 327 } 328 else 329 { 330 n3 = f3; 331 d3 = pone; 332 index3 = 0; /* points to 0,0 in table */ 333 } 334 yaddr3 = y; /* address to store this answer */ 335 x += stridex; /* point to next arg */ 336 y += stridey; /* point to next result */ 337 argcount = 4; /* we now have 4 good arguments */ 338 if (--n <=0) 339 { 340 goto UNROLL; /* finish up with 3 good args */ 341 } 342 #endif /* UNROLL4 */ 343 344 /* here is the n-way unrolled section, 345 but we may actually have less than n 346 arguments at this point 347 */ 348 349 UNROLL: 350 351 #ifdef UNROLL4 352 if (argcount == 4) 353 { 354 conup0 = __vlibm_TBL_atan1[index0]; 355 conup1 = __vlibm_TBL_atan1[index1]; 356 conup2 = __vlibm_TBL_atan1[index2]; 357 conup3 = __vlibm_TBL_atan1[index3]; 358 poly0 = p1*f0*f0*f0 + f0; 359 ans0 = sign0 * (float)(conup0 + poly0); 360 poly1 = p1*f1*f1*f1 + f1; 361 ans1 = sign1 * (float)(conup1 + poly1); 362 poly2 = p1*f2*f2*f2 + f2; 363 ans2 = sign2 * (float)(conup2 + poly2); 364 poly3 = p1*f3*f3*f3 + f3; 365 ans3 = sign3 * (float)(conup3 + poly3); 366 *yaddr0 = ans0; 367 *yaddr1 = ans1; 368 *yaddr2 = ans2; 369 *yaddr3 = ans3; 370 } 371 else 372 #endif 373 if (argcount == 3) 374 { 375 conup0 = __vlibm_TBL_atan1[index0]; 376 conup1 = __vlibm_TBL_atan1[index1]; 377 conup2 = __vlibm_TBL_atan1[index2]; 378 poly0 = p1*f0*f0*f0 + f0; 379 poly1 = p1*f1*f1*f1 + f1; 380 poly2 = p1*f2*f2*f2 + f2; 381 ans0 = sign0 * (float)(conup0 + poly0); 382 ans1 = sign1 * (float)(conup1 + poly1); 383 ans2 = sign2 * (float)(conup2 + poly2); 384 *yaddr0 = ans0; 385 *yaddr1 = ans1; 386 *yaddr2 = ans2; 387 } 388 else 389 if (argcount == 2) 390 { 391 conup0 = __vlibm_TBL_atan1[index0]; 392 conup1 = __vlibm_TBL_atan1[index1]; 393 poly0 = p1*f0*f0*f0 + f0; 394 poly1 = p1*f1*f1*f1 + f1; 395 ans0 = sign0 * (float)(conup0 + poly0); 396 ans1 = sign1 * (float)(conup1 + poly1); 397 *yaddr0 = ans0; 398 *yaddr1 = ans1; 399 } 400 else 401 if (argcount == 1) 402 { 403 conup0 = __vlibm_TBL_atan1[index0]; 404 poly0 = p1*f0*f0*f0 + f0; 405 ans0 = sign0 * (float)(conup0 + poly0); 406 *yaddr0 = ans0; 407 } 408 409 } while (n > 0); 410 411 } 412