1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma weak tgammaf = __tgammaf 31 32 /* 33 * True gamma function 34 * 35 * float tgammaf(float x) 36 * 37 * Algorithm: see tgamma.c 38 * 39 * Maximum error observed: 0.87ulp (both positive and negative arguments) 40 */ 41 42 #include "libm.h" 43 #include "libm_synonyms.h" 44 #include <math.h> 45 #if defined(__SUNPRO_C) 46 #include <sunmath.h> 47 #endif 48 #include <sys/isa_defs.h> 49 50 #if defined(_BIG_ENDIAN) 51 #define HIWORD 0 52 #define LOWORD 1 53 #else 54 #define HIWORD 1 55 #define LOWORD 0 56 #endif 57 #define __HI(x) ((int *) &x)[HIWORD] 58 #define __LO(x) ((unsigned *) &x)[LOWORD] 59 60 /* Coefficients for primary intervals GTi() */ 61 static const double cr[] = { 62 /* p1 */ 63 +7.09087253435088360271451613398019280077561279443e-0001, 64 -5.17229560788652108545141978238701790105241761089e-0001, 65 +5.23403394528150789405825222323770647162337764327e-0001, 66 -4.54586308717075010784041566069480411732634814899e-0001, 67 +4.20596490915239085459964590559256913498190955233e-0001, 68 -3.57307589712377520978332185838241458642142185789e-0001, 69 70 /* p2 */ 71 +4.28486983980295198166056119223984284434264344578e-0001, 72 -1.30704539487709138528680121627899735386650103914e-0001, 73 +1.60856285038051955072861219352655851542955430871e-0001, 74 -9.22285161346010583774458802067371182158937943507e-0002, 75 +7.19240511767225260740890292605070595560626179357e-0002, 76 -4.88158265593355093703112238534484636193260459574e-0002, 77 78 /* p3 */ 79 +3.82409531118807759081121479786092134814808872880e-0001, 80 +2.65309888180188647956400403013495759365167853426e-0002, 81 +8.06815109775079171923561169415370309376296739835e-0002, 82 -1.54821591666137613928840890835174351674007764799e-0002, 83 +1.76308239242717268530498313416899188157165183405e-0002, 84 85 /* GZi and TZi */ 86 +0.9382046279096824494097535615803269576988, /* GZ1 */ 87 +0.8856031944108887002788159005825887332080, /* GZ2 */ 88 +0.9367814114636523216188468970808378497426, /* GZ3 */ 89 -0.3517214357852935791015625, /* TZ1 */ 90 +0.280530631542205810546875, /* TZ3 */ 91 }; 92 93 #define P10 cr[0] 94 #define P11 cr[1] 95 #define P12 cr[2] 96 #define P13 cr[3] 97 #define P14 cr[4] 98 #define P15 cr[5] 99 #define P20 cr[6] 100 #define P21 cr[7] 101 #define P22 cr[8] 102 #define P23 cr[9] 103 #define P24 cr[10] 104 #define P25 cr[11] 105 #define P30 cr[12] 106 #define P31 cr[13] 107 #define P32 cr[14] 108 #define P33 cr[15] 109 #define P34 cr[16] 110 #define GZ1 cr[17] 111 #define GZ2 cr[18] 112 #define GZ3 cr[19] 113 #define TZ1 cr[20] 114 #define TZ3 cr[21] 115 116 /* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */ 117 static double 118 GT1(double y) { 119 double z, r; 120 121 z = y * y; 122 r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y * 123 P14 + z * P15)); 124 return (GZ1 + r); 125 } 126 127 /* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */ 128 static double 129 GT2(double y) { 130 double z; 131 132 z = y * y; 133 return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y * 134 P24 + z * P25))); 135 } 136 137 /* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */ 138 static double 139 GT3(double y) { 140 double z, r; 141 142 z = y * y; 143 r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y * 144 P34)); 145 return (GZ3 + r); 146 } 147 148 /* INDENT OFF */ 149 static const double c[] = { 150 +1.0, 151 +2.0, 152 +0.5, 153 +1.0e-300, 154 +6.666717231848518054693623697539230e-0001, /* A1=T3[0] */ 155 +8.33333330959694065245736888749042811909994573178e-0002, /* GP[0] */ 156 -2.77765545601667179767706600890361535225507762168e-0003, /* GP[1] */ 157 +7.77830853479775281781085278324621033523037489883e-0004, /* GP[2] */ 158 +4.18938533204672741744150788368695779923320328369e-0001, /* hln2pi */ 159 +2.16608493924982901946e-02, /* ln2_32 */ 160 +4.61662413084468283841e+01, /* invln2_32 */ 161 +5.00004103388988968841156421415669985414073453720e-0001, /* Et1 */ 162 +1.66667656752800761782778277828110208108687545908e-0001, /* Et2 */ 163 }; 164 165 #define one c[0] 166 #define two c[1] 167 #define half c[2] 168 #define tiny c[3] 169 #define A1 c[4] 170 #define GP0 c[5] 171 #define GP1 c[6] 172 #define GP2 c[7] 173 #define hln2pi c[8] 174 #define ln2_32 c[9] 175 #define invln2_32 c[10] 176 #define Et1 c[11] 177 #define Et2 c[12] 178 179 /* S[j] = 2**(j/32.) for the final computation of exp(w) */ 180 static const double S[] = { 181 +1.00000000000000000000e+00, /* 3FF0000000000000 */ 182 +1.02189714865411662714e+00, /* 3FF059B0D3158574 */ 183 +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ 184 +1.06714040067682369717e+00, /* 3FF11301D0125B51 */ 185 +1.09050773266525768967e+00, /* 3FF172B83C7D517B */ 186 +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ 187 +1.13878863475669156458e+00, /* 3FF2387A6E756238 */ 188 +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ 189 +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ 190 +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ 191 +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ 192 +1.26905095719173321989e+00, /* 3FF44E086061892D */ 193 +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ 194 +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ 195 +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ 196 +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ 197 +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ 198 +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ 199 +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ 200 +1.50916442759342284141e+00, /* 3FF82589994CCE13 */ 201 +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ 202 +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ 203 +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ 204 +1.64575547815396494578e+00, /* 3FFA5503B23E255D */ 205 +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ 206 +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ 207 +1.75625216037329945351e+00, /* 3FFC199BDD85529C */ 208 +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ 209 +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ 210 +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ 211 +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ 212 +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ 213 }; 214 /* INDENT ON */ 215 216 /* INDENT OFF */ 217 /* 218 * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula 219 * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) 220 */ 221 /* 222 * compute ss = log(x)-1 223 * 224 * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, 225 * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and 226 * T1(n-3) = n*log(2)-1, n=3,4,5 227 * T2(j) = log(z[j]), 228 * T3(s) = 2s + A1*s^3 229 * Note 230 * (1) Remez error for T3(s) is bounded by 2**(-35.8) 231 * (see mpremez/work/Log/tgamma_log_2_outr1) 232 */ 233 234 static const double T1[] = { /* T1[j]=(j+3)*log(2)-1 */ 235 +1.079441541679835928251696364375e+00, 236 +1.772588722239781237668928485833e+00, 237 +2.465735902799726547086160607291e+00, 238 }; 239 240 static const double T2[] = { /* T2[j]=log(1+j/64+1/128) */ 241 +7.782140442054948947462900061137e-03, 242 +2.316705928153437822879916096229e-02, 243 +3.831886430213659919375532512380e-02, 244 +5.324451451881228286587019378653e-02, 245 +6.795066190850774939456527777263e-02, 246 +8.244366921107459126816006866831e-02, 247 +9.672962645855111229557105648746e-02, 248 +1.108143663402901141948061693232e-01, 249 +1.247034785009572358634065153809e-01, 250 +1.384023228591191356853258736016e-01, 251 +1.519160420258419750718034248969e-01, 252 +1.652495728953071628756114492772e-01, 253 +1.784076574728182971194002415109e-01, 254 +1.913948529996294546092988075613e-01, 255 +2.042155414286908915038203861962e-01, 256 +2.168739383006143596190895257443e-01, 257 +2.293741010648458299914807250461e-01, 258 +2.417199368871451681443075159135e-01, 259 +2.539152099809634441373232979066e-01, 260 +2.659635484971379413391259265375e-01, 261 +2.778684510034563061863500329234e-01, 262 +2.896332925830426768788930555257e-01, 263 +3.012613305781617810128755382338e-01, 264 +3.127557100038968883862465596883e-01, 265 +3.241194686542119760906707604350e-01, 266 +3.353555419211378302571795798142e-01, 267 +3.464667673462085809184621884258e-01, 268 +3.574558889218037742260094901409e-01, 269 +3.683255611587076530482301540504e-01, 270 +3.790783529349694583908533456310e-01, 271 +3.897167511400252133704636040035e-01, 272 +4.002431641270127069293251019951e-01, 273 +4.106599249852683859343062031758e-01, 274 +4.209692946441296361288671615068e-01, 275 +4.311734648183713408591724789556e-01, 276 +4.412745608048752294894964416613e-01, 277 +4.512746441394585851446923830790e-01, 278 +4.611757151221701663679999255979e-01, 279 +4.709797152187910125468978560564e-01, 280 +4.806885293457519076766184554480e-01, 281 +4.903039880451938381503461596457e-01, 282 +4.998278695564493298213314152470e-01, 283 +5.092619017898079468040749192283e-01, 284 +5.186077642080456321529769963648e-01, 285 +5.278670896208423851138922177783e-01, 286 +5.370414658968836545667292441538e-01, 287 +5.461324375981356503823972092312e-01, 288 +5.551415075405015927154803595159e-01, 289 +5.640701382848029660713842900902e-01, 290 +5.729197535617855090927567266263e-01, 291 +5.816917396346224825206107537254e-01, 292 +5.903874466021763746419167081236e-01, 293 +5.990081896460833993816000244617e-01, 294 +6.075552502245417955010851527911e-01, 295 +6.160298772155140196475659281967e-01, 296 +6.244332880118935010425387440547e-01, 297 +6.327666695710378295457864685036e-01, 298 +6.410311794209312910556013344054e-01, 299 +6.492279466251098188908399699053e-01, 300 +6.573580727083600301418900232459e-01, 301 +6.654226325450904489500926100067e-01, 302 +6.734226752121667202979603888010e-01, 303 +6.813592248079030689480715595681e-01, 304 +6.892332812388089803249143378146e-01, 305 }; 306 /* INDENT ON */ 307 308 static double 309 large_gam(double x) { 310 double ss, zz, z, t1, t2, w, y, u; 311 unsigned lx; 312 int k, ix, j, m; 313 314 ix = __HI(x); 315 lx = __LO(x); 316 m = (ix >> 20) - 0x3ff; /* exponent of x, range:3-5 */ 317 ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ 318 __HI(y) = ix; 319 __LO(y) = lx; 320 __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ 321 __LO(z) = 0; 322 j = (ix >> 14) & 0x3f; 323 t1 = y + z; 324 t2 = y - z; 325 u = t2 / t1; 326 ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u)); 327 /* ss = log(x)-1 */ 328 /* 329 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) 330 * where ss = log(x) - 1 331 */ 332 z = one / x; 333 zz = z * z; 334 w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2); 335 k = (int) (w * invln2_32 + half); 336 337 /* compute the exponential of w */ 338 j = k & 0x1f; 339 m = k >> 5; 340 z = w - (double) k *ln2_32; 341 zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2)); 342 __HI(zz) += m << 20; 343 return (zz); 344 } 345 /* INDENT OFF */ 346 /* 347 * kpsin(x)= sin(pi*x)/pi 348 * 3 5 7 9 349 * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x 350 */ 351 static const double ks[] = { 352 -1.64493404985645811354476665052005342839447790544e+0000, 353 +8.11740794458351064092797249069438269367389272270e-0001, 354 -1.90703144603551216933075809162889536878854055202e-0001, 355 +2.55742333994264563281155312271481108635575331201e-0002, 356 }; 357 /* INDENT ON */ 358 359 static double 360 kpsin(double x) { 361 double z; 362 363 z = x * x; 364 return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z * 365 ks[3]))); 366 } 367 368 /* INDENT OFF */ 369 /* 370 * kpcos(x)= cos(pi*x)/pi 371 * 2 4 6 372 * = kc[0]+kc[1]*x +kc[2]*x +kc[3]*x 373 */ 374 static const double kc[] = { 375 +3.18309886183790671537767526745028724068919291480e-0001, 376 -1.57079581447762568199467875065854538626594937791e+0000, 377 +1.29183528092558692844073004029568674027807393862e+0000, 378 -4.20232949771307685981015914425195471602739075537e-0001, 379 }; 380 /* INDENT ON */ 381 382 static double 383 kpcos(double x) { 384 double z; 385 386 z = x * x; 387 return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3])); 388 } 389 390 /* INDENT OFF */ 391 static const double 392 t0z1 = 0.134861805732790769689793935774652917006, 393 t0z2 = 0.461632144968362341262659542325721328468, 394 t0z3 = 0.819773101100500601787868704921606996312; 395 /* 1.134861805732790769689793935774652917006 */ 396 /* INDENT ON */ 397 398 /* 399 * gamma(x+i) for 0 <= x < 1 400 */ 401 static double 402 gam_n(int i, double x) { 403 double rr = 0.0L, yy; 404 double z1, z2; 405 406 /* compute yy = gamma(x+1) */ 407 if (x > 0.2845) { 408 if (x > 0.6374) 409 yy = GT3(x - t0z3); 410 else 411 yy = GT2(x - t0z2); 412 } else 413 yy = GT1(x - t0z1); 414 415 /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ 416 switch (i) { 417 case 0: /* yy/x */ 418 rr = yy / x; 419 break; 420 case 1: /* yy */ 421 rr = yy; 422 break; 423 case 2: /* (x+1)*yy */ 424 rr = (x + one) * yy; 425 break; 426 case 3: /* (x+2)*(x+1)*yy */ 427 rr = (x + one) * (x + two) * yy; 428 break; 429 430 case 4: /* (x+1)*(x+3)*(x+2)*yy */ 431 rr = (x + one) * (x + two) * ((x + 3.0) * yy); 432 break; 433 case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ 434 z1 = (x + two) * (x + 3.0) * yy; 435 z2 = (x + one) * (x + 4.0); 436 rr = z1 * z2; 437 break; 438 case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ 439 z1 = (x + two) * (x + 3.0); 440 z2 = (x + 5.0) * yy; 441 rr = z1 * (z1 - two) * z2; 442 break; 443 case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ 444 z1 = (x + two) * (x + 3.0); 445 z2 = (x + 5.0) * (x + 6.0) * yy; 446 rr = z1 * (z1 - two) * z2; 447 break; 448 } 449 return (rr); 450 } 451 452 float 453 tgammaf(float xf) { 454 float zf; 455 double ss, ww; 456 double x, y, z; 457 int i, j, k, ix, hx, xk; 458 459 hx = *(int *) &xf; 460 ix = hx & 0x7fffffff; 461 462 x = (double) xf; 463 if (ix < 0x33800000) 464 return (1.0F / xf); /* |x| < 2**-24 */ 465 466 if (ix >= 0x7f800000) 467 return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */ 468 469 if (hx > 0x420C290F) /* x > 35.040096283... overflow */ 470 return (float)(x / tiny); 471 472 if (hx >= 0x41000000) /* x >= 8 */ 473 return ((float) large_gam(x)); 474 475 if (hx > 0) { /* 0 < x < 8 */ 476 i = (int) xf; 477 return ((float) gam_n(i, x - (double) i)); 478 } 479 480 /* negative x */ 481 /* INDENT OFF */ 482 /* 483 * compute xk = 484 * -2 ... x is an even int (-inf is considered even) 485 * -1 ... x is an odd int 486 * +0 ... x is not an int but chopped to an even int 487 * +1 ... x is not an int but chopped to an odd int 488 */ 489 /* INDENT ON */ 490 xk = 0; 491 if (ix >= 0x4b000000) { 492 if (ix > 0x4b000000) 493 xk = -2; 494 else 495 xk = -2 + (ix & 1); 496 } else if (ix >= 0x3f800000) { 497 k = (ix >> 23) - 0x7f; 498 j = ix >> (23 - k); 499 if ((j << (23 - k)) == ix) 500 xk = -2 + (j & 1); 501 else 502 xk = j & 1; 503 } 504 if (xk < 0) { 505 /* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */ 506 zf = xf - xf; 507 return (zf / zf); 508 } 509 510 /* negative underflow thresold */ 511 if (ix > 0x4224000B) { /* x < -(41+11ulp) */ 512 if (xk == 0) 513 z = -tiny; 514 else 515 z = tiny; 516 return ((float)z); 517 } 518 519 /* INDENT OFF */ 520 /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ 521 /* 522 * First compute ss = -sin(pi*y)/pi , so that 523 * gamma(x) = 1/(ss*gamma(1+y)) 524 */ 525 /* INDENT ON */ 526 y = -x; 527 j = (int) y; 528 z = y - (double) j; 529 if (z > 0.3183098861837906715377675) 530 if (z > 0.6816901138162093284622325) 531 ss = kpsin(one - z); 532 else 533 ss = kpcos(0.5 - z); 534 else 535 ss = kpsin(z); 536 if (xk == 0) 537 ss = -ss; 538 539 /* Then compute ww = gamma(1+y) */ 540 if (j < 7) 541 ww = gam_n(j + 1, z); 542 else 543 ww = large_gam(y + one); 544 545 /* return 1/(ss*ww) */ 546 return ((float) (one / (ww * ss))); 547 } 548