xref: /illumos-gate/usr/src/lib/libm/common/m9x/tgammaf.c (revision 177d5b5f8c0e969013441207a0a705ae66b08cf7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #pragma weak tgammaf = __tgammaf
31 
32 /*
33  * True gamma function
34  *
35  * float tgammaf(float x)
36  *
37  * Algorithm: see tgamma.c
38  *
39  * Maximum error observed: 0.87ulp (both positive and negative arguments)
40  */
41 
42 #include "libm.h"
43 #include "libm_synonyms.h"
44 #include <math.h>
45 #if defined(__SUNPRO_C)
46 #include <sunmath.h>
47 #endif
48 #include <sys/isa_defs.h>
49 
50 #if defined(_BIG_ENDIAN)
51 #define	HIWORD	0
52 #define	LOWORD	1
53 #else
54 #define	HIWORD	1
55 #define	LOWORD	0
56 #endif
57 #define	__HI(x)	((int *) &x)[HIWORD]
58 #define	__LO(x)	((unsigned *) &x)[LOWORD]
59 
60 /* Coefficients for primary intervals GTi() */
61 static const double cr[] = {
62 	/* p1 */
63 	+7.09087253435088360271451613398019280077561279443e-0001,
64 	-5.17229560788652108545141978238701790105241761089e-0001,
65 	+5.23403394528150789405825222323770647162337764327e-0001,
66 	-4.54586308717075010784041566069480411732634814899e-0001,
67 	+4.20596490915239085459964590559256913498190955233e-0001,
68 	-3.57307589712377520978332185838241458642142185789e-0001,
69 
70 	/* p2 */
71 	+4.28486983980295198166056119223984284434264344578e-0001,
72 	-1.30704539487709138528680121627899735386650103914e-0001,
73 	+1.60856285038051955072861219352655851542955430871e-0001,
74 	-9.22285161346010583774458802067371182158937943507e-0002,
75 	+7.19240511767225260740890292605070595560626179357e-0002,
76 	-4.88158265593355093703112238534484636193260459574e-0002,
77 
78 	/* p3 */
79 	+3.82409531118807759081121479786092134814808872880e-0001,
80 	+2.65309888180188647956400403013495759365167853426e-0002,
81 	+8.06815109775079171923561169415370309376296739835e-0002,
82 	-1.54821591666137613928840890835174351674007764799e-0002,
83 	+1.76308239242717268530498313416899188157165183405e-0002,
84 
85 	/* GZi and TZi */
86 	+0.9382046279096824494097535615803269576988,	/* GZ1 */
87 	+0.8856031944108887002788159005825887332080,	/* GZ2 */
88 	+0.9367814114636523216188468970808378497426,	/* GZ3 */
89 	-0.3517214357852935791015625,	/* TZ1 */
90 	+0.280530631542205810546875,	/* TZ3 */
91 };
92 
93 #define	P10	cr[0]
94 #define	P11	cr[1]
95 #define	P12	cr[2]
96 #define	P13	cr[3]
97 #define	P14	cr[4]
98 #define	P15	cr[5]
99 #define	P20	cr[6]
100 #define	P21	cr[7]
101 #define	P22	cr[8]
102 #define	P23	cr[9]
103 #define	P24	cr[10]
104 #define	P25	cr[11]
105 #define	P30	cr[12]
106 #define	P31	cr[13]
107 #define	P32	cr[14]
108 #define	P33	cr[15]
109 #define	P34	cr[16]
110 #define	GZ1	cr[17]
111 #define	GZ2	cr[18]
112 #define	GZ3	cr[19]
113 #define	TZ1	cr[20]
114 #define	TZ3	cr[21]
115 
116 /* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */
117 static double
118 GT1(double y) {
119 	double z, r;
120 
121 	z = y * y;
122 	r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y *
123 		P14 + z * P15));
124 	return (GZ1 + r);
125 }
126 
127 /* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */
128 static double
129 GT2(double y) {
130 	double z;
131 
132 	z = y * y;
133 	return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y *
134 		P24 + z * P25)));
135 }
136 
137 /* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */
138 static double
139 GT3(double y) {
140 double z, r;
141 
142 	z = y * y;
143 	r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y *
144 		P34));
145 	return (GZ3 + r);
146 }
147 
148 /* INDENT OFF */
149 static const double c[] = {
150 +1.0,
151 +2.0,
152 +0.5,
153 +1.0e-300,
154 +6.666717231848518054693623697539230e-0001,			/* A1=T3[0] */
155 +8.33333330959694065245736888749042811909994573178e-0002,	/* GP[0] */
156 -2.77765545601667179767706600890361535225507762168e-0003,	/* GP[1] */
157 +7.77830853479775281781085278324621033523037489883e-0004,	/* GP[2] */
158 +4.18938533204672741744150788368695779923320328369e-0001,	/* hln2pi   */
159 +2.16608493924982901946e-02,					/* ln2_32 */
160 +4.61662413084468283841e+01,					/* invln2_32 */
161 +5.00004103388988968841156421415669985414073453720e-0001,	/* Et1 */
162 +1.66667656752800761782778277828110208108687545908e-0001,	/* Et2 */
163 };
164 
165 #define	one		c[0]
166 #define	two		c[1]
167 #define	half		c[2]
168 #define	tiny		c[3]
169 #define	A1		c[4]
170 #define	GP0		c[5]
171 #define	GP1		c[6]
172 #define	GP2		c[7]
173 #define	hln2pi		c[8]
174 #define	ln2_32		c[9]
175 #define	invln2_32	c[10]
176 #define	Et1		c[11]
177 #define	Et2		c[12]
178 
179 /* S[j] = 2**(j/32.) for the final computation of exp(w) */
180 static const double S[] = {
181 +1.00000000000000000000e+00,	/* 3FF0000000000000 */
182 +1.02189714865411662714e+00,	/* 3FF059B0D3158574 */
183 +1.04427378242741375480e+00,	/* 3FF0B5586CF9890F */
184 +1.06714040067682369717e+00,	/* 3FF11301D0125B51 */
185 +1.09050773266525768967e+00,	/* 3FF172B83C7D517B */
186 +1.11438674259589243221e+00,	/* 3FF1D4873168B9AA */
187 +1.13878863475669156458e+00,	/* 3FF2387A6E756238 */
188 +1.16372485877757747552e+00,	/* 3FF29E9DF51FDEE1 */
189 +1.18920711500272102690e+00,	/* 3FF306FE0A31B715 */
190 +1.21524735998046895524e+00,	/* 3FF371A7373AA9CB */
191 +1.24185781207348400201e+00,	/* 3FF3DEA64C123422 */
192 +1.26905095719173321989e+00,	/* 3FF44E086061892D */
193 +1.29683955465100964055e+00,	/* 3FF4BFDAD5362A27 */
194 +1.32523664315974132322e+00,	/* 3FF5342B569D4F82 */
195 +1.35425554693689265129e+00,	/* 3FF5AB07DD485429 */
196 +1.38390988196383202258e+00,	/* 3FF6247EB03A5585 */
197 +1.41421356237309514547e+00,	/* 3FF6A09E667F3BCD */
198 +1.44518080697704665027e+00,	/* 3FF71F75E8EC5F74 */
199 +1.47682614593949934623e+00,	/* 3FF7A11473EB0187 */
200 +1.50916442759342284141e+00,	/* 3FF82589994CCE13 */
201 +1.54221082540794074411e+00,	/* 3FF8ACE5422AA0DB */
202 +1.57598084510788649659e+00,	/* 3FF93737B0CDC5E5 */
203 +1.61049033194925428347e+00,	/* 3FF9C49182A3F090 */
204 +1.64575547815396494578e+00,	/* 3FFA5503B23E255D */
205 +1.68179283050742900407e+00,	/* 3FFAE89F995AD3AD */
206 +1.71861929812247793414e+00,	/* 3FFB7F76F2FB5E47 */
207 +1.75625216037329945351e+00,	/* 3FFC199BDD85529C */
208 +1.79470907500310716820e+00,	/* 3FFCB720DCEF9069 */
209 +1.83400808640934243066e+00,	/* 3FFD5818DCFBA487 */
210 +1.87416763411029996256e+00,	/* 3FFDFC97337B9B5F */
211 +1.91520656139714740007e+00,	/* 3FFEA4AFA2A490DA */
212 +1.95714412417540017941e+00,	/* 3FFF50765B6E4540 */
213 };
214 /* INDENT ON */
215 
216 /* INDENT OFF */
217 /*
218  * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula
219  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
220  */
221 /*
222  * compute ss = log(x)-1
223  *
224  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
225  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
226  *       T1(n-3) = n*log(2)-1,  n=3,4,5
227  *       T2(j) = log(z[j]),
228  *       T3(s) = 2s + A1*s^3
229  *  Note
230  *  (1) Remez error for T3(s) is bounded by 2**(-35.8)
231  *	(see mpremez/work/Log/tgamma_log_2_outr1)
232  */
233 
234 static const double T1[] = { /* T1[j]=(j+3)*log(2)-1 */
235 +1.079441541679835928251696364375e+00,
236 +1.772588722239781237668928485833e+00,
237 +2.465735902799726547086160607291e+00,
238 };
239 
240 static const double T2[] = {   /* T2[j]=log(1+j/64+1/128) */
241 +7.782140442054948947462900061137e-03,
242 +2.316705928153437822879916096229e-02,
243 +3.831886430213659919375532512380e-02,
244 +5.324451451881228286587019378653e-02,
245 +6.795066190850774939456527777263e-02,
246 +8.244366921107459126816006866831e-02,
247 +9.672962645855111229557105648746e-02,
248 +1.108143663402901141948061693232e-01,
249 +1.247034785009572358634065153809e-01,
250 +1.384023228591191356853258736016e-01,
251 +1.519160420258419750718034248969e-01,
252 +1.652495728953071628756114492772e-01,
253 +1.784076574728182971194002415109e-01,
254 +1.913948529996294546092988075613e-01,
255 +2.042155414286908915038203861962e-01,
256 +2.168739383006143596190895257443e-01,
257 +2.293741010648458299914807250461e-01,
258 +2.417199368871451681443075159135e-01,
259 +2.539152099809634441373232979066e-01,
260 +2.659635484971379413391259265375e-01,
261 +2.778684510034563061863500329234e-01,
262 +2.896332925830426768788930555257e-01,
263 +3.012613305781617810128755382338e-01,
264 +3.127557100038968883862465596883e-01,
265 +3.241194686542119760906707604350e-01,
266 +3.353555419211378302571795798142e-01,
267 +3.464667673462085809184621884258e-01,
268 +3.574558889218037742260094901409e-01,
269 +3.683255611587076530482301540504e-01,
270 +3.790783529349694583908533456310e-01,
271 +3.897167511400252133704636040035e-01,
272 +4.002431641270127069293251019951e-01,
273 +4.106599249852683859343062031758e-01,
274 +4.209692946441296361288671615068e-01,
275 +4.311734648183713408591724789556e-01,
276 +4.412745608048752294894964416613e-01,
277 +4.512746441394585851446923830790e-01,
278 +4.611757151221701663679999255979e-01,
279 +4.709797152187910125468978560564e-01,
280 +4.806885293457519076766184554480e-01,
281 +4.903039880451938381503461596457e-01,
282 +4.998278695564493298213314152470e-01,
283 +5.092619017898079468040749192283e-01,
284 +5.186077642080456321529769963648e-01,
285 +5.278670896208423851138922177783e-01,
286 +5.370414658968836545667292441538e-01,
287 +5.461324375981356503823972092312e-01,
288 +5.551415075405015927154803595159e-01,
289 +5.640701382848029660713842900902e-01,
290 +5.729197535617855090927567266263e-01,
291 +5.816917396346224825206107537254e-01,
292 +5.903874466021763746419167081236e-01,
293 +5.990081896460833993816000244617e-01,
294 +6.075552502245417955010851527911e-01,
295 +6.160298772155140196475659281967e-01,
296 +6.244332880118935010425387440547e-01,
297 +6.327666695710378295457864685036e-01,
298 +6.410311794209312910556013344054e-01,
299 +6.492279466251098188908399699053e-01,
300 +6.573580727083600301418900232459e-01,
301 +6.654226325450904489500926100067e-01,
302 +6.734226752121667202979603888010e-01,
303 +6.813592248079030689480715595681e-01,
304 +6.892332812388089803249143378146e-01,
305 };
306 /* INDENT ON */
307 
308 static double
309 large_gam(double x) {
310 	double ss, zz, z, t1, t2, w, y, u;
311 	unsigned lx;
312 	int k, ix, j, m;
313 
314 	ix = __HI(x);
315 	lx = __LO(x);
316 	m = (ix >> 20) - 0x3ff;			/* exponent of x, range:3-5 */
317 	ix = (ix & 0x000fffff) | 0x3ff00000;	/* y = scale x to [1,2] */
318 	__HI(y) = ix;
319 	__LO(y) = lx;
320 	__HI(z) = (ix & 0xffffc000) | 0x2000;	/* z[j]=1+j/64+1/128 */
321 	__LO(z) = 0;
322 	j = (ix >> 14) & 0x3f;
323 	t1 = y + z;
324 	t2 = y - z;
325 	u = t2 / t1;
326 	ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u));
327 							/* ss = log(x)-1 */
328 	/*
329 	 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
330 	 * where ss = log(x) - 1
331 	 */
332 	z = one / x;
333 	zz = z * z;
334 	w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2);
335 	k = (int) (w * invln2_32 + half);
336 
337 	/* compute the exponential of w */
338 	j = k & 0x1f;
339 	m = k >> 5;
340 	z = w - (double) k *ln2_32;
341 	zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2));
342 	__HI(zz) += m << 20;
343 	return (zz);
344 }
345 /* INDENT OFF */
346 /*
347  * kpsin(x)= sin(pi*x)/pi
348  *                 3        5        7        9
349  *	= x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x
350  */
351 static const double ks[] = {
352 -1.64493404985645811354476665052005342839447790544e+0000,
353 +8.11740794458351064092797249069438269367389272270e-0001,
354 -1.90703144603551216933075809162889536878854055202e-0001,
355 +2.55742333994264563281155312271481108635575331201e-0002,
356 };
357 /* INDENT ON */
358 
359 static double
360 kpsin(double x) {
361 	double z;
362 
363 	z = x * x;
364 	return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z *
365 		ks[3])));
366 }
367 
368 /* INDENT OFF */
369 /*
370  * kpcos(x)= cos(pi*x)/pi
371  *                     2        4        6
372  *	= kc[0]+kc[1]*x +kc[2]*x +kc[3]*x
373  */
374 static const double kc[] = {
375 +3.18309886183790671537767526745028724068919291480e-0001,
376 -1.57079581447762568199467875065854538626594937791e+0000,
377 +1.29183528092558692844073004029568674027807393862e+0000,
378 -4.20232949771307685981015914425195471602739075537e-0001,
379 };
380 /* INDENT ON */
381 
382 static double
383 kpcos(double x) {
384 	double z;
385 
386 	z = x * x;
387 	return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3]));
388 }
389 
390 /* INDENT OFF */
391 static const double
392 t0z1 = 0.134861805732790769689793935774652917006,
393 t0z2 = 0.461632144968362341262659542325721328468,
394 t0z3 = 0.819773101100500601787868704921606996312;
395 	/* 1.134861805732790769689793935774652917006 */
396 /* INDENT ON */
397 
398 /*
399  * gamma(x+i) for 0 <= x < 1
400  */
401 static double
402 gam_n(int i, double x) {
403 	double rr = 0.0L, yy;
404 	double z1, z2;
405 
406 	/* compute yy = gamma(x+1) */
407 	if (x > 0.2845) {
408 		if (x > 0.6374)
409 			yy = GT3(x - t0z3);
410 		else
411 			yy = GT2(x - t0z2);
412 	} else
413 		yy = GT1(x - t0z1);
414 
415 	/* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
416 	switch (i) {
417 	case 0:		/* yy/x */
418 		rr = yy / x;
419 		break;
420 	case 1:		/* yy */
421 		rr = yy;
422 		break;
423 	case 2:		/* (x+1)*yy */
424 		rr = (x + one) * yy;
425 		break;
426 	case 3:		/* (x+2)*(x+1)*yy */
427 		rr = (x + one) * (x + two) * yy;
428 		break;
429 
430 	case 4:		/* (x+1)*(x+3)*(x+2)*yy */
431 		rr = (x + one) * (x + two) * ((x + 3.0) * yy);
432 		break;
433 	case 5:		/* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
434 		z1 = (x + two) * (x + 3.0) * yy;
435 		z2 = (x + one) * (x + 4.0);
436 		rr = z1 * z2;
437 		break;
438 	case 6:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
439 		z1 = (x + two) * (x + 3.0);
440 		z2 = (x + 5.0) * yy;
441 		rr = z1 * (z1 - two) * z2;
442 		break;
443 	case 7:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
444 		z1 = (x + two) * (x + 3.0);
445 		z2 = (x + 5.0) * (x + 6.0) * yy;
446 		rr = z1 * (z1 - two) * z2;
447 		break;
448 	}
449 	return (rr);
450 }
451 
452 float
453 tgammaf(float xf) {
454 	float zf;
455 	double ss, ww;
456 	double x, y, z;
457 	int i, j, k, ix, hx, xk;
458 
459 	hx = *(int *) &xf;
460 	ix = hx & 0x7fffffff;
461 
462 	x = (double) xf;
463 	if (ix < 0x33800000)
464 		return (1.0F / xf);	/* |x| < 2**-24 */
465 
466 	if (ix >= 0x7f800000)
467 		return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */
468 
469 	if (hx > 0x420C290F) 	/* x > 35.040096283... overflow */
470 		return (float)(x / tiny);
471 
472 	if (hx >= 0x41000000)	/* x >= 8 */
473 		return ((float) large_gam(x));
474 
475 	if (hx > 0) {		/* 0 < x < 8 */
476 		i = (int) xf;
477 		return ((float) gam_n(i, x - (double) i));
478 	}
479 
480 	/* negative x */
481 	/* INDENT OFF */
482 	/*
483 	 * compute xk =
484 	 *	-2 ... x is an even int (-inf is considered even)
485 	 *	-1 ... x is an odd int
486 	 *	+0 ... x is not an int but chopped to an even int
487 	 *	+1 ... x is not an int but chopped to an odd int
488 	 */
489 	/* INDENT ON */
490 	xk = 0;
491 	if (ix >= 0x4b000000) {
492 		if (ix > 0x4b000000)
493 			xk = -2;
494 		else
495 			xk = -2 + (ix & 1);
496 	} else if (ix >= 0x3f800000) {
497 		k = (ix >> 23) - 0x7f;
498 		j = ix >> (23 - k);
499 		if ((j << (23 - k)) == ix)
500 			xk = -2 + (j & 1);
501 		else
502 			xk = j & 1;
503 	}
504 	if (xk < 0) {
505 		/* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */
506 		zf = xf - xf;
507 		return (zf / zf);
508 	}
509 
510 	/* negative underflow thresold */
511 	if (ix > 0x4224000B) {	/* x < -(41+11ulp) */
512 		if (xk == 0)
513 			z = -tiny;
514 		else
515 			z = tiny;
516 		return ((float)z);
517 	}
518 
519 	/* INDENT OFF */
520 	/* now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
521 	/*
522 	 * First compute ss = -sin(pi*y)/pi , so that
523 	 * gamma(x) = 1/(ss*gamma(1+y))
524 	 */
525 	/* INDENT ON */
526 	y = -x;
527 	j = (int) y;
528 	z = y - (double) j;
529 	if (z > 0.3183098861837906715377675)
530 		if (z > 0.6816901138162093284622325)
531 			ss = kpsin(one - z);
532 		else
533 			ss = kpcos(0.5 - z);
534 	else
535 		ss = kpsin(z);
536 	if (xk == 0)
537 		ss = -ss;
538 
539 	/* Then compute ww = gamma(1+y)  */
540 	if (j < 7)
541 		ww = gam_n(j + 1, z);
542 	else
543 		ww = large_gam(y + one);
544 
545 	/* return 1/(ss*ww) */
546 	return ((float) (one / (ww * ss)));
547 }
548