1 /* 2 * Implementation of the true gamma function (as opposed to lgamma) 3 * for 128-bit long double. 4 * 5 * Copyright (c) 2006-2024, Arm Limited. 6 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 */ 8 9 /* 10 * This module implements the float128 gamma function under the name 11 * tgamma128. It's expected to be suitable for integration into system 12 * maths libraries under the standard name tgammal, if long double is 13 * 128-bit. Such a library will probably want to check the error 14 * handling and optimize the initial process of extracting the 15 * exponent, which is done here by simple and portable (but 16 * potentially slower) methods. 17 */ 18 19 #include <float.h> 20 #include <math.h> 21 #include <stdbool.h> 22 #include <stddef.h> 23 24 /* Only binary128 format is supported. */ 25 #if LDBL_MANT_DIG == 113 26 27 #include "tgamma128.h" 28 29 #define lenof(x) (sizeof(x)/sizeof(*(x))) 30 31 /* 32 * Helper routine to evaluate a polynomial via Horner's rule 33 */ 34 static long double poly(const long double *coeffs, size_t n, long double x) 35 { 36 long double result = coeffs[--n]; 37 38 while (n > 0) 39 result = (result * x) + coeffs[--n]; 40 41 return result; 42 } 43 44 /* 45 * Compute sin(pi*x) / pi, for use in the reflection formula that 46 * relates gamma(-x) and gamma(x). 47 */ 48 static long double sin_pi_x_over_pi(long double x) 49 { 50 int quo; 51 long double fracpart = remquol(x, 0.5L, &quo); 52 53 long double sign = 1.0L; 54 if (quo & 2) 55 sign = -sign; 56 quo &= 1; 57 58 if (quo == 0 && fabsl(fracpart) < 0x1.p-58L) { 59 /* For numbers this size, sin(pi*x) is so close to pi*x that 60 * sin(pi*x)/pi is indistinguishable from x in float128 */ 61 return sign * fracpart; 62 } 63 64 if (quo == 0) { 65 return sign * sinl(pi*fracpart) / pi; 66 } else { 67 return sign * cosl(pi*fracpart) / pi; 68 } 69 } 70 71 /* Return tgamma(x) on the assumption that x >= 8. */ 72 static long double tgamma_large(long double x, 73 bool negative, long double negadjust) 74 { 75 /* 76 * In this range we compute gamma(x) as x^(x-1/2) * e^-x * K, 77 * where K is a correction factor computed as a polynomial in 1/x. 78 * 79 * (Vaguely inspired by the form of the Lanczos approximation, but 80 * I tried the Lanczos approximation itself and it suffers badly 81 * from big cancellation leading to loss of significance.) 82 */ 83 long double t = 1/x; 84 long double p = poly(coeffs_large, lenof(coeffs_large), t); 85 86 /* 87 * To avoid overflow in cases where x^(x-0.5) does overflow 88 * but gamma(x) does not, we split x^(x-0.5) in half and 89 * multiply back up _after_ multiplying the shrinking factor 90 * of exp(-(x-0.5)). 91 * 92 * Note that computing x-0.5 and (x-0.5)/2 is exact for the 93 * relevant range of x, so the only sources of error are pow 94 * and exp themselves, plus the multiplications. 95 */ 96 long double powhalf = powl(x, (x-0.5L)/2.0L); 97 long double expret = expl(-(x-0.5L)); 98 99 if (!negative) { 100 return (expret * powhalf) * powhalf * p; 101 } else { 102 /* 103 * Apply the reflection formula as commented below, but 104 * carefully: negadjust has magnitude less than 1, so it can 105 * turn a case where gamma(+x) would overflow into a case 106 * where gamma(-x) doesn't underflow. Not only that, but the 107 * FP format has greater range in the tiny domain due to 108 * denormals. For both reasons, it's not good enough to 109 * compute the positive result and then adjust it. 110 */ 111 long double ret = 1 / ((expret * powhalf) * (x * negadjust) * p); 112 return ret / powhalf; 113 } 114 } 115 116 /* Return tgamma(x) on the assumption that 0 <= x < 1/32. */ 117 static long double tgamma_tiny(long double x, 118 bool negative, long double negadjust) 119 { 120 /* 121 * For x near zero, we use a polynomial approximation to 122 * g = 1/(x*gamma(x)), and then return 1/(g*x). 123 */ 124 long double g = poly(coeffs_tiny, lenof(coeffs_tiny), x); 125 if (!negative) 126 return 1.0L / (g*x); 127 else 128 return g / negadjust; 129 } 130 131 /* Return tgamma(x) on the assumption that 0 <= x < 2^-113. */ 132 static long double tgamma_ultratiny(long double x, bool negative, 133 long double negadjust) 134 { 135 /* On this interval, gamma can't even be distinguished from 1/x, 136 * so we skip the polynomial evaluation in tgamma_tiny, partly to 137 * save time and partly to avoid the tiny intermediate values 138 * setting the underflow exception flag. */ 139 if (!negative) 140 return 1.0L / x; 141 else 142 return 1.0L / negadjust; 143 } 144 145 /* Return tgamma(x) on the assumption that 1 <= x <= 2. */ 146 static long double tgamma_central(long double x) 147 { 148 /* 149 * In this central interval, our strategy is to finding the 150 * difference between x and the point where gamma has a minimum, 151 * and approximate based on that. 152 */ 153 154 /* The difference between the input x and the minimum x. The first 155 * subtraction is expected to be exact, since x and min_hi have 156 * the same exponent (unless x=2, in which case it will still be 157 * exact). */ 158 long double t = (x - min_x_hi) - min_x_lo; 159 160 /* 161 * Now use two different polynomials for the intervals [1,m] and 162 * [m,2]. 163 */ 164 long double p; 165 if (t < 0) 166 p = poly(coeffs_central_neg, lenof(coeffs_central_neg), -t); 167 else 168 p = poly(coeffs_central_pos, lenof(coeffs_central_pos), t); 169 170 return (min_y_lo + p * (t*t)) + min_y_hi; 171 } 172 173 long double tgamma128(long double x) 174 { 175 /* 176 * Start by extracting the number's sign and exponent, and ruling 177 * out cases of non-normalized numbers. 178 * 179 * For an implementation integrated into a system libm, it would 180 * almost certainly be quicker to do this by direct bitwise access 181 * to the input float128 value, using whatever is the local idiom 182 * for knowing its endianness. 183 * 184 * Integration into a system libc may also need to worry about 185 * setting errno, if that's the locally preferred way to report 186 * math.h errors. 187 */ 188 int sign = signbit(x); 189 int exponent; 190 switch (fpclassify(x)) { 191 case FP_NAN: 192 return x+x; /* propagate QNaN, make SNaN throw an exception */ 193 case FP_ZERO: 194 return 1/x; /* divide by zero on purpose to indicate a pole */ 195 case FP_INFINITE: 196 if (sign) { 197 return x-x; /* gamma(-inf) has indeterminate sign, so provoke an 198 * IEEE invalid operation exception to indicate that */ 199 } 200 return x; /* but gamma(+inf) is just +inf with no error */ 201 case FP_SUBNORMAL: 202 exponent = -16384; 203 break; 204 default: 205 frexpl(x, &exponent); 206 exponent--; 207 break; 208 } 209 210 bool negative = false; 211 long double negadjust = 0.0L; 212 213 if (sign) { 214 /* 215 * Euler's reflection formula is 216 * 217 * gamma(1-x) gamma(x) = pi/sin(pi*x) 218 * 219 * pi 220 * => gamma(x) = -------------------- 221 * gamma(1-x) sin(pi*x) 222 * 223 * But computing 1-x is going to lose a lot of accuracy when x 224 * is very small, so instead we transform using the recurrence 225 * gamma(t+1)=t gamma(t). Setting t=-x, this gives us 226 * gamma(1-x) = -x gamma(-x), so we now have 227 * 228 * pi 229 * gamma(x) = ---------------------- 230 * -x gamma(-x) sin(pi*x) 231 * 232 * which relates gamma(x) to gamma(-x), which is much nicer, 233 * since x can be turned into -x without rounding. 234 */ 235 negadjust = sin_pi_x_over_pi(x); 236 negative = true; 237 x = -x; 238 239 /* 240 * Now the ultimate answer we want is 241 * 242 * 1 / (gamma(x) * x * negadjust) 243 * 244 * where x is the positive value we've just turned it into. 245 * 246 * For some of the cases below, we'll compute gamma(x) 247 * normally and then compute this adjusted value afterwards. 248 * But for others, we can implement the reciprocal operation 249 * in this formula by _avoiding_ an inversion that the 250 * sub-case was going to do anyway. 251 */ 252 253 if (negadjust == 0) { 254 /* 255 * Special case for negative integers. Applying the 256 * reflection formula would cause division by zero, but 257 * standards would prefer we treat this error case as an 258 * invalid operation and return NaN instead. (Possibly 259 * because otherwise you'd have to decide which sign of 260 * infinity to return, and unlike the x=0 case, there's no 261 * sign of zero available to disambiguate.) 262 */ 263 return negadjust / negadjust; 264 } 265 } 266 267 /* 268 * Split the positive domain into various cases. For cases where 269 * we do the negative-number adjustment the usual way, we'll leave 270 * the answer in 'g' and drop out of the if statement. 271 */ 272 long double g; 273 274 if (exponent >= 11) { 275 /* 276 * gamma of any positive value this large overflows, and gamma 277 * of any negative value underflows. 278 */ 279 if (!negative) { 280 long double huge = 0x1p+12288L; 281 return huge * huge; /* provoke an overflow */ 282 } else { 283 long double tiny = 0x1p-12288L; 284 return tiny * tiny * negadjust; /* underflow, of the right sign */ 285 } 286 } else if (exponent >= 3) { 287 /* Negative-number adjustment happens inside here */ 288 return tgamma_large(x, negative, negadjust); 289 } else if (exponent < -113) { 290 /* Negative-number adjustment happens inside here */ 291 return tgamma_ultratiny(x, negative, negadjust); 292 } else if (exponent < -5) { 293 /* Negative-number adjustment happens inside here */ 294 return tgamma_tiny(x, negative, negadjust); 295 } else if (exponent == 0) { 296 g = tgamma_central(x); 297 } else if (exponent < 0) { 298 /* 299 * For x in [1/32,1) we range-reduce upwards to the interval 300 * [1,2), using the inverse of the normal recurrence formula: 301 * gamma(x) = gamma(x+1)/x. 302 */ 303 g = tgamma_central(1+x) / x; 304 } else { 305 /* 306 * For x in [2,8) we range-reduce downwards to the interval 307 * [1,2) by repeated application of the recurrence formula. 308 * 309 * Actually multiplying (x-1) by (x-2) by (x-3) and so on 310 * would introduce multiple ULPs of rounding error. We can get 311 * better accuracy by writing x = (k+1/2) + t, where k is an 312 * integer and |t|<1/2, and expanding out the obvious factor 313 * (x-1)(x-2)...(x-k+1) as a polynomial in t. 314 */ 315 long double mult; 316 int i = x; 317 if (i == 2) { /* x in [2,3) */ 318 mult = (x-1); 319 } else { 320 long double t = x - (i + 0.5L); 321 switch (i) { 322 /* E.g. for x=3.5+t, we want 323 * (x-1)(x-2) = (2.5+t)(1.5+t) = 3.75 + 4t + t^2 */ 324 case 3: 325 mult = 3.75L+t*(4.0L+t); 326 break; 327 case 4: 328 mult = 13.125L+t*(17.75L+t*(7.5L+t)); 329 break; 330 case 5: 331 mult = 59.0625L+t*(93.0L+t*(51.50L+t*(12.0L+t))); 332 break; 333 case 6: 334 mult = 324.84375L+t*(570.5625L+t*(376.250L+t*( 335 117.5L+t*(17.5L+t)))); 336 break; 337 case 7: 338 mult = 2111.484375L+t*(4033.5L+t*(3016.1875L+t*( 339 1140.0L+t*(231.25L+t*(24.0L+t))))); 340 break; 341 default: 342 __builtin_unreachable(); 343 } 344 } 345 346 g = tgamma_central(x - (i-1)) * mult; 347 } 348 349 if (!negative) { 350 /* Positive domain: return g unmodified */ 351 return g; 352 } else { 353 /* Negative domain: apply the reflection formula as commented above */ 354 return 1.0L / (g * x * negadjust); 355 } 356 } 357 358 #endif 359