xref: /freebsd/lib/msun/ld80/e_powl.c (revision 3468ddce672350a6d974b4f0fdf3f4a56eaab0a0)
1 /*-
2  * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*							powl.c
18  *
19  *	Power function, long double precision
20  *
21  *
22  *
23  * SYNOPSIS:
24  *
25  * long double x, y, z, powl();
26  *
27  * z = powl( x, y );
28  *
29  *
30  *
31  * DESCRIPTION:
32  *
33  * Computes x raised to the yth power.  Analytically,
34  *
35  *      x**y  =  exp( y log(x) ).
36  *
37  * Following Cody and Waite, this program uses a lookup table
38  * of 2**-i/32 and pseudo extended precision arithmetic to
39  * obtain several extra bits of accuracy in both the logarithm
40  * and the exponential.
41  *
42  *
43  *
44  * ACCURACY:
45  *
46  * The relative error of pow(x,y) can be estimated
47  * by   y dl ln(2),   where dl is the absolute error of
48  * the internally computed base 2 logarithm.  At the ends
49  * of the approximation interval the logarithm equal 1/32
50  * and its relative error is about 1 lsb = 1.1e-19.  Hence
51  * the predicted relative error in the result is 2.3e-21 y .
52  *
53  *                      Relative error:
54  * arithmetic   domain     # trials      peak         rms
55  *
56  *    IEEE     +-1000       40000      2.8e-18      3.7e-19
57  * .001 < x < 1000, with log(x) uniformly distributed.
58  * -1000 < y < 1000, y uniformly distributed.
59  *
60  *    IEEE     0,8700       60000      6.5e-18      1.0e-18
61  * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
62  *
63  *
64  * ERROR MESSAGES:
65  *
66  *   message         condition      value returned
67  * pow overflow     x**y > MAXNUM      INFINITY
68  * pow underflow   x**y < 1/MAXNUM       0.0
69  * pow domain      x<0 and y noninteger  0.0
70  *
71  */
72 
73 #include <sys/cdefs.h>
74 __FBSDID("$FreeBSD$");
75 
76 #include <float.h>
77 #include <math.h>
78 
79 #include "math_private.h"
80 
81 /* Table size */
82 #define NXT 32
83 /* log2(Table size) */
84 #define LNXT 5
85 
86 /* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
87  * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
88  */
89 static long double P[] = {
90  8.3319510773868690346226E-4L,
91  4.9000050881978028599627E-1L,
92  1.7500123722550302671919E0L,
93  1.4000100839971580279335E0L,
94 };
95 static long double Q[] = {
96 /* 1.0000000000000000000000E0L,*/
97  5.2500282295834889175431E0L,
98  8.4000598057587009834666E0L,
99  4.2000302519914740834728E0L,
100 };
101 /* A[i] = 2^(-i/32), rounded to IEEE long double precision.
102  * If i is even, A[i] + B[i/2] gives additional accuracy.
103  */
104 static long double A[33] = {
105  1.0000000000000000000000E0L,
106  9.7857206208770013448287E-1L,
107  9.5760328069857364691013E-1L,
108  9.3708381705514995065011E-1L,
109  9.1700404320467123175367E-1L,
110  8.9735453750155359320742E-1L,
111  8.7812608018664974155474E-1L,
112  8.5930964906123895780165E-1L,
113  8.4089641525371454301892E-1L,
114  8.2287773907698242225554E-1L,
115  8.0524516597462715409607E-1L,
116  7.8799042255394324325455E-1L,
117  7.7110541270397041179298E-1L,
118  7.5458221379671136985669E-1L,
119  7.3841307296974965571198E-1L,
120  7.2259040348852331001267E-1L,
121  7.0710678118654752438189E-1L,
122  6.9195494098191597746178E-1L,
123  6.7712777346844636413344E-1L,
124  6.6261832157987064729696E-1L,
125  6.4841977732550483296079E-1L,
126  6.3452547859586661129850E-1L,
127  6.2092890603674202431705E-1L,
128  6.0762367999023443907803E-1L,
129  5.9460355750136053334378E-1L,
130  5.8186242938878875689693E-1L,
131  5.6939431737834582684856E-1L,
132  5.5719337129794626814472E-1L,
133  5.4525386633262882960438E-1L,
134  5.3357020033841180906486E-1L,
135  5.2213689121370692017331E-1L,
136  5.1094857432705833910408E-1L,
137  5.0000000000000000000000E-1L,
138 };
139 static long double B[17] = {
140  0.0000000000000000000000E0L,
141  2.6176170809902549338711E-20L,
142 -1.0126791927256478897086E-20L,
143  1.3438228172316276937655E-21L,
144  1.2207982955417546912101E-20L,
145 -6.3084814358060867200133E-21L,
146  1.3164426894366316434230E-20L,
147 -1.8527916071632873716786E-20L,
148  1.8950325588932570796551E-20L,
149  1.5564775779538780478155E-20L,
150  6.0859793637556860974380E-21L,
151 -2.0208749253662532228949E-20L,
152  1.4966292219224761844552E-20L,
153  3.3540909728056476875639E-21L,
154 -8.6987564101742849540743E-22L,
155 -1.2327176863327626135542E-20L,
156  0.0000000000000000000000E0L,
157 };
158 
159 /* 2^x = 1 + x P(x),
160  * on the interval -1/32 <= x <= 0
161  */
162 static long double R[] = {
163  1.5089970579127659901157E-5L,
164  1.5402715328927013076125E-4L,
165  1.3333556028915671091390E-3L,
166  9.6181291046036762031786E-3L,
167  5.5504108664798463044015E-2L,
168  2.4022650695910062854352E-1L,
169  6.9314718055994530931447E-1L,
170 };
171 
172 #define douba(k) A[k]
173 #define doubb(k) B[k]
174 #define MEXP (NXT*16384.0L)
175 /* The following if denormal numbers are supported, else -MEXP: */
176 #define MNEXP (-NXT*(16384.0L+64.0L))
177 /* log2(e) - 1 */
178 #define LOG2EA 0.44269504088896340735992L
179 
180 #define F W
181 #define Fa Wa
182 #define Fb Wb
183 #define G W
184 #define Ga Wa
185 #define Gb u
186 #define H W
187 #define Ha Wb
188 #define Hb Wb
189 
190 static const long double MAXLOGL = 1.1356523406294143949492E4L;
191 static const long double MINLOGL = -1.13994985314888605586758E4L;
192 static const long double LOGE2L = 6.9314718055994530941723E-1L;
193 static volatile long double z;
194 static long double w, W, Wa, Wb, ya, yb, u;
195 static const long double huge = 0x1p10000L;
196 #if 0 /* XXX Prevent gcc from erroneously constant folding this. */
197 static const long double twom10000 = 0x1p-10000L;
198 #else
199 static volatile long double twom10000 = 0x1p-10000L;
200 #endif
201 
202 static long double reducl( long double );
203 static long double powil ( long double, int );
204 
205 long double
206 powl(long double x, long double y)
207 {
208 /* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
209 int i, nflg, iyflg, yoddint;
210 long e;
211 
212 if( y == 0.0L )
213 	return( 1.0L );
214 
215 if( x == 1.0L )
216 	return( 1.0L );
217 
218 if( isnan(x) )
219 	return ( nan_mix(x, y) );
220 if( isnan(y) )
221 	return ( nan_mix(x, y) );
222 
223 if( y == 1.0L )
224 	return( x );
225 
226 if( !isfinite(y) && x == -1.0L )
227 	return( 1.0L );
228 
229 if( y >= LDBL_MAX )
230 	{
231 	if( x > 1.0L )
232 		return( INFINITY );
233 	if( x > 0.0L && x < 1.0L )
234 		return( 0.0L );
235 	if( x < -1.0L )
236 		return( INFINITY );
237 	if( x > -1.0L && x < 0.0L )
238 		return( 0.0L );
239 	}
240 if( y <= -LDBL_MAX )
241 	{
242 	if( x > 1.0L )
243 		return( 0.0L );
244 	if( x > 0.0L && x < 1.0L )
245 		return( INFINITY );
246 	if( x < -1.0L )
247 		return( 0.0L );
248 	if( x > -1.0L && x < 0.0L )
249 		return( INFINITY );
250 	}
251 if( x >= LDBL_MAX )
252 	{
253 	if( y > 0.0L )
254 		return( INFINITY );
255 	return( 0.0L );
256 	}
257 
258 w = floorl(y);
259 /* Set iyflg to 1 if y is an integer.  */
260 iyflg = 0;
261 if( w == y )
262 	iyflg = 1;
263 
264 /* Test for odd integer y.  */
265 yoddint = 0;
266 if( iyflg )
267 	{
268 	ya = fabsl(y);
269 	ya = floorl(0.5L * ya);
270 	yb = 0.5L * fabsl(w);
271 	if( ya != yb )
272 		yoddint = 1;
273 	}
274 
275 if( x <= -LDBL_MAX )
276 	{
277 	if( y > 0.0L )
278 		{
279 		if( yoddint )
280 			return( -INFINITY );
281 		return( INFINITY );
282 		}
283 	if( y < 0.0L )
284 		{
285 		if( yoddint )
286 			return( -0.0L );
287 		return( 0.0 );
288 		}
289 	}
290 
291 
292 nflg = 0;	/* flag = 1 if x<0 raised to integer power */
293 if( x <= 0.0L )
294 	{
295 	if( x == 0.0L )
296 		{
297 		if( y < 0.0 )
298 			{
299 			if( signbit(x) && yoddint )
300 				return( -INFINITY );
301 			return( INFINITY );
302 			}
303 		if( y > 0.0 )
304 			{
305 			if( signbit(x) && yoddint )
306 				return( -0.0L );
307 			return( 0.0 );
308 			}
309 		if( y == 0.0L )
310 			return( 1.0L );  /*   0**0   */
311 		else
312 			return( 0.0L );  /*   0**y   */
313 		}
314 	else
315 		{
316 		if( iyflg == 0 )
317 			return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
318 		nflg = 1;
319 		}
320 	}
321 
322 /* Integer power of an integer.  */
323 
324 if( iyflg )
325 	{
326 	i = w;
327 	w = floorl(x);
328 	if( (w == x) && (fabsl(y) < 32768.0) )
329 		{
330 		w = powil( x, (int) y );
331 		return( w );
332 		}
333 	}
334 
335 
336 if( nflg )
337 	x = fabsl(x);
338 
339 /* separate significand from exponent */
340 x = frexpl( x, &i );
341 e = i;
342 
343 /* find significand in antilog table A[] */
344 i = 1;
345 if( x <= douba(17) )
346 	i = 17;
347 if( x <= douba(i+8) )
348 	i += 8;
349 if( x <= douba(i+4) )
350 	i += 4;
351 if( x <= douba(i+2) )
352 	i += 2;
353 if( x >= douba(1) )
354 	i = -1;
355 i += 1;
356 
357 
358 /* Find (x - A[i])/A[i]
359  * in order to compute log(x/A[i]):
360  *
361  * log(x) = log( a x/a ) = log(a) + log(x/a)
362  *
363  * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
364  */
365 x -= douba(i);
366 x -= doubb(i/2);
367 x /= douba(i);
368 
369 
370 /* rational approximation for log(1+v):
371  *
372  * log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v)
373  */
374 z = x*x;
375 w = x * ( z * __polevll( x, P, 3 ) / __p1evll( x, Q, 3 ) );
376 w = w - ldexpl( z, -1 );   /*  w - 0.5 * z  */
377 
378 /* Convert to base 2 logarithm:
379  * multiply by log2(e) = 1 + LOG2EA
380  */
381 z = LOG2EA * w;
382 z += w;
383 z += LOG2EA * x;
384 z += x;
385 
386 /* Compute exponent term of the base 2 logarithm. */
387 w = -i;
388 w = ldexpl( w, -LNXT );	/* divide by NXT */
389 w += e;
390 /* Now base 2 log of x is w + z. */
391 
392 /* Multiply base 2 log by y, in extended precision. */
393 
394 /* separate y into large part ya
395  * and small part yb less than 1/NXT
396  */
397 ya = reducl(y);
398 yb = y - ya;
399 
400 /* (w+z)(ya+yb)
401  * = w*ya + w*yb + z*y
402  */
403 F = z * y  +  w * yb;
404 Fa = reducl(F);
405 Fb = F - Fa;
406 
407 G = Fa + w * ya;
408 Ga = reducl(G);
409 Gb = G - Ga;
410 
411 H = Fb + Gb;
412 Ha = reducl(H);
413 w = ldexpl( Ga+Ha, LNXT );
414 
415 /* Test the power of 2 for overflow */
416 if( w > MEXP )
417 	return (huge * huge);		/* overflow */
418 
419 if( w < MNEXP )
420 	return (twom10000 * twom10000);	/* underflow */
421 
422 e = w;
423 Hb = H - Ha;
424 
425 if( Hb > 0.0L )
426 	{
427 	e += 1;
428 	Hb -= (1.0L/NXT);  /*0.0625L;*/
429 	}
430 
431 /* Now the product y * log2(x)  =  Hb + e/NXT.
432  *
433  * Compute base 2 exponential of Hb,
434  * where -0.0625 <= Hb <= 0.
435  */
436 z = Hb * __polevll( Hb, R, 6 );  /*    z  =  2**Hb - 1    */
437 
438 /* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
439  * Find lookup table entry for the fractional power of 2.
440  */
441 if( e < 0 )
442 	i = 0;
443 else
444 	i = 1;
445 i = e/NXT + i;
446 e = NXT*i - e;
447 w = douba( e );
448 z = w * z;      /*    2**-e * ( 1 + (2**Hb-1) )    */
449 z = z + w;
450 z = ldexpl( z, i );  /* multiply by integer power of 2 */
451 
452 if( nflg )
453 	{
454 /* For negative x,
455  * find out if the integer exponent
456  * is odd or even.
457  */
458 	w = ldexpl( y, -1 );
459 	w = floorl(w);
460 	w = ldexpl( w, 1 );
461 	if( w != y )
462 		z = -z; /* odd exponent */
463 	}
464 
465 return( z );
466 }
467 
468 
469 /* Find a multiple of 1/NXT that is within 1/NXT of x. */
470 static long double
471 reducl(long double x)
472 {
473 long double t;
474 
475 t = ldexpl( x, LNXT );
476 t = floorl( t );
477 t = ldexpl( t, -LNXT );
478 return(t);
479 }
480 
481 /*							powil.c
482  *
483  *	Real raised to integer power, long double precision
484  *
485  *
486  *
487  * SYNOPSIS:
488  *
489  * long double x, y, powil();
490  * int n;
491  *
492  * y = powil( x, n );
493  *
494  *
495  *
496  * DESCRIPTION:
497  *
498  * Returns argument x raised to the nth power.
499  * The routine efficiently decomposes n as a sum of powers of
500  * two. The desired power is a product of two-to-the-kth
501  * powers of x.  Thus to compute the 32767 power of x requires
502  * 28 multiplications instead of 32767 multiplications.
503  *
504  *
505  *
506  * ACCURACY:
507  *
508  *
509  *                      Relative error:
510  * arithmetic   x domain   n domain  # trials      peak         rms
511  *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
512  *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
513  *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
514  *
515  * Returns MAXNUM on overflow, zero on underflow.
516  *
517  */
518 
519 static long double
520 powil(long double x, int nn)
521 {
522 long double ww, y;
523 long double s;
524 int n, e, sign, asign, lx;
525 
526 if( x == 0.0L )
527 	{
528 	if( nn == 0 )
529 		return( 1.0L );
530 	else if( nn < 0 )
531 		return( LDBL_MAX );
532 	else
533 		return( 0.0L );
534 	}
535 
536 if( nn == 0 )
537 	return( 1.0L );
538 
539 
540 if( x < 0.0L )
541 	{
542 	asign = -1;
543 	x = -x;
544 	}
545 else
546 	asign = 0;
547 
548 
549 if( nn < 0 )
550 	{
551 	sign = -1;
552 	n = -nn;
553 	}
554 else
555 	{
556 	sign = 1;
557 	n = nn;
558 	}
559 
560 /* Overflow detection */
561 
562 /* Calculate approximate logarithm of answer */
563 s = x;
564 s = frexpl( s, &lx );
565 e = (lx - 1)*n;
566 if( (e == 0) || (e > 64) || (e < -64) )
567 	{
568 	s = (s - 7.0710678118654752e-1L) / (s +  7.0710678118654752e-1L);
569 	s = (2.9142135623730950L * s - 0.5L + lx) * nn * LOGE2L;
570 	}
571 else
572 	{
573 	s = LOGE2L * e;
574 	}
575 
576 if( s > MAXLOGL )
577 	return (huge * huge);		/* overflow */
578 
579 if( s < MINLOGL )
580 	return (twom10000 * twom10000);	/* underflow */
581 /* Handle tiny denormal answer, but with less accuracy
582  * since roundoff error in 1.0/x will be amplified.
583  * The precise demarcation should be the gradual underflow threshold.
584  */
585 if( s < (-MAXLOGL+2.0L) )
586 	{
587 	x = 1.0L/x;
588 	sign = -sign;
589 	}
590 
591 /* First bit of the power */
592 if( n & 1 )
593 	y = x;
594 
595 else
596 	{
597 	y = 1.0L;
598 	asign = 0;
599 	}
600 
601 ww = x;
602 n >>= 1;
603 while( n )
604 	{
605 	ww = ww * ww;	/* arg to the 2-to-the-kth power */
606 	if( n & 1 )	/* if that bit is set, then include in product */
607 		y *= ww;
608 	n >>= 1;
609 	}
610 
611 if( asign )
612 	y = -y; /* odd power of negative number */
613 if( sign < 0 )
614 	y = 1.0L/y;
615 return(y);
616 }
617