xref: /illumos-gate/usr/src/lib/libm/common/C/pow.c (revision c7b6bf90757979f50e793af7ea98d3120668214f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #if defined(ELFOBJ)
31 #pragma weak pow = __pow
32 #endif
33 
34 /*
35  * pow(x,y) return x**y
36  *		      n
37  * Method:  Let x =  2   * (1+f)
38  *	1. Compute and return log2(x) in two pieces:
39  *		log2(x) = w1 + w2,
40  *	   where w1 has 24 bits trailing zero.
41  *	2. Perform y*log2(x) by simulating muti-precision arithmetic
42  *	3. Return x**y = exp2(y*log(x))
43  *
44  * Special cases:
45  *	1.  (anything) ** +-0 is 1
46  *	1'. 1 ** (anything)   is 1	(C99; 1 ** +-INF/NAN used to be NAN)
47  *	2.  (anything) ** 1   is itself
48  *	3.  (anything except 1) ** NAN is NAN ("except 1" is C99)
49  *	4.  NAN ** (anything except 0) is NAN
50  *	5.  +-(|x| > 1) **  +INF is +INF
51  *	6.  +-(|x| > 1) **  -INF is +0
52  *	7.  +-(|x| < 1) **  +INF is +0
53  *	8.  +-(|x| < 1) **  -INF is +INF
54  *	9.  -1          ** +-INF is 1	(C99; -1 ** +-INF used to be NAN)
55  *	10. +0 ** (+anything except 0, NAN)               is +0
56  *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
57  *	12. +0 ** (-anything except 0, NAN)               is +INF
58  *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
59  *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
60  *	15. +INF ** (+anything except 0,NAN) is +INF
61  *	16. +INF ** (-anything except 0,NAN) is +0
62  *	17. -INF ** (anything)  = -0 ** (-anything)
63  *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
64  *	19. (-anything except 0 and inf) ** (non-integer) is NAN
65  *
66  * Accuracy:
67  *	pow(x,y) returns x**y nearly rounded. In particular
68  *			pow(integer,integer)
69  *	always returns the correct integer provided it is representable.
70  */
71 
72 #include "libm.h"
73 #include "xpg6.h"	/* __xpg6 */
74 #define	_C99SUSv3_pow	_C99SUSv3_pow_treats_Inf_as_an_even_int
75 
76 static const double zero = 0.0, one = 1.0, two = 2.0;
77 
78 extern const double _TBL_log2_hi[], _TBL_log2_lo[];
79 static const double
80 	two53 = 9007199254740992.0,
81 	A1_hi = 2.8853900432586669921875,
82 	A1_lo = 3.8519259825035041963606002e-8,
83 	A1 = 2.885390081777926817222541963606002026086e+0000,
84 	A2 = 9.617966939207270828380543979852286255862e-0001,
85 	A3 = 5.770807680887875964868853124873696201995e-0001,
86 	B0_hi = 2.8853900432586669921875,
87 	B0_lo = 3.8519259822532793056374320585e-8,
88 	B0 = 2.885390081777926814720293056374320585689e+0000,
89 	B1 = 9.617966939259755138949202350396200257632e-0001,
90 	B2 = 5.770780163585687000782112776448797953382e-0001,
91 	B3 = 4.121985488948771523290174512461778354953e-0001,
92 	B4 = 3.207590534812432970433641789022666850193e-0001;
93 
94 static double
95 log2_x(double x, double *w) {
96 	double f, s, z, qn, h, t;
97 	int *px = (int *) &x;
98 	int *pz = (int *) &z;
99 	int i, j, ix, n;
100 
101 	n = 0;
102 	ix = px[HIWORD];
103 	if (ix >= 0x3fef03f1 && ix < 0x3ff08208) {	/* 65/63 > x > 63/65 */
104 		double f1, v;
105 		f = x - one;
106 		if (((ix - 0x3ff00000) | px[LOWORD]) == 0) {
107 			*w = zero;
108 			return (zero);		/* log2(1)= +0 */
109 		}
110 		qn = one / (two + f);
111 		s = f * qn;				/* |s|<2**-6 */
112 		v = s * s;
113 		h = (double) ((float) s);
114 		f1 = (double) ((float) f);
115 		t = qn * (((f - two * h) - h * f1) - h * (f - f1));
116 								/* s = h+t */
117 		f1 = h * B0_lo + s * (v * (B1 + v * (B2 + v * (B3 + v * B4))));
118 		t = f1 + t * B0;
119 		h *= B0_hi;
120 		s = (double) ((float) (h + t));
121 		*w = t - (s - h);
122 		return (s);
123 	}
124 	if (ix < 0x00100000) {				/* subnormal x */
125 		x *= two53;
126 		n = -53;
127 		ix = px[HIWORD];
128 	}
129 	/* LARGE N */
130 	n += ((ix + 0x1000) >> 20) - 0x3ff;
131 	ix = (ix & 0x000fffff) | 0x3ff00000;		/* scale x to [1,2] */
132 	px[HIWORD] = ix;
133 	i = ix + 0x1000;
134 	pz[HIWORD] = i & 0xffffe000;
135 	pz[LOWORD] = 0;
136 	qn = one / (x + z);
137 	f = x - z;
138 	s = f * qn;
139 	h = (double) ((float) s);
140 	t = qn * ((f - (h + h) * z) - h * f);
141 	j = (i >> 13) & 0x7f;
142 	f = s * s;
143 	t = t * A1 + h * A1_lo;
144 	t += (s * f) * (A2 + f * A3);
145 	qn = h * A1_hi;
146 	s = n + _TBL_log2_hi[j];
147 	h = qn + s;
148 	t += _TBL_log2_lo[j] - ((h - s) - qn);
149 	f = (double) ((float) (h + t));
150 	*w = t - (f - h);
151 	return (f);
152 }
153 
154 extern const double _TBL_exp2_hi[], _TBL_exp2_lo[];
155 static const double		/* poly app of 2^x-1 on [-1e-10,2^-7+1e-10] */
156 	E1 = 6.931471805599453100674958533810346197328e-0001,
157 	E2 = 2.402265069587779347846769151717493815979e-0001,
158 	E3 = 5.550410866475410512631124892773937864699e-0002,
159 	E4 = 9.618143209991026824853712740162451423355e-0003,
160 	E5 = 1.333357676549940345096774122231849082991e-0003;
161 
162 double
163 pow(double x, double y) {
164 	double z, ax;
165 	double y1, y2, w1, w2;
166 	int sbx, sby, j, k, yisint;
167 	int hx, hy, ahx, ahy;
168 	unsigned lx, ly;
169 	int *pz = (int *) &z;
170 
171 	hx = ((int *) &x)[HIWORD];
172 	lx = ((unsigned *) &x)[LOWORD];
173 	hy = ((int *) &y)[HIWORD];
174 	ly = ((unsigned *) &y)[LOWORD];
175 	ahx = hx & ~0x80000000;
176 	ahy = hy & ~0x80000000;
177 	if ((ahy | ly) == 0) {	/* y==zero  */
178 		if ((ahx | lx) == 0)
179 			z = _SVID_libm_err(x, y, 20);	/* +-0**+-0 */
180 		else if ((ahx | (((lx | -lx) >> 31) & 1)) > 0x7ff00000)
181 			z = _SVID_libm_err(x, y, 42);	/* NaN**+-0 */
182 		else
183 			z = one;			/* x**+-0 = 1 */
184 		return (z);
185 	} else if (hx == 0x3ff00000 && lx == 0 &&
186 		(__xpg6 & _C99SUSv3_pow) != 0)
187 		return (one);			/* C99: 1**anything = 1 */
188 	else if (ahx > 0x7ff00000 || (ahx == 0x7ff00000 && lx != 0) ||
189 		ahy > 0x7ff00000 || (ahy == 0x7ff00000 && ly != 0))
190 		return (x * y);	/* +-NaN return x*y; + -> * for Cheetah */
191 				/* includes Sun: 1**NaN = NaN */
192 	sbx = (unsigned) hx >> 31;
193 	sby = (unsigned) hy >> 31;
194 	ax = fabs(x);
195 
196 	/*
197 	 * determine if y is an odd int when x < 0
198 	 * yisint = 0 ... y is not an integer
199 	 * yisint = 1 ... y is an odd int
200 	 * yisint = 2 ... y is an even int
201 	 */
202 	yisint = 0;
203 	if (sbx) {
204 		if (ahy >= 0x43400000)
205 			yisint = 2;		/* even integer y */
206 		else if (ahy >= 0x3ff00000) {
207 			k = (ahy >> 20) - 0x3ff;	/* exponent */
208 			if (k > 20) {
209 				j = ly >> (52 - k);
210 				if ((j << (52 - k)) == ly)
211 					yisint = 2 - (j & 1);
212 			} else if (ly == 0) {
213 				j = ahy >> (20 - k);
214 				if ((j << (20 - k)) == ahy)
215 					yisint = 2 - (j & 1);
216 			}
217 		}
218 	}
219 	/* special value of y */
220 	if (ly == 0) {
221 		if (ahy == 0x7ff00000) {	/* y is +-inf */
222 			if (((ahx - 0x3ff00000) | lx) == 0) {
223 				if ((__xpg6 & _C99SUSv3_pow) != 0)
224 					return (one);
225 						/* C99: (-1)**+-inf = 1 */
226 				else
227 					return (y - y);
228 						/* Sun: (+-1)**+-inf = NaN */
229 			} else if (ahx >= 0x3ff00000)
230 						/* (|x|>1)**+,-inf = inf,0 */
231 				return (sby == 0 ? y : zero);
232 			else			/* (|x|<1)**-,+inf = inf,0 */
233 				return (sby != 0 ? -y : zero);
234 		}
235 		if (ahy == 0x3ff00000) {	/* y is  +-1 */
236 			if (sby != 0) {	/* y is -1 */
237 				if (x == zero)	/* divided by zero */
238 					return (_SVID_libm_err(x, y, 23));
239 				else if (ahx < 0x40000 || ((ahx - 0x40000) |
240 					lx) == 0)	/* overflow */
241 					return (_SVID_libm_err(x, y, 21));
242 				else
243 					return (one / x);
244 			} else
245 				return (x);
246 		}
247 		if (hy == 0x40000000) {		/* y is  2 */
248 			if (ahx >= 0x5ff00000 && ahx < 0x7ff00000)
249 				return (_SVID_libm_err(x, y, 21));
250 							/* x*x overflow */
251 			else if ((ahx < 0x1e56a09e && (ahx | lx) != 0) ||
252 				(ahx == 0x1e56a09e && lx < 0x667f3bcd))
253 				return (_SVID_libm_err(x, y, 22));
254 							/* x*x underflow */
255 			else
256 				return (x * x);
257 		}
258 		if (hy == 0x3fe00000) {
259 			if (!((ahx | lx) == 0 || ((ahx - 0x7ff00000) | lx) ==
260 				0 || sbx == 1))
261 				return (sqrt(x));	/* y is 0.5 and x > 0 */
262 		}
263 	}
264 	/* special value of x */
265 	if (lx == 0) {
266 		if (ahx == 0x7ff00000 || ahx == 0 || ahx == 0x3ff00000) {
267 			/* x is +-0,+-inf,-1 */
268 			z = ax;
269 			if (sby == 1) {
270 				z = one / z;	/* z = |x|**y */
271 				if (ahx == 0)
272 					return (_SVID_libm_err(x, y, 23));
273 			}
274 			if (sbx == 1) {
275 				if (ahx == 0x3ff00000 && yisint == 0)
276 					z = _SVID_libm_err(x, y, 24);
277 					/* neg**non-integral is NaN + invalid */
278 				else if (yisint == 1)
279 					z = -z;	/* (x<0)**odd = -(|x|**odd) */
280 			}
281 			return (z);
282 		}
283 	}
284 	/* (x<0)**(non-int) is NaN */
285 	if (sbx == 1 && yisint == 0)
286 		return (_SVID_libm_err(x, y, 24));
287 	/* Now ax is finite, y is finite */
288 	/* first compute log2(ax) = w1+w2, with 24 bits w1 */
289 	w1 = log2_x(ax, &w2);
290 
291 	/* split up y into y1+y2 and compute (y1+y2)*(w1+w2) */
292 	if (((ly & 0x07ffffff) == 0) || ahy >= 0x47e00000 ||
293 		ahy <= 0x38100000) {
294 		/* no need to split if y is short or too large or too small */
295 		y1 = y * w1;
296 		y2 = y * w2;
297 	} else {
298 		y1 = (double) ((float) y);
299 		y2 = (y - y1) * w1 + y * w2;
300 		y1 *= w1;
301 	}
302 	z = y1 + y2;
303 	j = pz[HIWORD];
304 	if (j >= 0x40900000) {				/* z >= 1024 */
305 		if (!(j == 0x40900000 && pz[LOWORD] == 0))	/* z > 1024 */
306 			return (_SVID_libm_err(x, y, 21));	/* overflow */
307 		else {
308 			w2 = y1 - z;
309 			w2 += y2;
310 							/* rounded to inf */
311 			if (w2 >= -8.008566259537296567160e-17)
312 				return (_SVID_libm_err(x, y, 21));
313 								/* overflow */
314 		}
315 	} else if ((j & ~0x80000000) >= 0x4090cc00) {	/* z <= -1075 */
316 		if (!(j == 0xc090cc00 && pz[LOWORD] == 0))	/* z < -1075 */
317 			return (_SVID_libm_err(x, y, 22));	/* underflow */
318 		else {
319 			w2 = y1 - z;
320 			w2 += y2;
321 			if (w2 <= zero)			/* underflow */
322 				return (_SVID_libm_err(x, y, 22));
323 		}
324 	}
325 	/*
326 	 * compute 2**(k+f[j]+g)
327 	 */
328 	k = (int) (z * 64.0 + (((hy ^ (ahx - 0x3ff00000)) > 0) ? 0.5 : -0.5));
329 	j = k & 63;
330 	w1 = y2 - ((double) k * 0.015625 - y1);
331 	w2 = _TBL_exp2_hi[j];
332 	z = _TBL_exp2_lo[j] + (w2 * w1) * (E1 + w1 * (E2 + w1 * (E3 + w1 *
333 		(E4 + w1 * E5))));
334 	z += w2;
335 	k >>= 6;
336 	if (k < -1021)
337 		z = scalbn(z, k);
338 	else			/* subnormal output */
339 		pz[HIWORD] += k << 20;
340 	if (sbx == 1 && yisint == 1)
341 		z = -z;		/* (-ve)**(odd int) */
342 	return (z);
343 }
344