xref: /illumos-gate/usr/src/lib/libm/common/complex/cpow.c (revision ddc0e0b53c661f6e439e3b7072b3ef353eadb4af)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #pragma weak __cpow = cpow
31 
32 /* INDENT OFF */
33 /*
34  * dcomplex cpow(dcomplex z);
35  *
36  * z**w analytically equivalent to
37  *
38  * cpow(z,w) = cexp(w clog(z))
39  *
40  * Let z = x+iy, w = u+iv.
41  * Since
42  *                        _________
43  *                       / 2    2            -1   y
44  *     log(x+iy) = log(\/ x  + y    ) + i tan   (---)
45  *                                                x
46  *
47  *                  1       2    2         -1   y
48  *               = --- log(x  + y ) + i tan   (---)
49  *                  2                           x
50  *                       u       2    2         -1  y
51  * (u+iv)* log(x+iy) =  --- log(x  + y ) - v tan  (---)  +          (1)
52  *                       2                          x
53  *
54  *                            v       2    2         -1  y
55  *                     i * [ --- log(x  + y ) + u tan  (---) ]      (2)
56  *                            2                          x
57  *
58  *                   = r + i q
59  *
60  * Therefore,
61  *      w     r+iq    r
62  *     z  =  e     = e  (cos(q)+i*sin(q))
63  *                                   _______
64  *                                  / 2   2
65  *       r                        \/ x + y     -v*atan2(y,x)
66  * Here e  can be expressed as:  u          * e
67  *
68  * Special cases (in the order of appearance):
69  *      1.  (anything) ** 0  is 1
70  *      2.  (anything) ** 1  is itself
71  *      3.  When v = 0, y = 0:
72  *            If x is finite and negative, and u is finite, then
73  *               x ** u = exp(u*pi i) * pow(|x|, u);
74  *            otherwise,
75  *               x ** u = pow(x, u);
76  *      4.  When v = 0, x = 0 or |x| = |y| or x is inf or y is inf:
77  *               (x + y i) ** u = r * exp(q i)
78  *          where
79  *               r = hypot(x,y) ** u
80  *               q = u * atan2pi(y, x)
81  *
82  *      5.  otherwise, z**w is NAN if any x, y, u, v is a Nan or inf
83  *
84  *      Note: many results of special cases are obtained in terms of
85  *      polar coordinate. In the conversion from polar to rectangle:
86  *                  r exp(q i) = r * cos(q) + r * sin(q) i,
87  *      we regard r * 0 is 0 except when r is a NaN.
88  */
89 /* INDENT ON */
90 
91 #include "libm.h"	/* atan2/exp/fabs/hypot/log/pow/scalbn */
92 			/* atan2pi/exp2/sincos/sincospi/__k_clog_r/__k_atan2 */
93 #include "complex_wrapper.h"
94 
95 extern void sincospi(double, double *, double *);
96 
97 static const double
98 	huge = 1e300,
99 	tiny = 1e-300,
100 	invln2 = 1.44269504088896338700e+00,
101 	ln2hi = 6.93147180369123816490e-01,   /* 0x3fe62e42, 0xfee00000 */
102 	ln2lo = 1.90821492927058770002e-10,   /* 0x3dea39ef, 0x35793c76 */
103 	one = 1.0,
104 	zero = 0.0;
105 
106 static const int hiinf = 0x7ff00000;
107 extern double atan2pi(double, double);
108 
109 /*
110  * Assuming |t[0]| > |t[1]| and |t[2]| > |t[3]|, sum4fp subroutine
111  * compute t[0] + t[1] + t[2] + t[3] into two double fp numbers.
112  */
113 static double
sum4fp(double ta[],double * w)114 sum4fp(double ta[], double *w) {
115 	double t1, t2, t3, t4, w1, w2, t;
116 	t1 = ta[0]; t2 = ta[1]; t3 = ta[2]; t4 = ta[3];
117 	/*
118 	 * Rearrange ti so that |t1| >= |t2| >= |t3| >= |t4|
119 	 */
120 	if (fabs(t4) > fabs(t1)) {
121 		t = t1; t1 = t3; t3 = t;
122 		t = t2; t2 = t4; t4 = t;
123 	} else if (fabs(t3) > fabs(t1)) {
124 		t = t1; t1 = t3;
125 		if (fabs(t4) > fabs(t2)) {
126 			t3 = t4; t4 = t2; t2 = t;
127 		} else {
128 			t3 = t2; t2 = t;
129 		}
130 	} else if (fabs(t3) > fabs(t2)) {
131 		t = t2; t2 = t3;
132 		if (fabs(t4) > fabs(t2)) {
133 			t3 = t4; t4 = t;
134 		} else
135 			t3 = t;
136 	}
137 	/* summing r = t1 + t2 + t3 + t4 to w1 + w2 */
138 	w1 = t3 + t4;
139 	w2 = t4 - (w1 - t3);
140 	t  = t2 + w1;
141 	w2 += w1 - (t - t2);
142 	w1 = t + w2;
143 	w2 += t - w1;
144 	t  = t1 + w1;
145 	w2 += w1 - (t - t1);
146 	w1 = t + w2;
147 	*w = w2 - (w1 - t);
148 	return (w1);
149 }
150 
151 dcomplex
cpow(dcomplex z,dcomplex w)152 cpow(dcomplex z, dcomplex w) {
153 	dcomplex ans;
154 	double x, y, u, v, t, c, s, r, x2, y2;
155 	double b[4], t1, t2, t3, t4, w1, w2, u1, v1, x1, y1;
156 	int ix, iy, hx, lx, hy, ly, hv, hu, iu, iv, lu, lv;
157 	int i, j, k;
158 
159 	x = D_RE(z);
160 	y = D_IM(z);
161 	u = D_RE(w);
162 	v = D_IM(w);
163 	hx = ((int *) &x)[HIWORD];
164 	lx = ((int *) &x)[LOWORD];
165 	hy = ((int *) &y)[HIWORD];
166 	ly = ((int *) &y)[LOWORD];
167 	hu = ((int *) &u)[HIWORD];
168 	lu = ((int *) &u)[LOWORD];
169 	hv = ((int *) &v)[HIWORD];
170 	lv = ((int *) &v)[LOWORD];
171 	ix = hx & 0x7fffffff;
172 	iy = hy & 0x7fffffff;
173 	iu = hu & 0x7fffffff;
174 	iv = hv & 0x7fffffff;
175 
176 	j = 0;
177 	if ((iv | lv) == 0) {	/* z**(real) */
178 		if (((hu - 0x3ff00000) | lu) == 0) {	/* z ** 1 = z */
179 			D_RE(ans) = x;
180 			D_IM(ans) = y;
181 		} else if ((iu | lu) == 0) {	/* z ** 0 = 1 */
182 			D_RE(ans) = one;
183 			D_IM(ans) = zero;
184 		} else if ((iy | ly) == 0) {	/* (real)**(real) */
185 			D_IM(ans) = zero;
186 			if (hx < 0 && ix < hiinf && iu < hiinf) {
187 				/* -x ** u  is exp(i*pi*u)*pow(x,u) */
188 				r = pow(-x, u);
189 				sincospi(u, &s, &c);
190 				D_RE(ans) = (c == zero)? c: c * r;
191 				D_IM(ans) = (s == zero)? s: s * r;
192 			} else
193 				D_RE(ans) = pow(x, u);
194 		} else if (((ix | lx) == 0) || ix >= hiinf || iy >= hiinf) {
195 			if (isnan(x) || isnan(y) || isnan(u))
196 				D_RE(ans) = D_IM(ans) = x + y + u;
197 			else {
198 				if ((ix | lx) == 0)
199 					r = fabs(y);
200 				else
201 					r = fabs(x) + fabs(y);
202 				t = atan2pi(y, x);
203 				sincospi(t * u, &s, &c);
204 				D_RE(ans) = (c == zero)? c: c * r;
205 				D_IM(ans) = (s == zero)? s: s * r;
206 			}
207 		} else if (((ix - iy) | (lx - ly)) == 0) {   /* |x| = |y| */
208 			if (hx >= 0) {
209 				t = (hy >= 0)? 0.25 : -0.25;
210 				sincospi(t * u, &s, &c);
211 			} else if ((lu & 3) == 0) {
212 				t = (hy >= 0)? 0.75 : -0.75;
213 				sincospi(t * u, &s, &c);
214 			} else {
215 				r = (hy >= 0)? u : -u;
216 				t = -0.25 * r;
217 				w1 = r + t;
218 				w2 = t - (w1 - r);
219 				sincospi(w1, &t1, &t2);
220 				sincospi(w2, &t3, &t4);
221 				s = t1 * t4 + t3 * t2;
222 				c = t2 * t4 - t1 * t3;
223 			}
224 			if (ix < 0x3fe00000)	/* |x| < 1/2 */
225 				r = pow(fabs(x + x), u) * exp2(-0.5 * u);
226 			else if (ix >= 0x3ff00000 || iu < 0x408ff800)
227 				/* |x| >= 1 or |u| < 1023 */
228 				r = pow(fabs(x), u) * exp2(0.5 * u);
229 			else   /* special treatment */
230 				j = 2;
231 			if (j == 0) {
232 				D_RE(ans) = (c == zero)? c: c * r;
233 				D_IM(ans) = (s == zero)? s: s * r;
234 			}
235 		} else
236 			j = 1;
237 		if (j == 0)
238 			return (ans);
239 	}
240 	if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) {
241 		/*
242 		 * non-zero imag part(s) with inf component(s) yields NaN
243 		 */
244 		t = fabs(x) + fabs(y) + fabs(u) + fabs(v);
245 		D_RE(ans) = D_IM(ans) = t - t;
246 	} else {
247 		k = 0;	/* no scaling */
248 		if (iu > 0x7f000000 || iv > 0x7f000000) {
249 			u *= .0009765625; /* scale 2**-10 to avoid overflow */
250 			v *= .0009765625;
251 			k = 1;	/* scale by 2**-10 */
252 		}
253 		/*
254 		 * Use similated higher precision arithmetic to compute:
255 		 * r = u * log(hypot(x, y)) - v * atan2(y, x)
256 		 * q = u * atan2(y, x) + v * log(hypot(x, y))
257 		 */
258 		t1 = __k_clog_r(x, y, &t2);
259 		t3 = __k_atan2(y, x, &t4);
260 		x1 = t1;
261 		y1 = t3;
262 		u1 = u;
263 		v1 = v;
264 		((int *) &u1)[LOWORD] &= 0xf8000000;
265 		((int *) &v1)[LOWORD] &= 0xf8000000;
266 		((int *) &x1)[LOWORD] &= 0xf8000000;
267 		((int *) &y1)[LOWORD] &= 0xf8000000;
268 		x2 = t2 - (x1 - t1);	/* log(hypot(x,y)) = x1 + x2 */
269 		y2 = t4 - (y1 - t3);	/* atan2(y,x) = y1 + y2 */
270 		/* compute q = u * atan2(y, x) + v * log(hypot(x, y)) */
271 		if (j != 2) {
272 			b[0] = u1 * y1;
273 			b[1] = (u - u1) * y1 + u * y2;
274 			if (j == 1) {	/* v = 0 */
275 				w1 = b[0] + b[1];
276 				w2 = b[1] - (w1 - b[0]);
277 			} else {
278 				b[2] = v1 * x1;
279 				b[3] = (v - v1) * x1 + v * x2;
280 				w1 = sum4fp(b, &w2);
281 			}
282 			sincos(w1, &t1, &t2);
283 			sincos(w2, &t3, &t4);
284 			s = t1 * t4 + t3 * t2;
285 			c = t2 * t4 - t1 * t3;
286 			if (k == 1)
287 			/*
288 			 * square (cos(q) + i sin(q)) k times to get
289 			 * (cos(2^k * q + i sin(2^k * q)
290 			 */
291 				for (i = 0; i < 10; i++) {
292 					t1 = s * c;
293 					c = (c + s) * (c - s);
294 					s = t1 + t1;
295 				}
296 		}
297 		/* compute r = u * (t1, t2) - v * (t3, t4) */
298 		b[0] = u1 * x1;
299 		b[1] = (u - u1) * x1 + u * x2;
300 		if (j == 1) {	/* v = 0 */
301 			w1 = b[0] + b[1];
302 			w2 = b[1] - (w1 - b[0]);
303 		} else {
304 			b[2] = -v1 * y1;
305 			b[3] = (v1 - v) * y1 - v * y2;
306 			w1 = sum4fp(b, &w2);
307 		}
308 		/* check over/underflow for exp(w1 + w2) */
309 		if (k && fabs(w1) < 1000.0) {
310 			w1 *= 1024; w2 *= 1024; k = 0;
311 		}
312 		hx = ((int *) &w1)[HIWORD];
313 		lx = ((int *) &w1)[LOWORD];
314 		ix = hx & 0x7fffffff;
315 		/* compute exp(w1 + w2) */
316 		if (ix < 0x3c900000) /* exp(tiny < 2**-54) = 1 */
317 			r = one;
318 		else if (ix >= 0x40880000) /* overflow/underflow */
319 			r = (hx < 0)? tiny * tiny : huge * huge;
320 		else {	/* compute exp(w1 + w2) */
321 			k = (int) (invln2 * w1 + ((hx >= 0)? 0.5 : -0.5));
322 			t1 = (double) k;
323 			t2 = w1 - t1 * ln2hi;
324 			t3 = w2 - t1 * ln2lo;
325 			r = exp(t2 + t3);
326 		}
327 		if (c != zero) c *= r;
328 		if (s != zero) s *= r;
329 		if (k != 0) {
330 			c = scalbn(c, k);
331 			s = scalbn(s, k);
332 		}
333 		D_RE(ans) = c;
334 		D_IM(ans) = s;
335 	}
336 	return (ans);
337 }
338