xref: /freebsd/lib/msun/src/s_cbrt.c (revision ce804bff581bc312583c4487f0167cd12f642a07)
13a8617a8SJordan K. Hubbard /* @(#)s_cbrt.c 5.1 93/09/24 */
23a8617a8SJordan K. Hubbard /*
33a8617a8SJordan K. Hubbard  * ====================================================
43a8617a8SJordan K. Hubbard  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
53a8617a8SJordan K. Hubbard  *
63a8617a8SJordan K. Hubbard  * Developed at SunPro, a Sun Microsystems, Inc. business.
73a8617a8SJordan K. Hubbard  * Permission to use, copy, modify, and distribute this
83a8617a8SJordan K. Hubbard  * software is freely granted, provided that this notice
93a8617a8SJordan K. Hubbard  * is preserved.
103a8617a8SJordan K. Hubbard  * ====================================================
11ec761d75SBruce Evans  *
12ec761d75SBruce Evans  * Optimized by Bruce D. Evans.
133a8617a8SJordan K. Hubbard  */
143a8617a8SJordan K. Hubbard 
153a8617a8SJordan K. Hubbard #ifndef lint
167f3dea24SPeter Wemm static char rcsid[] = "$FreeBSD$";
173a8617a8SJordan K. Hubbard #endif
183a8617a8SJordan K. Hubbard 
193a8617a8SJordan K. Hubbard #include "math.h"
203a8617a8SJordan K. Hubbard #include "math_private.h"
213a8617a8SJordan K. Hubbard 
223a8617a8SJordan K. Hubbard /* cbrt(x)
233a8617a8SJordan K. Hubbard  * Return cube root of x
243a8617a8SJordan K. Hubbard  */
253a8617a8SJordan K. Hubbard static const u_int32_t
26af7f9913SBruce Evans 	B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
27af7f9913SBruce Evans 	B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
283a8617a8SJordan K. Hubbard 
293a8617a8SJordan K. Hubbard static const double
303a8617a8SJordan K. Hubbard C =  5.42857142857142815906e-01, /* 19/35     = 0x3FE15F15, 0xF15F15F1 */
313a8617a8SJordan K. Hubbard D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */
323a8617a8SJordan K. Hubbard E =  1.41428571428571436819e+00, /* 99/70     = 0x3FF6A0EA, 0x0EA0EA0F */
333a8617a8SJordan K. Hubbard F =  1.60714285714285720630e+00, /* 45/28     = 0x3FF9B6DB, 0x6DB6DB6E */
343a8617a8SJordan K. Hubbard G =  3.57142857142857150787e-01; /* 5/14      = 0x3FD6DB6D, 0xB6DB6DB7 */
353a8617a8SJordan K. Hubbard 
3659b19ff1SAlfred Perlstein double
3759b19ff1SAlfred Perlstein cbrt(double x)
383a8617a8SJordan K. Hubbard {
393a8617a8SJordan K. Hubbard 	int32_t	hx;
40ce804bffSBruce Evans 	union {
41ce804bffSBruce Evans 	    double value;
42ce804bffSBruce Evans 	    uint64_t bits;
43ce804bffSBruce Evans 	} u;
443a8617a8SJordan K. Hubbard 	double r,s,t=0.0,w;
45ce804bffSBruce Evans 	uint64_t bits;
463a8617a8SJordan K. Hubbard 	u_int32_t sign;
473a8617a8SJordan K. Hubbard 	u_int32_t high,low;
483a8617a8SJordan K. Hubbard 
493a8617a8SJordan K. Hubbard 	GET_HIGH_WORD(hx,x);
503a8617a8SJordan K. Hubbard 	sign=hx&0x80000000; 		/* sign= sign(x) */
513a8617a8SJordan K. Hubbard 	hx  ^=sign;
523a8617a8SJordan K. Hubbard 	if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */
533a8617a8SJordan K. Hubbard 	GET_LOW_WORD(low,x);
543a8617a8SJordan K. Hubbard 	if((hx|low)==0)
553a8617a8SJordan K. Hubbard 	    return(x);			/* cbrt(0) is itself */
563a8617a8SJordan K. Hubbard 
57af7f9913SBruce Evans     /*
58af7f9913SBruce Evans      * Rough cbrt to 5 bits:
59af7f9913SBruce Evans      *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
60af7f9913SBruce Evans      * where e is integral and >= 0, m is real and in [0, 1), and "/" and
61af7f9913SBruce Evans      * "%" are integer division and modulus with rounding towards minus
62af7f9913SBruce Evans      * infinity.  The RHS is always >= the LHS and has a maximum relative
63af7f9913SBruce Evans      * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
64af7f9913SBruce Evans      * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
65af7f9913SBruce Evans      * floating point representation, for finite positive normal values,
66af7f9913SBruce Evans      * ordinary integer divison of the value in bits magically gives
67af7f9913SBruce Evans      * almost exactly the RHS of the above provided we first subtract the
68af7f9913SBruce Evans      * exponent bias (1023 for doubles) and later add it back.  We do the
69af7f9913SBruce Evans      * subtraction virtually to keep e >= 0 so that ordinary integer
70af7f9913SBruce Evans      * division rounds towards minus infinity; this is also efficient.
71af7f9913SBruce Evans      */
727d5a4821SBruce Evans 	if(hx<0x00100000) { 		/* subnormal number */
737d5a4821SBruce Evans 	    SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */
747d5a4821SBruce Evans 	    t*=x;
757d5a4821SBruce Evans 	    GET_HIGH_WORD(high,t);
76ec761d75SBruce Evans 	    SET_HIGH_WORD(t,sign|((high&0x7fffffff)/3+B2));
777d5a4821SBruce Evans 	} else
78ec761d75SBruce Evans 	    SET_HIGH_WORD(t,sign|(hx/3+B1));
793a8617a8SJordan K. Hubbard 
807aac169eSBruce Evans     /*
81ce804bffSBruce Evans      * New cbrt to 25 bits:
827aac169eSBruce Evans      *    cbrt(x) = t*cbrt(x/t**3) ~= t*R(x/t**3)
837aac169eSBruce Evans      * where R(r) = (14*r**2 + 35*r + 5)/(5*r**2 + 35*r + 14) is the
847aac169eSBruce Evans      * (2,2) Pade approximation to cbrt(r) at r = 1.  We replace
857aac169eSBruce Evans      * r = x/t**3 by 1/r = t**3/x since the latter can be evaluated
867aac169eSBruce Evans      * more efficiently, and rearrange the expression for R(r) to use
877aac169eSBruce Evans      * 4 additions and 2 divisions instead of the 4 additions, 4
887aac169eSBruce Evans      * multiplications and 1 division that would be required using
897aac169eSBruce Evans      * Horner's rule on the numerator and denominator.  t being good
907aac169eSBruce Evans      * to 32 bits means that |t/cbrt(x)-1| < 1/32, so |x/t**3-1| < 0.1
917aac169eSBruce Evans      * and for R(r) we can use any approximation to cbrt(r) that is good
927aac169eSBruce Evans      * to 20 bits on [0.9, 1.1].  The (2,2) Pade approximation is not an
937aac169eSBruce Evans      * especially good choice.
947aac169eSBruce Evans      */
953a8617a8SJordan K. Hubbard 	r=t*t/x;
963a8617a8SJordan K. Hubbard 	s=C+r*t;
973a8617a8SJordan K. Hubbard 	t*=G+F/(s+E+D/s);
983a8617a8SJordan K. Hubbard 
99ce804bffSBruce Evans     /*
100ce804bffSBruce Evans      * Round t away from zero to 25 bits (sloppily except for ensuring that
101ce804bffSBruce Evans      * the result is larger in magnitude than cbrt(x) but not much more than
102ce804bffSBruce Evans      * 2 25-bit ulps larger).  With rounding towards zero, the error bound
103ce804bffSBruce Evans      * would be ~5/6 instead of ~4/6.  With a maximum error of 1 25-bit ulps
104ce804bffSBruce Evans      * in the rounded t, the infinite-precision error in the Newton
105ce804bffSBruce Evans      * approximation barely affects third digit in the the final error
106ce804bffSBruce Evans      * 0.667; the error in the rounded t can be up to about 12 25-bit ulps
107ce804bffSBruce Evans      * before the final error is larger than 0.667 ulps.
108ce804bffSBruce Evans      */
109ce804bffSBruce Evans 	u.value=t;
110ce804bffSBruce Evans 	u.bits=(u.bits+0x20000000)&0xfffffffff0000000ULL;
111ce804bffSBruce Evans 	t=u.value;
1123a8617a8SJordan K. Hubbard 
113ce804bffSBruce Evans     /* one step Newton iteration to 53 bits with error < 0.667 ulps */
1143a8617a8SJordan K. Hubbard 	s=t*t;				/* t*t is exact */
115ce804bffSBruce Evans 	r=x/s;				/* error <= 0.5 ulps; |r| < |t| */
116ce804bffSBruce Evans 	w=t+t;				/* t+t is exact */
117ce804bffSBruce Evans 	r=(r-t)/(w+r);			/* r-t is exact; w+r ~= 3*t */
118ce804bffSBruce Evans 	t=t+t*r;			/* error <= 0.5 + 0.5/3 + epsilon */
1193a8617a8SJordan K. Hubbard 
1203a8617a8SJordan K. Hubbard 	return(t);
1213a8617a8SJordan K. Hubbard }
122