xref: /freebsd/lib/msun/src/e_sqrt.c (revision 0dd5a5603e7a33d976f8e6015620bbc79839c609)
13f708241SDavid Schultz 
23a8617a8SJordan K. Hubbard /*
33a8617a8SJordan K. Hubbard  * ====================================================
43a8617a8SJordan K. Hubbard  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
53a8617a8SJordan K. Hubbard  *
63f708241SDavid Schultz  * Developed at SunSoft, a Sun Microsystems, Inc. business.
73a8617a8SJordan K. Hubbard  * Permission to use, copy, modify, and distribute this
83a8617a8SJordan K. Hubbard  * software is freely granted, provided that this notice
93a8617a8SJordan K. Hubbard  * is preserved.
103a8617a8SJordan K. Hubbard  * ====================================================
113a8617a8SJordan K. Hubbard  */
123a8617a8SJordan K. Hubbard 
13b2e84316SAndrew Turner #include <float.h>
14b2e84316SAndrew Turner 
15b2e84316SAndrew Turner #include "math.h"
16b2e84316SAndrew Turner #include "math_private.h"
17b2e84316SAndrew Turner 
18b2e84316SAndrew Turner #ifdef USE_BUILTIN_SQRT
19b2e84316SAndrew Turner double
sqrt(double x)20*99843eb8SSteve Kargl sqrt(double x)
21b2e84316SAndrew Turner {
22b2e84316SAndrew Turner 	return (__builtin_sqrt(x));
23b2e84316SAndrew Turner }
24b2e84316SAndrew Turner #else
25*99843eb8SSteve Kargl /* sqrt(x)
263a8617a8SJordan K. Hubbard  * Return correctly rounded sqrt.
273a8617a8SJordan K. Hubbard  *           ------------------------------------------
283a8617a8SJordan K. Hubbard  *	     |  Use the hardware sqrt if you have one |
293a8617a8SJordan K. Hubbard  *           ------------------------------------------
303a8617a8SJordan K. Hubbard  * Method:
313a8617a8SJordan K. Hubbard  *   Bit by bit method using integer arithmetic. (Slow, but portable)
323a8617a8SJordan K. Hubbard  *   1. Normalization
333a8617a8SJordan K. Hubbard  *	Scale x to y in [1,4) with even powers of 2:
343a8617a8SJordan K. Hubbard  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
353a8617a8SJordan K. Hubbard  *		sqrt(x) = 2^k * sqrt(y)
363a8617a8SJordan K. Hubbard  *   2. Bit by bit computation
373a8617a8SJordan K. Hubbard  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
383a8617a8SJordan K. Hubbard  *	     i							 0
393a8617a8SJordan K. Hubbard  *                                     i+1         2
403a8617a8SJordan K. Hubbard  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
413a8617a8SJordan K. Hubbard  *	     i      i            i                 i
423a8617a8SJordan K. Hubbard  *
433a8617a8SJordan K. Hubbard  *	To compute q    from q , one checks whether
443a8617a8SJordan K. Hubbard  *		    i+1       i
453a8617a8SJordan K. Hubbard  *
463a8617a8SJordan K. Hubbard  *			      -(i+1) 2
473a8617a8SJordan K. Hubbard  *			(q + 2      ) <= y.			(2)
483a8617a8SJordan K. Hubbard  *     			  i
493a8617a8SJordan K. Hubbard  *							      -(i+1)
503a8617a8SJordan K. Hubbard  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
513a8617a8SJordan K. Hubbard  *		 	       i+1   i             i+1   i
523a8617a8SJordan K. Hubbard  *
533a8617a8SJordan K. Hubbard  *	With some algebric manipulation, it is not difficult to see
543a8617a8SJordan K. Hubbard  *	that (2) is equivalent to
553a8617a8SJordan K. Hubbard  *                             -(i+1)
563a8617a8SJordan K. Hubbard  *			s  +  2       <= y			(3)
573a8617a8SJordan K. Hubbard  *			 i                i
583a8617a8SJordan K. Hubbard  *
593a8617a8SJordan K. Hubbard  *	The advantage of (3) is that s  and y  can be computed by
603a8617a8SJordan K. Hubbard  *				      i      i
613a8617a8SJordan K. Hubbard  *	the following recurrence formula:
623a8617a8SJordan K. Hubbard  *	    if (3) is false
633a8617a8SJordan K. Hubbard  *
643a8617a8SJordan K. Hubbard  *	    s     =  s  ,	y    = y   ;			(4)
653a8617a8SJordan K. Hubbard  *	     i+1      i		 i+1    i
663a8617a8SJordan K. Hubbard  *
673a8617a8SJordan K. Hubbard  *	    otherwise,
683a8617a8SJordan K. Hubbard  *                         -i                     -(i+1)
693a8617a8SJordan K. Hubbard  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
703a8617a8SJordan K. Hubbard  *           i+1      i          i+1    i     i
713a8617a8SJordan K. Hubbard  *
723a8617a8SJordan K. Hubbard  *	One may easily use induction to prove (4) and (5).
733a8617a8SJordan K. Hubbard  *	Note. Since the left hand side of (3) contain only i+2 bits,
743a8617a8SJordan K. Hubbard  *	      it does not necessary to do a full (53-bit) comparison
753a8617a8SJordan K. Hubbard  *	      in (3).
763a8617a8SJordan K. Hubbard  *   3. Final rounding
773a8617a8SJordan K. Hubbard  *	After generating the 53 bits result, we compute one more bit.
783a8617a8SJordan K. Hubbard  *	Together with the remainder, we can decide whether the
793a8617a8SJordan K. Hubbard  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
803a8617a8SJordan K. Hubbard  *	(it will never equal to 1/2ulp).
813a8617a8SJordan K. Hubbard  *	The rounding mode can be detected by checking whether
823a8617a8SJordan K. Hubbard  *	huge + tiny is equal to huge, and whether huge - tiny is
833a8617a8SJordan K. Hubbard  *	equal to huge for some floating point number "huge" and "tiny".
843a8617a8SJordan K. Hubbard  *
853a8617a8SJordan K. Hubbard  * Special cases:
863a8617a8SJordan K. Hubbard  *	sqrt(+-0) = +-0 	... exact
873a8617a8SJordan K. Hubbard  *	sqrt(inf) = inf
883a8617a8SJordan K. Hubbard  *	sqrt(-ve) = NaN		... with invalid signal
893a8617a8SJordan K. Hubbard  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
903a8617a8SJordan K. Hubbard  *
913a8617a8SJordan K. Hubbard  * Other methods : see the appended file at the end of the program below.
923a8617a8SJordan K. Hubbard  *---------------
933a8617a8SJordan K. Hubbard  */
943a8617a8SJordan K. Hubbard 
953a8617a8SJordan K. Hubbard static	const double	one	= 1.0, tiny=1.0e-300;
963a8617a8SJordan K. Hubbard 
9759b19ff1SAlfred Perlstein double
sqrt(double x)98*99843eb8SSteve Kargl sqrt(double x)
993a8617a8SJordan K. Hubbard {
1003a8617a8SJordan K. Hubbard 	double z;
1013a8617a8SJordan K. Hubbard 	int32_t sign = (int)0x80000000;
1023a8617a8SJordan K. Hubbard 	int32_t ix0,s0,q,m,t,i;
1033a8617a8SJordan K. Hubbard 	u_int32_t r,t1,s1,ix1,q1;
1043a8617a8SJordan K. Hubbard 
1053a8617a8SJordan K. Hubbard 	EXTRACT_WORDS(ix0,ix1,x);
1063a8617a8SJordan K. Hubbard 
1073a8617a8SJordan K. Hubbard     /* take care of Inf and NaN */
1083a8617a8SJordan K. Hubbard 	if((ix0&0x7ff00000)==0x7ff00000) {
1093a8617a8SJordan K. Hubbard 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
1103a8617a8SJordan K. Hubbard 					   sqrt(-inf)=sNaN */
1113a8617a8SJordan K. Hubbard 	}
1123a8617a8SJordan K. Hubbard     /* take care of zero */
1133a8617a8SJordan K. Hubbard 	if(ix0<=0) {
1143a8617a8SJordan K. Hubbard 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
1153a8617a8SJordan K. Hubbard 	    else if(ix0<0)
1163a8617a8SJordan K. Hubbard 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
1173a8617a8SJordan K. Hubbard 	}
1183a8617a8SJordan K. Hubbard     /* normalize x */
1193a8617a8SJordan K. Hubbard 	m = (ix0>>20);
1203a8617a8SJordan K. Hubbard 	if(m==0) {				/* subnormal x */
1213a8617a8SJordan K. Hubbard 	    while(ix0==0) {
1223a8617a8SJordan K. Hubbard 		m -= 21;
1233a8617a8SJordan K. Hubbard 		ix0 |= (ix1>>11); ix1 <<= 21;
1243a8617a8SJordan K. Hubbard 	    }
1253a8617a8SJordan K. Hubbard 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
1263a8617a8SJordan K. Hubbard 	    m -= i-1;
1273a8617a8SJordan K. Hubbard 	    ix0 |= (ix1>>(32-i));
1283a8617a8SJordan K. Hubbard 	    ix1 <<= i;
1293a8617a8SJordan K. Hubbard 	}
1303a8617a8SJordan K. Hubbard 	m -= 1023;	/* unbias exponent */
1313a8617a8SJordan K. Hubbard 	ix0 = (ix0&0x000fffff)|0x00100000;
1323a8617a8SJordan K. Hubbard 	if(m&1){	/* odd m, double x to make it even */
1333a8617a8SJordan K. Hubbard 	    ix0 += ix0 + ((ix1&sign)>>31);
1343a8617a8SJordan K. Hubbard 	    ix1 += ix1;
1353a8617a8SJordan K. Hubbard 	}
1363a8617a8SJordan K. Hubbard 	m >>= 1;	/* m = [m/2] */
1373a8617a8SJordan K. Hubbard 
1383a8617a8SJordan K. Hubbard     /* generate sqrt(x) bit by bit */
1393a8617a8SJordan K. Hubbard 	ix0 += ix0 + ((ix1&sign)>>31);
1403a8617a8SJordan K. Hubbard 	ix1 += ix1;
1413a8617a8SJordan K. Hubbard 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
1423a8617a8SJordan K. Hubbard 	r = 0x00200000;		/* r = moving bit from right to left */
1433a8617a8SJordan K. Hubbard 
1443a8617a8SJordan K. Hubbard 	while(r!=0) {
1453a8617a8SJordan K. Hubbard 	    t = s0+r;
1463a8617a8SJordan K. Hubbard 	    if(t<=ix0) {
1473a8617a8SJordan K. Hubbard 		s0   = t+r;
1483a8617a8SJordan K. Hubbard 		ix0 -= t;
1493a8617a8SJordan K. Hubbard 		q   += r;
1503a8617a8SJordan K. Hubbard 	    }
1513a8617a8SJordan K. Hubbard 	    ix0 += ix0 + ((ix1&sign)>>31);
1523a8617a8SJordan K. Hubbard 	    ix1 += ix1;
1533a8617a8SJordan K. Hubbard 	    r>>=1;
1543a8617a8SJordan K. Hubbard 	}
1553a8617a8SJordan K. Hubbard 
1563a8617a8SJordan K. Hubbard 	r = sign;
1573a8617a8SJordan K. Hubbard 	while(r!=0) {
1583a8617a8SJordan K. Hubbard 	    t1 = s1+r;
1593a8617a8SJordan K. Hubbard 	    t  = s0;
1603a8617a8SJordan K. Hubbard 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
1613a8617a8SJordan K. Hubbard 		s1  = t1+r;
1623a8617a8SJordan K. Hubbard 		if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
1633a8617a8SJordan K. Hubbard 		ix0 -= t;
1643a8617a8SJordan K. Hubbard 		if (ix1 < t1) ix0 -= 1;
1653a8617a8SJordan K. Hubbard 		ix1 -= t1;
1663a8617a8SJordan K. Hubbard 		q1  += r;
1673a8617a8SJordan K. Hubbard 	    }
1683a8617a8SJordan K. Hubbard 	    ix0 += ix0 + ((ix1&sign)>>31);
1693a8617a8SJordan K. Hubbard 	    ix1 += ix1;
1703a8617a8SJordan K. Hubbard 	    r>>=1;
1713a8617a8SJordan K. Hubbard 	}
1723a8617a8SJordan K. Hubbard 
1733a8617a8SJordan K. Hubbard     /* use floating add to find out rounding direction */
1743a8617a8SJordan K. Hubbard 	if((ix0|ix1)!=0) {
1753a8617a8SJordan K. Hubbard 	    z = one-tiny; /* trigger inexact flag */
1763a8617a8SJordan K. Hubbard 	    if (z>=one) {
1773a8617a8SJordan K. Hubbard 	        z = one+tiny;
1783a8617a8SJordan K. Hubbard 	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
1793a8617a8SJordan K. Hubbard 		else if (z>one) {
1803a8617a8SJordan K. Hubbard 		    if (q1==(u_int32_t)0xfffffffe) q+=1;
1813a8617a8SJordan K. Hubbard 		    q1+=2;
1823a8617a8SJordan K. Hubbard 		} else
1833a8617a8SJordan K. Hubbard 	            q1 += (q1&1);
1843a8617a8SJordan K. Hubbard 	    }
1853a8617a8SJordan K. Hubbard 	}
1863a8617a8SJordan K. Hubbard 	ix0 = (q>>1)+0x3fe00000;
1873a8617a8SJordan K. Hubbard 	ix1 =  q1>>1;
1883a8617a8SJordan K. Hubbard 	if ((q&1)==1) ix1 |= sign;
1893a8617a8SJordan K. Hubbard 	ix0 += (m <<20);
1903a8617a8SJordan K. Hubbard 	INSERT_WORDS(z,ix0,ix1);
1913a8617a8SJordan K. Hubbard 	return z;
1923a8617a8SJordan K. Hubbard }
193b2e84316SAndrew Turner #endif
1943a8617a8SJordan K. Hubbard 
195c6a4447bSDavid Schultz #if (LDBL_MANT_DIG == 53)
196c6a4447bSDavid Schultz __weak_reference(sqrt, sqrtl);
197c6a4447bSDavid Schultz #endif
198c6a4447bSDavid Schultz 
1993a8617a8SJordan K. Hubbard /*
2003a8617a8SJordan K. Hubbard Other methods  (use floating-point arithmetic)
2013a8617a8SJordan K. Hubbard -------------
2023a8617a8SJordan K. Hubbard (This is a copy of a drafted paper by Prof W. Kahan
2033a8617a8SJordan K. Hubbard and K.C. Ng, written in May, 1986)
2043a8617a8SJordan K. Hubbard 
2053a8617a8SJordan K. Hubbard 	Two algorithms are given here to implement sqrt(x)
2063a8617a8SJordan K. Hubbard 	(IEEE double precision arithmetic) in software.
2073a8617a8SJordan K. Hubbard 	Both supply sqrt(x) correctly rounded. The first algorithm (in
2083a8617a8SJordan K. Hubbard 	Section A) uses newton iterations and involves four divisions.
2093a8617a8SJordan K. Hubbard 	The second one uses reciproot iterations to avoid division, but
2103a8617a8SJordan K. Hubbard 	requires more multiplications. Both algorithms need the ability
2113a8617a8SJordan K. Hubbard 	to chop results of arithmetic operations instead of round them,
2123a8617a8SJordan K. Hubbard 	and the INEXACT flag to indicate when an arithmetic operation
2133a8617a8SJordan K. Hubbard 	is executed exactly with no roundoff error, all part of the
2143a8617a8SJordan K. Hubbard 	standard (IEEE 754-1985). The ability to perform shift, add,
2153a8617a8SJordan K. Hubbard 	subtract and logical AND operations upon 32-bit words is needed
2163a8617a8SJordan K. Hubbard 	too, though not part of the standard.
2173a8617a8SJordan K. Hubbard 
2183a8617a8SJordan K. Hubbard A.  sqrt(x) by Newton Iteration
2193a8617a8SJordan K. Hubbard 
2203a8617a8SJordan K. Hubbard    (1)	Initial approximation
2213a8617a8SJordan K. Hubbard 
2223a8617a8SJordan K. Hubbard 	Let x0 and x1 be the leading and the trailing 32-bit words of
2233a8617a8SJordan K. Hubbard 	a floating point number x (in IEEE double format) respectively
2243a8617a8SJordan K. Hubbard 
2253a8617a8SJordan K. Hubbard 	    1    11		     52				  ...widths
2263a8617a8SJordan K. Hubbard 	   ------------------------------------------------------
2273a8617a8SJordan K. Hubbard 	x: |s|	  e     |	      f				|
2283a8617a8SJordan K. Hubbard 	   ------------------------------------------------------
2293a8617a8SJordan K. Hubbard 	      msb    lsb  msb				      lsb ...order
2303a8617a8SJordan K. Hubbard 
2313a8617a8SJordan K. Hubbard 
2323a8617a8SJordan K. Hubbard 	     ------------------------  	     ------------------------
2333a8617a8SJordan K. Hubbard 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
2343a8617a8SJordan K. Hubbard 	     ------------------------  	     ------------------------
2353a8617a8SJordan K. Hubbard 
2363a8617a8SJordan K. Hubbard 	By performing shifts and subtracts on x0 and x1 (both regarded
2373a8617a8SJordan K. Hubbard 	as integers), we obtain an 8-bit approximation of sqrt(x) as
2383a8617a8SJordan K. Hubbard 	follows.
2393a8617a8SJordan K. Hubbard 
2403a8617a8SJordan K. Hubbard 		k  := (x0>>1) + 0x1ff80000;
2413a8617a8SJordan K. Hubbard 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
2423a8617a8SJordan K. Hubbard 	Here k is a 32-bit integer and T1[] is an integer array containing
2433a8617a8SJordan K. Hubbard 	correction terms. Now magically the floating value of y (y's
2443a8617a8SJordan K. Hubbard 	leading 32-bit word is y0, the value of its trailing word is 0)
2453a8617a8SJordan K. Hubbard 	approximates sqrt(x) to almost 8-bit.
2463a8617a8SJordan K. Hubbard 
2473a8617a8SJordan K. Hubbard 	Value of T1:
2483a8617a8SJordan K. Hubbard 	static int T1[32]= {
2493a8617a8SJordan K. Hubbard 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
2503a8617a8SJordan K. Hubbard 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
2513a8617a8SJordan K. Hubbard 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
2523a8617a8SJordan K. Hubbard 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
2533a8617a8SJordan K. Hubbard 
2543a8617a8SJordan K. Hubbard     (2)	Iterative refinement
2553a8617a8SJordan K. Hubbard 
2563a8617a8SJordan K. Hubbard 	Apply Heron's rule three times to y, we have y approximates
2573a8617a8SJordan K. Hubbard 	sqrt(x) to within 1 ulp (Unit in the Last Place):
2583a8617a8SJordan K. Hubbard 
2593a8617a8SJordan K. Hubbard 		y := (y+x/y)/2		... almost 17 sig. bits
2603a8617a8SJordan K. Hubbard 		y := (y+x/y)/2		... almost 35 sig. bits
2613a8617a8SJordan K. Hubbard 		y := y-(y-x/y)/2	... within 1 ulp
2623a8617a8SJordan K. Hubbard 
2633a8617a8SJordan K. Hubbard 
2643a8617a8SJordan K. Hubbard 	Remark 1.
2653a8617a8SJordan K. Hubbard 	    Another way to improve y to within 1 ulp is:
2663a8617a8SJordan K. Hubbard 
2673a8617a8SJordan K. Hubbard 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
2683a8617a8SJordan K. Hubbard 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
2693a8617a8SJordan K. Hubbard 
2703a8617a8SJordan K. Hubbard 				2
2713a8617a8SJordan K. Hubbard 			    (x-y )*y
2723a8617a8SJordan K. Hubbard 		y := y + 2* ----------	...within 1 ulp
2733a8617a8SJordan K. Hubbard 			       2
2743a8617a8SJordan K. Hubbard 			     3y  + x
2753a8617a8SJordan K. Hubbard 
2763a8617a8SJordan K. Hubbard 
2773a8617a8SJordan K. Hubbard 	This formula has one division fewer than the one above; however,
2783a8617a8SJordan K. Hubbard 	it requires more multiplications and additions. Also x must be
2793a8617a8SJordan K. Hubbard 	scaled in advance to avoid spurious overflow in evaluating the
2803a8617a8SJordan K. Hubbard 	expression 3y*y+x. Hence it is not recommended uless division
2813a8617a8SJordan K. Hubbard 	is slow. If division is very slow, then one should use the
2823a8617a8SJordan K. Hubbard 	reciproot algorithm given in section B.
2833a8617a8SJordan K. Hubbard 
2843a8617a8SJordan K. Hubbard     (3) Final adjustment
2853a8617a8SJordan K. Hubbard 
2863a8617a8SJordan K. Hubbard 	By twiddling y's last bit it is possible to force y to be
2873a8617a8SJordan K. Hubbard 	correctly rounded according to the prevailing rounding mode
2883a8617a8SJordan K. Hubbard 	as follows. Let r and i be copies of the rounding mode and
2893a8617a8SJordan K. Hubbard 	inexact flag before entering the square root program. Also we
2903a8617a8SJordan K. Hubbard 	use the expression y+-ulp for the next representable floating
2913a8617a8SJordan K. Hubbard 	numbers (up and down) of y. Note that y+-ulp = either fixed
2923a8617a8SJordan K. Hubbard 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
2933a8617a8SJordan K. Hubbard 	mode.
2943a8617a8SJordan K. Hubbard 
2953a8617a8SJordan K. Hubbard 		I := FALSE;	... reset INEXACT flag I
2963a8617a8SJordan K. Hubbard 		R := RZ;	... set rounding mode to round-toward-zero
2973a8617a8SJordan K. Hubbard 		z := x/y;	... chopped quotient, possibly inexact
2983a8617a8SJordan K. Hubbard 		If(not I) then {	... if the quotient is exact
2993a8617a8SJordan K. Hubbard 		    if(z=y) {
3003a8617a8SJordan K. Hubbard 		        I := i;	 ... restore inexact flag
3013a8617a8SJordan K. Hubbard 		        R := r;  ... restore rounded mode
3023a8617a8SJordan K. Hubbard 		        return sqrt(x):=y.
3033a8617a8SJordan K. Hubbard 		    } else {
3043a8617a8SJordan K. Hubbard 			z := z - ulp;	... special rounding
3053a8617a8SJordan K. Hubbard 		    }
3063a8617a8SJordan K. Hubbard 		}
3073a8617a8SJordan K. Hubbard 		i := TRUE;		... sqrt(x) is inexact
3083a8617a8SJordan K. Hubbard 		If (r=RN) then z=z+ulp	... rounded-to-nearest
3093a8617a8SJordan K. Hubbard 		If (r=RP) then {	... round-toward-+inf
3103a8617a8SJordan K. Hubbard 		    y = y+ulp; z=z+ulp;
3113a8617a8SJordan K. Hubbard 		}
3123a8617a8SJordan K. Hubbard 		y := y+z;		... chopped sum
3133a8617a8SJordan K. Hubbard 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
3143a8617a8SJordan K. Hubbard 	        I := i;	 		... restore inexact flag
3153a8617a8SJordan K. Hubbard 	        R := r;  		... restore rounded mode
3163a8617a8SJordan K. Hubbard 	        return sqrt(x):=y.
3173a8617a8SJordan K. Hubbard 
3183a8617a8SJordan K. Hubbard     (4)	Special cases
3193a8617a8SJordan K. Hubbard 
3203a8617a8SJordan K. Hubbard 	Square root of +inf, +-0, or NaN is itself;
3213a8617a8SJordan K. Hubbard 	Square root of a negative number is NaN with invalid signal.
3223a8617a8SJordan K. Hubbard 
3233a8617a8SJordan K. Hubbard 
3243a8617a8SJordan K. Hubbard B.  sqrt(x) by Reciproot Iteration
3253a8617a8SJordan K. Hubbard 
3263a8617a8SJordan K. Hubbard    (1)	Initial approximation
3273a8617a8SJordan K. Hubbard 
3283a8617a8SJordan K. Hubbard 	Let x0 and x1 be the leading and the trailing 32-bit words of
3293a8617a8SJordan K. Hubbard 	a floating point number x (in IEEE double format) respectively
3303a8617a8SJordan K. Hubbard 	(see section A). By performing shifs and subtracts on x0 and y0,
3313a8617a8SJordan K. Hubbard 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
3323a8617a8SJordan K. Hubbard 
3333a8617a8SJordan K. Hubbard 	    k := 0x5fe80000 - (x0>>1);
3343a8617a8SJordan K. Hubbard 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
3353a8617a8SJordan K. Hubbard 
3363a8617a8SJordan K. Hubbard 	Here k is a 32-bit integer and T2[] is an integer array
3373a8617a8SJordan K. Hubbard 	containing correction terms. Now magically the floating
3383a8617a8SJordan K. Hubbard 	value of y (y's leading 32-bit word is y0, the value of
3393a8617a8SJordan K. Hubbard 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
3403a8617a8SJordan K. Hubbard 	to almost 7.8-bit.
3413a8617a8SJordan K. Hubbard 
3423a8617a8SJordan K. Hubbard 	Value of T2:
3433a8617a8SJordan K. Hubbard 	static int T2[64]= {
3443a8617a8SJordan K. Hubbard 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
3453a8617a8SJordan K. Hubbard 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
3463a8617a8SJordan K. Hubbard 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
3473a8617a8SJordan K. Hubbard 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
3483a8617a8SJordan K. Hubbard 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
3493a8617a8SJordan K. Hubbard 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
3503a8617a8SJordan K. Hubbard 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
3513a8617a8SJordan K. Hubbard 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
3523a8617a8SJordan K. Hubbard 
3533a8617a8SJordan K. Hubbard     (2)	Iterative refinement
3543a8617a8SJordan K. Hubbard 
3553a8617a8SJordan K. Hubbard 	Apply Reciproot iteration three times to y and multiply the
3563a8617a8SJordan K. Hubbard 	result by x to get an approximation z that matches sqrt(x)
3573a8617a8SJordan K. Hubbard 	to about 1 ulp. To be exact, we will have
3583a8617a8SJordan K. Hubbard 		-1ulp < sqrt(x)-z<1.0625ulp.
3593a8617a8SJordan K. Hubbard 
3603a8617a8SJordan K. Hubbard 	... set rounding mode to Round-to-nearest
3613a8617a8SJordan K. Hubbard 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
3623a8617a8SJordan K. Hubbard 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
3633a8617a8SJordan K. Hubbard 	... special arrangement for better accuracy
3643a8617a8SJordan K. Hubbard 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
3653a8617a8SJordan K. Hubbard 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
3663a8617a8SJordan K. Hubbard 
3673a8617a8SJordan K. Hubbard 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
3683a8617a8SJordan K. Hubbard 	(a) the term z*y in the final iteration is always less than 1;
3693a8617a8SJordan K. Hubbard 	(b) the error in the final result is biased upward so that
3703a8617a8SJordan K. Hubbard 		-1 ulp < sqrt(x) - z < 1.0625 ulp
3713a8617a8SJordan K. Hubbard 	    instead of |sqrt(x)-z|<1.03125ulp.
3723a8617a8SJordan K. Hubbard 
3733a8617a8SJordan K. Hubbard     (3)	Final adjustment
3743a8617a8SJordan K. Hubbard 
3753a8617a8SJordan K. Hubbard 	By twiddling y's last bit it is possible to force y to be
3763a8617a8SJordan K. Hubbard 	correctly rounded according to the prevailing rounding mode
3773a8617a8SJordan K. Hubbard 	as follows. Let r and i be copies of the rounding mode and
3783a8617a8SJordan K. Hubbard 	inexact flag before entering the square root program. Also we
3793a8617a8SJordan K. Hubbard 	use the expression y+-ulp for the next representable floating
3803a8617a8SJordan K. Hubbard 	numbers (up and down) of y. Note that y+-ulp = either fixed
3813a8617a8SJordan K. Hubbard 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
3823a8617a8SJordan K. Hubbard 	mode.
3833a8617a8SJordan K. Hubbard 
3843a8617a8SJordan K. Hubbard 	R := RZ;		... set rounding mode to round-toward-zero
3853a8617a8SJordan K. Hubbard 	switch(r) {
3863a8617a8SJordan K. Hubbard 	    case RN:		... round-to-nearest
3873a8617a8SJordan K. Hubbard 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
3883a8617a8SJordan K. Hubbard 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
3893a8617a8SJordan K. Hubbard 	       break;
3903a8617a8SJordan K. Hubbard 	    case RZ:case RM:	... round-to-zero or round-to--inf
3913a8617a8SJordan K. Hubbard 	       R:=RP;		... reset rounding mod to round-to-+inf
3923a8617a8SJordan K. Hubbard 	       if(x<z*z ... rounded up) z = z - ulp; else
3933a8617a8SJordan K. Hubbard 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
3943a8617a8SJordan K. Hubbard 	       break;
3953a8617a8SJordan K. Hubbard 	    case RP:		... round-to-+inf
3963a8617a8SJordan K. Hubbard 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
3973a8617a8SJordan K. Hubbard 	       if(x>z*z ...chopped) z = z+ulp;
3983a8617a8SJordan K. Hubbard 	       break;
3993a8617a8SJordan K. Hubbard 	}
4003a8617a8SJordan K. Hubbard 
4013a8617a8SJordan K. Hubbard 	Remark 3. The above comparisons can be done in fixed point. For
4023a8617a8SJordan K. Hubbard 	example, to compare x and w=z*z chopped, it suffices to compare
4033a8617a8SJordan K. Hubbard 	x1 and w1 (the trailing parts of x and w), regarding them as
4043a8617a8SJordan K. Hubbard 	two's complement integers.
4053a8617a8SJordan K. Hubbard 
4063a8617a8SJordan K. Hubbard 	...Is z an exact square root?
4073a8617a8SJordan K. Hubbard 	To determine whether z is an exact square root of x, let z1 be the
4083a8617a8SJordan K. Hubbard 	trailing part of z, and also let x0 and x1 be the leading and
4093a8617a8SJordan K. Hubbard 	trailing parts of x.
4103a8617a8SJordan K. Hubbard 
4113a8617a8SJordan K. Hubbard 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
4123a8617a8SJordan K. Hubbard 	    I := 1;		... Raise Inexact flag: z is not exact
4133a8617a8SJordan K. Hubbard 	else {
4143a8617a8SJordan K. Hubbard 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
4153a8617a8SJordan K. Hubbard 	    k := z1 >> 26;		... get z's 25-th and 26-th
4163a8617a8SJordan K. Hubbard 					    fraction bits
4173a8617a8SJordan K. Hubbard 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
4183a8617a8SJordan K. Hubbard 	}
4193a8617a8SJordan K. Hubbard 	R:= r		... restore rounded mode
4203a8617a8SJordan K. Hubbard 	return sqrt(x):=z.
4213a8617a8SJordan K. Hubbard 
4223a8617a8SJordan K. Hubbard 	If multiplication is cheaper then the foregoing red tape, the
4233a8617a8SJordan K. Hubbard 	Inexact flag can be evaluated by
4243a8617a8SJordan K. Hubbard 
4253a8617a8SJordan K. Hubbard 	    I := i;
4263a8617a8SJordan K. Hubbard 	    I := (z*z!=x) or I.
4273a8617a8SJordan K. Hubbard 
4283a8617a8SJordan K. Hubbard 	Note that z*z can overwrite I; this value must be sensed if it is
4293a8617a8SJordan K. Hubbard 	True.
4303a8617a8SJordan K. Hubbard 
4313a8617a8SJordan K. Hubbard 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
4323a8617a8SJordan K. Hubbard 	zero.
4333a8617a8SJordan K. Hubbard 
4343a8617a8SJordan K. Hubbard 		    --------------------
4353a8617a8SJordan K. Hubbard 		z1: |        f2        |
4363a8617a8SJordan K. Hubbard 		    --------------------
4373a8617a8SJordan K. Hubbard 		bit 31		   bit 0
4383a8617a8SJordan K. Hubbard 
4393a8617a8SJordan K. Hubbard 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
4403a8617a8SJordan K. Hubbard 	or even of logb(x) have the following relations:
4413a8617a8SJordan K. Hubbard 
4423a8617a8SJordan K. Hubbard 	-------------------------------------------------
4433a8617a8SJordan K. Hubbard 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
4443a8617a8SJordan K. Hubbard 	-------------------------------------------------
4453a8617a8SJordan K. Hubbard 	00			00		odd and even
4463a8617a8SJordan K. Hubbard 	01			01		even
4473a8617a8SJordan K. Hubbard 	10			10		odd
4483a8617a8SJordan K. Hubbard 	10			00		even
4493a8617a8SJordan K. Hubbard 	11			01		even
4503a8617a8SJordan K. Hubbard 	-------------------------------------------------
4513a8617a8SJordan K. Hubbard 
4523a8617a8SJordan K. Hubbard     (4)	Special cases (see (4) of Section A).
4533a8617a8SJordan K. Hubbard 
4543a8617a8SJordan K. Hubbard  */
4553a8617a8SJordan K. Hubbard 
456