13a8617a8SJordan K. Hubbard /* @(#)e_sqrt.c 5.1 93/09/24 */ 23a8617a8SJordan K. Hubbard /* 33a8617a8SJordan K. Hubbard * ==================================================== 43a8617a8SJordan K. Hubbard * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 53a8617a8SJordan K. Hubbard * 63a8617a8SJordan K. Hubbard * Developed at SunPro, a Sun Microsystems, Inc. business. 73a8617a8SJordan K. Hubbard * Permission to use, copy, modify, and distribute this 83a8617a8SJordan K. Hubbard * software is freely granted, provided that this notice 93a8617a8SJordan K. Hubbard * is preserved. 103a8617a8SJordan K. Hubbard * ==================================================== 113a8617a8SJordan K. Hubbard */ 123a8617a8SJordan K. Hubbard 133a8617a8SJordan K. Hubbard #ifndef lint 143a8617a8SJordan K. Hubbard static char rcsid[] = "$Id: e_sqrt.c,v 1.6 1994/08/18 23:06:06 jtc Exp $"; 153a8617a8SJordan K. Hubbard #endif 163a8617a8SJordan K. Hubbard 173a8617a8SJordan K. Hubbard /* __ieee754_sqrt(x) 183a8617a8SJordan K. Hubbard * Return correctly rounded sqrt. 193a8617a8SJordan K. Hubbard * ------------------------------------------ 203a8617a8SJordan K. Hubbard * | Use the hardware sqrt if you have one | 213a8617a8SJordan K. Hubbard * ------------------------------------------ 223a8617a8SJordan K. Hubbard * Method: 233a8617a8SJordan K. Hubbard * Bit by bit method using integer arithmetic. (Slow, but portable) 243a8617a8SJordan K. Hubbard * 1. Normalization 253a8617a8SJordan K. Hubbard * Scale x to y in [1,4) with even powers of 2: 263a8617a8SJordan K. Hubbard * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 273a8617a8SJordan K. Hubbard * sqrt(x) = 2^k * sqrt(y) 283a8617a8SJordan K. Hubbard * 2. Bit by bit computation 293a8617a8SJordan K. Hubbard * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 303a8617a8SJordan K. Hubbard * i 0 313a8617a8SJordan K. Hubbard * i+1 2 323a8617a8SJordan K. Hubbard * s = 2*q , and y = 2 * ( y - q ). (1) 333a8617a8SJordan K. Hubbard * i i i i 343a8617a8SJordan K. Hubbard * 353a8617a8SJordan K. Hubbard * To compute q from q , one checks whether 363a8617a8SJordan K. Hubbard * i+1 i 373a8617a8SJordan K. Hubbard * 383a8617a8SJordan K. Hubbard * -(i+1) 2 393a8617a8SJordan K. Hubbard * (q + 2 ) <= y. (2) 403a8617a8SJordan K. Hubbard * i 413a8617a8SJordan K. Hubbard * -(i+1) 423a8617a8SJordan K. Hubbard * If (2) is false, then q = q ; otherwise q = q + 2 . 433a8617a8SJordan K. Hubbard * i+1 i i+1 i 443a8617a8SJordan K. Hubbard * 453a8617a8SJordan K. Hubbard * With some algebric manipulation, it is not difficult to see 463a8617a8SJordan K. Hubbard * that (2) is equivalent to 473a8617a8SJordan K. Hubbard * -(i+1) 483a8617a8SJordan K. Hubbard * s + 2 <= y (3) 493a8617a8SJordan K. Hubbard * i i 503a8617a8SJordan K. Hubbard * 513a8617a8SJordan K. Hubbard * The advantage of (3) is that s and y can be computed by 523a8617a8SJordan K. Hubbard * i i 533a8617a8SJordan K. Hubbard * the following recurrence formula: 543a8617a8SJordan K. Hubbard * if (3) is false 553a8617a8SJordan K. Hubbard * 563a8617a8SJordan K. Hubbard * s = s , y = y ; (4) 573a8617a8SJordan K. Hubbard * i+1 i i+1 i 583a8617a8SJordan K. Hubbard * 593a8617a8SJordan K. Hubbard * otherwise, 603a8617a8SJordan K. Hubbard * -i -(i+1) 613a8617a8SJordan K. Hubbard * s = s + 2 , y = y - s - 2 (5) 623a8617a8SJordan K. Hubbard * i+1 i i+1 i i 633a8617a8SJordan K. Hubbard * 643a8617a8SJordan K. Hubbard * One may easily use induction to prove (4) and (5). 653a8617a8SJordan K. Hubbard * Note. Since the left hand side of (3) contain only i+2 bits, 663a8617a8SJordan K. Hubbard * it does not necessary to do a full (53-bit) comparison 673a8617a8SJordan K. Hubbard * in (3). 683a8617a8SJordan K. Hubbard * 3. Final rounding 693a8617a8SJordan K. Hubbard * After generating the 53 bits result, we compute one more bit. 703a8617a8SJordan K. Hubbard * Together with the remainder, we can decide whether the 713a8617a8SJordan K. Hubbard * result is exact, bigger than 1/2ulp, or less than 1/2ulp 723a8617a8SJordan K. Hubbard * (it will never equal to 1/2ulp). 733a8617a8SJordan K. Hubbard * The rounding mode can be detected by checking whether 743a8617a8SJordan K. Hubbard * huge + tiny is equal to huge, and whether huge - tiny is 753a8617a8SJordan K. Hubbard * equal to huge for some floating point number "huge" and "tiny". 763a8617a8SJordan K. Hubbard * 773a8617a8SJordan K. Hubbard * Special cases: 783a8617a8SJordan K. Hubbard * sqrt(+-0) = +-0 ... exact 793a8617a8SJordan K. Hubbard * sqrt(inf) = inf 803a8617a8SJordan K. Hubbard * sqrt(-ve) = NaN ... with invalid signal 813a8617a8SJordan K. Hubbard * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 823a8617a8SJordan K. Hubbard * 833a8617a8SJordan K. Hubbard * Other methods : see the appended file at the end of the program below. 843a8617a8SJordan K. Hubbard *--------------- 853a8617a8SJordan K. Hubbard */ 863a8617a8SJordan K. Hubbard 873a8617a8SJordan K. Hubbard #include "math.h" 883a8617a8SJordan K. Hubbard #include "math_private.h" 893a8617a8SJordan K. Hubbard 903a8617a8SJordan K. Hubbard #ifdef __STDC__ 913a8617a8SJordan K. Hubbard static const double one = 1.0, tiny=1.0e-300; 923a8617a8SJordan K. Hubbard #else 933a8617a8SJordan K. Hubbard static double one = 1.0, tiny=1.0e-300; 943a8617a8SJordan K. Hubbard #endif 953a8617a8SJordan K. Hubbard 963a8617a8SJordan K. Hubbard #ifdef __STDC__ 973a8617a8SJordan K. Hubbard double __ieee754_sqrt(double x) 983a8617a8SJordan K. Hubbard #else 993a8617a8SJordan K. Hubbard double __ieee754_sqrt(x) 1003a8617a8SJordan K. Hubbard double x; 1013a8617a8SJordan K. Hubbard #endif 1023a8617a8SJordan K. Hubbard { 1033a8617a8SJordan K. Hubbard double z; 1043a8617a8SJordan K. Hubbard int32_t sign = (int)0x80000000; 1053a8617a8SJordan K. Hubbard int32_t ix0,s0,q,m,t,i; 1063a8617a8SJordan K. Hubbard u_int32_t r,t1,s1,ix1,q1; 1073a8617a8SJordan K. Hubbard 1083a8617a8SJordan K. Hubbard EXTRACT_WORDS(ix0,ix1,x); 1093a8617a8SJordan K. Hubbard 1103a8617a8SJordan K. Hubbard /* take care of Inf and NaN */ 1113a8617a8SJordan K. Hubbard if((ix0&0x7ff00000)==0x7ff00000) { 1123a8617a8SJordan K. Hubbard return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 1133a8617a8SJordan K. Hubbard sqrt(-inf)=sNaN */ 1143a8617a8SJordan K. Hubbard } 1153a8617a8SJordan K. Hubbard /* take care of zero */ 1163a8617a8SJordan K. Hubbard if(ix0<=0) { 1173a8617a8SJordan K. Hubbard if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 1183a8617a8SJordan K. Hubbard else if(ix0<0) 1193a8617a8SJordan K. Hubbard return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 1203a8617a8SJordan K. Hubbard } 1213a8617a8SJordan K. Hubbard /* normalize x */ 1223a8617a8SJordan K. Hubbard m = (ix0>>20); 1233a8617a8SJordan K. Hubbard if(m==0) { /* subnormal x */ 1243a8617a8SJordan K. Hubbard while(ix0==0) { 1253a8617a8SJordan K. Hubbard m -= 21; 1263a8617a8SJordan K. Hubbard ix0 |= (ix1>>11); ix1 <<= 21; 1273a8617a8SJordan K. Hubbard } 1283a8617a8SJordan K. Hubbard for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 1293a8617a8SJordan K. Hubbard m -= i-1; 1303a8617a8SJordan K. Hubbard ix0 |= (ix1>>(32-i)); 1313a8617a8SJordan K. Hubbard ix1 <<= i; 1323a8617a8SJordan K. Hubbard } 1333a8617a8SJordan K. Hubbard m -= 1023; /* unbias exponent */ 1343a8617a8SJordan K. Hubbard ix0 = (ix0&0x000fffff)|0x00100000; 1353a8617a8SJordan K. Hubbard if(m&1){ /* odd m, double x to make it even */ 1363a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1373a8617a8SJordan K. Hubbard ix1 += ix1; 1383a8617a8SJordan K. Hubbard } 1393a8617a8SJordan K. Hubbard m >>= 1; /* m = [m/2] */ 1403a8617a8SJordan K. Hubbard 1413a8617a8SJordan K. Hubbard /* generate sqrt(x) bit by bit */ 1423a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1433a8617a8SJordan K. Hubbard ix1 += ix1; 1443a8617a8SJordan K. Hubbard q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 1453a8617a8SJordan K. Hubbard r = 0x00200000; /* r = moving bit from right to left */ 1463a8617a8SJordan K. Hubbard 1473a8617a8SJordan K. Hubbard while(r!=0) { 1483a8617a8SJordan K. Hubbard t = s0+r; 1493a8617a8SJordan K. Hubbard if(t<=ix0) { 1503a8617a8SJordan K. Hubbard s0 = t+r; 1513a8617a8SJordan K. Hubbard ix0 -= t; 1523a8617a8SJordan K. Hubbard q += r; 1533a8617a8SJordan K. Hubbard } 1543a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1553a8617a8SJordan K. Hubbard ix1 += ix1; 1563a8617a8SJordan K. Hubbard r>>=1; 1573a8617a8SJordan K. Hubbard } 1583a8617a8SJordan K. Hubbard 1593a8617a8SJordan K. Hubbard r = sign; 1603a8617a8SJordan K. Hubbard while(r!=0) { 1613a8617a8SJordan K. Hubbard t1 = s1+r; 1623a8617a8SJordan K. Hubbard t = s0; 1633a8617a8SJordan K. Hubbard if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 1643a8617a8SJordan K. Hubbard s1 = t1+r; 1653a8617a8SJordan K. Hubbard if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 1663a8617a8SJordan K. Hubbard ix0 -= t; 1673a8617a8SJordan K. Hubbard if (ix1 < t1) ix0 -= 1; 1683a8617a8SJordan K. Hubbard ix1 -= t1; 1693a8617a8SJordan K. Hubbard q1 += r; 1703a8617a8SJordan K. Hubbard } 1713a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1723a8617a8SJordan K. Hubbard ix1 += ix1; 1733a8617a8SJordan K. Hubbard r>>=1; 1743a8617a8SJordan K. Hubbard } 1753a8617a8SJordan K. Hubbard 1763a8617a8SJordan K. Hubbard /* use floating add to find out rounding direction */ 1773a8617a8SJordan K. Hubbard if((ix0|ix1)!=0) { 1783a8617a8SJordan K. Hubbard z = one-tiny; /* trigger inexact flag */ 1793a8617a8SJordan K. Hubbard if (z>=one) { 1803a8617a8SJordan K. Hubbard z = one+tiny; 1813a8617a8SJordan K. Hubbard if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 1823a8617a8SJordan K. Hubbard else if (z>one) { 1833a8617a8SJordan K. Hubbard if (q1==(u_int32_t)0xfffffffe) q+=1; 1843a8617a8SJordan K. Hubbard q1+=2; 1853a8617a8SJordan K. Hubbard } else 1863a8617a8SJordan K. Hubbard q1 += (q1&1); 1873a8617a8SJordan K. Hubbard } 1883a8617a8SJordan K. Hubbard } 1893a8617a8SJordan K. Hubbard ix0 = (q>>1)+0x3fe00000; 1903a8617a8SJordan K. Hubbard ix1 = q1>>1; 1913a8617a8SJordan K. Hubbard if ((q&1)==1) ix1 |= sign; 1923a8617a8SJordan K. Hubbard ix0 += (m <<20); 1933a8617a8SJordan K. Hubbard INSERT_WORDS(z,ix0,ix1); 1943a8617a8SJordan K. Hubbard return z; 1953a8617a8SJordan K. Hubbard } 1963a8617a8SJordan K. Hubbard 1973a8617a8SJordan K. Hubbard /* 1983a8617a8SJordan K. Hubbard Other methods (use floating-point arithmetic) 1993a8617a8SJordan K. Hubbard ------------- 2003a8617a8SJordan K. Hubbard (This is a copy of a drafted paper by Prof W. Kahan 2013a8617a8SJordan K. Hubbard and K.C. Ng, written in May, 1986) 2023a8617a8SJordan K. Hubbard 2033a8617a8SJordan K. Hubbard Two algorithms are given here to implement sqrt(x) 2043a8617a8SJordan K. Hubbard (IEEE double precision arithmetic) in software. 2053a8617a8SJordan K. Hubbard Both supply sqrt(x) correctly rounded. The first algorithm (in 2063a8617a8SJordan K. Hubbard Section A) uses newton iterations and involves four divisions. 2073a8617a8SJordan K. Hubbard The second one uses reciproot iterations to avoid division, but 2083a8617a8SJordan K. Hubbard requires more multiplications. Both algorithms need the ability 2093a8617a8SJordan K. Hubbard to chop results of arithmetic operations instead of round them, 2103a8617a8SJordan K. Hubbard and the INEXACT flag to indicate when an arithmetic operation 2113a8617a8SJordan K. Hubbard is executed exactly with no roundoff error, all part of the 2123a8617a8SJordan K. Hubbard standard (IEEE 754-1985). The ability to perform shift, add, 2133a8617a8SJordan K. Hubbard subtract and logical AND operations upon 32-bit words is needed 2143a8617a8SJordan K. Hubbard too, though not part of the standard. 2153a8617a8SJordan K. Hubbard 2163a8617a8SJordan K. Hubbard A. sqrt(x) by Newton Iteration 2173a8617a8SJordan K. Hubbard 2183a8617a8SJordan K. Hubbard (1) Initial approximation 2193a8617a8SJordan K. Hubbard 2203a8617a8SJordan K. Hubbard Let x0 and x1 be the leading and the trailing 32-bit words of 2213a8617a8SJordan K. Hubbard a floating point number x (in IEEE double format) respectively 2223a8617a8SJordan K. Hubbard 2233a8617a8SJordan K. Hubbard 1 11 52 ...widths 2243a8617a8SJordan K. Hubbard ------------------------------------------------------ 2253a8617a8SJordan K. Hubbard x: |s| e | f | 2263a8617a8SJordan K. Hubbard ------------------------------------------------------ 2273a8617a8SJordan K. Hubbard msb lsb msb lsb ...order 2283a8617a8SJordan K. Hubbard 2293a8617a8SJordan K. Hubbard 2303a8617a8SJordan K. Hubbard ------------------------ ------------------------ 2313a8617a8SJordan K. Hubbard x0: |s| e | f1 | x1: | f2 | 2323a8617a8SJordan K. Hubbard ------------------------ ------------------------ 2333a8617a8SJordan K. Hubbard 2343a8617a8SJordan K. Hubbard By performing shifts and subtracts on x0 and x1 (both regarded 2353a8617a8SJordan K. Hubbard as integers), we obtain an 8-bit approximation of sqrt(x) as 2363a8617a8SJordan K. Hubbard follows. 2373a8617a8SJordan K. Hubbard 2383a8617a8SJordan K. Hubbard k := (x0>>1) + 0x1ff80000; 2393a8617a8SJordan K. Hubbard y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 2403a8617a8SJordan K. Hubbard Here k is a 32-bit integer and T1[] is an integer array containing 2413a8617a8SJordan K. Hubbard correction terms. Now magically the floating value of y (y's 2423a8617a8SJordan K. Hubbard leading 32-bit word is y0, the value of its trailing word is 0) 2433a8617a8SJordan K. Hubbard approximates sqrt(x) to almost 8-bit. 2443a8617a8SJordan K. Hubbard 2453a8617a8SJordan K. Hubbard Value of T1: 2463a8617a8SJordan K. Hubbard static int T1[32]= { 2473a8617a8SJordan K. Hubbard 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 2483a8617a8SJordan K. Hubbard 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 2493a8617a8SJordan K. Hubbard 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 2503a8617a8SJordan K. Hubbard 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 2513a8617a8SJordan K. Hubbard 2523a8617a8SJordan K. Hubbard (2) Iterative refinement 2533a8617a8SJordan K. Hubbard 2543a8617a8SJordan K. Hubbard Apply Heron's rule three times to y, we have y approximates 2553a8617a8SJordan K. Hubbard sqrt(x) to within 1 ulp (Unit in the Last Place): 2563a8617a8SJordan K. Hubbard 2573a8617a8SJordan K. Hubbard y := (y+x/y)/2 ... almost 17 sig. bits 2583a8617a8SJordan K. Hubbard y := (y+x/y)/2 ... almost 35 sig. bits 2593a8617a8SJordan K. Hubbard y := y-(y-x/y)/2 ... within 1 ulp 2603a8617a8SJordan K. Hubbard 2613a8617a8SJordan K. Hubbard 2623a8617a8SJordan K. Hubbard Remark 1. 2633a8617a8SJordan K. Hubbard Another way to improve y to within 1 ulp is: 2643a8617a8SJordan K. Hubbard 2653a8617a8SJordan K. Hubbard y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 2663a8617a8SJordan K. Hubbard y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 2673a8617a8SJordan K. Hubbard 2683a8617a8SJordan K. Hubbard 2 2693a8617a8SJordan K. Hubbard (x-y )*y 2703a8617a8SJordan K. Hubbard y := y + 2* ---------- ...within 1 ulp 2713a8617a8SJordan K. Hubbard 2 2723a8617a8SJordan K. Hubbard 3y + x 2733a8617a8SJordan K. Hubbard 2743a8617a8SJordan K. Hubbard 2753a8617a8SJordan K. Hubbard This formula has one division fewer than the one above; however, 2763a8617a8SJordan K. Hubbard it requires more multiplications and additions. Also x must be 2773a8617a8SJordan K. Hubbard scaled in advance to avoid spurious overflow in evaluating the 2783a8617a8SJordan K. Hubbard expression 3y*y+x. Hence it is not recommended uless division 2793a8617a8SJordan K. Hubbard is slow. If division is very slow, then one should use the 2803a8617a8SJordan K. Hubbard reciproot algorithm given in section B. 2813a8617a8SJordan K. Hubbard 2823a8617a8SJordan K. Hubbard (3) Final adjustment 2833a8617a8SJordan K. Hubbard 2843a8617a8SJordan K. Hubbard By twiddling y's last bit it is possible to force y to be 2853a8617a8SJordan K. Hubbard correctly rounded according to the prevailing rounding mode 2863a8617a8SJordan K. Hubbard as follows. Let r and i be copies of the rounding mode and 2873a8617a8SJordan K. Hubbard inexact flag before entering the square root program. Also we 2883a8617a8SJordan K. Hubbard use the expression y+-ulp for the next representable floating 2893a8617a8SJordan K. Hubbard numbers (up and down) of y. Note that y+-ulp = either fixed 2903a8617a8SJordan K. Hubbard point y+-1, or multiply y by nextafter(1,+-inf) in chopped 2913a8617a8SJordan K. Hubbard mode. 2923a8617a8SJordan K. Hubbard 2933a8617a8SJordan K. Hubbard I := FALSE; ... reset INEXACT flag I 2943a8617a8SJordan K. Hubbard R := RZ; ... set rounding mode to round-toward-zero 2953a8617a8SJordan K. Hubbard z := x/y; ... chopped quotient, possibly inexact 2963a8617a8SJordan K. Hubbard If(not I) then { ... if the quotient is exact 2973a8617a8SJordan K. Hubbard if(z=y) { 2983a8617a8SJordan K. Hubbard I := i; ... restore inexact flag 2993a8617a8SJordan K. Hubbard R := r; ... restore rounded mode 3003a8617a8SJordan K. Hubbard return sqrt(x):=y. 3013a8617a8SJordan K. Hubbard } else { 3023a8617a8SJordan K. Hubbard z := z - ulp; ... special rounding 3033a8617a8SJordan K. Hubbard } 3043a8617a8SJordan K. Hubbard } 3053a8617a8SJordan K. Hubbard i := TRUE; ... sqrt(x) is inexact 3063a8617a8SJordan K. Hubbard If (r=RN) then z=z+ulp ... rounded-to-nearest 3073a8617a8SJordan K. Hubbard If (r=RP) then { ... round-toward-+inf 3083a8617a8SJordan K. Hubbard y = y+ulp; z=z+ulp; 3093a8617a8SJordan K. Hubbard } 3103a8617a8SJordan K. Hubbard y := y+z; ... chopped sum 3113a8617a8SJordan K. Hubbard y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 3123a8617a8SJordan K. Hubbard I := i; ... restore inexact flag 3133a8617a8SJordan K. Hubbard R := r; ... restore rounded mode 3143a8617a8SJordan K. Hubbard return sqrt(x):=y. 3153a8617a8SJordan K. Hubbard 3163a8617a8SJordan K. Hubbard (4) Special cases 3173a8617a8SJordan K. Hubbard 3183a8617a8SJordan K. Hubbard Square root of +inf, +-0, or NaN is itself; 3193a8617a8SJordan K. Hubbard Square root of a negative number is NaN with invalid signal. 3203a8617a8SJordan K. Hubbard 3213a8617a8SJordan K. Hubbard 3223a8617a8SJordan K. Hubbard B. sqrt(x) by Reciproot Iteration 3233a8617a8SJordan K. Hubbard 3243a8617a8SJordan K. Hubbard (1) Initial approximation 3253a8617a8SJordan K. Hubbard 3263a8617a8SJordan K. Hubbard Let x0 and x1 be the leading and the trailing 32-bit words of 3273a8617a8SJordan K. Hubbard a floating point number x (in IEEE double format) respectively 3283a8617a8SJordan K. Hubbard (see section A). By performing shifs and subtracts on x0 and y0, 3293a8617a8SJordan K. Hubbard we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 3303a8617a8SJordan K. Hubbard 3313a8617a8SJordan K. Hubbard k := 0x5fe80000 - (x0>>1); 3323a8617a8SJordan K. Hubbard y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 3333a8617a8SJordan K. Hubbard 3343a8617a8SJordan K. Hubbard Here k is a 32-bit integer and T2[] is an integer array 3353a8617a8SJordan K. Hubbard containing correction terms. Now magically the floating 3363a8617a8SJordan K. Hubbard value of y (y's leading 32-bit word is y0, the value of 3373a8617a8SJordan K. Hubbard its trailing word y1 is set to zero) approximates 1/sqrt(x) 3383a8617a8SJordan K. Hubbard to almost 7.8-bit. 3393a8617a8SJordan K. Hubbard 3403a8617a8SJordan K. Hubbard Value of T2: 3413a8617a8SJordan K. Hubbard static int T2[64]= { 3423a8617a8SJordan K. Hubbard 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 3433a8617a8SJordan K. Hubbard 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 3443a8617a8SJordan K. Hubbard 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 3453a8617a8SJordan K. Hubbard 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 3463a8617a8SJordan K. Hubbard 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 3473a8617a8SJordan K. Hubbard 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 3483a8617a8SJordan K. Hubbard 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 3493a8617a8SJordan K. Hubbard 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 3503a8617a8SJordan K. Hubbard 3513a8617a8SJordan K. Hubbard (2) Iterative refinement 3523a8617a8SJordan K. Hubbard 3533a8617a8SJordan K. Hubbard Apply Reciproot iteration three times to y and multiply the 3543a8617a8SJordan K. Hubbard result by x to get an approximation z that matches sqrt(x) 3553a8617a8SJordan K. Hubbard to about 1 ulp. To be exact, we will have 3563a8617a8SJordan K. Hubbard -1ulp < sqrt(x)-z<1.0625ulp. 3573a8617a8SJordan K. Hubbard 3583a8617a8SJordan K. Hubbard ... set rounding mode to Round-to-nearest 3593a8617a8SJordan K. Hubbard y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 3603a8617a8SJordan K. Hubbard y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 3613a8617a8SJordan K. Hubbard ... special arrangement for better accuracy 3623a8617a8SJordan K. Hubbard z := x*y ... 29 bits to sqrt(x), with z*y<1 3633a8617a8SJordan K. Hubbard z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 3643a8617a8SJordan K. Hubbard 3653a8617a8SJordan K. Hubbard Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 3663a8617a8SJordan K. Hubbard (a) the term z*y in the final iteration is always less than 1; 3673a8617a8SJordan K. Hubbard (b) the error in the final result is biased upward so that 3683a8617a8SJordan K. Hubbard -1 ulp < sqrt(x) - z < 1.0625 ulp 3693a8617a8SJordan K. Hubbard instead of |sqrt(x)-z|<1.03125ulp. 3703a8617a8SJordan K. Hubbard 3713a8617a8SJordan K. Hubbard (3) Final adjustment 3723a8617a8SJordan K. Hubbard 3733a8617a8SJordan K. Hubbard By twiddling y's last bit it is possible to force y to be 3743a8617a8SJordan K. Hubbard correctly rounded according to the prevailing rounding mode 3753a8617a8SJordan K. Hubbard as follows. Let r and i be copies of the rounding mode and 3763a8617a8SJordan K. Hubbard inexact flag before entering the square root program. Also we 3773a8617a8SJordan K. Hubbard use the expression y+-ulp for the next representable floating 3783a8617a8SJordan K. Hubbard numbers (up and down) of y. Note that y+-ulp = either fixed 3793a8617a8SJordan K. Hubbard point y+-1, or multiply y by nextafter(1,+-inf) in chopped 3803a8617a8SJordan K. Hubbard mode. 3813a8617a8SJordan K. Hubbard 3823a8617a8SJordan K. Hubbard R := RZ; ... set rounding mode to round-toward-zero 3833a8617a8SJordan K. Hubbard switch(r) { 3843a8617a8SJordan K. Hubbard case RN: ... round-to-nearest 3853a8617a8SJordan K. Hubbard if(x<= z*(z-ulp)...chopped) z = z - ulp; else 3863a8617a8SJordan K. Hubbard if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 3873a8617a8SJordan K. Hubbard break; 3883a8617a8SJordan K. Hubbard case RZ:case RM: ... round-to-zero or round-to--inf 3893a8617a8SJordan K. Hubbard R:=RP; ... reset rounding mod to round-to-+inf 3903a8617a8SJordan K. Hubbard if(x<z*z ... rounded up) z = z - ulp; else 3913a8617a8SJordan K. Hubbard if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 3923a8617a8SJordan K. Hubbard break; 3933a8617a8SJordan K. Hubbard case RP: ... round-to-+inf 3943a8617a8SJordan K. Hubbard if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 3953a8617a8SJordan K. Hubbard if(x>z*z ...chopped) z = z+ulp; 3963a8617a8SJordan K. Hubbard break; 3973a8617a8SJordan K. Hubbard } 3983a8617a8SJordan K. Hubbard 3993a8617a8SJordan K. Hubbard Remark 3. The above comparisons can be done in fixed point. For 4003a8617a8SJordan K. Hubbard example, to compare x and w=z*z chopped, it suffices to compare 4013a8617a8SJordan K. Hubbard x1 and w1 (the trailing parts of x and w), regarding them as 4023a8617a8SJordan K. Hubbard two's complement integers. 4033a8617a8SJordan K. Hubbard 4043a8617a8SJordan K. Hubbard ...Is z an exact square root? 4053a8617a8SJordan K. Hubbard To determine whether z is an exact square root of x, let z1 be the 4063a8617a8SJordan K. Hubbard trailing part of z, and also let x0 and x1 be the leading and 4073a8617a8SJordan K. Hubbard trailing parts of x. 4083a8617a8SJordan K. Hubbard 4093a8617a8SJordan K. Hubbard If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 4103a8617a8SJordan K. Hubbard I := 1; ... Raise Inexact flag: z is not exact 4113a8617a8SJordan K. Hubbard else { 4123a8617a8SJordan K. Hubbard j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 4133a8617a8SJordan K. Hubbard k := z1 >> 26; ... get z's 25-th and 26-th 4143a8617a8SJordan K. Hubbard fraction bits 4153a8617a8SJordan K. Hubbard I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 4163a8617a8SJordan K. Hubbard } 4173a8617a8SJordan K. Hubbard R:= r ... restore rounded mode 4183a8617a8SJordan K. Hubbard return sqrt(x):=z. 4193a8617a8SJordan K. Hubbard 4203a8617a8SJordan K. Hubbard If multiplication is cheaper then the foregoing red tape, the 4213a8617a8SJordan K. Hubbard Inexact flag can be evaluated by 4223a8617a8SJordan K. Hubbard 4233a8617a8SJordan K. Hubbard I := i; 4243a8617a8SJordan K. Hubbard I := (z*z!=x) or I. 4253a8617a8SJordan K. Hubbard 4263a8617a8SJordan K. Hubbard Note that z*z can overwrite I; this value must be sensed if it is 4273a8617a8SJordan K. Hubbard True. 4283a8617a8SJordan K. Hubbard 4293a8617a8SJordan K. Hubbard Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 4303a8617a8SJordan K. Hubbard zero. 4313a8617a8SJordan K. Hubbard 4323a8617a8SJordan K. Hubbard -------------------- 4333a8617a8SJordan K. Hubbard z1: | f2 | 4343a8617a8SJordan K. Hubbard -------------------- 4353a8617a8SJordan K. Hubbard bit 31 bit 0 4363a8617a8SJordan K. Hubbard 4373a8617a8SJordan K. Hubbard Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 4383a8617a8SJordan K. Hubbard or even of logb(x) have the following relations: 4393a8617a8SJordan K. Hubbard 4403a8617a8SJordan K. Hubbard ------------------------------------------------- 4413a8617a8SJordan K. Hubbard bit 27,26 of z1 bit 1,0 of x1 logb(x) 4423a8617a8SJordan K. Hubbard ------------------------------------------------- 4433a8617a8SJordan K. Hubbard 00 00 odd and even 4443a8617a8SJordan K. Hubbard 01 01 even 4453a8617a8SJordan K. Hubbard 10 10 odd 4463a8617a8SJordan K. Hubbard 10 00 even 4473a8617a8SJordan K. Hubbard 11 01 even 4483a8617a8SJordan K. Hubbard ------------------------------------------------- 4493a8617a8SJordan K. Hubbard 4503a8617a8SJordan K. Hubbard (4) Special cases (see (4) of Section A). 4513a8617a8SJordan K. Hubbard 4523a8617a8SJordan K. Hubbard */ 4533a8617a8SJordan K. Hubbard 454