13f708241SDavid Schultz 23f708241SDavid Schultz /* @(#)e_sqrt.c 1.3 95/01/18 */ 33a8617a8SJordan K. Hubbard /* 43a8617a8SJordan K. Hubbard * ==================================================== 53a8617a8SJordan K. Hubbard * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 63a8617a8SJordan K. Hubbard * 73f708241SDavid Schultz * Developed at SunSoft, a Sun Microsystems, Inc. business. 83a8617a8SJordan K. Hubbard * Permission to use, copy, modify, and distribute this 93a8617a8SJordan K. Hubbard * software is freely granted, provided that this notice 103a8617a8SJordan K. Hubbard * is preserved. 113a8617a8SJordan K. Hubbard * ==================================================== 123a8617a8SJordan K. Hubbard */ 133a8617a8SJordan K. Hubbard 14c6a4447bSDavid Schultz #include <sys/cdefs.h> 15c6a4447bSDavid Schultz __FBSDID("$FreeBSD$"); 163a8617a8SJordan K. Hubbard 173a8617a8SJordan K. Hubbard /* __ieee754_sqrt(x) 183a8617a8SJordan K. Hubbard * Return correctly rounded sqrt. 193a8617a8SJordan K. Hubbard * ------------------------------------------ 203a8617a8SJordan K. Hubbard * | Use the hardware sqrt if you have one | 213a8617a8SJordan K. Hubbard * ------------------------------------------ 223a8617a8SJordan K. Hubbard * Method: 233a8617a8SJordan K. Hubbard * Bit by bit method using integer arithmetic. (Slow, but portable) 243a8617a8SJordan K. Hubbard * 1. Normalization 253a8617a8SJordan K. Hubbard * Scale x to y in [1,4) with even powers of 2: 263a8617a8SJordan K. Hubbard * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 273a8617a8SJordan K. Hubbard * sqrt(x) = 2^k * sqrt(y) 283a8617a8SJordan K. Hubbard * 2. Bit by bit computation 293a8617a8SJordan K. Hubbard * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 303a8617a8SJordan K. Hubbard * i 0 313a8617a8SJordan K. Hubbard * i+1 2 323a8617a8SJordan K. Hubbard * s = 2*q , and y = 2 * ( y - q ). (1) 333a8617a8SJordan K. Hubbard * i i i i 343a8617a8SJordan K. Hubbard * 353a8617a8SJordan K. Hubbard * To compute q from q , one checks whether 363a8617a8SJordan K. Hubbard * i+1 i 373a8617a8SJordan K. Hubbard * 383a8617a8SJordan K. Hubbard * -(i+1) 2 393a8617a8SJordan K. Hubbard * (q + 2 ) <= y. (2) 403a8617a8SJordan K. Hubbard * i 413a8617a8SJordan K. Hubbard * -(i+1) 423a8617a8SJordan K. Hubbard * If (2) is false, then q = q ; otherwise q = q + 2 . 433a8617a8SJordan K. Hubbard * i+1 i i+1 i 443a8617a8SJordan K. Hubbard * 453a8617a8SJordan K. Hubbard * With some algebric manipulation, it is not difficult to see 463a8617a8SJordan K. Hubbard * that (2) is equivalent to 473a8617a8SJordan K. Hubbard * -(i+1) 483a8617a8SJordan K. Hubbard * s + 2 <= y (3) 493a8617a8SJordan K. Hubbard * i i 503a8617a8SJordan K. Hubbard * 513a8617a8SJordan K. Hubbard * The advantage of (3) is that s and y can be computed by 523a8617a8SJordan K. Hubbard * i i 533a8617a8SJordan K. Hubbard * the following recurrence formula: 543a8617a8SJordan K. Hubbard * if (3) is false 553a8617a8SJordan K. Hubbard * 563a8617a8SJordan K. Hubbard * s = s , y = y ; (4) 573a8617a8SJordan K. Hubbard * i+1 i i+1 i 583a8617a8SJordan K. Hubbard * 593a8617a8SJordan K. Hubbard * otherwise, 603a8617a8SJordan K. Hubbard * -i -(i+1) 613a8617a8SJordan K. Hubbard * s = s + 2 , y = y - s - 2 (5) 623a8617a8SJordan K. Hubbard * i+1 i i+1 i i 633a8617a8SJordan K. Hubbard * 643a8617a8SJordan K. Hubbard * One may easily use induction to prove (4) and (5). 653a8617a8SJordan K. Hubbard * Note. Since the left hand side of (3) contain only i+2 bits, 663a8617a8SJordan K. Hubbard * it does not necessary to do a full (53-bit) comparison 673a8617a8SJordan K. Hubbard * in (3). 683a8617a8SJordan K. Hubbard * 3. Final rounding 693a8617a8SJordan K. Hubbard * After generating the 53 bits result, we compute one more bit. 703a8617a8SJordan K. Hubbard * Together with the remainder, we can decide whether the 713a8617a8SJordan K. Hubbard * result is exact, bigger than 1/2ulp, or less than 1/2ulp 723a8617a8SJordan K. Hubbard * (it will never equal to 1/2ulp). 733a8617a8SJordan K. Hubbard * The rounding mode can be detected by checking whether 743a8617a8SJordan K. Hubbard * huge + tiny is equal to huge, and whether huge - tiny is 753a8617a8SJordan K. Hubbard * equal to huge for some floating point number "huge" and "tiny". 763a8617a8SJordan K. Hubbard * 773a8617a8SJordan K. Hubbard * Special cases: 783a8617a8SJordan K. Hubbard * sqrt(+-0) = +-0 ... exact 793a8617a8SJordan K. Hubbard * sqrt(inf) = inf 803a8617a8SJordan K. Hubbard * sqrt(-ve) = NaN ... with invalid signal 813a8617a8SJordan K. Hubbard * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 823a8617a8SJordan K. Hubbard * 833a8617a8SJordan K. Hubbard * Other methods : see the appended file at the end of the program below. 843a8617a8SJordan K. Hubbard *--------------- 853a8617a8SJordan K. Hubbard */ 863a8617a8SJordan K. Hubbard 87c6a4447bSDavid Schultz #include <float.h> 88c6a4447bSDavid Schultz 893a8617a8SJordan K. Hubbard #include "math.h" 903a8617a8SJordan K. Hubbard #include "math_private.h" 913a8617a8SJordan K. Hubbard 923a8617a8SJordan K. Hubbard static const double one = 1.0, tiny=1.0e-300; 933a8617a8SJordan K. Hubbard 9459b19ff1SAlfred Perlstein double 953819e840SPeter Wemm __ieee754_sqrt(double x) 963a8617a8SJordan K. Hubbard { 973a8617a8SJordan K. Hubbard double z; 983a8617a8SJordan K. Hubbard int32_t sign = (int)0x80000000; 993a8617a8SJordan K. Hubbard int32_t ix0,s0,q,m,t,i; 1003a8617a8SJordan K. Hubbard u_int32_t r,t1,s1,ix1,q1; 1013a8617a8SJordan K. Hubbard 1023a8617a8SJordan K. Hubbard EXTRACT_WORDS(ix0,ix1,x); 1033a8617a8SJordan K. Hubbard 1043a8617a8SJordan K. Hubbard /* take care of Inf and NaN */ 1053a8617a8SJordan K. Hubbard if((ix0&0x7ff00000)==0x7ff00000) { 1063a8617a8SJordan K. Hubbard return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 1073a8617a8SJordan K. Hubbard sqrt(-inf)=sNaN */ 1083a8617a8SJordan K. Hubbard } 1093a8617a8SJordan K. Hubbard /* take care of zero */ 1103a8617a8SJordan K. Hubbard if(ix0<=0) { 1113a8617a8SJordan K. Hubbard if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 1123a8617a8SJordan K. Hubbard else if(ix0<0) 1133a8617a8SJordan K. Hubbard return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 1143a8617a8SJordan K. Hubbard } 1153a8617a8SJordan K. Hubbard /* normalize x */ 1163a8617a8SJordan K. Hubbard m = (ix0>>20); 1173a8617a8SJordan K. Hubbard if(m==0) { /* subnormal x */ 1183a8617a8SJordan K. Hubbard while(ix0==0) { 1193a8617a8SJordan K. Hubbard m -= 21; 1203a8617a8SJordan K. Hubbard ix0 |= (ix1>>11); ix1 <<= 21; 1213a8617a8SJordan K. Hubbard } 1223a8617a8SJordan K. Hubbard for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 1233a8617a8SJordan K. Hubbard m -= i-1; 1243a8617a8SJordan K. Hubbard ix0 |= (ix1>>(32-i)); 1253a8617a8SJordan K. Hubbard ix1 <<= i; 1263a8617a8SJordan K. Hubbard } 1273a8617a8SJordan K. Hubbard m -= 1023; /* unbias exponent */ 1283a8617a8SJordan K. Hubbard ix0 = (ix0&0x000fffff)|0x00100000; 1293a8617a8SJordan K. Hubbard if(m&1){ /* odd m, double x to make it even */ 1303a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1313a8617a8SJordan K. Hubbard ix1 += ix1; 1323a8617a8SJordan K. Hubbard } 1333a8617a8SJordan K. Hubbard m >>= 1; /* m = [m/2] */ 1343a8617a8SJordan K. Hubbard 1353a8617a8SJordan K. Hubbard /* generate sqrt(x) bit by bit */ 1363a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1373a8617a8SJordan K. Hubbard ix1 += ix1; 1383a8617a8SJordan K. Hubbard q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 1393a8617a8SJordan K. Hubbard r = 0x00200000; /* r = moving bit from right to left */ 1403a8617a8SJordan K. Hubbard 1413a8617a8SJordan K. Hubbard while(r!=0) { 1423a8617a8SJordan K. Hubbard t = s0+r; 1433a8617a8SJordan K. Hubbard if(t<=ix0) { 1443a8617a8SJordan K. Hubbard s0 = t+r; 1453a8617a8SJordan K. Hubbard ix0 -= t; 1463a8617a8SJordan K. Hubbard q += r; 1473a8617a8SJordan K. Hubbard } 1483a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1493a8617a8SJordan K. Hubbard ix1 += ix1; 1503a8617a8SJordan K. Hubbard r>>=1; 1513a8617a8SJordan K. Hubbard } 1523a8617a8SJordan K. Hubbard 1533a8617a8SJordan K. Hubbard r = sign; 1543a8617a8SJordan K. Hubbard while(r!=0) { 1553a8617a8SJordan K. Hubbard t1 = s1+r; 1563a8617a8SJordan K. Hubbard t = s0; 1573a8617a8SJordan K. Hubbard if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 1583a8617a8SJordan K. Hubbard s1 = t1+r; 1593a8617a8SJordan K. Hubbard if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 1603a8617a8SJordan K. Hubbard ix0 -= t; 1613a8617a8SJordan K. Hubbard if (ix1 < t1) ix0 -= 1; 1623a8617a8SJordan K. Hubbard ix1 -= t1; 1633a8617a8SJordan K. Hubbard q1 += r; 1643a8617a8SJordan K. Hubbard } 1653a8617a8SJordan K. Hubbard ix0 += ix0 + ((ix1&sign)>>31); 1663a8617a8SJordan K. Hubbard ix1 += ix1; 1673a8617a8SJordan K. Hubbard r>>=1; 1683a8617a8SJordan K. Hubbard } 1693a8617a8SJordan K. Hubbard 1703a8617a8SJordan K. Hubbard /* use floating add to find out rounding direction */ 1713a8617a8SJordan K. Hubbard if((ix0|ix1)!=0) { 1723a8617a8SJordan K. Hubbard z = one-tiny; /* trigger inexact flag */ 1733a8617a8SJordan K. Hubbard if (z>=one) { 1743a8617a8SJordan K. Hubbard z = one+tiny; 1753a8617a8SJordan K. Hubbard if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 1763a8617a8SJordan K. Hubbard else if (z>one) { 1773a8617a8SJordan K. Hubbard if (q1==(u_int32_t)0xfffffffe) q+=1; 1783a8617a8SJordan K. Hubbard q1+=2; 1793a8617a8SJordan K. Hubbard } else 1803a8617a8SJordan K. Hubbard q1 += (q1&1); 1813a8617a8SJordan K. Hubbard } 1823a8617a8SJordan K. Hubbard } 1833a8617a8SJordan K. Hubbard ix0 = (q>>1)+0x3fe00000; 1843a8617a8SJordan K. Hubbard ix1 = q1>>1; 1853a8617a8SJordan K. Hubbard if ((q&1)==1) ix1 |= sign; 1863a8617a8SJordan K. Hubbard ix0 += (m <<20); 1873a8617a8SJordan K. Hubbard INSERT_WORDS(z,ix0,ix1); 1883a8617a8SJordan K. Hubbard return z; 1893a8617a8SJordan K. Hubbard } 1903a8617a8SJordan K. Hubbard 191c6a4447bSDavid Schultz #if (LDBL_MANT_DIG == 53) 192c6a4447bSDavid Schultz __weak_reference(sqrt, sqrtl); 193c6a4447bSDavid Schultz #endif 194c6a4447bSDavid Schultz 1953a8617a8SJordan K. Hubbard /* 1963a8617a8SJordan K. Hubbard Other methods (use floating-point arithmetic) 1973a8617a8SJordan K. Hubbard ------------- 1983a8617a8SJordan K. Hubbard (This is a copy of a drafted paper by Prof W. Kahan 1993a8617a8SJordan K. Hubbard and K.C. Ng, written in May, 1986) 2003a8617a8SJordan K. Hubbard 2013a8617a8SJordan K. Hubbard Two algorithms are given here to implement sqrt(x) 2023a8617a8SJordan K. Hubbard (IEEE double precision arithmetic) in software. 2033a8617a8SJordan K. Hubbard Both supply sqrt(x) correctly rounded. The first algorithm (in 2043a8617a8SJordan K. Hubbard Section A) uses newton iterations and involves four divisions. 2053a8617a8SJordan K. Hubbard The second one uses reciproot iterations to avoid division, but 2063a8617a8SJordan K. Hubbard requires more multiplications. Both algorithms need the ability 2073a8617a8SJordan K. Hubbard to chop results of arithmetic operations instead of round them, 2083a8617a8SJordan K. Hubbard and the INEXACT flag to indicate when an arithmetic operation 2093a8617a8SJordan K. Hubbard is executed exactly with no roundoff error, all part of the 2103a8617a8SJordan K. Hubbard standard (IEEE 754-1985). The ability to perform shift, add, 2113a8617a8SJordan K. Hubbard subtract and logical AND operations upon 32-bit words is needed 2123a8617a8SJordan K. Hubbard too, though not part of the standard. 2133a8617a8SJordan K. Hubbard 2143a8617a8SJordan K. Hubbard A. sqrt(x) by Newton Iteration 2153a8617a8SJordan K. Hubbard 2163a8617a8SJordan K. Hubbard (1) Initial approximation 2173a8617a8SJordan K. Hubbard 2183a8617a8SJordan K. Hubbard Let x0 and x1 be the leading and the trailing 32-bit words of 2193a8617a8SJordan K. Hubbard a floating point number x (in IEEE double format) respectively 2203a8617a8SJordan K. Hubbard 2213a8617a8SJordan K. Hubbard 1 11 52 ...widths 2223a8617a8SJordan K. Hubbard ------------------------------------------------------ 2233a8617a8SJordan K. Hubbard x: |s| e | f | 2243a8617a8SJordan K. Hubbard ------------------------------------------------------ 2253a8617a8SJordan K. Hubbard msb lsb msb lsb ...order 2263a8617a8SJordan K. Hubbard 2273a8617a8SJordan K. Hubbard 2283a8617a8SJordan K. Hubbard ------------------------ ------------------------ 2293a8617a8SJordan K. Hubbard x0: |s| e | f1 | x1: | f2 | 2303a8617a8SJordan K. Hubbard ------------------------ ------------------------ 2313a8617a8SJordan K. Hubbard 2323a8617a8SJordan K. Hubbard By performing shifts and subtracts on x0 and x1 (both regarded 2333a8617a8SJordan K. Hubbard as integers), we obtain an 8-bit approximation of sqrt(x) as 2343a8617a8SJordan K. Hubbard follows. 2353a8617a8SJordan K. Hubbard 2363a8617a8SJordan K. Hubbard k := (x0>>1) + 0x1ff80000; 2373a8617a8SJordan K. Hubbard y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 2383a8617a8SJordan K. Hubbard Here k is a 32-bit integer and T1[] is an integer array containing 2393a8617a8SJordan K. Hubbard correction terms. Now magically the floating value of y (y's 2403a8617a8SJordan K. Hubbard leading 32-bit word is y0, the value of its trailing word is 0) 2413a8617a8SJordan K. Hubbard approximates sqrt(x) to almost 8-bit. 2423a8617a8SJordan K. Hubbard 2433a8617a8SJordan K. Hubbard Value of T1: 2443a8617a8SJordan K. Hubbard static int T1[32]= { 2453a8617a8SJordan K. Hubbard 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 2463a8617a8SJordan K. Hubbard 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 2473a8617a8SJordan K. Hubbard 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 2483a8617a8SJordan K. Hubbard 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 2493a8617a8SJordan K. Hubbard 2503a8617a8SJordan K. Hubbard (2) Iterative refinement 2513a8617a8SJordan K. Hubbard 2523a8617a8SJordan K. Hubbard Apply Heron's rule three times to y, we have y approximates 2533a8617a8SJordan K. Hubbard sqrt(x) to within 1 ulp (Unit in the Last Place): 2543a8617a8SJordan K. Hubbard 2553a8617a8SJordan K. Hubbard y := (y+x/y)/2 ... almost 17 sig. bits 2563a8617a8SJordan K. Hubbard y := (y+x/y)/2 ... almost 35 sig. bits 2573a8617a8SJordan K. Hubbard y := y-(y-x/y)/2 ... within 1 ulp 2583a8617a8SJordan K. Hubbard 2593a8617a8SJordan K. Hubbard 2603a8617a8SJordan K. Hubbard Remark 1. 2613a8617a8SJordan K. Hubbard Another way to improve y to within 1 ulp is: 2623a8617a8SJordan K. Hubbard 2633a8617a8SJordan K. Hubbard y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 2643a8617a8SJordan K. Hubbard y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 2653a8617a8SJordan K. Hubbard 2663a8617a8SJordan K. Hubbard 2 2673a8617a8SJordan K. Hubbard (x-y )*y 2683a8617a8SJordan K. Hubbard y := y + 2* ---------- ...within 1 ulp 2693a8617a8SJordan K. Hubbard 2 2703a8617a8SJordan K. Hubbard 3y + x 2713a8617a8SJordan K. Hubbard 2723a8617a8SJordan K. Hubbard 2733a8617a8SJordan K. Hubbard This formula has one division fewer than the one above; however, 2743a8617a8SJordan K. Hubbard it requires more multiplications and additions. Also x must be 2753a8617a8SJordan K. Hubbard scaled in advance to avoid spurious overflow in evaluating the 2763a8617a8SJordan K. Hubbard expression 3y*y+x. Hence it is not recommended uless division 2773a8617a8SJordan K. Hubbard is slow. If division is very slow, then one should use the 2783a8617a8SJordan K. Hubbard reciproot algorithm given in section B. 2793a8617a8SJordan K. Hubbard 2803a8617a8SJordan K. Hubbard (3) Final adjustment 2813a8617a8SJordan K. Hubbard 2823a8617a8SJordan K. Hubbard By twiddling y's last bit it is possible to force y to be 2833a8617a8SJordan K. Hubbard correctly rounded according to the prevailing rounding mode 2843a8617a8SJordan K. Hubbard as follows. Let r and i be copies of the rounding mode and 2853a8617a8SJordan K. Hubbard inexact flag before entering the square root program. Also we 2863a8617a8SJordan K. Hubbard use the expression y+-ulp for the next representable floating 2873a8617a8SJordan K. Hubbard numbers (up and down) of y. Note that y+-ulp = either fixed 2883a8617a8SJordan K. Hubbard point y+-1, or multiply y by nextafter(1,+-inf) in chopped 2893a8617a8SJordan K. Hubbard mode. 2903a8617a8SJordan K. Hubbard 2913a8617a8SJordan K. Hubbard I := FALSE; ... reset INEXACT flag I 2923a8617a8SJordan K. Hubbard R := RZ; ... set rounding mode to round-toward-zero 2933a8617a8SJordan K. Hubbard z := x/y; ... chopped quotient, possibly inexact 2943a8617a8SJordan K. Hubbard If(not I) then { ... if the quotient is exact 2953a8617a8SJordan K. Hubbard if(z=y) { 2963a8617a8SJordan K. Hubbard I := i; ... restore inexact flag 2973a8617a8SJordan K. Hubbard R := r; ... restore rounded mode 2983a8617a8SJordan K. Hubbard return sqrt(x):=y. 2993a8617a8SJordan K. Hubbard } else { 3003a8617a8SJordan K. Hubbard z := z - ulp; ... special rounding 3013a8617a8SJordan K. Hubbard } 3023a8617a8SJordan K. Hubbard } 3033a8617a8SJordan K. Hubbard i := TRUE; ... sqrt(x) is inexact 3043a8617a8SJordan K. Hubbard If (r=RN) then z=z+ulp ... rounded-to-nearest 3053a8617a8SJordan K. Hubbard If (r=RP) then { ... round-toward-+inf 3063a8617a8SJordan K. Hubbard y = y+ulp; z=z+ulp; 3073a8617a8SJordan K. Hubbard } 3083a8617a8SJordan K. Hubbard y := y+z; ... chopped sum 3093a8617a8SJordan K. Hubbard y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 3103a8617a8SJordan K. Hubbard I := i; ... restore inexact flag 3113a8617a8SJordan K. Hubbard R := r; ... restore rounded mode 3123a8617a8SJordan K. Hubbard return sqrt(x):=y. 3133a8617a8SJordan K. Hubbard 3143a8617a8SJordan K. Hubbard (4) Special cases 3153a8617a8SJordan K. Hubbard 3163a8617a8SJordan K. Hubbard Square root of +inf, +-0, or NaN is itself; 3173a8617a8SJordan K. Hubbard Square root of a negative number is NaN with invalid signal. 3183a8617a8SJordan K. Hubbard 3193a8617a8SJordan K. Hubbard 3203a8617a8SJordan K. Hubbard B. sqrt(x) by Reciproot Iteration 3213a8617a8SJordan K. Hubbard 3223a8617a8SJordan K. Hubbard (1) Initial approximation 3233a8617a8SJordan K. Hubbard 3243a8617a8SJordan K. Hubbard Let x0 and x1 be the leading and the trailing 32-bit words of 3253a8617a8SJordan K. Hubbard a floating point number x (in IEEE double format) respectively 3263a8617a8SJordan K. Hubbard (see section A). By performing shifs and subtracts on x0 and y0, 3273a8617a8SJordan K. Hubbard we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 3283a8617a8SJordan K. Hubbard 3293a8617a8SJordan K. Hubbard k := 0x5fe80000 - (x0>>1); 3303a8617a8SJordan K. Hubbard y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 3313a8617a8SJordan K. Hubbard 3323a8617a8SJordan K. Hubbard Here k is a 32-bit integer and T2[] is an integer array 3333a8617a8SJordan K. Hubbard containing correction terms. Now magically the floating 3343a8617a8SJordan K. Hubbard value of y (y's leading 32-bit word is y0, the value of 3353a8617a8SJordan K. Hubbard its trailing word y1 is set to zero) approximates 1/sqrt(x) 3363a8617a8SJordan K. Hubbard to almost 7.8-bit. 3373a8617a8SJordan K. Hubbard 3383a8617a8SJordan K. Hubbard Value of T2: 3393a8617a8SJordan K. Hubbard static int T2[64]= { 3403a8617a8SJordan K. Hubbard 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 3413a8617a8SJordan K. Hubbard 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 3423a8617a8SJordan K. Hubbard 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 3433a8617a8SJordan K. Hubbard 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 3443a8617a8SJordan K. Hubbard 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 3453a8617a8SJordan K. Hubbard 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 3463a8617a8SJordan K. Hubbard 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 3473a8617a8SJordan K. Hubbard 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 3483a8617a8SJordan K. Hubbard 3493a8617a8SJordan K. Hubbard (2) Iterative refinement 3503a8617a8SJordan K. Hubbard 3513a8617a8SJordan K. Hubbard Apply Reciproot iteration three times to y and multiply the 3523a8617a8SJordan K. Hubbard result by x to get an approximation z that matches sqrt(x) 3533a8617a8SJordan K. Hubbard to about 1 ulp. To be exact, we will have 3543a8617a8SJordan K. Hubbard -1ulp < sqrt(x)-z<1.0625ulp. 3553a8617a8SJordan K. Hubbard 3563a8617a8SJordan K. Hubbard ... set rounding mode to Round-to-nearest 3573a8617a8SJordan K. Hubbard y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 3583a8617a8SJordan K. Hubbard y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 3593a8617a8SJordan K. Hubbard ... special arrangement for better accuracy 3603a8617a8SJordan K. Hubbard z := x*y ... 29 bits to sqrt(x), with z*y<1 3613a8617a8SJordan K. Hubbard z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 3623a8617a8SJordan K. Hubbard 3633a8617a8SJordan K. Hubbard Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 3643a8617a8SJordan K. Hubbard (a) the term z*y in the final iteration is always less than 1; 3653a8617a8SJordan K. Hubbard (b) the error in the final result is biased upward so that 3663a8617a8SJordan K. Hubbard -1 ulp < sqrt(x) - z < 1.0625 ulp 3673a8617a8SJordan K. Hubbard instead of |sqrt(x)-z|<1.03125ulp. 3683a8617a8SJordan K. Hubbard 3693a8617a8SJordan K. Hubbard (3) Final adjustment 3703a8617a8SJordan K. Hubbard 3713a8617a8SJordan K. Hubbard By twiddling y's last bit it is possible to force y to be 3723a8617a8SJordan K. Hubbard correctly rounded according to the prevailing rounding mode 3733a8617a8SJordan K. Hubbard as follows. Let r and i be copies of the rounding mode and 3743a8617a8SJordan K. Hubbard inexact flag before entering the square root program. Also we 3753a8617a8SJordan K. Hubbard use the expression y+-ulp for the next representable floating 3763a8617a8SJordan K. Hubbard numbers (up and down) of y. Note that y+-ulp = either fixed 3773a8617a8SJordan K. Hubbard point y+-1, or multiply y by nextafter(1,+-inf) in chopped 3783a8617a8SJordan K. Hubbard mode. 3793a8617a8SJordan K. Hubbard 3803a8617a8SJordan K. Hubbard R := RZ; ... set rounding mode to round-toward-zero 3813a8617a8SJordan K. Hubbard switch(r) { 3823a8617a8SJordan K. Hubbard case RN: ... round-to-nearest 3833a8617a8SJordan K. Hubbard if(x<= z*(z-ulp)...chopped) z = z - ulp; else 3843a8617a8SJordan K. Hubbard if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 3853a8617a8SJordan K. Hubbard break; 3863a8617a8SJordan K. Hubbard case RZ:case RM: ... round-to-zero or round-to--inf 3873a8617a8SJordan K. Hubbard R:=RP; ... reset rounding mod to round-to-+inf 3883a8617a8SJordan K. Hubbard if(x<z*z ... rounded up) z = z - ulp; else 3893a8617a8SJordan K. Hubbard if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 3903a8617a8SJordan K. Hubbard break; 3913a8617a8SJordan K. Hubbard case RP: ... round-to-+inf 3923a8617a8SJordan K. Hubbard if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 3933a8617a8SJordan K. Hubbard if(x>z*z ...chopped) z = z+ulp; 3943a8617a8SJordan K. Hubbard break; 3953a8617a8SJordan K. Hubbard } 3963a8617a8SJordan K. Hubbard 3973a8617a8SJordan K. Hubbard Remark 3. The above comparisons can be done in fixed point. For 3983a8617a8SJordan K. Hubbard example, to compare x and w=z*z chopped, it suffices to compare 3993a8617a8SJordan K. Hubbard x1 and w1 (the trailing parts of x and w), regarding them as 4003a8617a8SJordan K. Hubbard two's complement integers. 4013a8617a8SJordan K. Hubbard 4023a8617a8SJordan K. Hubbard ...Is z an exact square root? 4033a8617a8SJordan K. Hubbard To determine whether z is an exact square root of x, let z1 be the 4043a8617a8SJordan K. Hubbard trailing part of z, and also let x0 and x1 be the leading and 4053a8617a8SJordan K. Hubbard trailing parts of x. 4063a8617a8SJordan K. Hubbard 4073a8617a8SJordan K. Hubbard If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 4083a8617a8SJordan K. Hubbard I := 1; ... Raise Inexact flag: z is not exact 4093a8617a8SJordan K. Hubbard else { 4103a8617a8SJordan K. Hubbard j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 4113a8617a8SJordan K. Hubbard k := z1 >> 26; ... get z's 25-th and 26-th 4123a8617a8SJordan K. Hubbard fraction bits 4133a8617a8SJordan K. Hubbard I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 4143a8617a8SJordan K. Hubbard } 4153a8617a8SJordan K. Hubbard R:= r ... restore rounded mode 4163a8617a8SJordan K. Hubbard return sqrt(x):=z. 4173a8617a8SJordan K. Hubbard 4183a8617a8SJordan K. Hubbard If multiplication is cheaper then the foregoing red tape, the 4193a8617a8SJordan K. Hubbard Inexact flag can be evaluated by 4203a8617a8SJordan K. Hubbard 4213a8617a8SJordan K. Hubbard I := i; 4223a8617a8SJordan K. Hubbard I := (z*z!=x) or I. 4233a8617a8SJordan K. Hubbard 4243a8617a8SJordan K. Hubbard Note that z*z can overwrite I; this value must be sensed if it is 4253a8617a8SJordan K. Hubbard True. 4263a8617a8SJordan K. Hubbard 4273a8617a8SJordan K. Hubbard Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 4283a8617a8SJordan K. Hubbard zero. 4293a8617a8SJordan K. Hubbard 4303a8617a8SJordan K. Hubbard -------------------- 4313a8617a8SJordan K. Hubbard z1: | f2 | 4323a8617a8SJordan K. Hubbard -------------------- 4333a8617a8SJordan K. Hubbard bit 31 bit 0 4343a8617a8SJordan K. Hubbard 4353a8617a8SJordan K. Hubbard Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 4363a8617a8SJordan K. Hubbard or even of logb(x) have the following relations: 4373a8617a8SJordan K. Hubbard 4383a8617a8SJordan K. Hubbard ------------------------------------------------- 4393a8617a8SJordan K. Hubbard bit 27,26 of z1 bit 1,0 of x1 logb(x) 4403a8617a8SJordan K. Hubbard ------------------------------------------------- 4413a8617a8SJordan K. Hubbard 00 00 odd and even 4423a8617a8SJordan K. Hubbard 01 01 even 4433a8617a8SJordan K. Hubbard 10 10 odd 4443a8617a8SJordan K. Hubbard 10 00 even 4453a8617a8SJordan K. Hubbard 11 01 even 4463a8617a8SJordan K. Hubbard ------------------------------------------------- 4473a8617a8SJordan K. Hubbard 4483a8617a8SJordan K. Hubbard (4) Special cases (see (4) of Section A). 4493a8617a8SJordan K. Hubbard 4503a8617a8SJordan K. Hubbard */ 4513a8617a8SJordan K. Hubbard 452