1 2 /* @(#)e_sqrt.c 1.3 95/01/18 */ 3 /* 4 * ==================================================== 5 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 6 * 7 * Developed at SunSoft, a Sun Microsystems, Inc. business. 8 * Permission to use, copy, modify, and distribute this 9 * software is freely granted, provided that this notice 10 * is preserved. 11 * ==================================================== 12 */ 13 14 #ifndef lint 15 static char rcsid[] = "$FreeBSD$"; 16 #endif 17 18 /* __ieee754_sqrt(x) 19 * Return correctly rounded sqrt. 20 * ------------------------------------------ 21 * | Use the hardware sqrt if you have one | 22 * ------------------------------------------ 23 * Method: 24 * Bit by bit method using integer arithmetic. (Slow, but portable) 25 * 1. Normalization 26 * Scale x to y in [1,4) with even powers of 2: 27 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 28 * sqrt(x) = 2^k * sqrt(y) 29 * 2. Bit by bit computation 30 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 31 * i 0 32 * i+1 2 33 * s = 2*q , and y = 2 * ( y - q ). (1) 34 * i i i i 35 * 36 * To compute q from q , one checks whether 37 * i+1 i 38 * 39 * -(i+1) 2 40 * (q + 2 ) <= y. (2) 41 * i 42 * -(i+1) 43 * If (2) is false, then q = q ; otherwise q = q + 2 . 44 * i+1 i i+1 i 45 * 46 * With some algebric manipulation, it is not difficult to see 47 * that (2) is equivalent to 48 * -(i+1) 49 * s + 2 <= y (3) 50 * i i 51 * 52 * The advantage of (3) is that s and y can be computed by 53 * i i 54 * the following recurrence formula: 55 * if (3) is false 56 * 57 * s = s , y = y ; (4) 58 * i+1 i i+1 i 59 * 60 * otherwise, 61 * -i -(i+1) 62 * s = s + 2 , y = y - s - 2 (5) 63 * i+1 i i+1 i i 64 * 65 * One may easily use induction to prove (4) and (5). 66 * Note. Since the left hand side of (3) contain only i+2 bits, 67 * it does not necessary to do a full (53-bit) comparison 68 * in (3). 69 * 3. Final rounding 70 * After generating the 53 bits result, we compute one more bit. 71 * Together with the remainder, we can decide whether the 72 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 73 * (it will never equal to 1/2ulp). 74 * The rounding mode can be detected by checking whether 75 * huge + tiny is equal to huge, and whether huge - tiny is 76 * equal to huge for some floating point number "huge" and "tiny". 77 * 78 * Special cases: 79 * sqrt(+-0) = +-0 ... exact 80 * sqrt(inf) = inf 81 * sqrt(-ve) = NaN ... with invalid signal 82 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 83 * 84 * Other methods : see the appended file at the end of the program below. 85 *--------------- 86 */ 87 88 #include "math.h" 89 #include "math_private.h" 90 91 static const double one = 1.0, tiny=1.0e-300; 92 93 double 94 __ieee754_sqrt(double x) 95 { 96 double z; 97 int32_t sign = (int)0x80000000; 98 int32_t ix0,s0,q,m,t,i; 99 u_int32_t r,t1,s1,ix1,q1; 100 101 EXTRACT_WORDS(ix0,ix1,x); 102 103 /* take care of Inf and NaN */ 104 if((ix0&0x7ff00000)==0x7ff00000) { 105 return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 106 sqrt(-inf)=sNaN */ 107 } 108 /* take care of zero */ 109 if(ix0<=0) { 110 if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 111 else if(ix0<0) 112 return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 113 } 114 /* normalize x */ 115 m = (ix0>>20); 116 if(m==0) { /* subnormal x */ 117 while(ix0==0) { 118 m -= 21; 119 ix0 |= (ix1>>11); ix1 <<= 21; 120 } 121 for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 122 m -= i-1; 123 ix0 |= (ix1>>(32-i)); 124 ix1 <<= i; 125 } 126 m -= 1023; /* unbias exponent */ 127 ix0 = (ix0&0x000fffff)|0x00100000; 128 if(m&1){ /* odd m, double x to make it even */ 129 ix0 += ix0 + ((ix1&sign)>>31); 130 ix1 += ix1; 131 } 132 m >>= 1; /* m = [m/2] */ 133 134 /* generate sqrt(x) bit by bit */ 135 ix0 += ix0 + ((ix1&sign)>>31); 136 ix1 += ix1; 137 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 138 r = 0x00200000; /* r = moving bit from right to left */ 139 140 while(r!=0) { 141 t = s0+r; 142 if(t<=ix0) { 143 s0 = t+r; 144 ix0 -= t; 145 q += r; 146 } 147 ix0 += ix0 + ((ix1&sign)>>31); 148 ix1 += ix1; 149 r>>=1; 150 } 151 152 r = sign; 153 while(r!=0) { 154 t1 = s1+r; 155 t = s0; 156 if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 157 s1 = t1+r; 158 if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 159 ix0 -= t; 160 if (ix1 < t1) ix0 -= 1; 161 ix1 -= t1; 162 q1 += r; 163 } 164 ix0 += ix0 + ((ix1&sign)>>31); 165 ix1 += ix1; 166 r>>=1; 167 } 168 169 /* use floating add to find out rounding direction */ 170 if((ix0|ix1)!=0) { 171 z = one-tiny; /* trigger inexact flag */ 172 if (z>=one) { 173 z = one+tiny; 174 if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 175 else if (z>one) { 176 if (q1==(u_int32_t)0xfffffffe) q+=1; 177 q1+=2; 178 } else 179 q1 += (q1&1); 180 } 181 } 182 ix0 = (q>>1)+0x3fe00000; 183 ix1 = q1>>1; 184 if ((q&1)==1) ix1 |= sign; 185 ix0 += (m <<20); 186 INSERT_WORDS(z,ix0,ix1); 187 return z; 188 } 189 190 /* 191 Other methods (use floating-point arithmetic) 192 ------------- 193 (This is a copy of a drafted paper by Prof W. Kahan 194 and K.C. Ng, written in May, 1986) 195 196 Two algorithms are given here to implement sqrt(x) 197 (IEEE double precision arithmetic) in software. 198 Both supply sqrt(x) correctly rounded. The first algorithm (in 199 Section A) uses newton iterations and involves four divisions. 200 The second one uses reciproot iterations to avoid division, but 201 requires more multiplications. Both algorithms need the ability 202 to chop results of arithmetic operations instead of round them, 203 and the INEXACT flag to indicate when an arithmetic operation 204 is executed exactly with no roundoff error, all part of the 205 standard (IEEE 754-1985). The ability to perform shift, add, 206 subtract and logical AND operations upon 32-bit words is needed 207 too, though not part of the standard. 208 209 A. sqrt(x) by Newton Iteration 210 211 (1) Initial approximation 212 213 Let x0 and x1 be the leading and the trailing 32-bit words of 214 a floating point number x (in IEEE double format) respectively 215 216 1 11 52 ...widths 217 ------------------------------------------------------ 218 x: |s| e | f | 219 ------------------------------------------------------ 220 msb lsb msb lsb ...order 221 222 223 ------------------------ ------------------------ 224 x0: |s| e | f1 | x1: | f2 | 225 ------------------------ ------------------------ 226 227 By performing shifts and subtracts on x0 and x1 (both regarded 228 as integers), we obtain an 8-bit approximation of sqrt(x) as 229 follows. 230 231 k := (x0>>1) + 0x1ff80000; 232 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 233 Here k is a 32-bit integer and T1[] is an integer array containing 234 correction terms. Now magically the floating value of y (y's 235 leading 32-bit word is y0, the value of its trailing word is 0) 236 approximates sqrt(x) to almost 8-bit. 237 238 Value of T1: 239 static int T1[32]= { 240 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 241 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 242 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 243 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 244 245 (2) Iterative refinement 246 247 Apply Heron's rule three times to y, we have y approximates 248 sqrt(x) to within 1 ulp (Unit in the Last Place): 249 250 y := (y+x/y)/2 ... almost 17 sig. bits 251 y := (y+x/y)/2 ... almost 35 sig. bits 252 y := y-(y-x/y)/2 ... within 1 ulp 253 254 255 Remark 1. 256 Another way to improve y to within 1 ulp is: 257 258 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 259 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 260 261 2 262 (x-y )*y 263 y := y + 2* ---------- ...within 1 ulp 264 2 265 3y + x 266 267 268 This formula has one division fewer than the one above; however, 269 it requires more multiplications and additions. Also x must be 270 scaled in advance to avoid spurious overflow in evaluating the 271 expression 3y*y+x. Hence it is not recommended uless division 272 is slow. If division is very slow, then one should use the 273 reciproot algorithm given in section B. 274 275 (3) Final adjustment 276 277 By twiddling y's last bit it is possible to force y to be 278 correctly rounded according to the prevailing rounding mode 279 as follows. Let r and i be copies of the rounding mode and 280 inexact flag before entering the square root program. Also we 281 use the expression y+-ulp for the next representable floating 282 numbers (up and down) of y. Note that y+-ulp = either fixed 283 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 284 mode. 285 286 I := FALSE; ... reset INEXACT flag I 287 R := RZ; ... set rounding mode to round-toward-zero 288 z := x/y; ... chopped quotient, possibly inexact 289 If(not I) then { ... if the quotient is exact 290 if(z=y) { 291 I := i; ... restore inexact flag 292 R := r; ... restore rounded mode 293 return sqrt(x):=y. 294 } else { 295 z := z - ulp; ... special rounding 296 } 297 } 298 i := TRUE; ... sqrt(x) is inexact 299 If (r=RN) then z=z+ulp ... rounded-to-nearest 300 If (r=RP) then { ... round-toward-+inf 301 y = y+ulp; z=z+ulp; 302 } 303 y := y+z; ... chopped sum 304 y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 305 I := i; ... restore inexact flag 306 R := r; ... restore rounded mode 307 return sqrt(x):=y. 308 309 (4) Special cases 310 311 Square root of +inf, +-0, or NaN is itself; 312 Square root of a negative number is NaN with invalid signal. 313 314 315 B. sqrt(x) by Reciproot Iteration 316 317 (1) Initial approximation 318 319 Let x0 and x1 be the leading and the trailing 32-bit words of 320 a floating point number x (in IEEE double format) respectively 321 (see section A). By performing shifs and subtracts on x0 and y0, 322 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 323 324 k := 0x5fe80000 - (x0>>1); 325 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 326 327 Here k is a 32-bit integer and T2[] is an integer array 328 containing correction terms. Now magically the floating 329 value of y (y's leading 32-bit word is y0, the value of 330 its trailing word y1 is set to zero) approximates 1/sqrt(x) 331 to almost 7.8-bit. 332 333 Value of T2: 334 static int T2[64]= { 335 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 336 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 337 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 338 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 339 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 340 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 341 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 342 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 343 344 (2) Iterative refinement 345 346 Apply Reciproot iteration three times to y and multiply the 347 result by x to get an approximation z that matches sqrt(x) 348 to about 1 ulp. To be exact, we will have 349 -1ulp < sqrt(x)-z<1.0625ulp. 350 351 ... set rounding mode to Round-to-nearest 352 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 353 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 354 ... special arrangement for better accuracy 355 z := x*y ... 29 bits to sqrt(x), with z*y<1 356 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 357 358 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 359 (a) the term z*y in the final iteration is always less than 1; 360 (b) the error in the final result is biased upward so that 361 -1 ulp < sqrt(x) - z < 1.0625 ulp 362 instead of |sqrt(x)-z|<1.03125ulp. 363 364 (3) Final adjustment 365 366 By twiddling y's last bit it is possible to force y to be 367 correctly rounded according to the prevailing rounding mode 368 as follows. Let r and i be copies of the rounding mode and 369 inexact flag before entering the square root program. Also we 370 use the expression y+-ulp for the next representable floating 371 numbers (up and down) of y. Note that y+-ulp = either fixed 372 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 373 mode. 374 375 R := RZ; ... set rounding mode to round-toward-zero 376 switch(r) { 377 case RN: ... round-to-nearest 378 if(x<= z*(z-ulp)...chopped) z = z - ulp; else 379 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 380 break; 381 case RZ:case RM: ... round-to-zero or round-to--inf 382 R:=RP; ... reset rounding mod to round-to-+inf 383 if(x<z*z ... rounded up) z = z - ulp; else 384 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 385 break; 386 case RP: ... round-to-+inf 387 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 388 if(x>z*z ...chopped) z = z+ulp; 389 break; 390 } 391 392 Remark 3. The above comparisons can be done in fixed point. For 393 example, to compare x and w=z*z chopped, it suffices to compare 394 x1 and w1 (the trailing parts of x and w), regarding them as 395 two's complement integers. 396 397 ...Is z an exact square root? 398 To determine whether z is an exact square root of x, let z1 be the 399 trailing part of z, and also let x0 and x1 be the leading and 400 trailing parts of x. 401 402 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 403 I := 1; ... Raise Inexact flag: z is not exact 404 else { 405 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 406 k := z1 >> 26; ... get z's 25-th and 26-th 407 fraction bits 408 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 409 } 410 R:= r ... restore rounded mode 411 return sqrt(x):=z. 412 413 If multiplication is cheaper then the foregoing red tape, the 414 Inexact flag can be evaluated by 415 416 I := i; 417 I := (z*z!=x) or I. 418 419 Note that z*z can overwrite I; this value must be sensed if it is 420 True. 421 422 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 423 zero. 424 425 -------------------- 426 z1: | f2 | 427 -------------------- 428 bit 31 bit 0 429 430 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 431 or even of logb(x) have the following relations: 432 433 ------------------------------------------------- 434 bit 27,26 of z1 bit 1,0 of x1 logb(x) 435 ------------------------------------------------- 436 00 00 odd and even 437 01 01 even 438 10 10 odd 439 10 00 even 440 11 01 even 441 ------------------------------------------------- 442 443 (4) Special cases (see (4) of Section A). 444 445 */ 446 447