Lines Matching +full:2 +full:- +full:bit
3 /*-
4 * SPDX-License-Identifier: BSD-3-Clause
10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
23 * 2. Redistributions in binary form must reproduce the above copyright
60 * x = mant * 2 (where 1 <= mant < 2 and exp is an integer)
65 * exp-1
66 * x = (2 * mant) * 2 (where 2 <= 2 * mant < 4)
71 * exp/2
72 * sqrt(x) = sqrt(mant) * 2
77 * (exp-1)/2
78 * sqrt(x) = sqrt(2 * mant) * 2
82 * 1 <= mant < 2
86 * sqrt(1) <= sqrt(mant) < sqrt(2)
90 * 2 <= 2*mant < 4
94 * sqrt(2) <= sqrt(2*mant) < sqrt(4)
98 * sqrt(1) <= sqrt(n * mant) < sqrt(4), n = 1 or 2
102 * 1 <= sqrt(n * mant) < 2, n = 1 or 2.
105 * point number. The exponent of sqrt(x) is either exp/2 or (exp-1)/2
107 * of a fixed-point number in the range [1..4).
114 * for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
115 * x *= 2 -- multiply by radix, for next digit
116 * if x >= 2q + 2^k then -- if adding 2^k does not
117 * x -= 2q + 2^k -- exceed the correct root,
118 * q += 2^k -- add 2^k and adjust x
121 * sqrt = q / 2^(NBITS/2) -- (and any remainder is in x)
124 * zero bit at the top of x. Doing so means that q is not going to acquire
125 * a 1 bit in the first trip around the loop (since x0 < 2^NBITS). If the
126 * final value in x is not needed, or can be off by a factor of 2, this is
127 * equivalant to moving the `x *= 2' step to the bottom of the loop:
129 * for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
131 * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
135 * If we insert a loop invariant y = 2q, we can then rewrite this using
139 * for (k = NBITS; --k >= 0;) {
141 * x *= 2;
145 * x -= t;
150 * x *= 2;
156 * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
160 * bit at a time, from the top down, and is not used itself in the loop
161 * (we use 2q as held in y instead). This means we can build our answer
162 * in an integer, one word at a time, which saves a bit of work. Also,
163 * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
165 * a full-blown multiword add.
169 * and therefore the square root in q will be in [1..2), but what about x,
172 * We know that y = 2q at the beginning of each loop. (The relation only
173 * fails temporarily while y and q are being updated.) Since q < 2, y < 4.
174 * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
175 * Furthermore, we can prove with a bit of work that x never exceeds y by
176 * more than 2, so that even after doubling, 0 <= x < 8. (This is left as
182 * In fact, we want even one more bit (for a carry, to avoid compares), or
189 struct fpn *x = &fe->fe_f1; in fpu_sqrt()
190 u_int bit, q, tt; in fpu_sqrt() local
202 * sqrt(-0) = -0 in fpu_sqrt()
203 * sqrt(x < 0) = NaN (including sqrt(-Inf)) in fpu_sqrt()
206 * Then all that remains are numbers with mantissas in [1..2). in fpu_sqrt()
212 fe->fe_cx |= FPSCR_VXSNAN; in fpu_sqrt()
217 fe->fe_cx |= FPSCR_ZX; in fpu_sqrt()
218 x->fp_class = FPC_INF; in fpu_sqrt()
222 if (x->fp_sign) { in fpu_sqrt()
223 fe->fe_cx |= FPSCR_VXSQRT; in fpu_sqrt()
255 x0 = x->fp_mant[0]; in fpu_sqrt()
256 x1 = x->fp_mant[1]; in fpu_sqrt()
257 x2 = x->fp_mant[2]; in fpu_sqrt()
258 x3 = x->fp_mant[3]; in fpu_sqrt()
259 e = x->fp_exp; in fpu_sqrt()
260 if (e & 1) /* exponent is odd; use sqrt(2mant) */ in fpu_sqrt()
263 x->fp_exp = e >> 1; /* calculates (e&1 ? (e-1)/2 : e/2 */ in fpu_sqrt()
268 * set the top bit in q, so we can do that manually and start in fpu_sqrt()
269 * the loop at the next bit down instead. We must be sure to in fpu_sqrt()
272 * We do this one mantissa-word at a time, as noted above, to in fpu_sqrt()
273 * save work. To avoid `(1U << 31) << 1', we also do the top bit in fpu_sqrt()
274 * outside of each per-word loop. in fpu_sqrt()
276 * The calculation `t = y + bit' breaks down into `t0 = y0, ..., in fpu_sqrt()
277 * t3 = y3, t? |= bit' for the appropriate word. Since the bit in fpu_sqrt()
285 bit = FP_1; in fpu_sqrt()
287 /* if (x >= (t0 = y0 | bit)) { */ /* always true */ in fpu_sqrt()
288 q = bit; in fpu_sqrt()
289 x0 -= bit; in fpu_sqrt()
290 y0 = bit << 1; in fpu_sqrt()
293 while ((bit >>= 1) != 0) { /* for remaining bits in q0 */ in fpu_sqrt()
295 t0 = y0 | bit; /* t = y + bit */ in fpu_sqrt()
297 x0 -= t0; /* x -= t */ in fpu_sqrt()
298 q |= bit; /* q += bit */ in fpu_sqrt()
299 y0 |= bit << 1; /* y += bit << 1 */ in fpu_sqrt()
303 x->fp_mant[0] = q; in fpu_sqrt()
311 bit = 1 << 31; in fpu_sqrt()
313 t1 = bit; in fpu_sqrt()
315 FPU_SUBC(d0, x0, t0); /* d = x - t */ in fpu_sqrt()
317 x0 = d0, x1 = d1; /* x -= t */ in fpu_sqrt()
318 q = bit; /* q += bit */ in fpu_sqrt()
319 y0 |= 1; /* y += bit << 1 */ in fpu_sqrt()
322 while ((bit >>= 1) != 0) { /* for remaining bits in q1 */ in fpu_sqrt()
324 t1 = y1 | bit; in fpu_sqrt()
329 q |= bit; in fpu_sqrt()
330 y1 |= bit << 1; in fpu_sqrt()
334 x->fp_mant[1] = q; in fpu_sqrt()
342 bit = 1 << 31; in fpu_sqrt()
344 t2 = bit; in fpu_sqrt()
350 q = bit; in fpu_sqrt()
354 while ((bit >>= 1) != 0) { in fpu_sqrt()
356 t2 = y2 | bit; in fpu_sqrt()
362 q |= bit; in fpu_sqrt()
363 y2 |= bit << 1; in fpu_sqrt()
367 x->fp_mant[2] = q; in fpu_sqrt()
375 bit = 1 << 31; in fpu_sqrt()
377 t3 = bit; in fpu_sqrt()
384 q = bit; in fpu_sqrt()
388 while ((bit >>= 1) != 0) { in fpu_sqrt()
390 t3 = y3 | bit; in fpu_sqrt()
397 q |= bit; in fpu_sqrt()
398 y3 |= bit << 1; in fpu_sqrt()
402 x->fp_mant[3] = q; in fpu_sqrt()
408 x->fp_sticky = x0 | x1 | x2 | x3; in fpu_sqrt()