fpu_sqrt.c - OpenGrok cross reference for /freebsd/sys/powerpc/fpu/fpu

Lines Matching +full:2 +full:- +full:bit
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
10  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
23  * 2. Redistributions in binary form must reproduce the above copyright
60  *	x = mant * 2		(where 1 <= mant < 2 and exp is an integer)
65  *			  exp-1
66  *	x = (2 * mant) * 2	(where 2 <= 2 * mant < 4)
71  *				exp/2
72  *	sqrt(x) = sqrt(mant) * 2
77  *				    (exp-1)/2
78  *	sqrt(x) = sqrt(2 * mant) * 2
82  *	1 <= mant < 2
86  *	sqrt(1) <= sqrt(mant) < sqrt(2)
90  *	2 <= 2*mant < 4
94  *	sqrt(2) <= sqrt(2*mant) < sqrt(4)
98  *	sqrt(1) <= sqrt(n * mant) < sqrt(4),	n = 1 or 2
102  *	1 <= sqrt(n * mant) < 2,		n = 1 or 2.
105  * point number.  The exponent of sqrt(x) is either exp/2 or (exp-1)/2
107  * of a fixed-point number in the range [1..4).
114  *	for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
115  *		x *= 2			-- multiply by radix, for next digit
116  *		if x >= 2q + 2^k then	-- if adding 2^k does not
117  *			x -= 2q + 2^k	-- exceed the correct root,
118  *			q += 2^k	-- add 2^k and adjust x
121  *	sqrt = q / 2^(NBITS/2)		-- (and any remainder is in x)
124  * zero bit at the top of x.  Doing so means that q is not going to acquire
125  * a 1 bit in the first trip around the loop (since x0 < 2^NBITS).  If the
126  * final value in x is not needed, or can be off by a factor of 2, this is
127  * equivalant to moving the `x *= 2' step to the bottom of the loop:
129  *	for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
131  * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
135  * If we insert a loop invariant y = 2q, we can then rewrite this using
139  *	for (k = NBITS; --k >= 0;) {
141  *		x *= 2;
145  *			x -= t;
150  *		x *= 2;
156  * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
160  * bit at a time, from the top down, and is not used itself in the loop
161  * (we use 2q as held in y instead).  This means we can build our answer
162  * in an integer, one word at a time, which saves a bit of work.  Also,
163  * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
165  * a full-blown multiword add.
169  * and therefore the square root in q will be in [1..2), but what about x,
172  * We know that y = 2q at the beginning of each loop.  (The relation only
173  * fails temporarily while y and q are being updated.)  Since q < 2, y < 4.
174  * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
175  * Furthermore, we can prove with a bit of work that x never exceeds y by
176  * more than 2, so that even after doubling, 0 <= x < 8.  (This is left as
182  * In fact, we want even one more bit (for a carry, to avoid compares), or
189 	struct fpn *x = &fe->fe_f1;  in fpu_sqrt()
190 	u_int bit, q, tt;  in fpu_sqrt()  local
202 	 *	sqrt(-0) = -0  in fpu_sqrt()
203 	 *	sqrt(x < 0) = NaN	(including sqrt(-Inf))  in fpu_sqrt()
206 	 * Then all that remains are numbers with mantissas in [1..2).  in fpu_sqrt()
212 		fe->fe_cx |= FPSCR_VXSNAN;  in fpu_sqrt()
217 		fe->fe_cx |= FPSCR_ZX;  in fpu_sqrt()
218 		x->fp_class = FPC_INF;  in fpu_sqrt()
222 	if (x->fp_sign) {  in fpu_sqrt()
223 		fe->fe_cx |= FPSCR_VXSQRT;  in fpu_sqrt()
255 	x0 = x->fp_mant[0];  in fpu_sqrt()
256 	x1 = x->fp_mant[1];  in fpu_sqrt()
257 	x2 = x->fp_mant[2];  in fpu_sqrt()
258 	x3 = x->fp_mant[3];  in fpu_sqrt()
259 	e = x->fp_exp;  in fpu_sqrt()
260 	if (e & 1)		/* exponent is odd; use sqrt(2mant) */  in fpu_sqrt()
263 	x->fp_exp = e >> 1;	/* calculates (e&1 ? (e-1)/2 : e/2 */  in fpu_sqrt()
268 	 * set the top bit in q, so we can do that manually and start  in fpu_sqrt()
269 	 * the loop at the next bit down instead.  We must be sure to  in fpu_sqrt()
272 	 * We do this one mantissa-word at a time, as noted above, to  in fpu_sqrt()
273 	 * save work.  To avoid `(1U << 31) << 1', we also do the top bit  in fpu_sqrt()
274 	 * outside of each per-word loop.  in fpu_sqrt()
276 	 * The calculation `t = y + bit' breaks down into `t0 = y0, ...,  in fpu_sqrt()
277 	 * t3 = y3, t? |= bit' for the appropriate word.  Since the bit  in fpu_sqrt()
285 	bit = FP_1;  in fpu_sqrt()
287 	/* if (x >= (t0 = y0 | bit)) { */	/* always true */  in fpu_sqrt()
288 		q = bit;  in fpu_sqrt()
289 		x0 -= bit;  in fpu_sqrt()
290 		y0 = bit << 1;  in fpu_sqrt()
293 	while ((bit >>= 1) != 0) {	/* for remaining bits in q0 */  in fpu_sqrt()
295 		t0 = y0 | bit;		/* t = y + bit */  in fpu_sqrt()
297 			x0 -= t0;	/*	x -= t */  in fpu_sqrt()
298 			q |= bit;	/*	q += bit */  in fpu_sqrt()
299 			y0 |= bit << 1;	/*	y += bit << 1 */  in fpu_sqrt()
303 	x->fp_mant[0] = q;  in fpu_sqrt()
311 	bit = 1 << 31;  in fpu_sqrt()
313 	t1 = bit;  in fpu_sqrt()
315 	FPU_SUBC(d0, x0, t0);		/* d = x - t */  in fpu_sqrt()
317 		x0 = d0, x1 = d1;	/*	x -= t */  in fpu_sqrt()
318 		q = bit;		/*	q += bit */  in fpu_sqrt()
319 		y0 |= 1;		/*	y += bit << 1 */  in fpu_sqrt()
322 	while ((bit >>= 1) != 0) {	/* for remaining bits in q1 */  in fpu_sqrt()
324 		t1 = y1 | bit;  in fpu_sqrt()
329 			q |= bit;  in fpu_sqrt()
330 			y1 |= bit << 1;  in fpu_sqrt()
334 	x->fp_mant[1] = q;  in fpu_sqrt()
342 	bit = 1 << 31;  in fpu_sqrt()
344 	t2 = bit;  in fpu_sqrt()
350 		q = bit;  in fpu_sqrt()
354 	while ((bit >>= 1) != 0) {  in fpu_sqrt()
356 		t2 = y2 | bit;  in fpu_sqrt()
362 			q |= bit;  in fpu_sqrt()
363 			y2 |= bit << 1;  in fpu_sqrt()
367 	x->fp_mant[2] = q;  in fpu_sqrt()
375 	bit = 1 << 31;  in fpu_sqrt()
377 	t3 = bit;  in fpu_sqrt()
384 		q = bit;  in fpu_sqrt()
388 	while ((bit >>= 1) != 0) {  in fpu_sqrt()
390 		t3 = y3 | bit;  in fpu_sqrt()
397 			q |= bit;  in fpu_sqrt()
398 			y3 |= bit << 1;  in fpu_sqrt()
402 	x->fp_mant[3] = q;  in fpu_sqrt()
408 	x->fp_sticky = x0 | x1 | x2 | x3;  in fpu_sqrt()