xref: /illumos-gate/usr/src/lib/libc/sparc/fp/_Q_div.c (revision 628e3cbed6489fa1db545d8524a06cd6535af456)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "quad.h"
30 
31 static const double C[] = {
32 	0.0,
33 	1.0,
34 	68719476736.0,
35 	402653184.0,
36 	24.0,
37 	16.0,
38 	3.66210937500000000000e-04,
39 	1.52587890625000000000e-05,
40 	1.43051147460937500000e-06,
41 	5.96046447753906250000e-08,
42 	3.72529029846191406250e-09,
43 	2.18278728425502777100e-11,
44 	8.52651282912120223045e-14,
45 	3.55271367880050092936e-15,
46 	1.30104260698260532081e-18,
47 	8.67361737988403547206e-19,
48 	2.16840434497100886801e-19,
49 	3.17637355220362627151e-22,
50 	7.75481824268463445192e-26,
51 	4.62223186652936604733e-33,
52 	9.62964972193617926528e-35,
53 	4.70197740328915003187e-38,
54 	2.75506488473973634680e-40,
55 };
56 
57 #define	zero		C[0]
58 #define	one		C[1]
59 #define	two36		C[2]
60 #define	three2p27	C[3]
61 #define	three2p3	C[4]
62 #define	two4		C[5]
63 #define	three2m13	C[6]
64 #define	twom16		C[7]
65 #define	three2m21	C[8]
66 #define	twom24		C[9]
67 #define	twom28		C[10]
68 #define	three2m37	C[11]
69 #define	three2m45	C[12]
70 #define	twom48		C[13]
71 #define	three2m61	C[14]
72 #define	twom60		C[15]
73 #define	twom62		C[16]
74 #define	three2m73	C[17]
75 #define	three2m85	C[18]
76 #define	three2m109	C[19]
77 #define	twom113		C[20]
78 #define	twom124		C[21]
79 #define	three2m133	C[22]
80 
81 static const unsigned int
82 	fsr_re = 0x00000000u,
83 	fsr_rn = 0xc0000000u;
84 
85 #ifdef __sparcv9
86 
87 /*
88  * _Qp_div(pz, x, y) sets *pz = *x / *y.
89  */
90 void
91 _Qp_div(union longdouble *pz, const union longdouble *x,
92 	const union longdouble *y)
93 
94 #else
95 
96 /*
97  * _Q_div(x, y) returns *x / *y.
98  */
99 union longdouble
100 _Q_div(const union longdouble *x, const union longdouble *y)
101 
102 #endif /* __sparcv9 */
103 
104 {
105 	union longdouble	z;
106 	union xdouble		u;
107 	double			c, d, ry, xx[4], yy[5], zz[5];
108 	unsigned int		xm, ym, fsr, lx, ly, wx[3], wy[3];
109 	unsigned int		msw, frac2, frac3, frac4, rm;
110 	int			ibit, ex, ey, ez, sign;
111 
112 	xm = x->l.msw & 0x7fffffff;
113 	ym = y->l.msw & 0x7fffffff;
114 	sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff;
115 
116 	__quad_getfsrp(&fsr);
117 
118 	/* handle nan and inf cases */
119 	if (xm >= 0x7fff0000 || ym >= 0x7fff0000) {
120 		/* handle nan cases according to V9 app. B */
121 		if (QUAD_ISNAN(*y)) {
122 			if (!(y->l.msw & 0x8000)) {
123 				/* snan, signal invalid */
124 				if (fsr & FSR_NVM) {
125 					__quad_fdivq(x, y, &Z);
126 				} else {
127 					Z = *y;
128 					Z.l.msw |= 0x8000;
129 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
130 					    FSR_NVC;
131 					__quad_setfsrp(&fsr);
132 				}
133 				QUAD_RETURN(Z);
134 			} else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) {
135 				/* snan, signal invalid */
136 				if (fsr & FSR_NVM) {
137 					__quad_fdivq(x, y, &Z);
138 				} else {
139 					Z = *x;
140 					Z.l.msw |= 0x8000;
141 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
142 					    FSR_NVC;
143 					__quad_setfsrp(&fsr);
144 				}
145 				QUAD_RETURN(Z);
146 			}
147 			Z = *y;
148 			QUAD_RETURN(Z);
149 		}
150 		if (QUAD_ISNAN(*x)) {
151 			if (!(x->l.msw & 0x8000)) {
152 				/* snan, signal invalid */
153 				if (fsr & FSR_NVM) {
154 					__quad_fdivq(x, y, &Z);
155 				} else {
156 					Z = *x;
157 					Z.l.msw |= 0x8000;
158 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
159 					    FSR_NVC;
160 					__quad_setfsrp(&fsr);
161 				}
162 				QUAD_RETURN(Z);
163 			}
164 			Z = *x;
165 			QUAD_RETURN(Z);
166 		}
167 		if (xm == 0x7fff0000) {
168 			/* x is inf */
169 			if (ym == 0x7fff0000) {
170 				/* inf / inf, signal invalid */
171 				if (fsr & FSR_NVM) {
172 					__quad_fdivq(x, y, &Z);
173 				} else {
174 					Z.l.msw = 0x7fffffff;
175 					Z.l.frac2 = Z.l.frac3 =
176 					    Z.l.frac4 = 0xffffffff;
177 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
178 					    FSR_NVC;
179 					__quad_setfsrp(&fsr);
180 				}
181 				QUAD_RETURN(Z);
182 			}
183 			/* inf / finite, return inf */
184 			Z.l.msw = sign | 0x7fff0000;
185 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
186 			QUAD_RETURN(Z);
187 		}
188 		/* y is inf */
189 		Z.l.msw = sign;
190 		Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
191 		QUAD_RETURN(Z);
192 	}
193 
194 	/* handle zero cases */
195 	if (xm == 0 || ym == 0) {
196 		if (QUAD_ISZERO(*x)) {
197 			if (QUAD_ISZERO(*y)) {
198 				/* zero / zero, signal invalid */
199 				if (fsr & FSR_NVM) {
200 					__quad_fdivq(x, y, &Z);
201 				} else {
202 					Z.l.msw = 0x7fffffff;
203 					Z.l.frac2 = Z.l.frac3 =
204 					    Z.l.frac4 = 0xffffffff;
205 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
206 					    FSR_NVC;
207 					__quad_setfsrp(&fsr);
208 				}
209 				QUAD_RETURN(Z);
210 			}
211 			/* zero / nonzero, return zero */
212 			Z.l.msw = sign;
213 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
214 			QUAD_RETURN(Z);
215 		}
216 		if (QUAD_ISZERO(*y)) {
217 			/* nonzero / zero, signal zero divide */
218 			if (fsr & FSR_DZM) {
219 				__quad_fdivq(x, y, &Z);
220 			} else {
221 				Z.l.msw = sign | 0x7fff0000;
222 				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
223 				fsr = (fsr & ~FSR_CEXC) | FSR_DZA | FSR_DZC;
224 				__quad_setfsrp(&fsr);
225 			}
226 			QUAD_RETURN(Z);
227 		}
228 	}
229 
230 	/* now x and y are finite, nonzero */
231 	__quad_setfsrp(&fsr_re);
232 
233 	/* get their normalized significands and exponents */
234 	ex = (int)(xm >> 16);
235 	lx = xm & 0xffff;
236 	if (ex) {
237 		lx |= 0x10000;
238 		wx[0] = x->l.frac2;
239 		wx[1] = x->l.frac3;
240 		wx[2] = x->l.frac4;
241 	} else {
242 		if (lx | (x->l.frac2 & 0xfffe0000)) {
243 			wx[0] = x->l.frac2;
244 			wx[1] = x->l.frac3;
245 			wx[2] = x->l.frac4;
246 			ex = 1;
247 		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
248 			lx = x->l.frac2;
249 			wx[0] = x->l.frac3;
250 			wx[1] = x->l.frac4;
251 			wx[2] = 0;
252 			ex = -31;
253 		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
254 			lx = x->l.frac3;
255 			wx[0] = x->l.frac4;
256 			wx[1] = wx[2] = 0;
257 			ex = -63;
258 		} else {
259 			lx = x->l.frac4;
260 			wx[0] = wx[1] = wx[2] = 0;
261 			ex = -95;
262 		}
263 		while ((lx & 0x10000) == 0) {
264 			lx = (lx << 1) | (wx[0] >> 31);
265 			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
266 			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
267 			wx[2] <<= 1;
268 			ex--;
269 		}
270 	}
271 	ez = ex;
272 
273 	ey = (int)(ym >> 16);
274 	ly = ym & 0xffff;
275 	if (ey) {
276 		ly |= 0x10000;
277 		wy[0] = y->l.frac2;
278 		wy[1] = y->l.frac3;
279 		wy[2] = y->l.frac4;
280 	} else {
281 		if (ly | (y->l.frac2 & 0xfffe0000)) {
282 			wy[0] = y->l.frac2;
283 			wy[1] = y->l.frac3;
284 			wy[2] = y->l.frac4;
285 			ey = 1;
286 		} else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) {
287 			ly = y->l.frac2;
288 			wy[0] = y->l.frac3;
289 			wy[1] = y->l.frac4;
290 			wy[2] = 0;
291 			ey = -31;
292 		} else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) {
293 			ly = y->l.frac3;
294 			wy[0] = y->l.frac4;
295 			wy[1] = wy[2] = 0;
296 			ey = -63;
297 		} else {
298 			ly = y->l.frac4;
299 			wy[0] = wy[1] = wy[2] = 0;
300 			ey = -95;
301 		}
302 		while ((ly & 0x10000) == 0) {
303 			ly = (ly << 1) | (wy[0] >> 31);
304 			wy[0] = (wy[0] << 1) | (wy[1] >> 31);
305 			wy[1] = (wy[1] << 1) | (wy[2] >> 31);
306 			wy[2] <<= 1;
307 			ey--;
308 		}
309 	}
310 	ez -= ey - 0x3fff;
311 
312 	/* extract the significands into doubles */
313 	c = twom16;
314 	xx[0] = (double)((int)lx) * c;
315 	yy[0] = (double)((int)ly) * c;
316 
317 	c *= twom24;
318 	xx[0] += (double)((int)(wx[0] >> 8)) * c;
319 	yy[1] = (double)((int)(wy[0] >> 8)) * c;
320 
321 	c *= twom24;
322 	xx[1] = (double)((int)(((wx[0] << 16) |
323 	    (wx[1] >> 16)) & 0xffffff)) * c;
324 	yy[2] = (double)((int)(((wy[0] << 16) |
325 	    (wy[1] >> 16)) & 0xffffff)) * c;
326 
327 	c *= twom24;
328 	xx[2] = (double)((int)(((wx[1] << 8) |
329 	    (wx[2] >> 24)) & 0xffffff)) * c;
330 	yy[3] = (double)((int)(((wy[1] << 8) |
331 	    (wy[2] >> 24)) & 0xffffff)) * c;
332 
333 	c *= twom24;
334 	xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
335 	yy[4] = (double)((int)(wy[2] & 0xffffff)) * c;
336 
337 	/* approximate the reciprocal of y */
338 	ry = one / ((yy[0] + yy[1]) + yy[2]);
339 
340 	/* compute the first five "digits" of the quotient */
341 	zz[0] = (ry * (xx[0] + xx[1]) + three2p27) - three2p27;
342 	xx[0] = ((xx[0] - zz[0] * yy[0]) - zz[0] * yy[1]) + xx[1];
343 	d = zz[0] * yy[2];
344 	c = (d + three2m13) - three2m13;
345 	xx[0] -= c;
346 	xx[1] = xx[2] - (d - c);
347 	d = zz[0] * yy[3];
348 	c = (d + three2m37) - three2m37;
349 	xx[1] -= c;
350 	xx[2] = xx[3] - (d - c);
351 	d = zz[0] * yy[4];
352 	c = (d + three2m61) - three2m61;
353 	xx[2] -= c;
354 	xx[3] = c - d;
355 
356 	zz[1] = (ry * (xx[0] + xx[1]) + three2p3) - three2p3;
357 	xx[0] = ((xx[0] - zz[1] * yy[0]) - zz[1] * yy[1]) + xx[1];
358 	d = zz[1] * yy[2];
359 	c = (d + three2m37) - three2m37;
360 	xx[0] -= c;
361 	xx[1] = xx[2] - (d - c);
362 	d = zz[1] * yy[3];
363 	c = (d + three2m61) - three2m61;
364 	xx[1] -= c;
365 	xx[2] = xx[3] - (d - c);
366 	d = zz[1] * yy[4];
367 	c = (d + three2m85) - three2m85;
368 	xx[2] -= c;
369 	xx[3] = c - d;
370 
371 	zz[2] = (ry * (xx[0] + xx[1]) + three2m21) - three2m21;
372 	xx[0] = ((xx[0] - zz[2] * yy[0]) - zz[2] * yy[1]) + xx[1];
373 	d = zz[2] * yy[2];
374 	c = (d + three2m61) - three2m61;
375 	xx[0] -= c;
376 	xx[1] = xx[2] - (d - c);
377 	d = zz[2] * yy[3];
378 	c = (d + three2m85) - three2m85;
379 	xx[1] -= c;
380 	xx[2] = xx[3] - (d - c);
381 	d = zz[2] * yy[4];
382 	c = (d + three2m109) - three2m109;
383 	xx[2] -= c;
384 	xx[3] = c - d;
385 
386 	zz[3] = (ry * (xx[0] + xx[1]) + three2m45) - three2m45;
387 	xx[0] = ((xx[0] - zz[3] * yy[0]) - zz[3] * yy[1]) + xx[1];
388 	d = zz[3] * yy[2];
389 	c = (d + three2m85) - three2m85;
390 	xx[0] -= c;
391 	xx[1] = xx[2] - (d - c);
392 	d = zz[3] * yy[3];
393 	c = (d + three2m109) - three2m109;
394 	xx[1] -= c;
395 	xx[2] = xx[3] - (d - c);
396 	d = zz[3] * yy[4];
397 	c = (d + three2m133) - three2m133;
398 	xx[2] -= c;
399 	xx[3] = c - d;
400 
401 	zz[4] = (ry * (xx[0] + xx[1]) + three2m73) - three2m73;
402 
403 	/* reduce to three doubles, making sure zz[1] is positive */
404 	zz[0] += zz[1] - twom48;
405 	zz[1] = twom48 + zz[2] + zz[3];
406 	zz[2] = zz[4];
407 
408 	/* if the third term might lie on a rounding boundary, perturb it */
409 	if (zz[2] == (twom62 + zz[2]) - twom62) {
410 		/* here we just need to get the sign of the remainder */
411 		c = (((((xx[0] - zz[4] * yy[0]) - zz[4] * yy[1]) + xx[1]) +
412 		    (xx[2] - zz[4] * yy[2])) + (xx[3] - zz[4] * yy[3]))
413 		    - zz[4] * yy[4];
414 		if (c < zero)
415 			zz[2] -= twom124;
416 		else if (c > zero)
417 			zz[2] += twom124;
418 	}
419 
420 	/*
421 	 * propagate carries/borrows, using round-to-negative-infinity mode
422 	 * to make all terms nonnegative (note that we can't encounter a
423 	 * borrow so large that the roundoff is unrepresentable because
424 	 * we took care to make zz[1] positive above)
425 	 */
426 	__quad_setfsrp(&fsr_rn);
427 	c = zz[1] + zz[2];
428 	zz[2] += (zz[1] - c);
429 	zz[1] = c;
430 	c = zz[0] + zz[1];
431 	zz[1] += (zz[0] - c);
432 	zz[0] = c;
433 
434 	/* check for borrow */
435 	if (c < one) {
436 		/* postnormalize */
437 		zz[0] += zz[0];
438 		zz[1] += zz[1];
439 		zz[2] += zz[2];
440 		ez--;
441 	}
442 
443 	/* if exponent > 0 strip off integer bit, else denormalize */
444 	if (ez > 0) {
445 		ibit = 1;
446 		zz[0] -= one;
447 	} else {
448 		ibit = 0;
449 		if (ez > -128)
450 			u.l.hi = (unsigned int)(0x3fe + ez) << 20;
451 		else
452 			u.l.hi = 0x37e00000;
453 		u.l.lo = 0;
454 		zz[0] *= u.d;
455 		zz[1] *= u.d;
456 		zz[2] *= u.d;
457 		ez = 0;
458 	}
459 
460 	/* the first 48 bits of fraction come from zz[0] */
461 	u.d = d = two36 + zz[0];
462 	msw = u.l.lo;
463 	zz[0] -= (d - two36);
464 
465 	u.d = d = two4 + zz[0];
466 	frac2 = u.l.lo;
467 	zz[0] -= (d - two4);
468 
469 	/* the next 32 come from zz[0] and zz[1] */
470 	u.d = d = twom28 + (zz[0] + zz[1]);
471 	frac3 = u.l.lo;
472 	zz[0] -= (d - twom28);
473 
474 	/* condense the remaining fraction; errors here won't matter */
475 	c = zz[0] + zz[1];
476 	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
477 	zz[0] = c;
478 
479 	/* get the last word of fraction */
480 	u.d = d = twom60 + (zz[0] + zz[1]);
481 	frac4 = u.l.lo;
482 	zz[0] -= (d - twom60);
483 
484 	/* keep track of what's left for rounding; note that the error */
485 	/* in computing c will be non-negative due to rounding mode */
486 	c = zz[0] + zz[1];
487 
488 	/* get the rounding mode, fudging directed rounding modes */
489 	/* as though the result were positive */
490 	rm = fsr >> 30;
491 	if (sign)
492 		rm ^= (rm >> 1);
493 
494 	/* round and raise exceptions */
495 	fsr &= ~FSR_CEXC;
496 	if (c != zero) {
497 		fsr |= FSR_NXC;
498 
499 		/* decide whether to round the fraction up */
500 		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
501 		    (c == twom113 && ((frac4 & 1) || (c - zz[0] !=
502 		    zz[1])))))) {
503 			/* round up and renormalize if necessary */
504 			if (++frac4 == 0)
505 				if (++frac3 == 0)
506 					if (++frac2 == 0)
507 						if (++msw == 0x10000) {
508 							msw = 0;
509 							ez++;
510 						}
511 		}
512 	}
513 
514 	/* check for under/overflow */
515 	if (ez >= 0x7fff) {
516 		if (rm == FSR_RN || rm == FSR_RP) {
517 			z.l.msw = sign | 0x7fff0000;
518 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0;
519 		} else {
520 			z.l.msw = sign | 0x7ffeffff;
521 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff;
522 		}
523 		fsr |= FSR_OFC | FSR_NXC;
524 	} else {
525 		z.l.msw = sign | (ez << 16) | msw;
526 		z.l.frac2 = frac2;
527 		z.l.frac3 = frac3;
528 		z.l.frac4 = frac4;
529 
530 		/* !ibit => exact result was tiny before rounding, */
531 		/* t nonzero => result delivered is inexact */
532 		if (!ibit) {
533 			if (c != zero)
534 				fsr |= FSR_UFC | FSR_NXC;
535 			else if (fsr & FSR_UFM)
536 				fsr |= FSR_UFC;
537 		}
538 	}
539 
540 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
541 		__quad_setfsrp(&fsr);
542 		__quad_fdivq(x, y, &Z);
543 	} else {
544 		Z = z;
545 		fsr |= (fsr & 0x1f) << 5;
546 		__quad_setfsrp(&fsr);
547 	}
548 	QUAD_RETURN(Z);
549 }
550