xref: /illumos-gate/usr/src/lib/libc/sparc/fp/_Q_div.c (revision 1da57d551424de5a9d469760be7c4b4d4f10a755)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "quad.h"
28 
29 static const double C[] = {
30 	0.0,
31 	1.0,
32 	68719476736.0,
33 	402653184.0,
34 	24.0,
35 	16.0,
36 	3.66210937500000000000e-04,
37 	1.52587890625000000000e-05,
38 	1.43051147460937500000e-06,
39 	5.96046447753906250000e-08,
40 	3.72529029846191406250e-09,
41 	2.18278728425502777100e-11,
42 	8.52651282912120223045e-14,
43 	3.55271367880050092936e-15,
44 	1.30104260698260532081e-18,
45 	8.67361737988403547206e-19,
46 	2.16840434497100886801e-19,
47 	3.17637355220362627151e-22,
48 	7.75481824268463445192e-26,
49 	4.62223186652936604733e-33,
50 	9.62964972193617926528e-35,
51 	4.70197740328915003187e-38,
52 	2.75506488473973634680e-40,
53 };
54 
55 #define	zero		C[0]
56 #define	one		C[1]
57 #define	two36		C[2]
58 #define	three2p27	C[3]
59 #define	three2p3	C[4]
60 #define	two4		C[5]
61 #define	three2m13	C[6]
62 #define	twom16		C[7]
63 #define	three2m21	C[8]
64 #define	twom24		C[9]
65 #define	twom28		C[10]
66 #define	three2m37	C[11]
67 #define	three2m45	C[12]
68 #define	twom48		C[13]
69 #define	three2m61	C[14]
70 #define	twom60		C[15]
71 #define	twom62		C[16]
72 #define	three2m73	C[17]
73 #define	three2m85	C[18]
74 #define	three2m109	C[19]
75 #define	twom113		C[20]
76 #define	twom124		C[21]
77 #define	three2m133	C[22]
78 
79 static const unsigned int
80 	fsr_re = 0x00000000u,
81 	fsr_rn = 0xc0000000u;
82 
83 #ifdef __sparcv9
84 
85 /*
86  * _Qp_div(pz, x, y) sets *pz = *x / *y.
87  */
88 void
_Qp_div(union longdouble * pz,const union longdouble * x,const union longdouble * y)89 _Qp_div(union longdouble *pz, const union longdouble *x,
90 	const union longdouble *y)
91 
92 #else
93 
94 /*
95  * _Q_div(x, y) returns *x / *y.
96  */
97 union longdouble
98 _Q_div(const union longdouble *x, const union longdouble *y)
99 
100 #endif /* __sparcv9 */
101 
102 {
103 	union longdouble	z;
104 	union xdouble		u;
105 	double			c, d, ry, xx[4], yy[5], zz[5];
106 	unsigned int		xm, ym, fsr, lx, ly, wx[3], wy[3];
107 	unsigned int		msw, frac2, frac3, frac4, rm;
108 	int			ibit, ex, ey, ez, sign;
109 
110 	xm = x->l.msw & 0x7fffffff;
111 	ym = y->l.msw & 0x7fffffff;
112 	sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff;
113 
114 	__quad_getfsrp(&fsr);
115 
116 	/* handle nan and inf cases */
117 	if (xm >= 0x7fff0000 || ym >= 0x7fff0000) {
118 		/* handle nan cases according to V9 app. B */
119 		if (QUAD_ISNAN(*y)) {
120 			if (!(y->l.msw & 0x8000)) {
121 				/* snan, signal invalid */
122 				if (fsr & FSR_NVM) {
123 					__quad_fdivq(x, y, &Z);
124 				} else {
125 					Z = *y;
126 					Z.l.msw |= 0x8000;
127 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
128 					    FSR_NVC;
129 					__quad_setfsrp(&fsr);
130 				}
131 				QUAD_RETURN(Z);
132 			} else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) {
133 				/* snan, signal invalid */
134 				if (fsr & FSR_NVM) {
135 					__quad_fdivq(x, y, &Z);
136 				} else {
137 					Z = *x;
138 					Z.l.msw |= 0x8000;
139 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
140 					    FSR_NVC;
141 					__quad_setfsrp(&fsr);
142 				}
143 				QUAD_RETURN(Z);
144 			}
145 			Z = *y;
146 			QUAD_RETURN(Z);
147 		}
148 		if (QUAD_ISNAN(*x)) {
149 			if (!(x->l.msw & 0x8000)) {
150 				/* snan, signal invalid */
151 				if (fsr & FSR_NVM) {
152 					__quad_fdivq(x, y, &Z);
153 				} else {
154 					Z = *x;
155 					Z.l.msw |= 0x8000;
156 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
157 					    FSR_NVC;
158 					__quad_setfsrp(&fsr);
159 				}
160 				QUAD_RETURN(Z);
161 			}
162 			Z = *x;
163 			QUAD_RETURN(Z);
164 		}
165 		if (xm == 0x7fff0000) {
166 			/* x is inf */
167 			if (ym == 0x7fff0000) {
168 				/* inf / inf, signal invalid */
169 				if (fsr & FSR_NVM) {
170 					__quad_fdivq(x, y, &Z);
171 				} else {
172 					Z.l.msw = 0x7fffffff;
173 					Z.l.frac2 = Z.l.frac3 =
174 					    Z.l.frac4 = 0xffffffff;
175 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
176 					    FSR_NVC;
177 					__quad_setfsrp(&fsr);
178 				}
179 				QUAD_RETURN(Z);
180 			}
181 			/* inf / finite, return inf */
182 			Z.l.msw = sign | 0x7fff0000;
183 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
184 			QUAD_RETURN(Z);
185 		}
186 		/* y is inf */
187 		Z.l.msw = sign;
188 		Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
189 		QUAD_RETURN(Z);
190 	}
191 
192 	/* handle zero cases */
193 	if (xm == 0 || ym == 0) {
194 		if (QUAD_ISZERO(*x)) {
195 			if (QUAD_ISZERO(*y)) {
196 				/* zero / zero, signal invalid */
197 				if (fsr & FSR_NVM) {
198 					__quad_fdivq(x, y, &Z);
199 				} else {
200 					Z.l.msw = 0x7fffffff;
201 					Z.l.frac2 = Z.l.frac3 =
202 					    Z.l.frac4 = 0xffffffff;
203 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
204 					    FSR_NVC;
205 					__quad_setfsrp(&fsr);
206 				}
207 				QUAD_RETURN(Z);
208 			}
209 			/* zero / nonzero, return zero */
210 			Z.l.msw = sign;
211 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
212 			QUAD_RETURN(Z);
213 		}
214 		if (QUAD_ISZERO(*y)) {
215 			/* nonzero / zero, signal zero divide */
216 			if (fsr & FSR_DZM) {
217 				__quad_fdivq(x, y, &Z);
218 			} else {
219 				Z.l.msw = sign | 0x7fff0000;
220 				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
221 				fsr = (fsr & ~FSR_CEXC) | FSR_DZA | FSR_DZC;
222 				__quad_setfsrp(&fsr);
223 			}
224 			QUAD_RETURN(Z);
225 		}
226 	}
227 
228 	/* now x and y are finite, nonzero */
229 	__quad_setfsrp(&fsr_re);
230 
231 	/* get their normalized significands and exponents */
232 	ex = (int)(xm >> 16);
233 	lx = xm & 0xffff;
234 	if (ex) {
235 		lx |= 0x10000;
236 		wx[0] = x->l.frac2;
237 		wx[1] = x->l.frac3;
238 		wx[2] = x->l.frac4;
239 	} else {
240 		if (lx | (x->l.frac2 & 0xfffe0000)) {
241 			wx[0] = x->l.frac2;
242 			wx[1] = x->l.frac3;
243 			wx[2] = x->l.frac4;
244 			ex = 1;
245 		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
246 			lx = x->l.frac2;
247 			wx[0] = x->l.frac3;
248 			wx[1] = x->l.frac4;
249 			wx[2] = 0;
250 			ex = -31;
251 		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
252 			lx = x->l.frac3;
253 			wx[0] = x->l.frac4;
254 			wx[1] = wx[2] = 0;
255 			ex = -63;
256 		} else {
257 			lx = x->l.frac4;
258 			wx[0] = wx[1] = wx[2] = 0;
259 			ex = -95;
260 		}
261 		while ((lx & 0x10000) == 0) {
262 			lx = (lx << 1) | (wx[0] >> 31);
263 			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
264 			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
265 			wx[2] <<= 1;
266 			ex--;
267 		}
268 	}
269 	ez = ex;
270 
271 	ey = (int)(ym >> 16);
272 	ly = ym & 0xffff;
273 	if (ey) {
274 		ly |= 0x10000;
275 		wy[0] = y->l.frac2;
276 		wy[1] = y->l.frac3;
277 		wy[2] = y->l.frac4;
278 	} else {
279 		if (ly | (y->l.frac2 & 0xfffe0000)) {
280 			wy[0] = y->l.frac2;
281 			wy[1] = y->l.frac3;
282 			wy[2] = y->l.frac4;
283 			ey = 1;
284 		} else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) {
285 			ly = y->l.frac2;
286 			wy[0] = y->l.frac3;
287 			wy[1] = y->l.frac4;
288 			wy[2] = 0;
289 			ey = -31;
290 		} else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) {
291 			ly = y->l.frac3;
292 			wy[0] = y->l.frac4;
293 			wy[1] = wy[2] = 0;
294 			ey = -63;
295 		} else {
296 			ly = y->l.frac4;
297 			wy[0] = wy[1] = wy[2] = 0;
298 			ey = -95;
299 		}
300 		while ((ly & 0x10000) == 0) {
301 			ly = (ly << 1) | (wy[0] >> 31);
302 			wy[0] = (wy[0] << 1) | (wy[1] >> 31);
303 			wy[1] = (wy[1] << 1) | (wy[2] >> 31);
304 			wy[2] <<= 1;
305 			ey--;
306 		}
307 	}
308 	ez -= ey - 0x3fff;
309 
310 	/* extract the significands into doubles */
311 	c = twom16;
312 	xx[0] = (double)((int)lx) * c;
313 	yy[0] = (double)((int)ly) * c;
314 
315 	c *= twom24;
316 	xx[0] += (double)((int)(wx[0] >> 8)) * c;
317 	yy[1] = (double)((int)(wy[0] >> 8)) * c;
318 
319 	c *= twom24;
320 	xx[1] = (double)((int)(((wx[0] << 16) |
321 	    (wx[1] >> 16)) & 0xffffff)) * c;
322 	yy[2] = (double)((int)(((wy[0] << 16) |
323 	    (wy[1] >> 16)) & 0xffffff)) * c;
324 
325 	c *= twom24;
326 	xx[2] = (double)((int)(((wx[1] << 8) |
327 	    (wx[2] >> 24)) & 0xffffff)) * c;
328 	yy[3] = (double)((int)(((wy[1] << 8) |
329 	    (wy[2] >> 24)) & 0xffffff)) * c;
330 
331 	c *= twom24;
332 	xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
333 	yy[4] = (double)((int)(wy[2] & 0xffffff)) * c;
334 
335 	/* approximate the reciprocal of y */
336 	ry = one / ((yy[0] + yy[1]) + yy[2]);
337 
338 	/* compute the first five "digits" of the quotient */
339 	zz[0] = (ry * (xx[0] + xx[1]) + three2p27) - three2p27;
340 	xx[0] = ((xx[0] - zz[0] * yy[0]) - zz[0] * yy[1]) + xx[1];
341 	d = zz[0] * yy[2];
342 	c = (d + three2m13) - three2m13;
343 	xx[0] -= c;
344 	xx[1] = xx[2] - (d - c);
345 	d = zz[0] * yy[3];
346 	c = (d + three2m37) - three2m37;
347 	xx[1] -= c;
348 	xx[2] = xx[3] - (d - c);
349 	d = zz[0] * yy[4];
350 	c = (d + three2m61) - three2m61;
351 	xx[2] -= c;
352 	xx[3] = c - d;
353 
354 	zz[1] = (ry * (xx[0] + xx[1]) + three2p3) - three2p3;
355 	xx[0] = ((xx[0] - zz[1] * yy[0]) - zz[1] * yy[1]) + xx[1];
356 	d = zz[1] * yy[2];
357 	c = (d + three2m37) - three2m37;
358 	xx[0] -= c;
359 	xx[1] = xx[2] - (d - c);
360 	d = zz[1] * yy[3];
361 	c = (d + three2m61) - three2m61;
362 	xx[1] -= c;
363 	xx[2] = xx[3] - (d - c);
364 	d = zz[1] * yy[4];
365 	c = (d + three2m85) - three2m85;
366 	xx[2] -= c;
367 	xx[3] = c - d;
368 
369 	zz[2] = (ry * (xx[0] + xx[1]) + three2m21) - three2m21;
370 	xx[0] = ((xx[0] - zz[2] * yy[0]) - zz[2] * yy[1]) + xx[1];
371 	d = zz[2] * yy[2];
372 	c = (d + three2m61) - three2m61;
373 	xx[0] -= c;
374 	xx[1] = xx[2] - (d - c);
375 	d = zz[2] * yy[3];
376 	c = (d + three2m85) - three2m85;
377 	xx[1] -= c;
378 	xx[2] = xx[3] - (d - c);
379 	d = zz[2] * yy[4];
380 	c = (d + three2m109) - three2m109;
381 	xx[2] -= c;
382 	xx[3] = c - d;
383 
384 	zz[3] = (ry * (xx[0] + xx[1]) + three2m45) - three2m45;
385 	xx[0] = ((xx[0] - zz[3] * yy[0]) - zz[3] * yy[1]) + xx[1];
386 	d = zz[3] * yy[2];
387 	c = (d + three2m85) - three2m85;
388 	xx[0] -= c;
389 	xx[1] = xx[2] - (d - c);
390 	d = zz[3] * yy[3];
391 	c = (d + three2m109) - three2m109;
392 	xx[1] -= c;
393 	xx[2] = xx[3] - (d - c);
394 	d = zz[3] * yy[4];
395 	c = (d + three2m133) - three2m133;
396 	xx[2] -= c;
397 	xx[3] = c - d;
398 
399 	zz[4] = (ry * (xx[0] + xx[1]) + three2m73) - three2m73;
400 
401 	/* reduce to three doubles, making sure zz[1] is positive */
402 	zz[0] += zz[1] - twom48;
403 	zz[1] = twom48 + zz[2] + zz[3];
404 	zz[2] = zz[4];
405 
406 	/* if the third term might lie on a rounding boundary, perturb it */
407 	if (zz[2] == (twom62 + zz[2]) - twom62) {
408 		/* here we just need to get the sign of the remainder */
409 		c = (((((xx[0] - zz[4] * yy[0]) - zz[4] * yy[1]) + xx[1]) +
410 		    (xx[2] - zz[4] * yy[2])) + (xx[3] - zz[4] * yy[3]))
411 		    - zz[4] * yy[4];
412 		if (c < zero)
413 			zz[2] -= twom124;
414 		else if (c > zero)
415 			zz[2] += twom124;
416 	}
417 
418 	/*
419 	 * propagate carries/borrows, using round-to-negative-infinity mode
420 	 * to make all terms nonnegative (note that we can't encounter a
421 	 * borrow so large that the roundoff is unrepresentable because
422 	 * we took care to make zz[1] positive above)
423 	 */
424 	__quad_setfsrp(&fsr_rn);
425 	c = zz[1] + zz[2];
426 	zz[2] += (zz[1] - c);
427 	zz[1] = c;
428 	c = zz[0] + zz[1];
429 	zz[1] += (zz[0] - c);
430 	zz[0] = c;
431 
432 	/* check for borrow */
433 	if (c < one) {
434 		/* postnormalize */
435 		zz[0] += zz[0];
436 		zz[1] += zz[1];
437 		zz[2] += zz[2];
438 		ez--;
439 	}
440 
441 	/* if exponent > 0 strip off integer bit, else denormalize */
442 	if (ez > 0) {
443 		ibit = 1;
444 		zz[0] -= one;
445 	} else {
446 		ibit = 0;
447 		if (ez > -128)
448 			u.l.hi = (unsigned int)(0x3fe + ez) << 20;
449 		else
450 			u.l.hi = 0x37e00000;
451 		u.l.lo = 0;
452 		zz[0] *= u.d;
453 		zz[1] *= u.d;
454 		zz[2] *= u.d;
455 		ez = 0;
456 	}
457 
458 	/* the first 48 bits of fraction come from zz[0] */
459 	u.d = d = two36 + zz[0];
460 	msw = u.l.lo;
461 	zz[0] -= (d - two36);
462 
463 	u.d = d = two4 + zz[0];
464 	frac2 = u.l.lo;
465 	zz[0] -= (d - two4);
466 
467 	/* the next 32 come from zz[0] and zz[1] */
468 	u.d = d = twom28 + (zz[0] + zz[1]);
469 	frac3 = u.l.lo;
470 	zz[0] -= (d - twom28);
471 
472 	/* condense the remaining fraction; errors here won't matter */
473 	c = zz[0] + zz[1];
474 	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
475 	zz[0] = c;
476 
477 	/* get the last word of fraction */
478 	u.d = d = twom60 + (zz[0] + zz[1]);
479 	frac4 = u.l.lo;
480 	zz[0] -= (d - twom60);
481 
482 	/* keep track of what's left for rounding; note that the error */
483 	/* in computing c will be non-negative due to rounding mode */
484 	c = zz[0] + zz[1];
485 
486 	/* get the rounding mode, fudging directed rounding modes */
487 	/* as though the result were positive */
488 	rm = fsr >> 30;
489 	if (sign)
490 		rm ^= (rm >> 1);
491 
492 	/* round and raise exceptions */
493 	fsr &= ~FSR_CEXC;
494 	if (c != zero) {
495 		fsr |= FSR_NXC;
496 
497 		/* decide whether to round the fraction up */
498 		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
499 		    (c == twom113 && ((frac4 & 1) || (c - zz[0] !=
500 		    zz[1])))))) {
501 			/* round up and renormalize if necessary */
502 			if (++frac4 == 0)
503 				if (++frac3 == 0)
504 					if (++frac2 == 0)
505 						if (++msw == 0x10000) {
506 							msw = 0;
507 							ez++;
508 						}
509 		}
510 	}
511 
512 	/* check for under/overflow */
513 	if (ez >= 0x7fff) {
514 		if (rm == FSR_RN || rm == FSR_RP) {
515 			z.l.msw = sign | 0x7fff0000;
516 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0;
517 		} else {
518 			z.l.msw = sign | 0x7ffeffff;
519 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff;
520 		}
521 		fsr |= FSR_OFC | FSR_NXC;
522 	} else {
523 		z.l.msw = sign | (ez << 16) | msw;
524 		z.l.frac2 = frac2;
525 		z.l.frac3 = frac3;
526 		z.l.frac4 = frac4;
527 
528 		/* !ibit => exact result was tiny before rounding, */
529 		/* t nonzero => result delivered is inexact */
530 		if (!ibit) {
531 			if (c != zero)
532 				fsr |= FSR_UFC | FSR_NXC;
533 			else if (fsr & FSR_UFM)
534 				fsr |= FSR_UFC;
535 		}
536 	}
537 
538 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
539 		__quad_setfsrp(&fsr);
540 		__quad_fdivq(x, y, &Z);
541 	} else {
542 		Z = z;
543 		fsr |= (fsr & 0x1f) << 5;
544 		__quad_setfsrp(&fsr);
545 	}
546 	QUAD_RETURN(Z);
547 }
548