xref: /illumos-gate/usr/src/lib/libc/sparc/fp/_Q_mul.c (revision 201ceb75ab95f9bf1f42ea1dc9ab363b43ba47cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "quad.h"
28 
29 static const double C[] = {
30 	0.0,
31 	0.5,
32 	1.0,
33 	2.0,
34 	68719476736.0,
35 	1048576.0,
36 	16.0,
37 	1.52587890625000000000e-05,
38 	5.96046447753906250000e-08,
39 	3.72529029846191406250e-09,
40 	8.67361737988403547206e-19,
41 	2.16840434497100886801e-19,
42 	1.32348898008484427979e-23,
43 	9.62964972193617926528e-35,
44 	4.70197740328915003187e-38
45 };
46 
47 #define	zero	C[0]
48 #define	half	C[1]
49 #define	one	C[2]
50 #define	two	C[3]
51 #define	two36	C[4]
52 #define	two20	C[5]
53 #define	two4	C[6]
54 #define	twom16	C[7]
55 #define	twom24	C[8]
56 #define	twom28	C[9]
57 #define	twom60	C[10]
58 #define	twom62	C[11]
59 #define	twom76	C[12]
60 #define	twom113	C[13]
61 #define	twom124	C[14]
62 
63 static const unsigned fsr_rn = 0xc0000000u;
64 
65 #ifdef __sparcv9
66 
67 /*
68  * _Qp_mul(pz, x, y) sets *pz = *x * *y.
69  */
70 void
71 _Qp_mul(union longdouble *pz, const union longdouble *x,
72 	const union longdouble *y)
73 
74 #else
75 
76 /*
77  * _Q_mul(x, y) returns *x * *y.
78  */
79 union longdouble
80 _Q_mul(const union longdouble *x, const union longdouble *y)
81 
82 #endif	/* __sparcv9 */
83 
84 {
85 	union longdouble	z;
86 	union xdouble		u;
87 	double			xx[5], yy[5], c, d, zz[9];
88 	unsigned int		xm, ym, fsr, lx, ly, wx[3], wy[3];
89 	unsigned int		msw, frac2, frac3, frac4, rm;
90 	int			ibit, ex, ey, ez, sign;
91 
92 	xm = x->l.msw & 0x7fffffff;
93 	ym = y->l.msw & 0x7fffffff;
94 	sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff;
95 
96 	__quad_getfsrp(&fsr);
97 
98 	/* handle nan and inf cases */
99 	if (xm >= 0x7fff0000 || ym >= 0x7fff0000) {
100 		/* handle nan cases according to V9 app. B */
101 		if (QUAD_ISNAN(*y)) {
102 			if (!(y->l.msw & 0x8000)) {
103 				/* snan, signal invalid */
104 				if (fsr & FSR_NVM) {
105 					__quad_fmulq(x, y, &Z);
106 				} else {
107 					Z = *y;
108 					Z.l.msw |= 0x8000;
109 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
110 					    FSR_NVC;
111 					__quad_setfsrp(&fsr);
112 				}
113 				QUAD_RETURN(Z);
114 			} else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) {
115 				/* snan, signal invalid */
116 				if (fsr & FSR_NVM) {
117 					__quad_fmulq(x, y, &Z);
118 				} else {
119 					Z = *x;
120 					Z.l.msw |= 0x8000;
121 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
122 					    FSR_NVC;
123 					__quad_setfsrp(&fsr);
124 				}
125 				QUAD_RETURN(Z);
126 			}
127 			Z = *y;
128 			QUAD_RETURN(Z);
129 		}
130 		if (QUAD_ISNAN(*x)) {
131 			if (!(x->l.msw & 0x8000)) {
132 				/* snan, signal invalid */
133 				if (fsr & FSR_NVM) {
134 					__quad_fmulq(x, y, &Z);
135 				} else {
136 					Z = *x;
137 					Z.l.msw |= 0x8000;
138 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
139 					    FSR_NVC;
140 					__quad_setfsrp(&fsr);
141 				}
142 				QUAD_RETURN(Z);
143 			}
144 			Z = *x;
145 			QUAD_RETURN(Z);
146 		}
147 		if (xm == 0x7fff0000) {
148 			/* x is inf */
149 			if (QUAD_ISZERO(*y)) {
150 				/* zero * inf, signal invalid */
151 				if (fsr & FSR_NVM) {
152 					__quad_fmulq(x, y, &Z);
153 				} else {
154 					Z.l.msw = 0x7fffffff;
155 					Z.l.frac2 = Z.l.frac3 =
156 					    Z.l.frac4 = 0xffffffff;
157 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
158 					    FSR_NVC;
159 					__quad_setfsrp(&fsr);
160 				}
161 				QUAD_RETURN(Z);
162 			}
163 			/* inf * finite, return inf */
164 			Z.l.msw = sign | 0x7fff0000;
165 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
166 			QUAD_RETURN(Z);
167 		}
168 		/* y is inf */
169 		if (QUAD_ISZERO(*x)) {
170 			/* zero * inf, signal invalid */
171 			if (fsr & FSR_NVM) {
172 				__quad_fmulq(x, y, &Z);
173 			} else {
174 				Z.l.msw = 0x7fffffff;
175 				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
176 				fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
177 				__quad_setfsrp(&fsr);
178 			}
179 			QUAD_RETURN(Z);
180 		}
181 		/* inf * finite, return inf */
182 		Z.l.msw = sign | 0x7fff0000;
183 		Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
184 		QUAD_RETURN(Z);
185 	}
186 
187 	/* handle zero cases */
188 	if (xm == 0 || ym == 0) {
189 		if (QUAD_ISZERO(*x) || QUAD_ISZERO(*y)) {
190 			Z.l.msw = sign;
191 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
192 			QUAD_RETURN(Z);
193 		}
194 	}
195 
196 	/* now x and y are finite, nonzero */
197 	__quad_setfsrp(&fsr_rn);
198 
199 	/* get their normalized significands and exponents */
200 	ex = (int)(xm >> 16);
201 	lx = xm & 0xffff;
202 	if (ex) {
203 		lx |= 0x10000;
204 		wx[0] = x->l.frac2;
205 		wx[1] = x->l.frac3;
206 		wx[2] = x->l.frac4;
207 	} else {
208 		if (lx | (x->l.frac2 & 0xfffe0000)) {
209 			wx[0] = x->l.frac2;
210 			wx[1] = x->l.frac3;
211 			wx[2] = x->l.frac4;
212 			ex = 1;
213 		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
214 			lx = x->l.frac2;
215 			wx[0] = x->l.frac3;
216 			wx[1] = x->l.frac4;
217 			wx[2] = 0;
218 			ex = -31;
219 		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
220 			lx = x->l.frac3;
221 			wx[0] = x->l.frac4;
222 			wx[1] = wx[2] = 0;
223 			ex = -63;
224 		} else {
225 			lx = x->l.frac4;
226 			wx[0] = wx[1] = wx[2] = 0;
227 			ex = -95;
228 		}
229 		while ((lx & 0x10000) == 0) {
230 			lx = (lx << 1) | (wx[0] >> 31);
231 			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
232 			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
233 			wx[2] <<= 1;
234 			ex--;
235 		}
236 	}
237 	ez = ex - 0x3fff;
238 
239 	ey = (int)(ym >> 16);
240 	ly = ym & 0xffff;
241 	if (ey) {
242 		ly |= 0x10000;
243 		wy[0] = y->l.frac2;
244 		wy[1] = y->l.frac3;
245 		wy[2] = y->l.frac4;
246 	} else {
247 		if (ly | (y->l.frac2 & 0xfffe0000)) {
248 			wy[0] = y->l.frac2;
249 			wy[1] = y->l.frac3;
250 			wy[2] = y->l.frac4;
251 			ey = 1;
252 		} else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) {
253 			ly = y->l.frac2;
254 			wy[0] = y->l.frac3;
255 			wy[1] = y->l.frac4;
256 			wy[2] = 0;
257 			ey = -31;
258 		} else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) {
259 			ly = y->l.frac3;
260 			wy[0] = y->l.frac4;
261 			wy[1] = wy[2] = 0;
262 			ey = -63;
263 		} else {
264 			ly = y->l.frac4;
265 			wy[0] = wy[1] = wy[2] = 0;
266 			ey = -95;
267 		}
268 		while ((ly & 0x10000) == 0) {
269 			ly = (ly << 1) | (wy[0] >> 31);
270 			wy[0] = (wy[0] << 1) | (wy[1] >> 31);
271 			wy[1] = (wy[1] << 1) | (wy[2] >> 31);
272 			wy[2] <<= 1;
273 			ey--;
274 		}
275 	}
276 	ez += ey;
277 
278 	/* extract the significand into five doubles */
279 	c = twom16;
280 	xx[0] = (double)((int)lx) * c;
281 	yy[0] = (double)((int)ly) * c;
282 
283 	c *= twom24;
284 	xx[1] = (double)((int)(wx[0] >> 8)) * c;
285 	yy[1] = (double)((int)(wy[0] >> 8)) * c;
286 
287 	c *= twom24;
288 	xx[2] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
289 	    0xffffff)) * c;
290 	yy[2] = (double)((int)(((wy[0] << 16) | (wy[1] >> 16)) &
291 	    0xffffff)) * c;
292 
293 	c *= twom24;
294 	xx[3] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
295 	    0xffffff)) * c;
296 	yy[3] = (double)((int)(((wy[1] << 8) | (wy[2] >> 24)) &
297 	    0xffffff)) * c;
298 
299 	c *= twom24;
300 	xx[4] = (double)((int)(wx[2] & 0xffffff)) * c;
301 	yy[4] = (double)((int)(wy[2] & 0xffffff)) * c;
302 
303 	/* form the "digits" of the product */
304 	zz[0] = xx[0] * yy[0];
305 	zz[1] = xx[0] * yy[1] + xx[1] * yy[0];
306 	zz[2] = xx[0] * yy[2] + xx[1] * yy[1] + xx[2] * yy[0];
307 	zz[3] = xx[0] * yy[3] + xx[1] * yy[2] + xx[2] * yy[1] +
308 	    xx[3] * yy[0];
309 	zz[4] = xx[0] * yy[4] + xx[1] * yy[3] + xx[2] * yy[2] +
310 	    xx[3] * yy[1] + xx[4] * yy[0];
311 	zz[5] = xx[1] * yy[4] + xx[2] * yy[3] + xx[3] * yy[2] +
312 	    xx[4] * yy[1];
313 	zz[6] = xx[2] * yy[4] + xx[3] * yy[3] + xx[4] * yy[2];
314 	zz[7] = xx[3] * yy[4] + xx[4] * yy[3];
315 	zz[8] = xx[4] * yy[4];
316 
317 	/* collect the first few terms */
318 	c = (zz[1] + two20) - two20;
319 	zz[0] += c;
320 	zz[1] = zz[2] + (zz[1] - c);
321 	c = (zz[3] + twom28) - twom28;
322 	zz[1] += c;
323 	zz[2] = zz[4] + (zz[3] - c);
324 
325 	/* propagate carries into the third term */
326 	d = zz[6] + (zz[7] + zz[8]);
327 	zz[2] += zz[5] + d;
328 
329 	/* if the third term might lie on a rounding boundary, perturb it */
330 	/* as need be */
331 	if (zz[2] == (twom62 + zz[2]) - twom62)
332 	{
333 		c = (zz[5] + twom76) - twom76;
334 		if ((zz[5] - c) + d != zero)
335 			zz[2] += twom124;
336 	}
337 
338 	/* propagate carries to the leading term */
339 	c = zz[1] + zz[2];
340 	zz[2] = zz[2] + (zz[1] - c);
341 	zz[1] = c;
342 	c = zz[0] + zz[1];
343 	zz[1] = zz[1] + (zz[0] - c);
344 	zz[0] = c;
345 
346 	/* check for carry out */
347 	if (c >= two) {
348 		/* postnormalize */
349 		zz[0] *= half;
350 		zz[1] *= half;
351 		zz[2] *= half;
352 		ez++;
353 	}
354 
355 	/* if exponent > 0 strip off integer bit, else denormalize */
356 	if (ez > 0) {
357 		ibit = 1;
358 		zz[0] -= one;
359 	} else {
360 		ibit = 0;
361 		if (ez > -128)
362 			u.l.hi = (unsigned)(0x3fe + ez) << 20;
363 		else
364 			u.l.hi = 0x37e00000;
365 		u.l.lo = 0;
366 		zz[0] *= u.d;
367 		zz[1] *= u.d;
368 		zz[2] *= u.d;
369 		ez = 0;
370 	}
371 
372 	/* the first 48 bits of fraction come from zz[0] */
373 	u.d = d = two36 + zz[0];
374 	msw = u.l.lo;
375 	zz[0] -= (d - two36);
376 
377 	u.d = d = two4 + zz[0];
378 	frac2 = u.l.lo;
379 	zz[0] -= (d - two4);
380 
381 	/* the next 32 come from zz[0] and zz[1] */
382 	u.d = d = twom28 + (zz[0] + zz[1]);
383 	frac3 = u.l.lo;
384 	zz[0] -= (d - twom28);
385 
386 	/* condense the remaining fraction; errors here won't matter */
387 	c = zz[0] + zz[1];
388 	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
389 	zz[0] = c;
390 
391 	/* get the last word of fraction */
392 	u.d = d = twom60 + (zz[0] + zz[1]);
393 	frac4 = u.l.lo;
394 	zz[0] -= (d - twom60);
395 
396 	/* keep track of what's left for rounding; note that the error */
397 	/* in computing c will be non-negative due to rounding mode */
398 	c = zz[0] + zz[1];
399 
400 	/* get the rounding mode, fudging directed rounding modes */
401 	/* as though the result were positive */
402 	rm = fsr >> 30;
403 	if (sign)
404 		rm ^= (rm >> 1);
405 
406 	/* round and raise exceptions */
407 	fsr &= ~FSR_CEXC;
408 	if (c != zero) {
409 		fsr |= FSR_NXC;
410 
411 		/* decide whether to round the fraction up */
412 		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
413 		    (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
414 			/* round up and renormalize if necessary */
415 			if (++frac4 == 0)
416 				if (++frac3 == 0)
417 					if (++frac2 == 0)
418 						if (++msw == 0x10000) {
419 							msw = 0;
420 							ez++;
421 						}
422 		}
423 	}
424 
425 	/* check for under/overflow */
426 	if (ez >= 0x7fff) {
427 		if (rm == FSR_RN || rm == FSR_RP) {
428 			z.l.msw = sign | 0x7fff0000;
429 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0;
430 		} else {
431 			z.l.msw = sign | 0x7ffeffff;
432 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff;
433 		}
434 		fsr |= FSR_OFC | FSR_NXC;
435 	} else {
436 		z.l.msw = sign | (ez << 16) | msw;
437 		z.l.frac2 = frac2;
438 		z.l.frac3 = frac3;
439 		z.l.frac4 = frac4;
440 
441 		/* !ibit => exact result was tiny before rounding, */
442 		/* t nonzero => result delivered is inexact */
443 		if (!ibit) {
444 			if (c != zero)
445 				fsr |= FSR_UFC | FSR_NXC;
446 			else if (fsr & FSR_UFM)
447 				fsr |= FSR_UFC;
448 		}
449 	}
450 
451 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
452 		__quad_setfsrp(&fsr);
453 		__quad_fmulq(x, y, &Z);
454 	} else {
455 		Z = z;
456 		fsr |= (fsr & 0x1f) << 5;
457 		__quad_setfsrp(&fsr);
458 	}
459 	QUAD_RETURN(Z);
460 }
461