xref: /illumos-gate/usr/src/uts/sparc/fpu/mul.c (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1988 by Sun Microsystems, Inc.
24  */
25 
26 #ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS-4.1 1.8 88/12/06 */
27 
28 #include <sys/fpu/fpu_simulator.h>
29 #include <sys/fpu/globals.h>
30 
31 void
32 _fp_mul(pfpsd, px, py, pz)
33 	fp_simd_type	*pfpsd;
34 	unpacked	*px, *py, *pz;
35 
36 {
37 	unpacked	*pt;
38 	unsigned	acc[4];		/* Product accumulator. */
39 	unsigned	j, y, *x, s, r, c;
40 
41 	if ((int) px->fpclass <= (int) py->fpclass) {
42 		pt = px;
43 		px = py;
44 		py = pt;
45 	}
46 	/* Now class(x) >= class(y).  */
47 
48 	*pz = *px;
49 	if (pz->fpclass < fp_quiet)
50 		pz->sign = px->sign ^ py->sign;
51 
52 	switch (px->fpclass) {
53 	case fp_quiet:
54 	case fp_signaling:
55 	case fp_zero:
56 		return;
57 	case fp_infinity:
58 		if (py->fpclass == fp_zero) {
59 			fpu_error_nan(pfpsd, pz);
60 			pz->fpclass = fp_quiet;
61 		}
62 		return;
63 	case fp_normal:
64 		if (py->fpclass == fp_zero) {
65 			pz->fpclass = fp_zero;
66 			return;
67 		}
68 	}
69 
70 	/* Now x and y are both normal or subnormal. */
71 
72 	x = px->significand;	/* save typing */
73 
74 	/* intialize acc to zero */
75 	s = r = acc[0] = acc[1] = acc[2] = acc[3] = 0;
76 
77 	y = py->significand[3];		/* py->significand[3] * x */
78 	if (y != 0) {
79 	    j = 1;
80 	    do {
81 		s |= r;		/* shift acc right one bit */
82 		r  = acc[3]&1;
83 		acc[3] = ((acc[2]&1)<<31)|(acc[3]>>1);
84 		acc[2] = ((acc[1]&1)<<31)|(acc[2]>>1);
85 		acc[1] = ((acc[0]&1)<<31)|(acc[1]>>1);
86 		acc[0] = (acc[0]>>1);
87 		if (j&y) {		/* bit i of y != 0, add x to acc */
88 			c = 0;
89 			c = fpu_add3wc(&acc[3], acc[3], x[3], c);
90 			c = fpu_add3wc(&acc[2], acc[2], x[2], c);
91 			c = fpu_add3wc(&acc[1], acc[1], x[1], c);
92 			c = fpu_add3wc(&acc[0], acc[0], x[0], c);
93 		}
94 		j += j;
95 	    } while (j != 0);
96 	}
97 
98 	y = py->significand[2];		/* py->significand[2] * x */
99 	if (y != 0) {
100 	    j = 1;
101 	    do {
102 		s |= r;		/* shift acc right one bit */
103 		r  = acc[3]&1;
104 		acc[3] = ((acc[2]&1)<<31)|(acc[3]>>1);
105 		acc[2] = ((acc[1]&1)<<31)|(acc[2]>>1);
106 		acc[1] = ((acc[0]&1)<<31)|(acc[1]>>1);
107 		acc[0] = (acc[0]>>1);
108 		if (j&y) {		/* bit i of y != 0, add x to acc */
109 			c = 0;
110 			c = fpu_add3wc(&acc[3], acc[3], x[3], c);
111 			c = fpu_add3wc(&acc[2], acc[2], x[2], c);
112 			c = fpu_add3wc(&acc[1], acc[1], x[1], c);
113 			c = fpu_add3wc(&acc[0], acc[0], x[0], c);
114 		}
115 		j += j;
116 	    } while (j != 0);
117 	} else {
118 		s |= r|(acc[3]&0x7fffffff);
119 		r  = (acc[3]&0x80000000)>>31;
120 		acc[3] = acc[2]; acc[2] = acc[1]; acc[1] = acc[0]; acc[0] = 0;
121 	}
122 
123 	y = py->significand[1];		/* py->significand[1] * x */
124 	if (y != 0) {
125 	    j = 1;
126 	    do {
127 		s |= r;		/* shift acc right one bit */
128 		r  = acc[3]&1;
129 		acc[3] = ((acc[2]&1)<<31)|(acc[3]>>1);
130 		acc[2] = ((acc[1]&1)<<31)|(acc[2]>>1);
131 		acc[1] = ((acc[0]&1)<<31)|(acc[1]>>1);
132 		acc[0] = (acc[0]>>1);
133 		if (j&y) {		/* bit i of y != 0, add x to acc */
134 			c = 0;
135 			c = fpu_add3wc(&acc[3], acc[3], x[3], c);
136 			c = fpu_add3wc(&acc[2], acc[2], x[2], c);
137 			c = fpu_add3wc(&acc[1], acc[1], x[1], c);
138 			c = fpu_add3wc(&acc[0], acc[0], x[0], c);
139 		}
140 		j += j;
141 	    } while (j != 0);
142 	} else {
143 		s |= r|(acc[3]&0x7fffffff);
144 		r  = (acc[3]&0x80000000)>>31;
145 		acc[3] = acc[2]; acc[2] = acc[1]; acc[1] = acc[0]; acc[0] = 0;
146 	}
147 
148 					/* py->significand[0] * x */
149 	y = py->significand[0];		/* y is of form 0x0001???? */
150 	j = 1;
151 	do {
152 		s |= r;		/* shift acc right one bit */
153 		r  = acc[3]&1;
154 		acc[3] = ((acc[2]&1)<<31)|(acc[3]>>1);
155 		acc[2] = ((acc[1]&1)<<31)|(acc[2]>>1);
156 		acc[1] = ((acc[0]&1)<<31)|(acc[1]>>1);
157 		acc[0] = (acc[0]>>1);
158 		if (j&y) {		/* bit i of y != 0, add x to acc */
159 			c = 0;
160 			c = fpu_add3wc(&acc[3], acc[3], x[3], c);
161 			c = fpu_add3wc(&acc[2], acc[2], x[2], c);
162 			c = fpu_add3wc(&acc[1], acc[1], x[1], c);
163 			c = fpu_add3wc(&acc[0], acc[0], x[0], c);
164 		}
165 		j += j;
166 	} while (j <= y);
167 
168 	if (acc[0] >= 0x20000) {	/* right shift one bit to normalize */
169 		pz->exponent = px->exponent + py->exponent + 1;
170 		pz->sticky = s|r;
171 		pz->rounded = acc[3]&1;
172 		pz->significand[3] = ((acc[2]&1)<<31)|(acc[3]>>1);
173 		pz->significand[2] = ((acc[1]&1)<<31)|(acc[2]>>1);
174 		pz->significand[1] = ((acc[0]&1)<<31)|(acc[1]>>1);
175 		pz->significand[0] = (acc[0]>>1);
176 	} else {
177 		pz->exponent = px->exponent + py->exponent;
178 		pz->sticky = s;
179 		pz->rounded = r;
180 		pz->significand[3] = acc[3];
181 		pz->significand[2] = acc[2];
182 		pz->significand[1] = acc[1];
183 		pz->significand[0] = acc[0];
184 	}
185 }
186