xref: /linux/arch/mips/math-emu/sp_maddf.c (revision f3a8b6645dc2e60d11f20c1c23afd964ff4e55ae)
1 /*
2  * IEEE754 floating point arithmetic
3  * single precision: MADDF.f (Fused Multiply Add)
4  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5  *
6  * MIPS floating point support
7  * Copyright (C) 2015 Imagination Technologies, Ltd.
8  * Author: Markos Chandras <markos.chandras@imgtec.com>
9  *
10  *  This program is free software; you can distribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; version 2 of the License.
13  */
14 
15 #include "ieee754sp.h"
16 
17 enum maddf_flags {
18 	maddf_negate_product	= 1 << 0,
19 };
20 
21 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22 				 union ieee754sp y, enum maddf_flags flags)
23 {
24 	int re;
25 	int rs;
26 	unsigned rm;
27 	unsigned short lxm;
28 	unsigned short hxm;
29 	unsigned short lym;
30 	unsigned short hym;
31 	unsigned lrm;
32 	unsigned hrm;
33 	unsigned t;
34 	unsigned at;
35 	int s;
36 
37 	COMPXSP;
38 	COMPYSP;
39 	COMPZSP;
40 
41 	EXPLODEXSP;
42 	EXPLODEYSP;
43 	EXPLODEZSP;
44 
45 	FLUSHXSP;
46 	FLUSHYSP;
47 	FLUSHZSP;
48 
49 	ieee754_clearcx();
50 
51 	switch (zc) {
52 	case IEEE754_CLASS_SNAN:
53 		ieee754_setcx(IEEE754_INVALID_OPERATION);
54 		return ieee754sp_nanxcpt(z);
55 	case IEEE754_CLASS_DNORM:
56 		SPDNORMZ;
57 	/* QNAN is handled separately below */
58 	}
59 
60 	switch (CLPAIR(xc, yc)) {
61 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
62 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
63 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
64 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
65 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
66 		return ieee754sp_nanxcpt(y);
67 
68 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
69 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
70 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
71 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
72 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
73 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
74 		return ieee754sp_nanxcpt(x);
75 
76 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
77 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
78 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
79 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
80 		return y;
81 
82 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
83 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
84 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
85 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
86 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
87 		return x;
88 
89 	/*
90 	 * Infinity handling
91 	 */
92 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
93 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
94 		if (zc == IEEE754_CLASS_QNAN)
95 			return z;
96 		ieee754_setcx(IEEE754_INVALID_OPERATION);
97 		return ieee754sp_indef();
98 
99 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
100 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
101 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
102 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
103 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
104 		if (zc == IEEE754_CLASS_QNAN)
105 			return z;
106 		return ieee754sp_inf(xs ^ ys);
107 
108 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
109 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
110 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
111 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
112 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
113 		if (zc == IEEE754_CLASS_INF)
114 			return ieee754sp_inf(zs);
115 		/* Multiplication is 0 so just return z */
116 		return z;
117 
118 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
119 		SPDNORMX;
120 
121 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
122 		if (zc == IEEE754_CLASS_QNAN)
123 			return z;
124 		else if (zc == IEEE754_CLASS_INF)
125 			return ieee754sp_inf(zs);
126 		SPDNORMY;
127 		break;
128 
129 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130 		if (zc == IEEE754_CLASS_QNAN)
131 			return z;
132 		else if (zc == IEEE754_CLASS_INF)
133 			return ieee754sp_inf(zs);
134 		SPDNORMX;
135 		break;
136 
137 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
138 		if (zc == IEEE754_CLASS_QNAN)
139 			return z;
140 		else if (zc == IEEE754_CLASS_INF)
141 			return ieee754sp_inf(zs);
142 		/* fall through to real computations */
143 	}
144 
145 	/* Finally get to do some computation */
146 
147 	/*
148 	 * Do the multiplication bit first
149 	 *
150 	 * rm = xm * ym, re = xe + ye basically
151 	 *
152 	 * At this point xm and ym should have been normalized.
153 	 */
154 
155 	/* rm = xm * ym, re = xe+ye basically */
156 	assert(xm & SP_HIDDEN_BIT);
157 	assert(ym & SP_HIDDEN_BIT);
158 
159 	re = xe + ye;
160 	rs = xs ^ ys;
161 	if (flags & maddf_negate_product)
162 		rs ^= 1;
163 
164 	/* shunt to top of word */
165 	xm <<= 32 - (SP_FBITS + 1);
166 	ym <<= 32 - (SP_FBITS + 1);
167 
168 	/*
169 	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170 	 */
171 	lxm = xm & 0xffff;
172 	hxm = xm >> 16;
173 	lym = ym & 0xffff;
174 	hym = ym >> 16;
175 
176 	lrm = lxm * lym;	/* 16 * 16 => 32 */
177 	hrm = hxm * hym;	/* 16 * 16 => 32 */
178 
179 	t = lxm * hym; /* 16 * 16 => 32 */
180 	at = lrm + (t << 16);
181 	hrm += at < lrm;
182 	lrm = at;
183 	hrm = hrm + (t >> 16);
184 
185 	t = hxm * lym; /* 16 * 16 => 32 */
186 	at = lrm + (t << 16);
187 	hrm += at < lrm;
188 	lrm = at;
189 	hrm = hrm + (t >> 16);
190 
191 	rm = hrm | (lrm != 0);
192 
193 	/*
194 	 * Sticky shift down to normal rounding precision.
195 	 */
196 	if ((int) rm < 0) {
197 		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
198 		    ((rm << (SP_FBITS + 1 + 3)) != 0);
199 		re++;
200 	} else {
201 		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
202 		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
203 	}
204 	assert(rm & (SP_HIDDEN_BIT << 3));
205 
206 	/* And now the addition */
207 
208 	assert(zm & SP_HIDDEN_BIT);
209 
210 	/*
211 	 * Provide guard,round and stick bit space.
212 	 */
213 	zm <<= 3;
214 
215 	if (ze > re) {
216 		/*
217 		 * Have to shift r fraction right to align.
218 		 */
219 		s = ze - re;
220 		rm = XSPSRS(rm, s);
221 		re += s;
222 	} else if (re > ze) {
223 		/*
224 		 * Have to shift z fraction right to align.
225 		 */
226 		s = re - ze;
227 		zm = XSPSRS(zm, s);
228 		ze += s;
229 	}
230 	assert(ze == re);
231 	assert(ze <= SP_EMAX);
232 
233 	if (zs == rs) {
234 		/*
235 		 * Generate 28 bit result of adding two 27 bit numbers
236 		 * leaving result in zm, zs and ze.
237 		 */
238 		zm = zm + rm;
239 
240 		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
241 			zm = XSPSRS1(zm);
242 			ze++;
243 		}
244 	} else {
245 		if (zm >= rm) {
246 			zm = zm - rm;
247 		} else {
248 			zm = rm - zm;
249 			zs = rs;
250 		}
251 		if (zm == 0)
252 			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
253 
254 		/*
255 		 * Normalize in extended single precision
256 		 */
257 		while ((zm >> (SP_MBITS + 3)) == 0) {
258 			zm <<= 1;
259 			ze--;
260 		}
261 
262 	}
263 	return ieee754sp_format(zs, ze, zm);
264 }
265 
266 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
267 				union ieee754sp y)
268 {
269 	return _sp_maddf(z, x, y, 0);
270 }
271 
272 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
273 				union ieee754sp y)
274 {
275 	return _sp_maddf(z, x, y, maddf_negate_product);
276 }
277