xref: /linux/arch/mips/math-emu/dp_maddf.c (revision b85d45947951d23cb22d90caecf4c1eb81342c96)
1 /*
2  * IEEE754 floating point arithmetic
3  * double precision: MADDF.f (Fused Multiply Add)
4  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5  *
6  * MIPS floating point support
7  * Copyright (C) 2015 Imagination Technologies, Ltd.
8  * Author: Markos Chandras <markos.chandras@imgtec.com>
9  *
10  *  This program is free software; you can distribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; version 2 of the License.
13  */
14 
15 #include "ieee754dp.h"
16 
17 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
18 				union ieee754dp y)
19 {
20 	int re;
21 	int rs;
22 	u64 rm;
23 	unsigned lxm;
24 	unsigned hxm;
25 	unsigned lym;
26 	unsigned hym;
27 	u64 lrm;
28 	u64 hrm;
29 	u64 t;
30 	u64 at;
31 	int s;
32 
33 	COMPXDP;
34 	COMPYDP;
35 
36 	u64 zm; int ze; int zs __maybe_unused; int zc;
37 
38 	EXPLODEXDP;
39 	EXPLODEYDP;
40 	EXPLODEDP(z, zc, zs, ze, zm)
41 
42 	FLUSHXDP;
43 	FLUSHYDP;
44 	FLUSHDP(z, zc, zs, ze, zm);
45 
46 	ieee754_clearcx();
47 
48 	switch (zc) {
49 	case IEEE754_CLASS_SNAN:
50 		ieee754_setcx(IEEE754_INVALID_OPERATION);
51 		return ieee754dp_nanxcpt(z);
52 	case IEEE754_CLASS_DNORM:
53 		DPDNORMx(zm, ze);
54 	/* QNAN is handled separately below */
55 	}
56 
57 	switch (CLPAIR(xc, yc)) {
58 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
59 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
60 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
61 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
62 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
63 		return ieee754dp_nanxcpt(y);
64 
65 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
66 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
67 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
68 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
69 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
70 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
71 		return ieee754dp_nanxcpt(x);
72 
73 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
74 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
75 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
76 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
77 		return y;
78 
79 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
80 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
81 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
82 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
83 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
84 		return x;
85 
86 
87 	/*
88 	 * Infinity handling
89 	 */
90 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
91 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
92 		if (zc == IEEE754_CLASS_QNAN)
93 			return z;
94 		ieee754_setcx(IEEE754_INVALID_OPERATION);
95 		return ieee754dp_indef();
96 
97 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
98 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
99 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
100 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
101 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
102 		if (zc == IEEE754_CLASS_QNAN)
103 			return z;
104 		return ieee754dp_inf(xs ^ ys);
105 
106 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
107 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
108 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
109 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
110 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
111 		if (zc == IEEE754_CLASS_INF)
112 			return ieee754dp_inf(zs);
113 		/* Multiplication is 0 so just return z */
114 		return z;
115 
116 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
117 		DPDNORMX;
118 
119 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
120 		if (zc == IEEE754_CLASS_QNAN)
121 			return z;
122 		else if (zc == IEEE754_CLASS_INF)
123 			return ieee754dp_inf(zs);
124 		DPDNORMY;
125 		break;
126 
127 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
128 		if (zc == IEEE754_CLASS_QNAN)
129 			return z;
130 		else if (zc == IEEE754_CLASS_INF)
131 			return ieee754dp_inf(zs);
132 		DPDNORMX;
133 		break;
134 
135 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136 		if (zc == IEEE754_CLASS_QNAN)
137 			return z;
138 		else if (zc == IEEE754_CLASS_INF)
139 			return ieee754dp_inf(zs);
140 		/* fall through to real computations */
141 	}
142 
143 	/* Finally get to do some computation */
144 
145 	/*
146 	 * Do the multiplication bit first
147 	 *
148 	 * rm = xm * ym, re = xe + ye basically
149 	 *
150 	 * At this point xm and ym should have been normalized.
151 	 */
152 	assert(xm & DP_HIDDEN_BIT);
153 	assert(ym & DP_HIDDEN_BIT);
154 
155 	re = xe + ye;
156 	rs = xs ^ ys;
157 
158 	/* shunt to top of word */
159 	xm <<= 64 - (DP_FBITS + 1);
160 	ym <<= 64 - (DP_FBITS + 1);
161 
162 	/*
163 	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
164 	 */
165 
166 	/* 32 * 32 => 64 */
167 #define DPXMULT(x, y)	((u64)(x) * (u64)y)
168 
169 	lxm = xm;
170 	hxm = xm >> 32;
171 	lym = ym;
172 	hym = ym >> 32;
173 
174 	lrm = DPXMULT(lxm, lym);
175 	hrm = DPXMULT(hxm, hym);
176 
177 	t = DPXMULT(lxm, hym);
178 
179 	at = lrm + (t << 32);
180 	hrm += at < lrm;
181 	lrm = at;
182 
183 	hrm = hrm + (t >> 32);
184 
185 	t = DPXMULT(hxm, lym);
186 
187 	at = lrm + (t << 32);
188 	hrm += at < lrm;
189 	lrm = at;
190 
191 	hrm = hrm + (t >> 32);
192 
193 	rm = hrm | (lrm != 0);
194 
195 	/*
196 	 * Sticky shift down to normal rounding precision.
197 	 */
198 	if ((s64) rm < 0) {
199 		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
200 		     ((rm << (DP_FBITS + 1 + 3)) != 0);
201 			re++;
202 	} else {
203 		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
204 		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
205 	}
206 	assert(rm & (DP_HIDDEN_BIT << 3));
207 
208 	/* And now the addition */
209 	assert(zm & DP_HIDDEN_BIT);
210 
211 	/*
212 	 * Provide guard,round and stick bit space.
213 	 */
214 	zm <<= 3;
215 
216 	if (ze > re) {
217 		/*
218 		 * Have to shift y fraction right to align.
219 		 */
220 		s = ze - re;
221 		rm = XDPSRS(rm, s);
222 		re += s;
223 	} else if (re > ze) {
224 		/*
225 		 * Have to shift x fraction right to align.
226 		 */
227 		s = re - ze;
228 		zm = XDPSRS(zm, s);
229 		ze += s;
230 	}
231 	assert(ze == re);
232 	assert(ze <= DP_EMAX);
233 
234 	if (zs == rs) {
235 		/*
236 		 * Generate 28 bit result of adding two 27 bit numbers
237 		 * leaving result in xm, xs and xe.
238 		 */
239 		zm = zm + rm;
240 
241 		if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
242 			zm = XDPSRS1(zm);
243 			ze++;
244 		}
245 	} else {
246 		if (zm >= rm) {
247 			zm = zm - rm;
248 		} else {
249 			zm = rm - zm;
250 			zs = rs;
251 		}
252 		if (zm == 0)
253 			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
254 
255 		/*
256 		 * Normalize to rounding precision.
257 		 */
258 		while ((zm >> (DP_FBITS + 3)) == 0) {
259 			zm <<= 1;
260 			ze--;
261 		}
262 	}
263 
264 	return ieee754dp_format(zs, ze, zm);
265 }
266