1 /* 2 * IEEE754 floating point arithmetic 3 * double precision: MADDF.f (Fused Multiply Add) 4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15 #include "ieee754dp.h" 16 17 enum maddf_flags { 18 maddf_negate_product = 1 << 0, 19 }; 20 21 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 22 union ieee754dp y, enum maddf_flags flags) 23 { 24 int re; 25 int rs; 26 u64 rm; 27 unsigned lxm; 28 unsigned hxm; 29 unsigned lym; 30 unsigned hym; 31 u64 lrm; 32 u64 hrm; 33 u64 t; 34 u64 at; 35 int s; 36 37 COMPXDP; 38 COMPYDP; 39 COMPZDP; 40 41 EXPLODEXDP; 42 EXPLODEYDP; 43 EXPLODEZDP; 44 45 FLUSHXDP; 46 FLUSHYDP; 47 FLUSHZDP; 48 49 ieee754_clearcx(); 50 51 switch (zc) { 52 case IEEE754_CLASS_SNAN: 53 ieee754_setcx(IEEE754_INVALID_OPERATION); 54 return ieee754dp_nanxcpt(z); 55 case IEEE754_CLASS_DNORM: 56 DPDNORMZ; 57 /* QNAN is handled separately below */ 58 } 59 60 switch (CLPAIR(xc, yc)) { 61 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 62 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 63 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 64 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 65 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 66 return ieee754dp_nanxcpt(y); 67 68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 71 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 72 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 73 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 74 return ieee754dp_nanxcpt(x); 75 76 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 80 return y; 81 82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 84 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 85 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 86 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 87 return x; 88 89 90 /* 91 * Infinity handling 92 */ 93 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 94 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 95 if (zc == IEEE754_CLASS_QNAN) 96 return z; 97 ieee754_setcx(IEEE754_INVALID_OPERATION); 98 return ieee754dp_indef(); 99 100 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 101 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 102 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 104 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 105 if (zc == IEEE754_CLASS_QNAN) 106 return z; 107 return ieee754dp_inf(xs ^ ys); 108 109 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 110 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 111 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 112 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 113 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 114 if (zc == IEEE754_CLASS_INF) 115 return ieee754dp_inf(zs); 116 /* Multiplication is 0 so just return z */ 117 return z; 118 119 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 120 DPDNORMX; 121 122 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 123 if (zc == IEEE754_CLASS_QNAN) 124 return z; 125 else if (zc == IEEE754_CLASS_INF) 126 return ieee754dp_inf(zs); 127 DPDNORMY; 128 break; 129 130 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 131 if (zc == IEEE754_CLASS_QNAN) 132 return z; 133 else if (zc == IEEE754_CLASS_INF) 134 return ieee754dp_inf(zs); 135 DPDNORMX; 136 break; 137 138 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 139 if (zc == IEEE754_CLASS_QNAN) 140 return z; 141 else if (zc == IEEE754_CLASS_INF) 142 return ieee754dp_inf(zs); 143 /* fall through to real computations */ 144 } 145 146 /* Finally get to do some computation */ 147 148 /* 149 * Do the multiplication bit first 150 * 151 * rm = xm * ym, re = xe + ye basically 152 * 153 * At this point xm and ym should have been normalized. 154 */ 155 assert(xm & DP_HIDDEN_BIT); 156 assert(ym & DP_HIDDEN_BIT); 157 158 re = xe + ye; 159 rs = xs ^ ys; 160 if (flags & maddf_negate_product) 161 rs ^= 1; 162 163 /* shunt to top of word */ 164 xm <<= 64 - (DP_FBITS + 1); 165 ym <<= 64 - (DP_FBITS + 1); 166 167 /* 168 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. 169 */ 170 171 /* 32 * 32 => 64 */ 172 #define DPXMULT(x, y) ((u64)(x) * (u64)y) 173 174 lxm = xm; 175 hxm = xm >> 32; 176 lym = ym; 177 hym = ym >> 32; 178 179 lrm = DPXMULT(lxm, lym); 180 hrm = DPXMULT(hxm, hym); 181 182 t = DPXMULT(lxm, hym); 183 184 at = lrm + (t << 32); 185 hrm += at < lrm; 186 lrm = at; 187 188 hrm = hrm + (t >> 32); 189 190 t = DPXMULT(hxm, lym); 191 192 at = lrm + (t << 32); 193 hrm += at < lrm; 194 lrm = at; 195 196 hrm = hrm + (t >> 32); 197 198 rm = hrm | (lrm != 0); 199 200 /* 201 * Sticky shift down to normal rounding precision. 202 */ 203 if ((s64) rm < 0) { 204 rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | 205 ((rm << (DP_FBITS + 1 + 3)) != 0); 206 re++; 207 } else { 208 rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | 209 ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); 210 } 211 assert(rm & (DP_HIDDEN_BIT << 3)); 212 213 /* And now the addition */ 214 assert(zm & DP_HIDDEN_BIT); 215 216 /* 217 * Provide guard,round and stick bit space. 218 */ 219 zm <<= 3; 220 221 if (ze > re) { 222 /* 223 * Have to shift y fraction right to align. 224 */ 225 s = ze - re; 226 rm = XDPSRS(rm, s); 227 re += s; 228 } else if (re > ze) { 229 /* 230 * Have to shift x fraction right to align. 231 */ 232 s = re - ze; 233 zm = XDPSRS(zm, s); 234 ze += s; 235 } 236 assert(ze == re); 237 assert(ze <= DP_EMAX); 238 239 if (zs == rs) { 240 /* 241 * Generate 28 bit result of adding two 27 bit numbers 242 * leaving result in xm, xs and xe. 243 */ 244 zm = zm + rm; 245 246 if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ 247 zm = XDPSRS1(zm); 248 ze++; 249 } 250 } else { 251 if (zm >= rm) { 252 zm = zm - rm; 253 } else { 254 zm = rm - zm; 255 zs = rs; 256 } 257 if (zm == 0) 258 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 259 260 /* 261 * Normalize to rounding precision. 262 */ 263 while ((zm >> (DP_FBITS + 3)) == 0) { 264 zm <<= 1; 265 ze--; 266 } 267 } 268 269 return ieee754dp_format(zs, ze, zm); 270 } 271 272 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 273 union ieee754dp y) 274 { 275 return _dp_maddf(z, x, y, 0); 276 } 277 278 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 279 union ieee754dp y) 280 { 281 return _dp_maddf(z, x, y, maddf_negate_product); 282 } 283