1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Routines to emulate some Altivec/VMX instructions, specifically 4 * those that can trap when given denormalized operands in Java mode. 5 */ 6 #include <linux/kernel.h> 7 #include <linux/errno.h> 8 #include <linux/sched.h> 9 #include <asm/ptrace.h> 10 #include <asm/processor.h> 11 #include <asm/switch_to.h> 12 #include <linux/uaccess.h> 13 14 /* Functions in vector.S */ 15 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); 16 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); 17 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 18 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 19 extern void vrefp(vector128 *dst, vector128 *src); 20 extern void vrsqrtefp(vector128 *dst, vector128 *src); 21 extern void vexptep(vector128 *dst, vector128 *src); 22 23 static unsigned int exp2s[8] = { 24 0x800000, 25 0x8b95c2, 26 0x9837f0, 27 0xa5fed7, 28 0xb504f3, 29 0xc5672a, 30 0xd744fd, 31 0xeac0c7 32 }; 33 34 /* 35 * Computes an estimate of 2^x. The `s' argument is the 32-bit 36 * single-precision floating-point representation of x. 37 */ 38 static unsigned int eexp2(unsigned int s) 39 { 40 int exp, pwr; 41 unsigned int mant, frac; 42 43 /* extract exponent field from input */ 44 exp = ((s >> 23) & 0xff) - 127; 45 if (exp > 7) { 46 /* check for NaN input */ 47 if (exp == 128 && (s & 0x7fffff) != 0) 48 return s | 0x400000; /* return QNaN */ 49 /* 2^-big = 0, 2^+big = +Inf */ 50 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ 51 } 52 if (exp < -23) 53 return 0x3f800000; /* 1.0 */ 54 55 /* convert to fixed point integer in 9.23 representation */ 56 pwr = (s & 0x7fffff) | 0x800000; 57 if (exp > 0) 58 pwr <<= exp; 59 else 60 pwr >>= -exp; 61 if (s & 0x80000000) 62 pwr = -pwr; 63 64 /* extract integer part, which becomes exponent part of result */ 65 exp = (pwr >> 23) + 126; 66 if (exp >= 254) 67 return 0x7f800000; 68 if (exp < -23) 69 return 0; 70 71 /* table lookup on top 3 bits of fraction to get mantissa */ 72 mant = exp2s[(pwr >> 20) & 7]; 73 74 /* linear interpolation using remaining 20 bits of fraction */ 75 asm("mulhwu %0,%1,%2" : "=r" (frac) 76 : "r" (pwr << 12), "r" (0x172b83ff)); 77 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); 78 mant += frac; 79 80 if (exp >= 0) 81 return mant + (exp << 23); 82 83 /* denormalized result */ 84 exp = -exp; 85 mant += 1 << (exp - 1); 86 return mant >> exp; 87 } 88 89 /* 90 * Computes an estimate of log_2(x). The `s' argument is the 32-bit 91 * single-precision floating-point representation of x. 92 */ 93 static unsigned int elog2(unsigned int s) 94 { 95 int exp, mant, lz, frac; 96 97 exp = s & 0x7f800000; 98 mant = s & 0x7fffff; 99 if (exp == 0x7f800000) { /* Inf or NaN */ 100 if (mant != 0) 101 s |= 0x400000; /* turn NaN into QNaN */ 102 return s; 103 } 104 if ((exp | mant) == 0) /* +0 or -0 */ 105 return 0xff800000; /* return -Inf */ 106 107 if (exp == 0) { 108 /* denormalized */ 109 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); 110 mant <<= lz - 8; 111 exp = (-118 - lz) << 23; 112 } else { 113 mant |= 0x800000; 114 exp -= 127 << 23; 115 } 116 117 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ 118 exp |= 0x400000; /* 0.5 * 2^23 */ 119 asm("mulhwu %0,%1,%2" : "=r" (mant) 120 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ 121 } 122 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ 123 exp |= 0x200000; /* 0.25 * 2^23 */ 124 asm("mulhwu %0,%1,%2" : "=r" (mant) 125 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ 126 } 127 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ 128 exp |= 0x100000; /* 0.125 * 2^23 */ 129 asm("mulhwu %0,%1,%2" : "=r" (mant) 130 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ 131 } 132 if (mant > 0x800000) { /* 1.0 * 2^23 */ 133 /* calculate (mant - 1) * 1.381097463 */ 134 /* 1.381097463 == 0.125 / (2^0.125 - 1) */ 135 asm("mulhwu %0,%1,%2" : "=r" (frac) 136 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); 137 exp += frac; 138 } 139 s = exp & 0x80000000; 140 if (exp != 0) { 141 if (s) 142 exp = -exp; 143 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); 144 lz = 8 - lz; 145 if (lz > 0) 146 exp >>= lz; 147 else if (lz < 0) 148 exp <<= -lz; 149 s += ((lz + 126) << 23) + exp; 150 } 151 return s; 152 } 153 154 #define VSCR_SAT 1 155 156 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) 157 { 158 int exp, mant; 159 160 exp = (x >> 23) & 0xff; 161 mant = x & 0x7fffff; 162 if (exp == 255 && mant != 0) 163 return 0; /* NaN -> 0 */ 164 exp = exp - 127 + scale; 165 if (exp < 0) 166 return 0; /* round towards zero */ 167 if (exp >= 31) { 168 /* saturate, unless the result would be -2^31 */ 169 if (x + (scale << 23) != 0xcf000000) 170 *vscrp |= VSCR_SAT; 171 return (x & 0x80000000)? 0x80000000: 0x7fffffff; 172 } 173 mant |= 0x800000; 174 mant = (mant << 7) >> (30 - exp); 175 return (x & 0x80000000)? -mant: mant; 176 } 177 178 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) 179 { 180 int exp; 181 unsigned int mant; 182 183 exp = (x >> 23) & 0xff; 184 mant = x & 0x7fffff; 185 if (exp == 255 && mant != 0) 186 return 0; /* NaN -> 0 */ 187 exp = exp - 127 + scale; 188 if (exp < 0) 189 return 0; /* round towards zero */ 190 if (x & 0x80000000) { 191 /* negative => saturate to 0 */ 192 *vscrp |= VSCR_SAT; 193 return 0; 194 } 195 if (exp >= 32) { 196 /* saturate */ 197 *vscrp |= VSCR_SAT; 198 return 0xffffffff; 199 } 200 mant |= 0x800000; 201 mant = (mant << 8) >> (31 - exp); 202 return mant; 203 } 204 205 /* Round to floating integer, towards 0 */ 206 static unsigned int rfiz(unsigned int x) 207 { 208 int exp; 209 210 exp = ((x >> 23) & 0xff) - 127; 211 if (exp == 128 && (x & 0x7fffff) != 0) 212 return x | 0x400000; /* NaN -> make it a QNaN */ 213 if (exp >= 23) 214 return x; /* it's an integer already (or Inf) */ 215 if (exp < 0) 216 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ 217 return x & ~(0x7fffff >> exp); 218 } 219 220 /* Round to floating integer, towards +/- Inf */ 221 static unsigned int rfii(unsigned int x) 222 { 223 int exp, mask; 224 225 exp = ((x >> 23) & 0xff) - 127; 226 if (exp == 128 && (x & 0x7fffff) != 0) 227 return x | 0x400000; /* NaN -> make it a QNaN */ 228 if (exp >= 23) 229 return x; /* it's an integer already (or Inf) */ 230 if ((x & 0x7fffffff) == 0) 231 return x; /* +/-0 -> +/-0 */ 232 if (exp < 0) 233 /* 0 < |x| < 1.0 rounds to +/- 1.0 */ 234 return (x & 0x80000000) | 0x3f800000; 235 mask = 0x7fffff >> exp; 236 /* mantissa overflows into exponent - that's OK, 237 it can't overflow into the sign bit */ 238 return (x + mask) & ~mask; 239 } 240 241 /* Round to floating integer, to nearest */ 242 static unsigned int rfin(unsigned int x) 243 { 244 int exp, half; 245 246 exp = ((x >> 23) & 0xff) - 127; 247 if (exp == 128 && (x & 0x7fffff) != 0) 248 return x | 0x400000; /* NaN -> make it a QNaN */ 249 if (exp >= 23) 250 return x; /* it's an integer already (or Inf) */ 251 if (exp < -1) 252 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ 253 if (exp == -1) 254 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ 255 return (x & 0x80000000) | 0x3f800000; 256 half = 0x400000 >> exp; 257 /* add 0.5 to the magnitude and chop off the fraction bits */ 258 return (x + half) & ~(0x7fffff >> exp); 259 } 260 261 int emulate_altivec(struct pt_regs *regs) 262 { 263 unsigned int instr, i; 264 unsigned int va, vb, vc, vd; 265 vector128 *vrs; 266 267 if (get_user(instr, (unsigned int __user *) regs->nip)) 268 return -EFAULT; 269 if ((instr >> 26) != 4) 270 return -EINVAL; /* not an altivec instruction */ 271 vd = (instr >> 21) & 0x1f; 272 va = (instr >> 16) & 0x1f; 273 vb = (instr >> 11) & 0x1f; 274 vc = (instr >> 6) & 0x1f; 275 276 vrs = current->thread.vr_state.vr; 277 switch (instr & 0x3f) { 278 case 10: 279 switch (vc) { 280 case 0: /* vaddfp */ 281 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); 282 break; 283 case 1: /* vsubfp */ 284 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); 285 break; 286 case 4: /* vrefp */ 287 vrefp(&vrs[vd], &vrs[vb]); 288 break; 289 case 5: /* vrsqrtefp */ 290 vrsqrtefp(&vrs[vd], &vrs[vb]); 291 break; 292 case 6: /* vexptefp */ 293 for (i = 0; i < 4; ++i) 294 vrs[vd].u[i] = eexp2(vrs[vb].u[i]); 295 break; 296 case 7: /* vlogefp */ 297 for (i = 0; i < 4; ++i) 298 vrs[vd].u[i] = elog2(vrs[vb].u[i]); 299 break; 300 case 8: /* vrfin */ 301 for (i = 0; i < 4; ++i) 302 vrs[vd].u[i] = rfin(vrs[vb].u[i]); 303 break; 304 case 9: /* vrfiz */ 305 for (i = 0; i < 4; ++i) 306 vrs[vd].u[i] = rfiz(vrs[vb].u[i]); 307 break; 308 case 10: /* vrfip */ 309 for (i = 0; i < 4; ++i) { 310 u32 x = vrs[vb].u[i]; 311 x = (x & 0x80000000)? rfiz(x): rfii(x); 312 vrs[vd].u[i] = x; 313 } 314 break; 315 case 11: /* vrfim */ 316 for (i = 0; i < 4; ++i) { 317 u32 x = vrs[vb].u[i]; 318 x = (x & 0x80000000)? rfii(x): rfiz(x); 319 vrs[vd].u[i] = x; 320 } 321 break; 322 case 14: /* vctuxs */ 323 for (i = 0; i < 4; ++i) 324 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, 325 ¤t->thread.vr_state.vscr.u[3]); 326 break; 327 case 15: /* vctsxs */ 328 for (i = 0; i < 4; ++i) 329 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, 330 ¤t->thread.vr_state.vscr.u[3]); 331 break; 332 default: 333 return -EINVAL; 334 } 335 break; 336 case 46: /* vmaddfp */ 337 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 338 break; 339 case 47: /* vnmsubfp */ 340 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 341 break; 342 default: 343 return -EINVAL; 344 } 345 346 return 0; 347 } 348