xref: /linux/arch/powerpc/kernel/vecemu.c (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Routines to emulate some Altivec/VMX instructions, specifically
4  * those that can trap when given denormalized operands in Java mode.
5  */
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/sched.h>
9 #include <asm/ptrace.h>
10 #include <asm/processor.h>
11 #include <asm/switch_to.h>
12 #include <linux/uaccess.h>
13 #include <asm/inst.h>
14 
15 /* Functions in vector.S */
16 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
17 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
18 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
19 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
20 extern void vrefp(vector128 *dst, vector128 *src);
21 extern void vrsqrtefp(vector128 *dst, vector128 *src);
22 extern void vexptep(vector128 *dst, vector128 *src);
23 
24 static unsigned int exp2s[8] = {
25 	0x800000,
26 	0x8b95c2,
27 	0x9837f0,
28 	0xa5fed7,
29 	0xb504f3,
30 	0xc5672a,
31 	0xd744fd,
32 	0xeac0c7
33 };
34 
35 /*
36  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
37  * single-precision floating-point representation of x.
38  */
eexp2(unsigned int s)39 static unsigned int eexp2(unsigned int s)
40 {
41 	int exp, pwr;
42 	unsigned int mant, frac;
43 
44 	/* extract exponent field from input */
45 	exp = ((s >> 23) & 0xff) - 127;
46 	if (exp > 7) {
47 		/* check for NaN input */
48 		if (exp == 128 && (s & 0x7fffff) != 0)
49 			return s | 0x400000;	/* return QNaN */
50 		/* 2^-big = 0, 2^+big = +Inf */
51 		return (s & 0x80000000)? 0: 0x7f800000;	/* 0 or +Inf */
52 	}
53 	if (exp < -23)
54 		return 0x3f800000;	/* 1.0 */
55 
56 	/* convert to fixed point integer in 9.23 representation */
57 	pwr = (s & 0x7fffff) | 0x800000;
58 	if (exp > 0)
59 		pwr <<= exp;
60 	else
61 		pwr >>= -exp;
62 	if (s & 0x80000000)
63 		pwr = -pwr;
64 
65 	/* extract integer part, which becomes exponent part of result */
66 	exp = (pwr >> 23) + 126;
67 	if (exp >= 254)
68 		return 0x7f800000;
69 	if (exp < -23)
70 		return 0;
71 
72 	/* table lookup on top 3 bits of fraction to get mantissa */
73 	mant = exp2s[(pwr >> 20) & 7];
74 
75 	/* linear interpolation using remaining 20 bits of fraction */
76 	asm("mulhwu %0,%1,%2" : "=r" (frac)
77 	    : "r" (pwr << 12), "r" (0x172b83ff));
78 	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
79 	mant += frac;
80 
81 	if (exp >= 0)
82 		return mant + (exp << 23);
83 
84 	/* denormalized result */
85 	exp = -exp;
86 	mant += 1 << (exp - 1);
87 	return mant >> exp;
88 }
89 
90 /*
91  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
92  * single-precision floating-point representation of x.
93  */
elog2(unsigned int s)94 static unsigned int elog2(unsigned int s)
95 {
96 	int exp, mant, lz, frac;
97 
98 	exp = s & 0x7f800000;
99 	mant = s & 0x7fffff;
100 	if (exp == 0x7f800000) {	/* Inf or NaN */
101 		if (mant != 0)
102 			s |= 0x400000;	/* turn NaN into QNaN */
103 		return s;
104 	}
105 	if ((exp | mant) == 0)		/* +0 or -0 */
106 		return 0xff800000;	/* return -Inf */
107 
108 	if (exp == 0) {
109 		/* denormalized */
110 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
111 		mant <<= lz - 8;
112 		exp = (-118 - lz) << 23;
113 	} else {
114 		mant |= 0x800000;
115 		exp -= 127 << 23;
116 	}
117 
118 	if (mant >= 0xb504f3) {				/* 2^0.5 * 2^23 */
119 		exp |= 0x400000;			/* 0.5 * 2^23 */
120 		asm("mulhwu %0,%1,%2" : "=r" (mant)
121 		    : "r" (mant), "r" (0xb504f334));	/* 2^-0.5 * 2^32 */
122 	}
123 	if (mant >= 0x9837f0) {				/* 2^0.25 * 2^23 */
124 		exp |= 0x200000;			/* 0.25 * 2^23 */
125 		asm("mulhwu %0,%1,%2" : "=r" (mant)
126 		    : "r" (mant), "r" (0xd744fccb));	/* 2^-0.25 * 2^32 */
127 	}
128 	if (mant >= 0x8b95c2) {				/* 2^0.125 * 2^23 */
129 		exp |= 0x100000;			/* 0.125 * 2^23 */
130 		asm("mulhwu %0,%1,%2" : "=r" (mant)
131 		    : "r" (mant), "r" (0xeac0c6e8));	/* 2^-0.125 * 2^32 */
132 	}
133 	if (mant > 0x800000) {				/* 1.0 * 2^23 */
134 		/* calculate (mant - 1) * 1.381097463 */
135 		/* 1.381097463 == 0.125 / (2^0.125 - 1) */
136 		asm("mulhwu %0,%1,%2" : "=r" (frac)
137 		    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
138 		exp += frac;
139 	}
140 	s = exp & 0x80000000;
141 	if (exp != 0) {
142 		if (s)
143 			exp = -exp;
144 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
145 		lz = 8 - lz;
146 		if (lz > 0)
147 			exp >>= lz;
148 		else if (lz < 0)
149 			exp <<= -lz;
150 		s += ((lz + 126) << 23) + exp;
151 	}
152 	return s;
153 }
154 
155 #define VSCR_SAT	1
156 
ctsxs(unsigned int x,int scale,unsigned int * vscrp)157 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
158 {
159 	int exp, mant;
160 
161 	exp = (x >> 23) & 0xff;
162 	mant = x & 0x7fffff;
163 	if (exp == 255 && mant != 0)
164 		return 0;		/* NaN -> 0 */
165 	exp = exp - 127 + scale;
166 	if (exp < 0)
167 		return 0;		/* round towards zero */
168 	if (exp >= 31) {
169 		/* saturate, unless the result would be -2^31 */
170 		if (x + (scale << 23) != 0xcf000000)
171 			*vscrp |= VSCR_SAT;
172 		return (x & 0x80000000)? 0x80000000: 0x7fffffff;
173 	}
174 	mant |= 0x800000;
175 	mant = (mant << 7) >> (30 - exp);
176 	return (x & 0x80000000)? -mant: mant;
177 }
178 
ctuxs(unsigned int x,int scale,unsigned int * vscrp)179 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
180 {
181 	int exp;
182 	unsigned int mant;
183 
184 	exp = (x >> 23) & 0xff;
185 	mant = x & 0x7fffff;
186 	if (exp == 255 && mant != 0)
187 		return 0;		/* NaN -> 0 */
188 	exp = exp - 127 + scale;
189 	if (exp < 0)
190 		return 0;		/* round towards zero */
191 	if (x & 0x80000000) {
192 		/* negative => saturate to 0 */
193 		*vscrp |= VSCR_SAT;
194 		return 0;
195 	}
196 	if (exp >= 32) {
197 		/* saturate */
198 		*vscrp |= VSCR_SAT;
199 		return 0xffffffff;
200 	}
201 	mant |= 0x800000;
202 	mant = (mant << 8) >> (31 - exp);
203 	return mant;
204 }
205 
206 /* Round to floating integer, towards 0 */
rfiz(unsigned int x)207 static unsigned int rfiz(unsigned int x)
208 {
209 	int exp;
210 
211 	exp = ((x >> 23) & 0xff) - 127;
212 	if (exp == 128 && (x & 0x7fffff) != 0)
213 		return x | 0x400000;	/* NaN -> make it a QNaN */
214 	if (exp >= 23)
215 		return x;		/* it's an integer already (or Inf) */
216 	if (exp < 0)
217 		return x & 0x80000000;	/* |x| < 1.0 rounds to 0 */
218 	return x & ~(0x7fffff >> exp);
219 }
220 
221 /* Round to floating integer, towards +/- Inf */
rfii(unsigned int x)222 static unsigned int rfii(unsigned int x)
223 {
224 	int exp, mask;
225 
226 	exp = ((x >> 23) & 0xff) - 127;
227 	if (exp == 128 && (x & 0x7fffff) != 0)
228 		return x | 0x400000;	/* NaN -> make it a QNaN */
229 	if (exp >= 23)
230 		return x;		/* it's an integer already (or Inf) */
231 	if ((x & 0x7fffffff) == 0)
232 		return x;		/* +/-0 -> +/-0 */
233 	if (exp < 0)
234 		/* 0 < |x| < 1.0 rounds to +/- 1.0 */
235 		return (x & 0x80000000) | 0x3f800000;
236 	mask = 0x7fffff >> exp;
237 	/* mantissa overflows into exponent - that's OK,
238 	   it can't overflow into the sign bit */
239 	return (x + mask) & ~mask;
240 }
241 
242 /* Round to floating integer, to nearest */
rfin(unsigned int x)243 static unsigned int rfin(unsigned int x)
244 {
245 	int exp, half;
246 
247 	exp = ((x >> 23) & 0xff) - 127;
248 	if (exp == 128 && (x & 0x7fffff) != 0)
249 		return x | 0x400000;	/* NaN -> make it a QNaN */
250 	if (exp >= 23)
251 		return x;		/* it's an integer already (or Inf) */
252 	if (exp < -1)
253 		return x & 0x80000000;	/* |x| < 0.5 -> +/-0 */
254 	if (exp == -1)
255 		/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
256 		return (x & 0x80000000) | 0x3f800000;
257 	half = 0x400000 >> exp;
258 	/* add 0.5 to the magnitude and chop off the fraction bits */
259 	return (x + half) & ~(0x7fffff >> exp);
260 }
261 
emulate_altivec(struct pt_regs * regs)262 int emulate_altivec(struct pt_regs *regs)
263 {
264 	ppc_inst_t instr;
265 	unsigned int i, word;
266 	unsigned int va, vb, vc, vd;
267 	vector128 *vrs;
268 
269 	if (get_user_instr(instr, (void __user *)regs->nip))
270 		return -EFAULT;
271 
272 	word = ppc_inst_val(instr);
273 	if (ppc_inst_primary_opcode(instr) != 4)
274 		return -EINVAL;		/* not an altivec instruction */
275 	vd = (word >> 21) & 0x1f;
276 	va = (word >> 16) & 0x1f;
277 	vb = (word >> 11) & 0x1f;
278 	vc = (word >> 6) & 0x1f;
279 
280 	vrs = current->thread.vr_state.vr;
281 	switch (word & 0x3f) {
282 	case 10:
283 		switch (vc) {
284 		case 0:	/* vaddfp */
285 			vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
286 			break;
287 		case 1:	/* vsubfp */
288 			vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
289 			break;
290 		case 4:	/* vrefp */
291 			vrefp(&vrs[vd], &vrs[vb]);
292 			break;
293 		case 5:	/* vrsqrtefp */
294 			vrsqrtefp(&vrs[vd], &vrs[vb]);
295 			break;
296 		case 6:	/* vexptefp */
297 			for (i = 0; i < 4; ++i)
298 				vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
299 			break;
300 		case 7:	/* vlogefp */
301 			for (i = 0; i < 4; ++i)
302 				vrs[vd].u[i] = elog2(vrs[vb].u[i]);
303 			break;
304 		case 8:		/* vrfin */
305 			for (i = 0; i < 4; ++i)
306 				vrs[vd].u[i] = rfin(vrs[vb].u[i]);
307 			break;
308 		case 9:		/* vrfiz */
309 			for (i = 0; i < 4; ++i)
310 				vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
311 			break;
312 		case 10:	/* vrfip */
313 			for (i = 0; i < 4; ++i) {
314 				u32 x = vrs[vb].u[i];
315 				x = (x & 0x80000000)? rfiz(x): rfii(x);
316 				vrs[vd].u[i] = x;
317 			}
318 			break;
319 		case 11:	/* vrfim */
320 			for (i = 0; i < 4; ++i) {
321 				u32 x = vrs[vb].u[i];
322 				x = (x & 0x80000000)? rfii(x): rfiz(x);
323 				vrs[vd].u[i] = x;
324 			}
325 			break;
326 		case 14:	/* vctuxs */
327 			for (i = 0; i < 4; ++i)
328 				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
329 					&current->thread.vr_state.vscr.u[3]);
330 			break;
331 		case 15:	/* vctsxs */
332 			for (i = 0; i < 4; ++i)
333 				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
334 					&current->thread.vr_state.vscr.u[3]);
335 			break;
336 		default:
337 			return -EINVAL;
338 		}
339 		break;
340 	case 46:	/* vmaddfp */
341 		vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
342 		break;
343 	case 47:	/* vnmsubfp */
344 		vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
345 		break;
346 	default:
347 		return -EINVAL;
348 	}
349 
350 	return 0;
351 }
352