xref: /linux/arch/powerpc/kernel/vecemu.c (revision 4be5e8648b0c287aefc6ac3f3a0b12c696054f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Routines to emulate some Altivec/VMX instructions, specifically
4  * those that can trap when given denormalized operands in Java mode.
5  */
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/sched.h>
9 #include <asm/ptrace.h>
10 #include <asm/processor.h>
11 #include <asm/switch_to.h>
12 #include <linux/uaccess.h>
13 
14 /* Functions in vector.S */
15 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
16 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
17 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
18 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
19 extern void vrefp(vector128 *dst, vector128 *src);
20 extern void vrsqrtefp(vector128 *dst, vector128 *src);
21 extern void vexptep(vector128 *dst, vector128 *src);
22 
23 static unsigned int exp2s[8] = {
24 	0x800000,
25 	0x8b95c2,
26 	0x9837f0,
27 	0xa5fed7,
28 	0xb504f3,
29 	0xc5672a,
30 	0xd744fd,
31 	0xeac0c7
32 };
33 
34 /*
35  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
36  * single-precision floating-point representation of x.
37  */
38 static unsigned int eexp2(unsigned int s)
39 {
40 	int exp, pwr;
41 	unsigned int mant, frac;
42 
43 	/* extract exponent field from input */
44 	exp = ((s >> 23) & 0xff) - 127;
45 	if (exp > 7) {
46 		/* check for NaN input */
47 		if (exp == 128 && (s & 0x7fffff) != 0)
48 			return s | 0x400000;	/* return QNaN */
49 		/* 2^-big = 0, 2^+big = +Inf */
50 		return (s & 0x80000000)? 0: 0x7f800000;	/* 0 or +Inf */
51 	}
52 	if (exp < -23)
53 		return 0x3f800000;	/* 1.0 */
54 
55 	/* convert to fixed point integer in 9.23 representation */
56 	pwr = (s & 0x7fffff) | 0x800000;
57 	if (exp > 0)
58 		pwr <<= exp;
59 	else
60 		pwr >>= -exp;
61 	if (s & 0x80000000)
62 		pwr = -pwr;
63 
64 	/* extract integer part, which becomes exponent part of result */
65 	exp = (pwr >> 23) + 126;
66 	if (exp >= 254)
67 		return 0x7f800000;
68 	if (exp < -23)
69 		return 0;
70 
71 	/* table lookup on top 3 bits of fraction to get mantissa */
72 	mant = exp2s[(pwr >> 20) & 7];
73 
74 	/* linear interpolation using remaining 20 bits of fraction */
75 	asm("mulhwu %0,%1,%2" : "=r" (frac)
76 	    : "r" (pwr << 12), "r" (0x172b83ff));
77 	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
78 	mant += frac;
79 
80 	if (exp >= 0)
81 		return mant + (exp << 23);
82 
83 	/* denormalized result */
84 	exp = -exp;
85 	mant += 1 << (exp - 1);
86 	return mant >> exp;
87 }
88 
89 /*
90  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
91  * single-precision floating-point representation of x.
92  */
93 static unsigned int elog2(unsigned int s)
94 {
95 	int exp, mant, lz, frac;
96 
97 	exp = s & 0x7f800000;
98 	mant = s & 0x7fffff;
99 	if (exp == 0x7f800000) {	/* Inf or NaN */
100 		if (mant != 0)
101 			s |= 0x400000;	/* turn NaN into QNaN */
102 		return s;
103 	}
104 	if ((exp | mant) == 0)		/* +0 or -0 */
105 		return 0xff800000;	/* return -Inf */
106 
107 	if (exp == 0) {
108 		/* denormalized */
109 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
110 		mant <<= lz - 8;
111 		exp = (-118 - lz) << 23;
112 	} else {
113 		mant |= 0x800000;
114 		exp -= 127 << 23;
115 	}
116 
117 	if (mant >= 0xb504f3) {				/* 2^0.5 * 2^23 */
118 		exp |= 0x400000;			/* 0.5 * 2^23 */
119 		asm("mulhwu %0,%1,%2" : "=r" (mant)
120 		    : "r" (mant), "r" (0xb504f334));	/* 2^-0.5 * 2^32 */
121 	}
122 	if (mant >= 0x9837f0) {				/* 2^0.25 * 2^23 */
123 		exp |= 0x200000;			/* 0.25 * 2^23 */
124 		asm("mulhwu %0,%1,%2" : "=r" (mant)
125 		    : "r" (mant), "r" (0xd744fccb));	/* 2^-0.25 * 2^32 */
126 	}
127 	if (mant >= 0x8b95c2) {				/* 2^0.125 * 2^23 */
128 		exp |= 0x100000;			/* 0.125 * 2^23 */
129 		asm("mulhwu %0,%1,%2" : "=r" (mant)
130 		    : "r" (mant), "r" (0xeac0c6e8));	/* 2^-0.125 * 2^32 */
131 	}
132 	if (mant > 0x800000) {				/* 1.0 * 2^23 */
133 		/* calculate (mant - 1) * 1.381097463 */
134 		/* 1.381097463 == 0.125 / (2^0.125 - 1) */
135 		asm("mulhwu %0,%1,%2" : "=r" (frac)
136 		    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
137 		exp += frac;
138 	}
139 	s = exp & 0x80000000;
140 	if (exp != 0) {
141 		if (s)
142 			exp = -exp;
143 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
144 		lz = 8 - lz;
145 		if (lz > 0)
146 			exp >>= lz;
147 		else if (lz < 0)
148 			exp <<= -lz;
149 		s += ((lz + 126) << 23) + exp;
150 	}
151 	return s;
152 }
153 
154 #define VSCR_SAT	1
155 
156 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
157 {
158 	int exp, mant;
159 
160 	exp = (x >> 23) & 0xff;
161 	mant = x & 0x7fffff;
162 	if (exp == 255 && mant != 0)
163 		return 0;		/* NaN -> 0 */
164 	exp = exp - 127 + scale;
165 	if (exp < 0)
166 		return 0;		/* round towards zero */
167 	if (exp >= 31) {
168 		/* saturate, unless the result would be -2^31 */
169 		if (x + (scale << 23) != 0xcf000000)
170 			*vscrp |= VSCR_SAT;
171 		return (x & 0x80000000)? 0x80000000: 0x7fffffff;
172 	}
173 	mant |= 0x800000;
174 	mant = (mant << 7) >> (30 - exp);
175 	return (x & 0x80000000)? -mant: mant;
176 }
177 
178 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
179 {
180 	int exp;
181 	unsigned int mant;
182 
183 	exp = (x >> 23) & 0xff;
184 	mant = x & 0x7fffff;
185 	if (exp == 255 && mant != 0)
186 		return 0;		/* NaN -> 0 */
187 	exp = exp - 127 + scale;
188 	if (exp < 0)
189 		return 0;		/* round towards zero */
190 	if (x & 0x80000000) {
191 		/* negative => saturate to 0 */
192 		*vscrp |= VSCR_SAT;
193 		return 0;
194 	}
195 	if (exp >= 32) {
196 		/* saturate */
197 		*vscrp |= VSCR_SAT;
198 		return 0xffffffff;
199 	}
200 	mant |= 0x800000;
201 	mant = (mant << 8) >> (31 - exp);
202 	return mant;
203 }
204 
205 /* Round to floating integer, towards 0 */
206 static unsigned int rfiz(unsigned int x)
207 {
208 	int exp;
209 
210 	exp = ((x >> 23) & 0xff) - 127;
211 	if (exp == 128 && (x & 0x7fffff) != 0)
212 		return x | 0x400000;	/* NaN -> make it a QNaN */
213 	if (exp >= 23)
214 		return x;		/* it's an integer already (or Inf) */
215 	if (exp < 0)
216 		return x & 0x80000000;	/* |x| < 1.0 rounds to 0 */
217 	return x & ~(0x7fffff >> exp);
218 }
219 
220 /* Round to floating integer, towards +/- Inf */
221 static unsigned int rfii(unsigned int x)
222 {
223 	int exp, mask;
224 
225 	exp = ((x >> 23) & 0xff) - 127;
226 	if (exp == 128 && (x & 0x7fffff) != 0)
227 		return x | 0x400000;	/* NaN -> make it a QNaN */
228 	if (exp >= 23)
229 		return x;		/* it's an integer already (or Inf) */
230 	if ((x & 0x7fffffff) == 0)
231 		return x;		/* +/-0 -> +/-0 */
232 	if (exp < 0)
233 		/* 0 < |x| < 1.0 rounds to +/- 1.0 */
234 		return (x & 0x80000000) | 0x3f800000;
235 	mask = 0x7fffff >> exp;
236 	/* mantissa overflows into exponent - that's OK,
237 	   it can't overflow into the sign bit */
238 	return (x + mask) & ~mask;
239 }
240 
241 /* Round to floating integer, to nearest */
242 static unsigned int rfin(unsigned int x)
243 {
244 	int exp, half;
245 
246 	exp = ((x >> 23) & 0xff) - 127;
247 	if (exp == 128 && (x & 0x7fffff) != 0)
248 		return x | 0x400000;	/* NaN -> make it a QNaN */
249 	if (exp >= 23)
250 		return x;		/* it's an integer already (or Inf) */
251 	if (exp < -1)
252 		return x & 0x80000000;	/* |x| < 0.5 -> +/-0 */
253 	if (exp == -1)
254 		/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
255 		return (x & 0x80000000) | 0x3f800000;
256 	half = 0x400000 >> exp;
257 	/* add 0.5 to the magnitude and chop off the fraction bits */
258 	return (x + half) & ~(0x7fffff >> exp);
259 }
260 
261 int emulate_altivec(struct pt_regs *regs)
262 {
263 	unsigned int instr, i;
264 	unsigned int va, vb, vc, vd;
265 	vector128 *vrs;
266 
267 	if (get_user(instr, (unsigned int __user *) regs->nip))
268 		return -EFAULT;
269 	if ((instr >> 26) != 4)
270 		return -EINVAL;		/* not an altivec instruction */
271 	vd = (instr >> 21) & 0x1f;
272 	va = (instr >> 16) & 0x1f;
273 	vb = (instr >> 11) & 0x1f;
274 	vc = (instr >> 6) & 0x1f;
275 
276 	vrs = current->thread.vr_state.vr;
277 	switch (instr & 0x3f) {
278 	case 10:
279 		switch (vc) {
280 		case 0:	/* vaddfp */
281 			vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
282 			break;
283 		case 1:	/* vsubfp */
284 			vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
285 			break;
286 		case 4:	/* vrefp */
287 			vrefp(&vrs[vd], &vrs[vb]);
288 			break;
289 		case 5:	/* vrsqrtefp */
290 			vrsqrtefp(&vrs[vd], &vrs[vb]);
291 			break;
292 		case 6:	/* vexptefp */
293 			for (i = 0; i < 4; ++i)
294 				vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
295 			break;
296 		case 7:	/* vlogefp */
297 			for (i = 0; i < 4; ++i)
298 				vrs[vd].u[i] = elog2(vrs[vb].u[i]);
299 			break;
300 		case 8:		/* vrfin */
301 			for (i = 0; i < 4; ++i)
302 				vrs[vd].u[i] = rfin(vrs[vb].u[i]);
303 			break;
304 		case 9:		/* vrfiz */
305 			for (i = 0; i < 4; ++i)
306 				vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
307 			break;
308 		case 10:	/* vrfip */
309 			for (i = 0; i < 4; ++i) {
310 				u32 x = vrs[vb].u[i];
311 				x = (x & 0x80000000)? rfiz(x): rfii(x);
312 				vrs[vd].u[i] = x;
313 			}
314 			break;
315 		case 11:	/* vrfim */
316 			for (i = 0; i < 4; ++i) {
317 				u32 x = vrs[vb].u[i];
318 				x = (x & 0x80000000)? rfii(x): rfiz(x);
319 				vrs[vd].u[i] = x;
320 			}
321 			break;
322 		case 14:	/* vctuxs */
323 			for (i = 0; i < 4; ++i)
324 				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
325 					&current->thread.vr_state.vscr.u[3]);
326 			break;
327 		case 15:	/* vctsxs */
328 			for (i = 0; i < 4; ++i)
329 				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
330 					&current->thread.vr_state.vscr.u[3]);
331 			break;
332 		default:
333 			return -EINVAL;
334 		}
335 		break;
336 	case 46:	/* vmaddfp */
337 		vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
338 		break;
339 	case 47:	/* vnmsubfp */
340 		vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
341 		break;
342 	default:
343 		return -EINVAL;
344 	}
345 
346 	return 0;
347 }
348