xref: /linux/tools/arch/x86/lib/insn.c (revision 8b83369ddcb3fb9cab5c1088987ce477565bb630)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * x86 instruction analysis
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004, 2009
6  */
7 
8 #include <linux/kernel.h>
9 #ifdef __KERNEL__
10 #include <linux/string.h>
11 #else
12 #include <string.h>
13 #endif
14 #include "../include/asm/inat.h"
15 #include "../include/asm/insn.h"
16 
17 #include "../include/asm/emulate_prefix.h"
18 
19 #define leXX_to_cpu(t, r)						\
20 ({									\
21 	__typeof__(t) v;						\
22 	switch (sizeof(t)) {						\
23 	case 4: v = le32_to_cpu(r); break;				\
24 	case 2: v = le16_to_cpu(r); break;				\
25 	case 1:	v = r; break;						\
26 	default:							\
27 		BUILD_BUG(); break;					\
28 	}								\
29 	v;								\
30 })
31 
32 /* Verify next sizeof(t) bytes can be on the same instruction */
33 #define validate_next(t, insn, n)	\
34 	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
35 
36 #define __get_next(t, insn)	\
37 	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
38 
39 #define __peek_nbyte_next(t, insn, n)	\
40 	({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
41 
42 #define get_next(t, insn)	\
43 	({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
44 
45 #define peek_nbyte_next(t, insn, n)	\
46 	({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
47 
48 #define peek_next(t, insn)	peek_nbyte_next(t, insn, 0)
49 
50 /**
51  * insn_init() - initialize struct insn
52  * @insn:	&struct insn to be initialized
53  * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
54  * @x86_64:	!0 for 64-bit kernel or 64-bit app
55  */
56 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
57 {
58 	/*
59 	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
60 	 * even if the input buffer is long enough to hold them.
61 	 */
62 	if (buf_len > MAX_INSN_SIZE)
63 		buf_len = MAX_INSN_SIZE;
64 
65 	memset(insn, 0, sizeof(*insn));
66 	insn->kaddr = kaddr;
67 	insn->end_kaddr = kaddr + buf_len;
68 	insn->next_byte = kaddr;
69 	insn->x86_64 = x86_64 ? 1 : 0;
70 	insn->opnd_bytes = 4;
71 	if (x86_64)
72 		insn->addr_bytes = 8;
73 	else
74 		insn->addr_bytes = 4;
75 }
76 
77 static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
78 static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
79 
80 static int __insn_get_emulate_prefix(struct insn *insn,
81 				     const insn_byte_t *prefix, size_t len)
82 {
83 	size_t i;
84 
85 	for (i = 0; i < len; i++) {
86 		if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
87 			goto err_out;
88 	}
89 
90 	insn->emulate_prefix_size = len;
91 	insn->next_byte += len;
92 
93 	return 1;
94 
95 err_out:
96 	return 0;
97 }
98 
99 static void insn_get_emulate_prefix(struct insn *insn)
100 {
101 	if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
102 		return;
103 
104 	__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
105 }
106 
107 /**
108  * insn_get_prefixes - scan x86 instruction prefix bytes
109  * @insn:	&struct insn containing instruction
110  *
111  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
112  * to point to the (first) opcode.  No effect if @insn->prefixes.got
113  * is already set.
114  */
115 void insn_get_prefixes(struct insn *insn)
116 {
117 	struct insn_field *prefixes = &insn->prefixes;
118 	insn_attr_t attr;
119 	insn_byte_t b, lb;
120 	int i, nb;
121 
122 	if (prefixes->got)
123 		return;
124 
125 	insn_get_emulate_prefix(insn);
126 
127 	nb = 0;
128 	lb = 0;
129 	b = peek_next(insn_byte_t, insn);
130 	attr = inat_get_opcode_attribute(b);
131 	while (inat_is_legacy_prefix(attr)) {
132 		/* Skip if same prefix */
133 		for (i = 0; i < nb; i++)
134 			if (prefixes->bytes[i] == b)
135 				goto found;
136 		if (nb == 4)
137 			/* Invalid instruction */
138 			break;
139 		prefixes->bytes[nb++] = b;
140 		if (inat_is_address_size_prefix(attr)) {
141 			/* address size switches 2/4 or 4/8 */
142 			if (insn->x86_64)
143 				insn->addr_bytes ^= 12;
144 			else
145 				insn->addr_bytes ^= 6;
146 		} else if (inat_is_operand_size_prefix(attr)) {
147 			/* oprand size switches 2/4 */
148 			insn->opnd_bytes ^= 6;
149 		}
150 found:
151 		prefixes->nbytes++;
152 		insn->next_byte++;
153 		lb = b;
154 		b = peek_next(insn_byte_t, insn);
155 		attr = inat_get_opcode_attribute(b);
156 	}
157 	/* Set the last prefix */
158 	if (lb && lb != insn->prefixes.bytes[3]) {
159 		if (unlikely(insn->prefixes.bytes[3])) {
160 			/* Swap the last prefix */
161 			b = insn->prefixes.bytes[3];
162 			for (i = 0; i < nb; i++)
163 				if (prefixes->bytes[i] == lb)
164 					insn_set_byte(prefixes, i, b);
165 		}
166 		insn_set_byte(&insn->prefixes, 3, lb);
167 	}
168 
169 	/* Decode REX prefix */
170 	if (insn->x86_64) {
171 		b = peek_next(insn_byte_t, insn);
172 		attr = inat_get_opcode_attribute(b);
173 		if (inat_is_rex_prefix(attr)) {
174 			insn_field_set(&insn->rex_prefix, b, 1);
175 			insn->next_byte++;
176 			if (X86_REX_W(b))
177 				/* REX.W overrides opnd_size */
178 				insn->opnd_bytes = 8;
179 		}
180 	}
181 	insn->rex_prefix.got = 1;
182 
183 	/* Decode VEX prefix */
184 	b = peek_next(insn_byte_t, insn);
185 	attr = inat_get_opcode_attribute(b);
186 	if (inat_is_vex_prefix(attr)) {
187 		insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
188 		if (!insn->x86_64) {
189 			/*
190 			 * In 32-bits mode, if the [7:6] bits (mod bits of
191 			 * ModRM) on the second byte are not 11b, it is
192 			 * LDS or LES or BOUND.
193 			 */
194 			if (X86_MODRM_MOD(b2) != 3)
195 				goto vex_end;
196 		}
197 		insn_set_byte(&insn->vex_prefix, 0, b);
198 		insn_set_byte(&insn->vex_prefix, 1, b2);
199 		if (inat_is_evex_prefix(attr)) {
200 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
201 			insn_set_byte(&insn->vex_prefix, 2, b2);
202 			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
203 			insn_set_byte(&insn->vex_prefix, 3, b2);
204 			insn->vex_prefix.nbytes = 4;
205 			insn->next_byte += 4;
206 			if (insn->x86_64 && X86_VEX_W(b2))
207 				/* VEX.W overrides opnd_size */
208 				insn->opnd_bytes = 8;
209 		} else if (inat_is_vex3_prefix(attr)) {
210 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
211 			insn_set_byte(&insn->vex_prefix, 2, b2);
212 			insn->vex_prefix.nbytes = 3;
213 			insn->next_byte += 3;
214 			if (insn->x86_64 && X86_VEX_W(b2))
215 				/* VEX.W overrides opnd_size */
216 				insn->opnd_bytes = 8;
217 		} else {
218 			/*
219 			 * For VEX2, fake VEX3-like byte#2.
220 			 * Makes it easier to decode vex.W, vex.vvvv,
221 			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
222 			 */
223 			insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f);
224 			insn->vex_prefix.nbytes = 2;
225 			insn->next_byte += 2;
226 		}
227 	}
228 vex_end:
229 	insn->vex_prefix.got = 1;
230 
231 	prefixes->got = 1;
232 
233 err_out:
234 	return;
235 }
236 
237 /**
238  * insn_get_opcode - collect opcode(s)
239  * @insn:	&struct insn containing instruction
240  *
241  * Populates @insn->opcode, updates @insn->next_byte to point past the
242  * opcode byte(s), and set @insn->attr (except for groups).
243  * If necessary, first collects any preceding (prefix) bytes.
244  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
245  * is already 1.
246  */
247 void insn_get_opcode(struct insn *insn)
248 {
249 	struct insn_field *opcode = &insn->opcode;
250 	insn_byte_t op;
251 	int pfx_id;
252 	if (opcode->got)
253 		return;
254 	if (!insn->prefixes.got)
255 		insn_get_prefixes(insn);
256 
257 	/* Get first opcode */
258 	op = get_next(insn_byte_t, insn);
259 	insn_set_byte(opcode, 0, op);
260 	opcode->nbytes = 1;
261 
262 	/* Check if there is VEX prefix or not */
263 	if (insn_is_avx(insn)) {
264 		insn_byte_t m, p;
265 		m = insn_vex_m_bits(insn);
266 		p = insn_vex_p_bits(insn);
267 		insn->attr = inat_get_avx_attribute(op, m, p);
268 		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
269 		    (!inat_accept_vex(insn->attr) &&
270 		     !inat_is_group(insn->attr)))
271 			insn->attr = 0;	/* This instruction is bad */
272 		goto end;	/* VEX has only 1 byte for opcode */
273 	}
274 
275 	insn->attr = inat_get_opcode_attribute(op);
276 	while (inat_is_escape(insn->attr)) {
277 		/* Get escaped opcode */
278 		op = get_next(insn_byte_t, insn);
279 		opcode->bytes[opcode->nbytes++] = op;
280 		pfx_id = insn_last_prefix_id(insn);
281 		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
282 	}
283 	if (inat_must_vex(insn->attr))
284 		insn->attr = 0;	/* This instruction is bad */
285 end:
286 	opcode->got = 1;
287 
288 err_out:
289 	return;
290 }
291 
292 /**
293  * insn_get_modrm - collect ModRM byte, if any
294  * @insn:	&struct insn containing instruction
295  *
296  * Populates @insn->modrm and updates @insn->next_byte to point past the
297  * ModRM byte, if any.  If necessary, first collects the preceding bytes
298  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
299  */
300 void insn_get_modrm(struct insn *insn)
301 {
302 	struct insn_field *modrm = &insn->modrm;
303 	insn_byte_t pfx_id, mod;
304 	if (modrm->got)
305 		return;
306 	if (!insn->opcode.got)
307 		insn_get_opcode(insn);
308 
309 	if (inat_has_modrm(insn->attr)) {
310 		mod = get_next(insn_byte_t, insn);
311 		insn_field_set(modrm, mod, 1);
312 		if (inat_is_group(insn->attr)) {
313 			pfx_id = insn_last_prefix_id(insn);
314 			insn->attr = inat_get_group_attribute(mod, pfx_id,
315 							      insn->attr);
316 			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
317 				insn->attr = 0;	/* This is bad */
318 		}
319 	}
320 
321 	if (insn->x86_64 && inat_is_force64(insn->attr))
322 		insn->opnd_bytes = 8;
323 	modrm->got = 1;
324 
325 err_out:
326 	return;
327 }
328 
329 
330 /**
331  * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
332  * @insn:	&struct insn containing instruction
333  *
334  * If necessary, first collects the instruction up to and including the
335  * ModRM byte.  No effect if @insn->x86_64 is 0.
336  */
337 int insn_rip_relative(struct insn *insn)
338 {
339 	struct insn_field *modrm = &insn->modrm;
340 
341 	if (!insn->x86_64)
342 		return 0;
343 	if (!modrm->got)
344 		insn_get_modrm(insn);
345 	/*
346 	 * For rip-relative instructions, the mod field (top 2 bits)
347 	 * is zero and the r/m field (bottom 3 bits) is 0x5.
348 	 */
349 	return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
350 }
351 
352 /**
353  * insn_get_sib() - Get the SIB byte of instruction
354  * @insn:	&struct insn containing instruction
355  *
356  * If necessary, first collects the instruction up to and including the
357  * ModRM byte.
358  */
359 void insn_get_sib(struct insn *insn)
360 {
361 	insn_byte_t modrm;
362 
363 	if (insn->sib.got)
364 		return;
365 	if (!insn->modrm.got)
366 		insn_get_modrm(insn);
367 	if (insn->modrm.nbytes) {
368 		modrm = insn->modrm.bytes[0];
369 		if (insn->addr_bytes != 2 &&
370 		    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
371 			insn_field_set(&insn->sib,
372 				       get_next(insn_byte_t, insn), 1);
373 		}
374 	}
375 	insn->sib.got = 1;
376 
377 err_out:
378 	return;
379 }
380 
381 
382 /**
383  * insn_get_displacement() - Get the displacement of instruction
384  * @insn:	&struct insn containing instruction
385  *
386  * If necessary, first collects the instruction up to and including the
387  * SIB byte.
388  * Displacement value is sign-expanded.
389  */
390 void insn_get_displacement(struct insn *insn)
391 {
392 	insn_byte_t mod, rm, base;
393 
394 	if (insn->displacement.got)
395 		return;
396 	if (!insn->sib.got)
397 		insn_get_sib(insn);
398 	if (insn->modrm.nbytes) {
399 		/*
400 		 * Interpreting the modrm byte:
401 		 * mod = 00 - no displacement fields (exceptions below)
402 		 * mod = 01 - 1-byte displacement field
403 		 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
404 		 * 	address size = 2 (0x67 prefix in 32-bit mode)
405 		 * mod = 11 - no memory operand
406 		 *
407 		 * If address size = 2...
408 		 * mod = 00, r/m = 110 - displacement field is 2 bytes
409 		 *
410 		 * If address size != 2...
411 		 * mod != 11, r/m = 100 - SIB byte exists
412 		 * mod = 00, SIB base = 101 - displacement field is 4 bytes
413 		 * mod = 00, r/m = 101 - rip-relative addressing, displacement
414 		 * 	field is 4 bytes
415 		 */
416 		mod = X86_MODRM_MOD(insn->modrm.value);
417 		rm = X86_MODRM_RM(insn->modrm.value);
418 		base = X86_SIB_BASE(insn->sib.value);
419 		if (mod == 3)
420 			goto out;
421 		if (mod == 1) {
422 			insn_field_set(&insn->displacement,
423 				       get_next(signed char, insn), 1);
424 		} else if (insn->addr_bytes == 2) {
425 			if ((mod == 0 && rm == 6) || mod == 2) {
426 				insn_field_set(&insn->displacement,
427 					       get_next(short, insn), 2);
428 			}
429 		} else {
430 			if ((mod == 0 && rm == 5) || mod == 2 ||
431 			    (mod == 0 && base == 5)) {
432 				insn_field_set(&insn->displacement,
433 					       get_next(int, insn), 4);
434 			}
435 		}
436 	}
437 out:
438 	insn->displacement.got = 1;
439 
440 err_out:
441 	return;
442 }
443 
444 /* Decode moffset16/32/64. Return 0 if failed */
445 static int __get_moffset(struct insn *insn)
446 {
447 	switch (insn->addr_bytes) {
448 	case 2:
449 		insn_field_set(&insn->moffset1, get_next(short, insn), 2);
450 		break;
451 	case 4:
452 		insn_field_set(&insn->moffset1, get_next(int, insn), 4);
453 		break;
454 	case 8:
455 		insn_field_set(&insn->moffset1, get_next(int, insn), 4);
456 		insn_field_set(&insn->moffset2, get_next(int, insn), 4);
457 		break;
458 	default:	/* opnd_bytes must be modified manually */
459 		goto err_out;
460 	}
461 	insn->moffset1.got = insn->moffset2.got = 1;
462 
463 	return 1;
464 
465 err_out:
466 	return 0;
467 }
468 
469 /* Decode imm v32(Iz). Return 0 if failed */
470 static int __get_immv32(struct insn *insn)
471 {
472 	switch (insn->opnd_bytes) {
473 	case 2:
474 		insn_field_set(&insn->immediate, get_next(short, insn), 2);
475 		break;
476 	case 4:
477 	case 8:
478 		insn_field_set(&insn->immediate, get_next(int, insn), 4);
479 		break;
480 	default:	/* opnd_bytes must be modified manually */
481 		goto err_out;
482 	}
483 
484 	return 1;
485 
486 err_out:
487 	return 0;
488 }
489 
490 /* Decode imm v64(Iv/Ov), Return 0 if failed */
491 static int __get_immv(struct insn *insn)
492 {
493 	switch (insn->opnd_bytes) {
494 	case 2:
495 		insn_field_set(&insn->immediate1, get_next(short, insn), 2);
496 		break;
497 	case 4:
498 		insn_field_set(&insn->immediate1, get_next(int, insn), 4);
499 		insn->immediate1.nbytes = 4;
500 		break;
501 	case 8:
502 		insn_field_set(&insn->immediate1, get_next(int, insn), 4);
503 		insn_field_set(&insn->immediate2, get_next(int, insn), 4);
504 		break;
505 	default:	/* opnd_bytes must be modified manually */
506 		goto err_out;
507 	}
508 	insn->immediate1.got = insn->immediate2.got = 1;
509 
510 	return 1;
511 err_out:
512 	return 0;
513 }
514 
515 /* Decode ptr16:16/32(Ap) */
516 static int __get_immptr(struct insn *insn)
517 {
518 	switch (insn->opnd_bytes) {
519 	case 2:
520 		insn_field_set(&insn->immediate1, get_next(short, insn), 2);
521 		break;
522 	case 4:
523 		insn_field_set(&insn->immediate1, get_next(int, insn), 4);
524 		break;
525 	case 8:
526 		/* ptr16:64 is not exist (no segment) */
527 		return 0;
528 	default:	/* opnd_bytes must be modified manually */
529 		goto err_out;
530 	}
531 	insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
532 	insn->immediate1.got = insn->immediate2.got = 1;
533 
534 	return 1;
535 err_out:
536 	return 0;
537 }
538 
539 /**
540  * insn_get_immediate() - Get the immediates of instruction
541  * @insn:	&struct insn containing instruction
542  *
543  * If necessary, first collects the instruction up to and including the
544  * displacement bytes.
545  * Basically, most of immediates are sign-expanded. Unsigned-value can be
546  * get by bit masking with ((1 << (nbytes * 8)) - 1)
547  */
548 void insn_get_immediate(struct insn *insn)
549 {
550 	if (insn->immediate.got)
551 		return;
552 	if (!insn->displacement.got)
553 		insn_get_displacement(insn);
554 
555 	if (inat_has_moffset(insn->attr)) {
556 		if (!__get_moffset(insn))
557 			goto err_out;
558 		goto done;
559 	}
560 
561 	if (!inat_has_immediate(insn->attr))
562 		/* no immediates */
563 		goto done;
564 
565 	switch (inat_immediate_size(insn->attr)) {
566 	case INAT_IMM_BYTE:
567 		insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
568 		break;
569 	case INAT_IMM_WORD:
570 		insn_field_set(&insn->immediate, get_next(short, insn), 2);
571 		break;
572 	case INAT_IMM_DWORD:
573 		insn_field_set(&insn->immediate, get_next(int, insn), 4);
574 		break;
575 	case INAT_IMM_QWORD:
576 		insn_field_set(&insn->immediate1, get_next(int, insn), 4);
577 		insn_field_set(&insn->immediate2, get_next(int, insn), 4);
578 		break;
579 	case INAT_IMM_PTR:
580 		if (!__get_immptr(insn))
581 			goto err_out;
582 		break;
583 	case INAT_IMM_VWORD32:
584 		if (!__get_immv32(insn))
585 			goto err_out;
586 		break;
587 	case INAT_IMM_VWORD:
588 		if (!__get_immv(insn))
589 			goto err_out;
590 		break;
591 	default:
592 		/* Here, insn must have an immediate, but failed */
593 		goto err_out;
594 	}
595 	if (inat_has_second_immediate(insn->attr)) {
596 		insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
597 	}
598 done:
599 	insn->immediate.got = 1;
600 
601 err_out:
602 	return;
603 }
604 
605 /**
606  * insn_get_length() - Get the length of instruction
607  * @insn:	&struct insn containing instruction
608  *
609  * If necessary, first collects the instruction up to and including the
610  * immediates bytes.
611  */
612 void insn_get_length(struct insn *insn)
613 {
614 	if (insn->length)
615 		return;
616 	if (!insn->immediate.got)
617 		insn_get_immediate(insn);
618 	insn->length = (unsigned char)((unsigned long)insn->next_byte
619 				     - (unsigned long)insn->kaddr);
620 }
621