xref: /linux/tools/arch/x86/lib/insn.c (revision 56fb34d86e875dbb0d3e6a81c5d3d035db373031)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * x86 instruction analysis
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004, 2009
6  */
7 
8 #ifdef __KERNEL__
9 #include <linux/string.h>
10 #else
11 #include <string.h>
12 #endif
13 #include "../include/asm/inat.h"
14 #include "../include/asm/insn.h"
15 
16 /* Verify next sizeof(t) bytes can be on the same instruction */
17 #define validate_next(t, insn, n)	\
18 	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
19 
20 #define __get_next(t, insn)	\
21 	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
22 
23 #define __peek_nbyte_next(t, insn, n)	\
24 	({ t r = *(t*)((insn)->next_byte + n); r; })
25 
26 #define get_next(t, insn)	\
27 	({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
28 
29 #define peek_nbyte_next(t, insn, n)	\
30 	({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
31 
32 #define peek_next(t, insn)	peek_nbyte_next(t, insn, 0)
33 
34 /**
35  * insn_init() - initialize struct insn
36  * @insn:	&struct insn to be initialized
37  * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
38  * @x86_64:	!0 for 64-bit kernel or 64-bit app
39  */
40 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
41 {
42 	/*
43 	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
44 	 * even if the input buffer is long enough to hold them.
45 	 */
46 	if (buf_len > MAX_INSN_SIZE)
47 		buf_len = MAX_INSN_SIZE;
48 
49 	memset(insn, 0, sizeof(*insn));
50 	insn->kaddr = kaddr;
51 	insn->end_kaddr = kaddr + buf_len;
52 	insn->next_byte = kaddr;
53 	insn->x86_64 = x86_64 ? 1 : 0;
54 	insn->opnd_bytes = 4;
55 	if (x86_64)
56 		insn->addr_bytes = 8;
57 	else
58 		insn->addr_bytes = 4;
59 }
60 
61 /**
62  * insn_get_prefixes - scan x86 instruction prefix bytes
63  * @insn:	&struct insn containing instruction
64  *
65  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
66  * to point to the (first) opcode.  No effect if @insn->prefixes.got
67  * is already set.
68  */
69 void insn_get_prefixes(struct insn *insn)
70 {
71 	struct insn_field *prefixes = &insn->prefixes;
72 	insn_attr_t attr;
73 	insn_byte_t b, lb;
74 	int i, nb;
75 
76 	if (prefixes->got)
77 		return;
78 
79 	nb = 0;
80 	lb = 0;
81 	b = peek_next(insn_byte_t, insn);
82 	attr = inat_get_opcode_attribute(b);
83 	while (inat_is_legacy_prefix(attr)) {
84 		/* Skip if same prefix */
85 		for (i = 0; i < nb; i++)
86 			if (prefixes->bytes[i] == b)
87 				goto found;
88 		if (nb == 4)
89 			/* Invalid instruction */
90 			break;
91 		prefixes->bytes[nb++] = b;
92 		if (inat_is_address_size_prefix(attr)) {
93 			/* address size switches 2/4 or 4/8 */
94 			if (insn->x86_64)
95 				insn->addr_bytes ^= 12;
96 			else
97 				insn->addr_bytes ^= 6;
98 		} else if (inat_is_operand_size_prefix(attr)) {
99 			/* oprand size switches 2/4 */
100 			insn->opnd_bytes ^= 6;
101 		}
102 found:
103 		prefixes->nbytes++;
104 		insn->next_byte++;
105 		lb = b;
106 		b = peek_next(insn_byte_t, insn);
107 		attr = inat_get_opcode_attribute(b);
108 	}
109 	/* Set the last prefix */
110 	if (lb && lb != insn->prefixes.bytes[3]) {
111 		if (unlikely(insn->prefixes.bytes[3])) {
112 			/* Swap the last prefix */
113 			b = insn->prefixes.bytes[3];
114 			for (i = 0; i < nb; i++)
115 				if (prefixes->bytes[i] == lb)
116 					prefixes->bytes[i] = b;
117 		}
118 		insn->prefixes.bytes[3] = lb;
119 	}
120 
121 	/* Decode REX prefix */
122 	if (insn->x86_64) {
123 		b = peek_next(insn_byte_t, insn);
124 		attr = inat_get_opcode_attribute(b);
125 		if (inat_is_rex_prefix(attr)) {
126 			insn->rex_prefix.value = b;
127 			insn->rex_prefix.nbytes = 1;
128 			insn->next_byte++;
129 			if (X86_REX_W(b))
130 				/* REX.W overrides opnd_size */
131 				insn->opnd_bytes = 8;
132 		}
133 	}
134 	insn->rex_prefix.got = 1;
135 
136 	/* Decode VEX prefix */
137 	b = peek_next(insn_byte_t, insn);
138 	attr = inat_get_opcode_attribute(b);
139 	if (inat_is_vex_prefix(attr)) {
140 		insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
141 		if (!insn->x86_64) {
142 			/*
143 			 * In 32-bits mode, if the [7:6] bits (mod bits of
144 			 * ModRM) on the second byte are not 11b, it is
145 			 * LDS or LES or BOUND.
146 			 */
147 			if (X86_MODRM_MOD(b2) != 3)
148 				goto vex_end;
149 		}
150 		insn->vex_prefix.bytes[0] = b;
151 		insn->vex_prefix.bytes[1] = b2;
152 		if (inat_is_evex_prefix(attr)) {
153 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
154 			insn->vex_prefix.bytes[2] = b2;
155 			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
156 			insn->vex_prefix.bytes[3] = b2;
157 			insn->vex_prefix.nbytes = 4;
158 			insn->next_byte += 4;
159 			if (insn->x86_64 && X86_VEX_W(b2))
160 				/* VEX.W overrides opnd_size */
161 				insn->opnd_bytes = 8;
162 		} else if (inat_is_vex3_prefix(attr)) {
163 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
164 			insn->vex_prefix.bytes[2] = b2;
165 			insn->vex_prefix.nbytes = 3;
166 			insn->next_byte += 3;
167 			if (insn->x86_64 && X86_VEX_W(b2))
168 				/* VEX.W overrides opnd_size */
169 				insn->opnd_bytes = 8;
170 		} else {
171 			/*
172 			 * For VEX2, fake VEX3-like byte#2.
173 			 * Makes it easier to decode vex.W, vex.vvvv,
174 			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
175 			 */
176 			insn->vex_prefix.bytes[2] = b2 & 0x7f;
177 			insn->vex_prefix.nbytes = 2;
178 			insn->next_byte += 2;
179 		}
180 	}
181 vex_end:
182 	insn->vex_prefix.got = 1;
183 
184 	prefixes->got = 1;
185 
186 err_out:
187 	return;
188 }
189 
190 /**
191  * insn_get_opcode - collect opcode(s)
192  * @insn:	&struct insn containing instruction
193  *
194  * Populates @insn->opcode, updates @insn->next_byte to point past the
195  * opcode byte(s), and set @insn->attr (except for groups).
196  * If necessary, first collects any preceding (prefix) bytes.
197  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
198  * is already 1.
199  */
200 void insn_get_opcode(struct insn *insn)
201 {
202 	struct insn_field *opcode = &insn->opcode;
203 	insn_byte_t op;
204 	int pfx_id;
205 	if (opcode->got)
206 		return;
207 	if (!insn->prefixes.got)
208 		insn_get_prefixes(insn);
209 
210 	/* Get first opcode */
211 	op = get_next(insn_byte_t, insn);
212 	opcode->bytes[0] = op;
213 	opcode->nbytes = 1;
214 
215 	/* Check if there is VEX prefix or not */
216 	if (insn_is_avx(insn)) {
217 		insn_byte_t m, p;
218 		m = insn_vex_m_bits(insn);
219 		p = insn_vex_p_bits(insn);
220 		insn->attr = inat_get_avx_attribute(op, m, p);
221 		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
222 		    (!inat_accept_vex(insn->attr) &&
223 		     !inat_is_group(insn->attr)))
224 			insn->attr = 0;	/* This instruction is bad */
225 		goto end;	/* VEX has only 1 byte for opcode */
226 	}
227 
228 	insn->attr = inat_get_opcode_attribute(op);
229 	while (inat_is_escape(insn->attr)) {
230 		/* Get escaped opcode */
231 		op = get_next(insn_byte_t, insn);
232 		opcode->bytes[opcode->nbytes++] = op;
233 		pfx_id = insn_last_prefix_id(insn);
234 		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
235 	}
236 	if (inat_must_vex(insn->attr))
237 		insn->attr = 0;	/* This instruction is bad */
238 end:
239 	opcode->got = 1;
240 
241 err_out:
242 	return;
243 }
244 
245 /**
246  * insn_get_modrm - collect ModRM byte, if any
247  * @insn:	&struct insn containing instruction
248  *
249  * Populates @insn->modrm and updates @insn->next_byte to point past the
250  * ModRM byte, if any.  If necessary, first collects the preceding bytes
251  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
252  */
253 void insn_get_modrm(struct insn *insn)
254 {
255 	struct insn_field *modrm = &insn->modrm;
256 	insn_byte_t pfx_id, mod;
257 	if (modrm->got)
258 		return;
259 	if (!insn->opcode.got)
260 		insn_get_opcode(insn);
261 
262 	if (inat_has_modrm(insn->attr)) {
263 		mod = get_next(insn_byte_t, insn);
264 		modrm->value = mod;
265 		modrm->nbytes = 1;
266 		if (inat_is_group(insn->attr)) {
267 			pfx_id = insn_last_prefix_id(insn);
268 			insn->attr = inat_get_group_attribute(mod, pfx_id,
269 							      insn->attr);
270 			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
271 				insn->attr = 0;	/* This is bad */
272 		}
273 	}
274 
275 	if (insn->x86_64 && inat_is_force64(insn->attr))
276 		insn->opnd_bytes = 8;
277 	modrm->got = 1;
278 
279 err_out:
280 	return;
281 }
282 
283 
284 /**
285  * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
286  * @insn:	&struct insn containing instruction
287  *
288  * If necessary, first collects the instruction up to and including the
289  * ModRM byte.  No effect if @insn->x86_64 is 0.
290  */
291 int insn_rip_relative(struct insn *insn)
292 {
293 	struct insn_field *modrm = &insn->modrm;
294 
295 	if (!insn->x86_64)
296 		return 0;
297 	if (!modrm->got)
298 		insn_get_modrm(insn);
299 	/*
300 	 * For rip-relative instructions, the mod field (top 2 bits)
301 	 * is zero and the r/m field (bottom 3 bits) is 0x5.
302 	 */
303 	return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
304 }
305 
306 /**
307  * insn_get_sib() - Get the SIB byte of instruction
308  * @insn:	&struct insn containing instruction
309  *
310  * If necessary, first collects the instruction up to and including the
311  * ModRM byte.
312  */
313 void insn_get_sib(struct insn *insn)
314 {
315 	insn_byte_t modrm;
316 
317 	if (insn->sib.got)
318 		return;
319 	if (!insn->modrm.got)
320 		insn_get_modrm(insn);
321 	if (insn->modrm.nbytes) {
322 		modrm = (insn_byte_t)insn->modrm.value;
323 		if (insn->addr_bytes != 2 &&
324 		    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
325 			insn->sib.value = get_next(insn_byte_t, insn);
326 			insn->sib.nbytes = 1;
327 		}
328 	}
329 	insn->sib.got = 1;
330 
331 err_out:
332 	return;
333 }
334 
335 
336 /**
337  * insn_get_displacement() - Get the displacement of instruction
338  * @insn:	&struct insn containing instruction
339  *
340  * If necessary, first collects the instruction up to and including the
341  * SIB byte.
342  * Displacement value is sign-expanded.
343  */
344 void insn_get_displacement(struct insn *insn)
345 {
346 	insn_byte_t mod, rm, base;
347 
348 	if (insn->displacement.got)
349 		return;
350 	if (!insn->sib.got)
351 		insn_get_sib(insn);
352 	if (insn->modrm.nbytes) {
353 		/*
354 		 * Interpreting the modrm byte:
355 		 * mod = 00 - no displacement fields (exceptions below)
356 		 * mod = 01 - 1-byte displacement field
357 		 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
358 		 * 	address size = 2 (0x67 prefix in 32-bit mode)
359 		 * mod = 11 - no memory operand
360 		 *
361 		 * If address size = 2...
362 		 * mod = 00, r/m = 110 - displacement field is 2 bytes
363 		 *
364 		 * If address size != 2...
365 		 * mod != 11, r/m = 100 - SIB byte exists
366 		 * mod = 00, SIB base = 101 - displacement field is 4 bytes
367 		 * mod = 00, r/m = 101 - rip-relative addressing, displacement
368 		 * 	field is 4 bytes
369 		 */
370 		mod = X86_MODRM_MOD(insn->modrm.value);
371 		rm = X86_MODRM_RM(insn->modrm.value);
372 		base = X86_SIB_BASE(insn->sib.value);
373 		if (mod == 3)
374 			goto out;
375 		if (mod == 1) {
376 			insn->displacement.value = get_next(signed char, insn);
377 			insn->displacement.nbytes = 1;
378 		} else if (insn->addr_bytes == 2) {
379 			if ((mod == 0 && rm == 6) || mod == 2) {
380 				insn->displacement.value =
381 					 get_next(short, insn);
382 				insn->displacement.nbytes = 2;
383 			}
384 		} else {
385 			if ((mod == 0 && rm == 5) || mod == 2 ||
386 			    (mod == 0 && base == 5)) {
387 				insn->displacement.value = get_next(int, insn);
388 				insn->displacement.nbytes = 4;
389 			}
390 		}
391 	}
392 out:
393 	insn->displacement.got = 1;
394 
395 err_out:
396 	return;
397 }
398 
399 /* Decode moffset16/32/64. Return 0 if failed */
400 static int __get_moffset(struct insn *insn)
401 {
402 	switch (insn->addr_bytes) {
403 	case 2:
404 		insn->moffset1.value = get_next(short, insn);
405 		insn->moffset1.nbytes = 2;
406 		break;
407 	case 4:
408 		insn->moffset1.value = get_next(int, insn);
409 		insn->moffset1.nbytes = 4;
410 		break;
411 	case 8:
412 		insn->moffset1.value = get_next(int, insn);
413 		insn->moffset1.nbytes = 4;
414 		insn->moffset2.value = get_next(int, insn);
415 		insn->moffset2.nbytes = 4;
416 		break;
417 	default:	/* opnd_bytes must be modified manually */
418 		goto err_out;
419 	}
420 	insn->moffset1.got = insn->moffset2.got = 1;
421 
422 	return 1;
423 
424 err_out:
425 	return 0;
426 }
427 
428 /* Decode imm v32(Iz). Return 0 if failed */
429 static int __get_immv32(struct insn *insn)
430 {
431 	switch (insn->opnd_bytes) {
432 	case 2:
433 		insn->immediate.value = get_next(short, insn);
434 		insn->immediate.nbytes = 2;
435 		break;
436 	case 4:
437 	case 8:
438 		insn->immediate.value = get_next(int, insn);
439 		insn->immediate.nbytes = 4;
440 		break;
441 	default:	/* opnd_bytes must be modified manually */
442 		goto err_out;
443 	}
444 
445 	return 1;
446 
447 err_out:
448 	return 0;
449 }
450 
451 /* Decode imm v64(Iv/Ov), Return 0 if failed */
452 static int __get_immv(struct insn *insn)
453 {
454 	switch (insn->opnd_bytes) {
455 	case 2:
456 		insn->immediate1.value = get_next(short, insn);
457 		insn->immediate1.nbytes = 2;
458 		break;
459 	case 4:
460 		insn->immediate1.value = get_next(int, insn);
461 		insn->immediate1.nbytes = 4;
462 		break;
463 	case 8:
464 		insn->immediate1.value = get_next(int, insn);
465 		insn->immediate1.nbytes = 4;
466 		insn->immediate2.value = get_next(int, insn);
467 		insn->immediate2.nbytes = 4;
468 		break;
469 	default:	/* opnd_bytes must be modified manually */
470 		goto err_out;
471 	}
472 	insn->immediate1.got = insn->immediate2.got = 1;
473 
474 	return 1;
475 err_out:
476 	return 0;
477 }
478 
479 /* Decode ptr16:16/32(Ap) */
480 static int __get_immptr(struct insn *insn)
481 {
482 	switch (insn->opnd_bytes) {
483 	case 2:
484 		insn->immediate1.value = get_next(short, insn);
485 		insn->immediate1.nbytes = 2;
486 		break;
487 	case 4:
488 		insn->immediate1.value = get_next(int, insn);
489 		insn->immediate1.nbytes = 4;
490 		break;
491 	case 8:
492 		/* ptr16:64 is not exist (no segment) */
493 		return 0;
494 	default:	/* opnd_bytes must be modified manually */
495 		goto err_out;
496 	}
497 	insn->immediate2.value = get_next(unsigned short, insn);
498 	insn->immediate2.nbytes = 2;
499 	insn->immediate1.got = insn->immediate2.got = 1;
500 
501 	return 1;
502 err_out:
503 	return 0;
504 }
505 
506 /**
507  * insn_get_immediate() - Get the immediates of instruction
508  * @insn:	&struct insn containing instruction
509  *
510  * If necessary, first collects the instruction up to and including the
511  * displacement bytes.
512  * Basically, most of immediates are sign-expanded. Unsigned-value can be
513  * get by bit masking with ((1 << (nbytes * 8)) - 1)
514  */
515 void insn_get_immediate(struct insn *insn)
516 {
517 	if (insn->immediate.got)
518 		return;
519 	if (!insn->displacement.got)
520 		insn_get_displacement(insn);
521 
522 	if (inat_has_moffset(insn->attr)) {
523 		if (!__get_moffset(insn))
524 			goto err_out;
525 		goto done;
526 	}
527 
528 	if (!inat_has_immediate(insn->attr))
529 		/* no immediates */
530 		goto done;
531 
532 	switch (inat_immediate_size(insn->attr)) {
533 	case INAT_IMM_BYTE:
534 		insn->immediate.value = get_next(signed char, insn);
535 		insn->immediate.nbytes = 1;
536 		break;
537 	case INAT_IMM_WORD:
538 		insn->immediate.value = get_next(short, insn);
539 		insn->immediate.nbytes = 2;
540 		break;
541 	case INAT_IMM_DWORD:
542 		insn->immediate.value = get_next(int, insn);
543 		insn->immediate.nbytes = 4;
544 		break;
545 	case INAT_IMM_QWORD:
546 		insn->immediate1.value = get_next(int, insn);
547 		insn->immediate1.nbytes = 4;
548 		insn->immediate2.value = get_next(int, insn);
549 		insn->immediate2.nbytes = 4;
550 		break;
551 	case INAT_IMM_PTR:
552 		if (!__get_immptr(insn))
553 			goto err_out;
554 		break;
555 	case INAT_IMM_VWORD32:
556 		if (!__get_immv32(insn))
557 			goto err_out;
558 		break;
559 	case INAT_IMM_VWORD:
560 		if (!__get_immv(insn))
561 			goto err_out;
562 		break;
563 	default:
564 		/* Here, insn must have an immediate, but failed */
565 		goto err_out;
566 	}
567 	if (inat_has_second_immediate(insn->attr)) {
568 		insn->immediate2.value = get_next(signed char, insn);
569 		insn->immediate2.nbytes = 1;
570 	}
571 done:
572 	insn->immediate.got = 1;
573 
574 err_out:
575 	return;
576 }
577 
578 /**
579  * insn_get_length() - Get the length of instruction
580  * @insn:	&struct insn containing instruction
581  *
582  * If necessary, first collects the instruction up to and including the
583  * immediates bytes.
584  */
585 void insn_get_length(struct insn *insn)
586 {
587 	if (insn->length)
588 		return;
589 	if (!insn->immediate.got)
590 		insn_get_immediate(insn);
591 	insn->length = (unsigned char)((unsigned long)insn->next_byte
592 				     - (unsigned long)insn->kaddr);
593 }
594