xref: /linux/tools/objtool/arch/x86/decode.c (revision c717993dd76a1049093af5c262e751d901b8da10)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
4   */
5  
6  #include <stdio.h>
7  #include <stdlib.h>
8  
9  #define unlikely(cond) (cond)
10  #include <asm/insn.h>
11  #include "../../../arch/x86/lib/inat.c"
12  #include "../../../arch/x86/lib/insn.c"
13  
14  #define CONFIG_64BIT 1
15  #include <asm/nops.h>
16  
17  #include <asm/orc_types.h>
18  #include <objtool/check.h>
19  #include <objtool/elf.h>
20  #include <objtool/arch.h>
21  #include <objtool/warn.h>
22  #include <objtool/endianness.h>
23  #include <objtool/builtin.h>
24  #include <arch/elf.h>
25  
26  static int is_x86_64(const struct elf *elf)
27  {
28  	switch (elf->ehdr.e_machine) {
29  	case EM_X86_64:
30  		return 1;
31  	case EM_386:
32  		return 0;
33  	default:
34  		WARN("unexpected ELF machine type %d", elf->ehdr.e_machine);
35  		return -1;
36  	}
37  }
38  
39  bool arch_callee_saved_reg(unsigned char reg)
40  {
41  	switch (reg) {
42  	case CFI_BP:
43  	case CFI_BX:
44  	case CFI_R12:
45  	case CFI_R13:
46  	case CFI_R14:
47  	case CFI_R15:
48  		return true;
49  
50  	case CFI_AX:
51  	case CFI_CX:
52  	case CFI_DX:
53  	case CFI_SI:
54  	case CFI_DI:
55  	case CFI_SP:
56  	case CFI_R8:
57  	case CFI_R9:
58  	case CFI_R10:
59  	case CFI_R11:
60  	case CFI_RA:
61  	default:
62  		return false;
63  	}
64  }
65  
66  unsigned long arch_dest_reloc_offset(int addend)
67  {
68  	return addend + 4;
69  }
70  
71  unsigned long arch_jump_destination(struct instruction *insn)
72  {
73  	return insn->offset + insn->len + insn->immediate;
74  }
75  
76  #define ADD_OP(op) \
77  	if (!(op = calloc(1, sizeof(*op)))) \
78  		return -1; \
79  	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
80  
81  /*
82   * Helpers to decode ModRM/SIB:
83   *
84   * r/m| AX  CX  DX  BX |  SP |  BP |  SI  DI |
85   *    | R8  R9 R10 R11 | R12 | R13 | R14 R15 |
86   * Mod+----------------+-----+-----+---------+
87   * 00 |    [r/m]       |[SIB]|[IP+]|  [r/m]  |
88   * 01 |  [r/m + d8]    |[S+d]|   [r/m + d8]  |
89   * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
90   * 11 |                   r/ m               |
91   */
92  
93  #define mod_is_mem()	(modrm_mod != 3)
94  #define mod_is_reg()	(modrm_mod == 3)
95  
96  #define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
97  #define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
98  
99  #define rm_is(reg) (have_SIB() ? \
100  		    sib_base == (reg) && sib_index == CFI_SP : \
101  		    modrm_rm == (reg))
102  
103  #define rm_is_mem(reg)	(mod_is_mem() && !is_RIP() && rm_is(reg))
104  #define rm_is_reg(reg)	(mod_is_reg() && modrm_rm == (reg))
105  
106  int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
107  			    unsigned long offset, unsigned int maxlen,
108  			    unsigned int *len, enum insn_type *type,
109  			    unsigned long *immediate,
110  			    struct list_head *ops_list)
111  {
112  	const struct elf *elf = file->elf;
113  	struct insn insn;
114  	int x86_64, ret;
115  	unsigned char op1, op2,
116  		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
117  		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
118  		      sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
119  	struct stack_op *op = NULL;
120  	struct symbol *sym;
121  	u64 imm;
122  
123  	x86_64 = is_x86_64(elf);
124  	if (x86_64 == -1)
125  		return -1;
126  
127  	ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
128  			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
129  	if (ret < 0) {
130  		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
131  		return -1;
132  	}
133  
134  	*len = insn.length;
135  	*type = INSN_OTHER;
136  
137  	if (insn.vex_prefix.nbytes)
138  		return 0;
139  
140  	op1 = insn.opcode.bytes[0];
141  	op2 = insn.opcode.bytes[1];
142  
143  	if (insn.rex_prefix.nbytes) {
144  		rex = insn.rex_prefix.bytes[0];
145  		rex_w = X86_REX_W(rex) >> 3;
146  		rex_r = X86_REX_R(rex) >> 2;
147  		rex_x = X86_REX_X(rex) >> 1;
148  		rex_b = X86_REX_B(rex);
149  	}
150  
151  	if (insn.modrm.nbytes) {
152  		modrm = insn.modrm.bytes[0];
153  		modrm_mod = X86_MODRM_MOD(modrm);
154  		modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
155  		modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
156  	}
157  
158  	if (insn.sib.nbytes) {
159  		sib = insn.sib.bytes[0];
160  		/* sib_scale = X86_SIB_SCALE(sib); */
161  		sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
162  		sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
163  	}
164  
165  	switch (op1) {
166  
167  	case 0x1:
168  	case 0x29:
169  		if (rex_w && rm_is_reg(CFI_SP)) {
170  
171  			/* add/sub reg, %rsp */
172  			ADD_OP(op) {
173  				op->src.type = OP_SRC_ADD;
174  				op->src.reg = modrm_reg;
175  				op->dest.type = OP_DEST_REG;
176  				op->dest.reg = CFI_SP;
177  			}
178  		}
179  		break;
180  
181  	case 0x50 ... 0x57:
182  
183  		/* push reg */
184  		ADD_OP(op) {
185  			op->src.type = OP_SRC_REG;
186  			op->src.reg = (op1 & 0x7) + 8*rex_b;
187  			op->dest.type = OP_DEST_PUSH;
188  		}
189  
190  		break;
191  
192  	case 0x58 ... 0x5f:
193  
194  		/* pop reg */
195  		ADD_OP(op) {
196  			op->src.type = OP_SRC_POP;
197  			op->dest.type = OP_DEST_REG;
198  			op->dest.reg = (op1 & 0x7) + 8*rex_b;
199  		}
200  
201  		break;
202  
203  	case 0x68:
204  	case 0x6a:
205  		/* push immediate */
206  		ADD_OP(op) {
207  			op->src.type = OP_SRC_CONST;
208  			op->dest.type = OP_DEST_PUSH;
209  		}
210  		break;
211  
212  	case 0x70 ... 0x7f:
213  		*type = INSN_JUMP_CONDITIONAL;
214  		break;
215  
216  	case 0x80 ... 0x83:
217  		/*
218  		 * 1000 00sw : mod OP r/m : immediate
219  		 *
220  		 * s - sign extend immediate
221  		 * w - imm8 / imm32
222  		 *
223  		 * OP: 000 ADD    100 AND
224  		 *     001 OR     101 SUB
225  		 *     010 ADC    110 XOR
226  		 *     011 SBB    111 CMP
227  		 */
228  
229  		/* 64bit only */
230  		if (!rex_w)
231  			break;
232  
233  		/* %rsp target only */
234  		if (!rm_is_reg(CFI_SP))
235  			break;
236  
237  		imm = insn.immediate.value;
238  		if (op1 & 2) { /* sign extend */
239  			if (op1 & 1) { /* imm32 */
240  				imm <<= 32;
241  				imm = (s64)imm >> 32;
242  			} else { /* imm8 */
243  				imm <<= 56;
244  				imm = (s64)imm >> 56;
245  			}
246  		}
247  
248  		switch (modrm_reg & 7) {
249  		case 5:
250  			imm = -imm;
251  			/* fallthrough */
252  		case 0:
253  			/* add/sub imm, %rsp */
254  			ADD_OP(op) {
255  				op->src.type = OP_SRC_ADD;
256  				op->src.reg = CFI_SP;
257  				op->src.offset = imm;
258  				op->dest.type = OP_DEST_REG;
259  				op->dest.reg = CFI_SP;
260  			}
261  			break;
262  
263  		case 4:
264  			/* and imm, %rsp */
265  			ADD_OP(op) {
266  				op->src.type = OP_SRC_AND;
267  				op->src.reg = CFI_SP;
268  				op->src.offset = insn.immediate.value;
269  				op->dest.type = OP_DEST_REG;
270  				op->dest.reg = CFI_SP;
271  			}
272  			break;
273  
274  		default:
275  			/* WARN ? */
276  			break;
277  		}
278  
279  		break;
280  
281  	case 0x89:
282  		if (!rex_w)
283  			break;
284  
285  		if (modrm_reg == CFI_SP) {
286  
287  			if (mod_is_reg()) {
288  				/* mov %rsp, reg */
289  				ADD_OP(op) {
290  					op->src.type = OP_SRC_REG;
291  					op->src.reg = CFI_SP;
292  					op->dest.type = OP_DEST_REG;
293  					op->dest.reg = modrm_rm;
294  				}
295  				break;
296  
297  			} else {
298  				/* skip RIP relative displacement */
299  				if (is_RIP())
300  					break;
301  
302  				/* skip nontrivial SIB */
303  				if (have_SIB()) {
304  					modrm_rm = sib_base;
305  					if (sib_index != CFI_SP)
306  						break;
307  				}
308  
309  				/* mov %rsp, disp(%reg) */
310  				ADD_OP(op) {
311  					op->src.type = OP_SRC_REG;
312  					op->src.reg = CFI_SP;
313  					op->dest.type = OP_DEST_REG_INDIRECT;
314  					op->dest.reg = modrm_rm;
315  					op->dest.offset = insn.displacement.value;
316  				}
317  				break;
318  			}
319  
320  			break;
321  		}
322  
323  		if (rm_is_reg(CFI_SP)) {
324  
325  			/* mov reg, %rsp */
326  			ADD_OP(op) {
327  				op->src.type = OP_SRC_REG;
328  				op->src.reg = modrm_reg;
329  				op->dest.type = OP_DEST_REG;
330  				op->dest.reg = CFI_SP;
331  			}
332  			break;
333  		}
334  
335  		/* fallthrough */
336  	case 0x88:
337  		if (!rex_w)
338  			break;
339  
340  		if (rm_is_mem(CFI_BP)) {
341  
342  			/* mov reg, disp(%rbp) */
343  			ADD_OP(op) {
344  				op->src.type = OP_SRC_REG;
345  				op->src.reg = modrm_reg;
346  				op->dest.type = OP_DEST_REG_INDIRECT;
347  				op->dest.reg = CFI_BP;
348  				op->dest.offset = insn.displacement.value;
349  			}
350  			break;
351  		}
352  
353  		if (rm_is_mem(CFI_SP)) {
354  
355  			/* mov reg, disp(%rsp) */
356  			ADD_OP(op) {
357  				op->src.type = OP_SRC_REG;
358  				op->src.reg = modrm_reg;
359  				op->dest.type = OP_DEST_REG_INDIRECT;
360  				op->dest.reg = CFI_SP;
361  				op->dest.offset = insn.displacement.value;
362  			}
363  			break;
364  		}
365  
366  		break;
367  
368  	case 0x8b:
369  		if (!rex_w)
370  			break;
371  
372  		if (rm_is_mem(CFI_BP)) {
373  
374  			/* mov disp(%rbp), reg */
375  			ADD_OP(op) {
376  				op->src.type = OP_SRC_REG_INDIRECT;
377  				op->src.reg = CFI_BP;
378  				op->src.offset = insn.displacement.value;
379  				op->dest.type = OP_DEST_REG;
380  				op->dest.reg = modrm_reg;
381  			}
382  			break;
383  		}
384  
385  		if (rm_is_mem(CFI_SP)) {
386  
387  			/* mov disp(%rsp), reg */
388  			ADD_OP(op) {
389  				op->src.type = OP_SRC_REG_INDIRECT;
390  				op->src.reg = CFI_SP;
391  				op->src.offset = insn.displacement.value;
392  				op->dest.type = OP_DEST_REG;
393  				op->dest.reg = modrm_reg;
394  			}
395  			break;
396  		}
397  
398  		break;
399  
400  	case 0x8d:
401  		if (mod_is_reg()) {
402  			WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset);
403  			break;
404  		}
405  
406  		/* skip non 64bit ops */
407  		if (!rex_w)
408  			break;
409  
410  		/* skip RIP relative displacement */
411  		if (is_RIP())
412  			break;
413  
414  		/* skip nontrivial SIB */
415  		if (have_SIB()) {
416  			modrm_rm = sib_base;
417  			if (sib_index != CFI_SP)
418  				break;
419  		}
420  
421  		/* lea disp(%src), %dst */
422  		ADD_OP(op) {
423  			op->src.offset = insn.displacement.value;
424  			if (!op->src.offset) {
425  				/* lea (%src), %dst */
426  				op->src.type = OP_SRC_REG;
427  			} else {
428  				/* lea disp(%src), %dst */
429  				op->src.type = OP_SRC_ADD;
430  			}
431  			op->src.reg = modrm_rm;
432  			op->dest.type = OP_DEST_REG;
433  			op->dest.reg = modrm_reg;
434  		}
435  		break;
436  
437  	case 0x8f:
438  		/* pop to mem */
439  		ADD_OP(op) {
440  			op->src.type = OP_SRC_POP;
441  			op->dest.type = OP_DEST_MEM;
442  		}
443  		break;
444  
445  	case 0x90:
446  		*type = INSN_NOP;
447  		break;
448  
449  	case 0x9c:
450  		/* pushf */
451  		ADD_OP(op) {
452  			op->src.type = OP_SRC_CONST;
453  			op->dest.type = OP_DEST_PUSHF;
454  		}
455  		break;
456  
457  	case 0x9d:
458  		/* popf */
459  		ADD_OP(op) {
460  			op->src.type = OP_SRC_POPF;
461  			op->dest.type = OP_DEST_MEM;
462  		}
463  		break;
464  
465  	case 0x0f:
466  
467  		if (op2 == 0x01) {
468  
469  			if (modrm == 0xca)
470  				*type = INSN_CLAC;
471  			else if (modrm == 0xcb)
472  				*type = INSN_STAC;
473  
474  		} else if (op2 >= 0x80 && op2 <= 0x8f) {
475  
476  			*type = INSN_JUMP_CONDITIONAL;
477  
478  		} else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
479  			   op2 == 0x35) {
480  
481  			/* sysenter, sysret */
482  			*type = INSN_CONTEXT_SWITCH;
483  
484  		} else if (op2 == 0x0b || op2 == 0xb9) {
485  
486  			/* ud2 */
487  			*type = INSN_BUG;
488  
489  		} else if (op2 == 0x0d || op2 == 0x1f) {
490  
491  			/* nopl/nopw */
492  			*type = INSN_NOP;
493  
494  		} else if (op2 == 0xa0 || op2 == 0xa8) {
495  
496  			/* push fs/gs */
497  			ADD_OP(op) {
498  				op->src.type = OP_SRC_CONST;
499  				op->dest.type = OP_DEST_PUSH;
500  			}
501  
502  		} else if (op2 == 0xa1 || op2 == 0xa9) {
503  
504  			/* pop fs/gs */
505  			ADD_OP(op) {
506  				op->src.type = OP_SRC_POP;
507  				op->dest.type = OP_DEST_MEM;
508  			}
509  		}
510  
511  		break;
512  
513  	case 0xc9:
514  		/*
515  		 * leave
516  		 *
517  		 * equivalent to:
518  		 * mov bp, sp
519  		 * pop bp
520  		 */
521  		ADD_OP(op) {
522  			op->src.type = OP_SRC_REG;
523  			op->src.reg = CFI_BP;
524  			op->dest.type = OP_DEST_REG;
525  			op->dest.reg = CFI_SP;
526  		}
527  		ADD_OP(op) {
528  			op->src.type = OP_SRC_POP;
529  			op->dest.type = OP_DEST_REG;
530  			op->dest.reg = CFI_BP;
531  		}
532  		break;
533  
534  	case 0xcc:
535  		/* int3 */
536  		*type = INSN_TRAP;
537  		break;
538  
539  	case 0xe3:
540  		/* jecxz/jrcxz */
541  		*type = INSN_JUMP_CONDITIONAL;
542  		break;
543  
544  	case 0xe9:
545  	case 0xeb:
546  		*type = INSN_JUMP_UNCONDITIONAL;
547  		break;
548  
549  	case 0xc2:
550  	case 0xc3:
551  		*type = INSN_RETURN;
552  		break;
553  
554  	case 0xc7: /* mov imm, r/m */
555  		if (!noinstr)
556  			break;
557  
558  		if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) {
559  			struct reloc *immr, *disp;
560  			struct symbol *func;
561  			int idx;
562  
563  			immr = find_reloc_by_dest(elf, (void *)sec, offset+3);
564  			disp = find_reloc_by_dest(elf, (void *)sec, offset+7);
565  
566  			if (!immr || strcmp(immr->sym->name, "pv_ops"))
567  				break;
568  
569  			idx = (immr->addend + 8) / sizeof(void *);
570  
571  			func = disp->sym;
572  			if (disp->sym->type == STT_SECTION)
573  				func = find_symbol_by_offset(disp->sym->sec, disp->addend);
574  			if (!func) {
575  				WARN("no func for pv_ops[]");
576  				return -1;
577  			}
578  
579  			objtool_pv_add(file, idx, func);
580  		}
581  
582  		break;
583  
584  	case 0xcf: /* iret */
585  		/*
586  		 * Handle sync_core(), which has an IRET to self.
587  		 * All other IRET are in STT_NONE entry code.
588  		 */
589  		sym = find_symbol_containing(sec, offset);
590  		if (sym && sym->type == STT_FUNC) {
591  			ADD_OP(op) {
592  				/* add $40, %rsp */
593  				op->src.type = OP_SRC_ADD;
594  				op->src.reg = CFI_SP;
595  				op->src.offset = 5*8;
596  				op->dest.type = OP_DEST_REG;
597  				op->dest.reg = CFI_SP;
598  			}
599  			break;
600  		}
601  
602  		/* fallthrough */
603  
604  	case 0xca: /* retf */
605  	case 0xcb: /* retf */
606  		*type = INSN_CONTEXT_SWITCH;
607  		break;
608  
609  	case 0xe8:
610  		*type = INSN_CALL;
611  		/*
612  		 * For the impact on the stack, a CALL behaves like
613  		 * a PUSH of an immediate value (the return address).
614  		 */
615  		ADD_OP(op) {
616  			op->src.type = OP_SRC_CONST;
617  			op->dest.type = OP_DEST_PUSH;
618  		}
619  		break;
620  
621  	case 0xfc:
622  		*type = INSN_CLD;
623  		break;
624  
625  	case 0xfd:
626  		*type = INSN_STD;
627  		break;
628  
629  	case 0xff:
630  		if (modrm_reg == 2 || modrm_reg == 3)
631  
632  			*type = INSN_CALL_DYNAMIC;
633  
634  		else if (modrm_reg == 4)
635  
636  			*type = INSN_JUMP_DYNAMIC;
637  
638  		else if (modrm_reg == 5)
639  
640  			/* jmpf */
641  			*type = INSN_CONTEXT_SWITCH;
642  
643  		else if (modrm_reg == 6) {
644  
645  			/* push from mem */
646  			ADD_OP(op) {
647  				op->src.type = OP_SRC_CONST;
648  				op->dest.type = OP_DEST_PUSH;
649  			}
650  		}
651  
652  		break;
653  
654  	default:
655  		break;
656  	}
657  
658  	*immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
659  
660  	return 0;
661  }
662  
663  void arch_initial_func_cfi_state(struct cfi_init_state *state)
664  {
665  	int i;
666  
667  	for (i = 0; i < CFI_NUM_REGS; i++) {
668  		state->regs[i].base = CFI_UNDEFINED;
669  		state->regs[i].offset = 0;
670  	}
671  
672  	/* initial CFA (call frame address) */
673  	state->cfa.base = CFI_SP;
674  	state->cfa.offset = 8;
675  
676  	/* initial RA (return address) */
677  	state->regs[CFI_RA].base = CFI_CFA;
678  	state->regs[CFI_RA].offset = -8;
679  }
680  
681  const char *arch_nop_insn(int len)
682  {
683  	static const char nops[5][5] = {
684  		{ BYTES_NOP1 },
685  		{ BYTES_NOP2 },
686  		{ BYTES_NOP3 },
687  		{ BYTES_NOP4 },
688  		{ BYTES_NOP5 },
689  	};
690  
691  	if (len < 1 || len > 5) {
692  		WARN("invalid NOP size: %d\n", len);
693  		return NULL;
694  	}
695  
696  	return nops[len-1];
697  }
698  
699  #define BYTE_RET	0xC3
700  
701  const char *arch_ret_insn(int len)
702  {
703  	static const char ret[5][5] = {
704  		{ BYTE_RET },
705  		{ BYTE_RET, 0xcc },
706  		{ BYTE_RET, 0xcc, BYTES_NOP1 },
707  		{ BYTE_RET, 0xcc, BYTES_NOP2 },
708  		{ BYTE_RET, 0xcc, BYTES_NOP3 },
709  	};
710  
711  	if (len < 1 || len > 5) {
712  		WARN("invalid RET size: %d\n", len);
713  		return NULL;
714  	}
715  
716  	return ret[len-1];
717  }
718  
719  int arch_decode_hint_reg(u8 sp_reg, int *base)
720  {
721  	switch (sp_reg) {
722  	case ORC_REG_UNDEFINED:
723  		*base = CFI_UNDEFINED;
724  		break;
725  	case ORC_REG_SP:
726  		*base = CFI_SP;
727  		break;
728  	case ORC_REG_BP:
729  		*base = CFI_BP;
730  		break;
731  	case ORC_REG_SP_INDIRECT:
732  		*base = CFI_SP_INDIRECT;
733  		break;
734  	case ORC_REG_R10:
735  		*base = CFI_R10;
736  		break;
737  	case ORC_REG_R13:
738  		*base = CFI_R13;
739  		break;
740  	case ORC_REG_DI:
741  		*base = CFI_DI;
742  		break;
743  	case ORC_REG_DX:
744  		*base = CFI_DX;
745  		break;
746  	default:
747  		return -1;
748  	}
749  
750  	return 0;
751  }
752  
753  bool arch_is_retpoline(struct symbol *sym)
754  {
755  	return !strncmp(sym->name, "__x86_indirect_", 15);
756  }
757