xref: /linux/tools/perf/util/capstone.c (revision a77ecea7ced2fef7cc0a8ad0323542f781ad9788)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "capstone.h"
3 
4 #include <errno.h>
5 #include <inttypes.h>
6 #include <string.h>
7 
8 #include <dlfcn.h>
9 #include <elf.h>
10 #include <fcntl.h>
11 #include <linux/ctype.h>
12 
13 #include <capstone/capstone.h>
14 
15 #include "addr_location.h"
16 #include "annotate.h"
17 #include "debug.h"
18 #include "disasm.h"
19 #include "dso.h"
20 #include "machine.h"
21 #include "map.h"
22 #include "namespaces.h"
23 #include "print_insn.h"
24 #include "symbol.h"
25 #include "thread.h"
26 
27 #ifdef LIBCAPSTONE_DLOPEN
28 static void *perf_cs_dll_handle(void)
29 {
30 	static bool dll_handle_init;
31 	static void *dll_handle;
32 
33 	if (!dll_handle_init) {
34 		dll_handle_init = true;
35 		dll_handle = dlopen("libcapstone.so", RTLD_LAZY);
36 		if (!dll_handle)
37 			pr_debug("dlopen failed for libcapstone.so\n");
38 	}
39 	return dll_handle;
40 }
41 #endif
42 
43 static enum cs_err perf_cs_open(enum cs_arch arch, enum cs_mode mode, csh *handle)
44 {
45 #ifndef LIBCAPSTONE_DLOPEN
46 	return cs_open(arch, mode, handle);
47 #else
48 	static bool fn_init;
49 	static enum cs_err (*fn)(enum cs_arch arch, enum cs_mode mode, csh *handle);
50 
51 	if (!fn_init) {
52 		fn = dlsym(perf_cs_dll_handle(), "cs_open");
53 		if (!fn)
54 			pr_debug("dlsym failed for cs_open\n");
55 		fn_init = true;
56 	}
57 	if (!fn)
58 		return CS_ERR_HANDLE;
59 	return fn(arch, mode, handle);
60 #endif
61 }
62 
63 static enum cs_err perf_cs_option(csh handle, enum cs_opt_type type, size_t value)
64 {
65 #ifndef LIBCAPSTONE_DLOPEN
66 	return cs_option(handle, type, value);
67 #else
68 	static bool fn_init;
69 	static enum cs_err (*fn)(csh handle, enum cs_opt_type type, size_t value);
70 
71 	if (!fn_init) {
72 		fn = dlsym(perf_cs_dll_handle(), "cs_option");
73 		if (!fn)
74 			pr_debug("dlsym failed for cs_option\n");
75 		fn_init = true;
76 	}
77 	if (!fn)
78 		return CS_ERR_HANDLE;
79 	return fn(handle, type, value);
80 #endif
81 }
82 
83 static size_t perf_cs_disasm(csh handle, const uint8_t *code, size_t code_size,
84 			uint64_t address, size_t count, struct cs_insn **insn)
85 {
86 #ifndef LIBCAPSTONE_DLOPEN
87 	return cs_disasm(handle, code, code_size, address, count, insn);
88 #else
89 	static bool fn_init;
90 	static enum cs_err (*fn)(csh handle, const uint8_t *code, size_t code_size,
91 				 uint64_t address, size_t count, struct cs_insn **insn);
92 
93 	if (!fn_init) {
94 		fn = dlsym(perf_cs_dll_handle(), "cs_disasm");
95 		if (!fn)
96 			pr_debug("dlsym failed for cs_disasm\n");
97 		fn_init = true;
98 	}
99 	if (!fn)
100 		return CS_ERR_HANDLE;
101 	return fn(handle, code, code_size, address, count, insn);
102 #endif
103 }
104 
105 static void perf_cs_free(struct cs_insn *insn, size_t count)
106 {
107 #ifndef LIBCAPSTONE_DLOPEN
108 	cs_free(insn, count);
109 #else
110 	static bool fn_init;
111 	static void (*fn)(struct cs_insn *insn, size_t count);
112 
113 	if (!fn_init) {
114 		fn = dlsym(perf_cs_dll_handle(), "cs_free");
115 		if (!fn)
116 			pr_debug("dlsym failed for cs_free\n");
117 		fn_init = true;
118 	}
119 	if (!fn)
120 		return;
121 	fn(insn, count);
122 #endif
123 }
124 
125 static enum cs_err perf_cs_close(csh *handle)
126 {
127 #ifndef LIBCAPSTONE_DLOPEN
128 	return cs_close(handle);
129 #else
130 	static bool fn_init;
131 	static enum cs_err (*fn)(csh *handle);
132 
133 	if (!fn_init) {
134 		fn = dlsym(perf_cs_dll_handle(), "cs_close");
135 		if (!fn)
136 			pr_debug("dlsym failed for cs_close\n");
137 		fn_init = true;
138 	}
139 	if (!fn)
140 		return CS_ERR_HANDLE;
141 	return fn(handle);
142 #endif
143 }
144 
145 static bool e_machine_to_capstone(uint16_t e_machine, bool is64, bool is_big_endian,
146 				  enum cs_arch *arch, enum cs_mode *mode)
147 {
148 	*mode = is_big_endian ? CS_MODE_BIG_ENDIAN : CS_MODE_LITTLE_ENDIAN;
149 
150 	switch (e_machine) {
151 	case EM_X86_64:
152 	case EM_386:
153 		*arch = CS_ARCH_X86;
154 		*mode |= is64 ? CS_MODE_64 : CS_MODE_32;
155 		return true;
156 	case EM_AARCH64:
157 		*arch = CS_ARCH_ARM64;
158 		*mode |= CS_MODE_ARM;
159 		return true;
160 	case EM_ARM:
161 		*arch = CS_ARCH_ARM;
162 		*mode |= CS_MODE_ARM | CS_MODE_V8;
163 		return true;
164 	case EM_S390:
165 		*arch = CS_ARCH_SYSZ;
166 		return true;
167 	case EM_MIPS:
168 		*arch = CS_ARCH_MIPS;
169 		*mode |= is64 ? CS_MODE_MIPS64 : CS_MODE_MIPS32;
170 		return true;
171 	case EM_PPC:
172 		*arch = CS_ARCH_PPC;
173 		return true;
174 	case EM_PPC64:
175 		*arch = CS_ARCH_PPC;
176 		*mode |= CS_MODE_64;
177 		return true;
178 	case EM_SPARC:
179 		*arch = CS_ARCH_SPARC;
180 		return true;
181 	case EM_SPARCV9:
182 		*arch = CS_ARCH_SPARC;
183 		*mode |= CS_MODE_V9;
184 		return true;
185 	case EM_RISCV:
186 		*arch = CS_ARCH_RISCV;
187 		*mode |= (is64 ? CS_MODE_RISCV64 : CS_MODE_RISCV32) | CS_MODE_RISCVC;
188 		return true;
189 	default:
190 		return false;
191 	}
192 }
193 
194 static int capstone_init(uint16_t e_machine, csh *cs_handle, bool is64, bool is_big_endian,
195 			 bool disassembler_style)
196 {
197 	enum cs_arch arch;
198 	enum cs_mode mode;
199 
200 	if (!e_machine_to_capstone(e_machine, is64, is_big_endian, &arch, &mode))
201 		return -1;
202 
203 	if (perf_cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
204 		pr_warning_once("cs_open failed\n");
205 		return -1;
206 	}
207 
208 	if (arch == CS_ARCH_X86) {
209 		/*
210 		 * In case of using capstone_init while symbol__disassemble
211 		 * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
212 		 * is set via annotation args
213 		 */
214 		if (disassembler_style)
215 			perf_cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
216 		/*
217 		 * Resolving address operands to symbols is implemented
218 		 * on x86 by investigating instruction details.
219 		 */
220 		perf_cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
221 	}
222 
223 	return 0;
224 }
225 
226 static size_t print_insn_x86(struct thread *thread, u8 cpumode, struct cs_insn *insn,
227 			     int print_opts, FILE *fp)
228 {
229 	struct addr_location al;
230 	size_t printed = 0;
231 
232 	if (insn->detail && insn->detail->x86.op_count == 1) {
233 		struct cs_x86_op *op = &insn->detail->x86.operands[0];
234 
235 		addr_location__init(&al);
236 		if (op->type == X86_OP_IMM &&
237 		    thread__find_symbol(thread, cpumode, op->imm, &al)) {
238 			printed += fprintf(fp, "%s ", insn[0].mnemonic);
239 			printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
240 			if (print_opts & PRINT_INSN_IMM_HEX)
241 				printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
242 			addr_location__exit(&al);
243 			return printed;
244 		}
245 		addr_location__exit(&al);
246 	}
247 
248 	printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
249 	return printed;
250 }
251 
252 ssize_t capstone__fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode,
253 				   bool is64bit, const uint8_t *code, size_t code_size, uint64_t ip,
254 				   int *lenp, int print_opts, FILE *fp)
255 {
256 	size_t printed;
257 	struct cs_insn *insn;
258 	csh cs_handle;
259 	size_t count;
260 	bool is_big_endian = false;
261 	uint16_t e_machine = thread__e_machine_endian(thread, machine,
262 						      /*e_flags=*/NULL, &is_big_endian);
263 	int ret;
264 
265 	/* TODO: Try to initiate capstone only once but need a proper place. */
266 	ret = capstone_init(e_machine, &cs_handle, is64bit, is_big_endian,
267 			    /*disassembler_style=*/true);
268 	if (ret < 0)
269 		return ret;
270 
271 	count = perf_cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
272 	if (count > 0) {
273 		if (e_machine == EM_X86_64 || e_machine == EM_386)
274 			printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
275 		else
276 			printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
277 		if (lenp)
278 			*lenp = insn->size;
279 		perf_cs_free(insn, count);
280 	} else {
281 		printed = -1;
282 	}
283 
284 	perf_cs_close(&cs_handle);
285 	return printed;
286 }
287 
288 static void print_capstone_detail(struct cs_insn *insn, char *buf, size_t len,
289 				  struct annotate_args *args, u64 addr)
290 {
291 	int i;
292 	struct map *map = args->ms->map;
293 	struct symbol *sym;
294 
295 	/* TODO: support more architectures */
296 	if (!arch__is_x86(args->arch))
297 		return;
298 
299 	if (insn->detail == NULL)
300 		return;
301 
302 	for (i = 0; i < insn->detail->x86.op_count; i++) {
303 		struct cs_x86_op *op = &insn->detail->x86.operands[i];
304 		u64 orig_addr;
305 
306 		if (op->type != X86_OP_MEM)
307 			continue;
308 
309 		/* only print RIP-based global symbols for now */
310 		if (op->mem.base != X86_REG_RIP)
311 			continue;
312 
313 		/* get the target address */
314 		orig_addr = addr + insn->size + op->mem.disp;
315 		addr = map__objdump_2mem(map, orig_addr);
316 
317 		if (dso__kernel(map__dso(map))) {
318 			/*
319 			 * The kernel maps can be split into sections, let's
320 			 * find the map first and the search the symbol.
321 			 */
322 			map = maps__find(map__kmaps(map), addr);
323 			if (map == NULL)
324 				continue;
325 		}
326 
327 		/* convert it to map-relative address for search */
328 		addr = map__map_ip(map, addr);
329 
330 		sym = map__find_symbol(map, addr);
331 		if (sym == NULL)
332 			continue;
333 
334 		if (addr == sym->start) {
335 			scnprintf(buf, len, "\t# %"PRIx64" <%s>",
336 				  orig_addr, sym->name);
337 		} else {
338 			scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
339 				  orig_addr, sym->name, addr - sym->start);
340 		}
341 		break;
342 	}
343 }
344 
345 struct find_file_offset_data {
346 	u64 ip;
347 	u64 offset;
348 };
349 
350 /* This will be called for each PHDR in an ELF binary */
351 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
352 {
353 	struct find_file_offset_data *data = arg;
354 
355 	if (start <= data->ip && data->ip < start + len) {
356 		data->offset = pgoff + data->ip - start;
357 		return 1;
358 	}
359 	return 0;
360 }
361 
362 int symbol__disassemble_capstone(const char *filename, struct symbol *sym,
363 				 struct annotate_args *args)
364 {
365 	struct annotation *notes = symbol__annotation(sym);
366 	struct map *map = args->ms->map;
367 	struct dso *dso = map__dso(map);
368 	u64 start = map__rip_2objdump(map, sym->start);
369 	u64 offset;
370 	int i, count, free_count;
371 	bool is_64bit = false;
372 	bool needs_cs_close = false;
373 	/* Malloc-ed buffer containing instructions read from disk. */
374 	u8 *code_buf = NULL;
375 	/* Pointer to code to be disassembled. */
376 	const u8 *buf;
377 	u64 buf_len;
378 	csh handle;
379 	struct cs_insn *insn = NULL;
380 	char disasm_buf[512];
381 	struct disasm_line *dl;
382 	bool disassembler_style = false;
383 	uint16_t e_machine;
384 	bool is_big_endian = false;
385 
386 	if (args->options->objdump_path)
387 		return -1;
388 
389 	buf = dso__read_symbol(dso, filename, map, sym,
390 			       &code_buf, &buf_len, &is_64bit);
391 	if (buf == NULL)
392 		return errno;
393 
394 	/* add the function address and name */
395 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
396 		  start, sym->name);
397 
398 	args->offset = -1;
399 	args->line = disasm_buf;
400 	args->line_nr = 0;
401 	args->fileloc = NULL;
402 	args->ms->sym = sym;
403 
404 	dl = disasm_line__new(args);
405 	if (dl == NULL)
406 		goto err;
407 
408 	annotation_line__add(&dl->al, &notes->src->source);
409 
410 	if (!args->options->disassembler_style ||
411 	    !strcmp(args->options->disassembler_style, "att"))
412 		disassembler_style = true;
413 
414 	e_machine = thread__e_machine_endian(args->ms->thread,
415 					     /*machine=*/NULL,
416 					     /*e_flags=*/NULL, &is_big_endian);
417 	if (capstone_init(e_machine, &handle, is_64bit, is_big_endian, disassembler_style) < 0)
418 		goto err;
419 
420 	needs_cs_close = true;
421 
422 	free_count = count = perf_cs_disasm(handle, buf, buf_len, start, buf_len, &insn);
423 	for (i = 0, offset = 0; i < count; i++) {
424 		int printed;
425 
426 		printed = scnprintf(disasm_buf, sizeof(disasm_buf),
427 				    "       %-7s %s",
428 				    insn[i].mnemonic, insn[i].op_str);
429 		print_capstone_detail(&insn[i], disasm_buf + printed,
430 				      sizeof(disasm_buf) - printed, args,
431 				      start + offset);
432 
433 		args->offset = offset;
434 		args->line = disasm_buf;
435 
436 		dl = disasm_line__new(args);
437 		if (dl == NULL)
438 			goto err;
439 
440 		annotation_line__add(&dl->al, &notes->src->source);
441 
442 		offset += insn[i].size;
443 	}
444 
445 	/* It failed in the middle: probably due to unknown instructions */
446 	if (offset != buf_len) {
447 		struct list_head *list = &notes->src->source;
448 
449 		/* Discard all lines and fallback to objdump */
450 		while (!list_empty(list)) {
451 			dl = list_first_entry(list, struct disasm_line, al.node);
452 
453 			list_del_init(&dl->al.node);
454 			disasm_line__free(dl);
455 		}
456 		count = -1;
457 	}
458 
459 out:
460 	if (needs_cs_close) {
461 		perf_cs_close(&handle);
462 		if (free_count > 0)
463 			perf_cs_free(insn, free_count);
464 	}
465 	free(code_buf);
466 	return count < 0 ? count : 0;
467 
468 err:
469 	if (needs_cs_close) {
470 		struct disasm_line *tmp;
471 
472 		/*
473 		 * It probably failed in the middle of the above loop.
474 		 * Release any resources it might add.
475 		 */
476 		list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
477 			list_del(&dl->al.node);
478 			disasm_line__free(dl);
479 		}
480 	}
481 	count = -1;
482 	goto out;
483 }
484 
485 int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused,
486 					 struct symbol *sym __maybe_unused,
487 					 struct annotate_args *args __maybe_unused)
488 {
489 	struct annotation *notes = symbol__annotation(sym);
490 	struct map *map = args->ms->map;
491 	struct dso *dso = map__dso(map);
492 	struct nscookie nsc;
493 	u64 start = map__rip_2objdump(map, sym->start);
494 	u64 end = map__rip_2objdump(map, sym->end);
495 	u64 len = end - start;
496 	u64 offset;
497 	int i, fd, count;
498 	bool is_64bit = false;
499 	bool needs_cs_close = false;
500 	u8 *buf = NULL;
501 	struct find_file_offset_data data = {
502 		.ip = start,
503 	};
504 	csh handle;
505 	char disasm_buf[512];
506 	struct disasm_line *dl;
507 	u32 *line;
508 	bool disassembler_style = false;
509 	uint16_t e_machine;
510 	bool is_big_endian = false;
511 
512 	if (args->options->objdump_path)
513 		return -1;
514 
515 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
516 	fd = open(filename, O_RDONLY);
517 	nsinfo__mountns_exit(&nsc);
518 	if (fd < 0)
519 		return -1;
520 
521 	if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
522 			    &is_64bit) == 0)
523 		goto err;
524 
525 	if (!args->options->disassembler_style ||
526 	    !strcmp(args->options->disassembler_style, "att"))
527 		disassembler_style = true;
528 
529 	e_machine = thread__e_machine_endian(args->ms->thread,
530 					     /*machine=*/NULL,
531 					     /*e_flags=*/NULL, &is_big_endian);
532 	if (capstone_init(e_machine, &handle, is_64bit, is_big_endian, disassembler_style) < 0)
533 		goto err;
534 
535 	needs_cs_close = true;
536 
537 	buf = malloc(len);
538 	if (buf == NULL)
539 		goto err;
540 
541 	count = pread(fd, buf, len, data.offset);
542 	close(fd);
543 	fd = -1;
544 
545 	if ((u64)count != len)
546 		goto err;
547 
548 	line = (u32 *)buf;
549 
550 	/* add the function address and name */
551 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
552 		  start, sym->name);
553 
554 	args->offset = -1;
555 	args->line = disasm_buf;
556 	args->line_nr = 0;
557 	args->fileloc = NULL;
558 	args->ms->sym = sym;
559 
560 	dl = disasm_line__new(args);
561 	if (dl == NULL)
562 		goto err;
563 
564 	annotation_line__add(&dl->al, &notes->src->source);
565 
566 	/*
567 	 * TODO: enable disassm for powerpc
568 	 * count = cs_disasm(handle, buf, len, start, len, &insn);
569 	 *
570 	 * For now, only binary code is saved in disassembled line
571 	 * to be used in "type" and "typeoff" sort keys. Each raw code
572 	 * is 32 bit instruction. So use "len/4" to get the number of
573 	 * entries.
574 	 */
575 	count = len/4;
576 
577 	for (i = 0, offset = 0; i < count; i++) {
578 		args->offset = offset;
579 		sprintf(args->line, "%x", line[i]);
580 
581 		dl = disasm_line__new(args);
582 		if (dl == NULL)
583 			break;
584 
585 		annotation_line__add(&dl->al, &notes->src->source);
586 
587 		offset += 4;
588 	}
589 
590 	/* It failed in the middle */
591 	if (offset != len) {
592 		struct list_head *list = &notes->src->source;
593 
594 		/* Discard all lines and fallback to objdump */
595 		while (!list_empty(list)) {
596 			dl = list_first_entry(list, struct disasm_line, al.node);
597 
598 			list_del_init(&dl->al.node);
599 			disasm_line__free(dl);
600 		}
601 		count = -1;
602 	}
603 
604 out:
605 	if (needs_cs_close)
606 		perf_cs_close(&handle);
607 	free(buf);
608 	return count < 0 ? count : 0;
609 
610 err:
611 	if (fd >= 0)
612 		close(fd);
613 	count = -1;
614 	goto out;
615 }
616