xref: /linux/tools/perf/util/capstone.c (revision c7decec2f2d2ab0366567f9e30c0e1418cece43f)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "capstone.h"
3 #include "annotate.h"
4 #include "addr_location.h"
5 #include "debug.h"
6 #include "disasm.h"
7 #include "dso.h"
8 #include "machine.h"
9 #include "map.h"
10 #include "namespaces.h"
11 #include "print_insn.h"
12 #include "symbol.h"
13 #include "thread.h"
14 #include <dlfcn.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <inttypes.h>
18 #include <string.h>
19 
20 #include <capstone/capstone.h>
21 
22 #ifdef LIBCAPSTONE_DLOPEN
perf_cs_dll_handle(void)23 static void *perf_cs_dll_handle(void)
24 {
25 	static bool dll_handle_init;
26 	static void *dll_handle;
27 
28 	if (!dll_handle_init) {
29 		dll_handle_init = true;
30 		dll_handle = dlopen("libcapstone.so", RTLD_LAZY);
31 		if (!dll_handle)
32 			pr_debug("dlopen failed for libcapstone.so\n");
33 	}
34 	return dll_handle;
35 }
36 #endif
37 
perf_cs_open(enum cs_arch arch,enum cs_mode mode,csh * handle)38 static enum cs_err perf_cs_open(enum cs_arch arch, enum cs_mode mode, csh *handle)
39 {
40 #ifndef LIBCAPSTONE_DLOPEN
41 	return cs_open(arch, mode, handle);
42 #else
43 	static bool fn_init;
44 	static enum cs_err (*fn)(enum cs_arch arch, enum cs_mode mode, csh *handle);
45 
46 	if (!fn_init) {
47 		fn = dlsym(perf_cs_dll_handle(), "cs_open");
48 		if (!fn)
49 			pr_debug("dlsym failed for cs_open\n");
50 		fn_init = true;
51 	}
52 	if (!fn)
53 		return CS_ERR_HANDLE;
54 	return fn(arch, mode, handle);
55 #endif
56 }
57 
perf_cs_option(csh handle,enum cs_opt_type type,size_t value)58 static enum cs_err perf_cs_option(csh handle, enum cs_opt_type type, size_t value)
59 {
60 #ifndef LIBCAPSTONE_DLOPEN
61 	return cs_option(handle, type, value);
62 #else
63 	static bool fn_init;
64 	static enum cs_err (*fn)(csh handle, enum cs_opt_type type, size_t value);
65 
66 	if (!fn_init) {
67 		fn = dlsym(perf_cs_dll_handle(), "cs_option");
68 		if (!fn)
69 			pr_debug("dlsym failed for cs_option\n");
70 		fn_init = true;
71 	}
72 	if (!fn)
73 		return CS_ERR_HANDLE;
74 	return fn(handle, type, value);
75 #endif
76 }
77 
perf_cs_disasm(csh handle,const uint8_t * code,size_t code_size,uint64_t address,size_t count,struct cs_insn ** insn)78 static size_t perf_cs_disasm(csh handle, const uint8_t *code, size_t code_size,
79 			uint64_t address, size_t count, struct cs_insn **insn)
80 {
81 #ifndef LIBCAPSTONE_DLOPEN
82 	return cs_disasm(handle, code, code_size, address, count, insn);
83 #else
84 	static bool fn_init;
85 	static enum cs_err (*fn)(csh handle, const uint8_t *code, size_t code_size,
86 				 uint64_t address, size_t count, struct cs_insn **insn);
87 
88 	if (!fn_init) {
89 		fn = dlsym(perf_cs_dll_handle(), "cs_disasm");
90 		if (!fn)
91 			pr_debug("dlsym failed for cs_disasm\n");
92 		fn_init = true;
93 	}
94 	if (!fn)
95 		return CS_ERR_HANDLE;
96 	return fn(handle, code, code_size, address, count, insn);
97 #endif
98 }
99 
perf_cs_free(struct cs_insn * insn,size_t count)100 static void perf_cs_free(struct cs_insn *insn, size_t count)
101 {
102 #ifndef LIBCAPSTONE_DLOPEN
103 	cs_free(insn, count);
104 #else
105 	static bool fn_init;
106 	static void (*fn)(struct cs_insn *insn, size_t count);
107 
108 	if (!fn_init) {
109 		fn = dlsym(perf_cs_dll_handle(), "cs_free");
110 		if (!fn)
111 			pr_debug("dlsym failed for cs_free\n");
112 		fn_init = true;
113 	}
114 	if (!fn)
115 		return;
116 	fn(insn, count);
117 #endif
118 }
119 
perf_cs_close(csh * handle)120 static enum cs_err perf_cs_close(csh *handle)
121 {
122 #ifndef LIBCAPSTONE_DLOPEN
123 	return cs_close(handle);
124 #else
125 	static bool fn_init;
126 	static enum cs_err (*fn)(csh *handle);
127 
128 	if (!fn_init) {
129 		fn = dlsym(perf_cs_dll_handle(), "cs_close");
130 		if (!fn)
131 			pr_debug("dlsym failed for cs_close\n");
132 		fn_init = true;
133 	}
134 	if (!fn)
135 		return CS_ERR_HANDLE;
136 	return fn(handle);
137 #endif
138 }
139 
capstone_init(struct machine * machine,csh * cs_handle,bool is64,bool disassembler_style)140 static int capstone_init(struct machine *machine, csh *cs_handle, bool is64,
141 			 bool disassembler_style)
142 {
143 	enum cs_arch arch;
144 	enum cs_mode mode;
145 
146 	if (machine__is(machine, "x86_64") && is64) {
147 		arch = CS_ARCH_X86;
148 		mode = CS_MODE_64;
149 	} else if (machine__normalized_is(machine, "x86")) {
150 		arch = CS_ARCH_X86;
151 		mode = CS_MODE_32;
152 	} else if (machine__normalized_is(machine, "arm64")) {
153 		arch = CS_ARCH_ARM64;
154 		mode = CS_MODE_ARM;
155 	} else if (machine__normalized_is(machine, "arm")) {
156 		arch = CS_ARCH_ARM;
157 		mode = CS_MODE_ARM + CS_MODE_V8;
158 	} else if (machine__normalized_is(machine, "s390")) {
159 		arch = CS_ARCH_SYSZ;
160 		mode = CS_MODE_BIG_ENDIAN;
161 	} else {
162 		return -1;
163 	}
164 
165 	if (perf_cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
166 		pr_warning_once("cs_open failed\n");
167 		return -1;
168 	}
169 
170 	if (machine__normalized_is(machine, "x86")) {
171 		/*
172 		 * In case of using capstone_init while symbol__disassemble
173 		 * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
174 		 * is set via annotation args
175 		 */
176 		if (disassembler_style)
177 			perf_cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
178 		/*
179 		 * Resolving address operands to symbols is implemented
180 		 * on x86 by investigating instruction details.
181 		 */
182 		perf_cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
183 	}
184 
185 	return 0;
186 }
187 
print_insn_x86(struct thread * thread,u8 cpumode,struct cs_insn * insn,int print_opts,FILE * fp)188 static size_t print_insn_x86(struct thread *thread, u8 cpumode, struct cs_insn *insn,
189 			     int print_opts, FILE *fp)
190 {
191 	struct addr_location al;
192 	size_t printed = 0;
193 
194 	if (insn->detail && insn->detail->x86.op_count == 1) {
195 		struct cs_x86_op *op = &insn->detail->x86.operands[0];
196 
197 		addr_location__init(&al);
198 		if (op->type == X86_OP_IMM &&
199 		    thread__find_symbol(thread, cpumode, op->imm, &al)) {
200 			printed += fprintf(fp, "%s ", insn[0].mnemonic);
201 			printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
202 			if (print_opts & PRINT_INSN_IMM_HEX)
203 				printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
204 			addr_location__exit(&al);
205 			return printed;
206 		}
207 		addr_location__exit(&al);
208 	}
209 
210 	printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
211 	return printed;
212 }
213 
214 
capstone__fprintf_insn_asm(struct machine * machine __maybe_unused,struct thread * thread __maybe_unused,u8 cpumode __maybe_unused,bool is64bit __maybe_unused,const uint8_t * code __maybe_unused,size_t code_size __maybe_unused,uint64_t ip __maybe_unused,int * lenp __maybe_unused,int print_opts __maybe_unused,FILE * fp __maybe_unused)215 ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused,
216 				   struct thread *thread __maybe_unused,
217 				   u8 cpumode __maybe_unused, bool is64bit __maybe_unused,
218 				   const uint8_t *code __maybe_unused,
219 				   size_t code_size __maybe_unused,
220 				   uint64_t ip __maybe_unused, int *lenp __maybe_unused,
221 				   int print_opts __maybe_unused, FILE *fp __maybe_unused)
222 {
223 	size_t printed;
224 	struct cs_insn *insn;
225 	csh cs_handle;
226 	size_t count;
227 	int ret;
228 
229 	/* TODO: Try to initiate capstone only once but need a proper place. */
230 	ret = capstone_init(machine, &cs_handle, is64bit, true);
231 	if (ret < 0)
232 		return ret;
233 
234 	count = perf_cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
235 	if (count > 0) {
236 		if (machine__normalized_is(machine, "x86"))
237 			printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
238 		else
239 			printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
240 		if (lenp)
241 			*lenp = insn->size;
242 		perf_cs_free(insn, count);
243 	} else {
244 		printed = -1;
245 	}
246 
247 	perf_cs_close(&cs_handle);
248 	return printed;
249 }
250 
print_capstone_detail(struct cs_insn * insn,char * buf,size_t len,struct annotate_args * args,u64 addr)251 static void print_capstone_detail(struct cs_insn *insn, char *buf, size_t len,
252 				  struct annotate_args *args, u64 addr)
253 {
254 	int i;
255 	struct map *map = args->ms->map;
256 	struct symbol *sym;
257 
258 	/* TODO: support more architectures */
259 	if (!arch__is_x86(args->arch))
260 		return;
261 
262 	if (insn->detail == NULL)
263 		return;
264 
265 	for (i = 0; i < insn->detail->x86.op_count; i++) {
266 		struct cs_x86_op *op = &insn->detail->x86.operands[i];
267 		u64 orig_addr;
268 
269 		if (op->type != X86_OP_MEM)
270 			continue;
271 
272 		/* only print RIP-based global symbols for now */
273 		if (op->mem.base != X86_REG_RIP)
274 			continue;
275 
276 		/* get the target address */
277 		orig_addr = addr + insn->size + op->mem.disp;
278 		addr = map__objdump_2mem(map, orig_addr);
279 
280 		if (dso__kernel(map__dso(map))) {
281 			/*
282 			 * The kernel maps can be split into sections, let's
283 			 * find the map first and the search the symbol.
284 			 */
285 			map = maps__find(map__kmaps(map), addr);
286 			if (map == NULL)
287 				continue;
288 		}
289 
290 		/* convert it to map-relative address for search */
291 		addr = map__map_ip(map, addr);
292 
293 		sym = map__find_symbol(map, addr);
294 		if (sym == NULL)
295 			continue;
296 
297 		if (addr == sym->start) {
298 			scnprintf(buf, len, "\t# %"PRIx64" <%s>",
299 				  orig_addr, sym->name);
300 		} else {
301 			scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
302 				  orig_addr, sym->name, addr - sym->start);
303 		}
304 		break;
305 	}
306 }
307 
308 struct find_file_offset_data {
309 	u64 ip;
310 	u64 offset;
311 };
312 
313 /* This will be called for each PHDR in an ELF binary */
find_file_offset(u64 start,u64 len,u64 pgoff,void * arg)314 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
315 {
316 	struct find_file_offset_data *data = arg;
317 
318 	if (start <= data->ip && data->ip < start + len) {
319 		data->offset = pgoff + data->ip - start;
320 		return 1;
321 	}
322 	return 0;
323 }
324 
symbol__disassemble_capstone(const char * filename __maybe_unused,struct symbol * sym __maybe_unused,struct annotate_args * args __maybe_unused)325 int symbol__disassemble_capstone(const char *filename __maybe_unused,
326 				 struct symbol *sym __maybe_unused,
327 				 struct annotate_args *args __maybe_unused)
328 {
329 	struct annotation *notes = symbol__annotation(sym);
330 	struct map *map = args->ms->map;
331 	struct dso *dso = map__dso(map);
332 	u64 start = map__rip_2objdump(map, sym->start);
333 	u64 offset;
334 	int i, count, free_count;
335 	bool is_64bit = false;
336 	bool needs_cs_close = false;
337 	/* Malloc-ed buffer containing instructions read from disk. */
338 	u8 *code_buf = NULL;
339 	/* Pointer to code to be disassembled. */
340 	const u8 *buf;
341 	u64 buf_len;
342 	csh handle;
343 	struct cs_insn *insn = NULL;
344 	char disasm_buf[512];
345 	struct disasm_line *dl;
346 	bool disassembler_style = false;
347 
348 	if (args->options->objdump_path)
349 		return -1;
350 
351 	buf = dso__read_symbol(dso, filename, map, sym,
352 			       &code_buf, &buf_len, &is_64bit);
353 	if (buf == NULL)
354 		return errno;
355 
356 	/* add the function address and name */
357 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
358 		  start, sym->name);
359 
360 	args->offset = -1;
361 	args->line = disasm_buf;
362 	args->line_nr = 0;
363 	args->fileloc = NULL;
364 	args->ms->sym = sym;
365 
366 	dl = disasm_line__new(args);
367 	if (dl == NULL)
368 		goto err;
369 
370 	annotation_line__add(&dl->al, &notes->src->source);
371 
372 	if (!args->options->disassembler_style ||
373 	    !strcmp(args->options->disassembler_style, "att"))
374 		disassembler_style = true;
375 
376 	if (capstone_init(maps__machine(thread__maps(args->ms->thread)), &handle, is_64bit,
377 			  disassembler_style) < 0)
378 		goto err;
379 
380 	needs_cs_close = true;
381 
382 	free_count = count = perf_cs_disasm(handle, buf, buf_len, start, buf_len, &insn);
383 	for (i = 0, offset = 0; i < count; i++) {
384 		int printed;
385 
386 		printed = scnprintf(disasm_buf, sizeof(disasm_buf),
387 				    "       %-7s %s",
388 				    insn[i].mnemonic, insn[i].op_str);
389 		print_capstone_detail(&insn[i], disasm_buf + printed,
390 				      sizeof(disasm_buf) - printed, args,
391 				      start + offset);
392 
393 		args->offset = offset;
394 		args->line = disasm_buf;
395 
396 		dl = disasm_line__new(args);
397 		if (dl == NULL)
398 			goto err;
399 
400 		annotation_line__add(&dl->al, &notes->src->source);
401 
402 		offset += insn[i].size;
403 	}
404 
405 	/* It failed in the middle: probably due to unknown instructions */
406 	if (offset != buf_len) {
407 		struct list_head *list = &notes->src->source;
408 
409 		/* Discard all lines and fallback to objdump */
410 		while (!list_empty(list)) {
411 			dl = list_first_entry(list, struct disasm_line, al.node);
412 
413 			list_del_init(&dl->al.node);
414 			disasm_line__free(dl);
415 		}
416 		count = -1;
417 	}
418 
419 out:
420 	if (needs_cs_close) {
421 		perf_cs_close(&handle);
422 		if (free_count > 0)
423 			perf_cs_free(insn, free_count);
424 	}
425 	free(code_buf);
426 	return count < 0 ? count : 0;
427 
428 err:
429 	if (needs_cs_close) {
430 		struct disasm_line *tmp;
431 
432 		/*
433 		 * It probably failed in the middle of the above loop.
434 		 * Release any resources it might add.
435 		 */
436 		list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
437 			list_del(&dl->al.node);
438 			disasm_line__free(dl);
439 		}
440 	}
441 	count = -1;
442 	goto out;
443 }
444 
symbol__disassemble_capstone_powerpc(const char * filename __maybe_unused,struct symbol * sym __maybe_unused,struct annotate_args * args __maybe_unused)445 int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused,
446 					 struct symbol *sym __maybe_unused,
447 					 struct annotate_args *args __maybe_unused)
448 {
449 	struct annotation *notes = symbol__annotation(sym);
450 	struct map *map = args->ms->map;
451 	struct dso *dso = map__dso(map);
452 	struct nscookie nsc;
453 	u64 start = map__rip_2objdump(map, sym->start);
454 	u64 end = map__rip_2objdump(map, sym->end);
455 	u64 len = end - start;
456 	u64 offset;
457 	int i, fd, count;
458 	bool is_64bit = false;
459 	bool needs_cs_close = false;
460 	u8 *buf = NULL;
461 	struct find_file_offset_data data = {
462 		.ip = start,
463 	};
464 	csh handle;
465 	char disasm_buf[512];
466 	struct disasm_line *dl;
467 	u32 *line;
468 	bool disassembler_style = false;
469 
470 	if (args->options->objdump_path)
471 		return -1;
472 
473 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
474 	fd = open(filename, O_RDONLY);
475 	nsinfo__mountns_exit(&nsc);
476 	if (fd < 0)
477 		return -1;
478 
479 	if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
480 			    &is_64bit) == 0)
481 		goto err;
482 
483 	if (!args->options->disassembler_style ||
484 	    !strcmp(args->options->disassembler_style, "att"))
485 		disassembler_style = true;
486 
487 	if (capstone_init(maps__machine(thread__maps(args->ms->thread)), &handle, is_64bit,
488 			  disassembler_style) < 0)
489 		goto err;
490 
491 	needs_cs_close = true;
492 
493 	buf = malloc(len);
494 	if (buf == NULL)
495 		goto err;
496 
497 	count = pread(fd, buf, len, data.offset);
498 	close(fd);
499 	fd = -1;
500 
501 	if ((u64)count != len)
502 		goto err;
503 
504 	line = (u32 *)buf;
505 
506 	/* add the function address and name */
507 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
508 		  start, sym->name);
509 
510 	args->offset = -1;
511 	args->line = disasm_buf;
512 	args->line_nr = 0;
513 	args->fileloc = NULL;
514 	args->ms->sym = sym;
515 
516 	dl = disasm_line__new(args);
517 	if (dl == NULL)
518 		goto err;
519 
520 	annotation_line__add(&dl->al, &notes->src->source);
521 
522 	/*
523 	 * TODO: enable disassm for powerpc
524 	 * count = cs_disasm(handle, buf, len, start, len, &insn);
525 	 *
526 	 * For now, only binary code is saved in disassembled line
527 	 * to be used in "type" and "typeoff" sort keys. Each raw code
528 	 * is 32 bit instruction. So use "len/4" to get the number of
529 	 * entries.
530 	 */
531 	count = len/4;
532 
533 	for (i = 0, offset = 0; i < count; i++) {
534 		args->offset = offset;
535 		sprintf(args->line, "%x", line[i]);
536 
537 		dl = disasm_line__new(args);
538 		if (dl == NULL)
539 			break;
540 
541 		annotation_line__add(&dl->al, &notes->src->source);
542 
543 		offset += 4;
544 	}
545 
546 	/* It failed in the middle */
547 	if (offset != len) {
548 		struct list_head *list = &notes->src->source;
549 
550 		/* Discard all lines and fallback to objdump */
551 		while (!list_empty(list)) {
552 			dl = list_first_entry(list, struct disasm_line, al.node);
553 
554 			list_del_init(&dl->al.node);
555 			disasm_line__free(dl);
556 		}
557 		count = -1;
558 	}
559 
560 out:
561 	if (needs_cs_close)
562 		perf_cs_close(&handle);
563 	free(buf);
564 	return count < 0 ? count : 0;
565 
566 err:
567 	if (fd >= 0)
568 		close(fd);
569 	count = -1;
570 	goto out;
571 }
572