xref: /linux/tools/perf/util/capstone.c (revision ec714e371f22f716a04e6ecb2a24988c92b26911)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "capstone.h"
3 #include "annotate.h"
4 #include "addr_location.h"
5 #include "debug.h"
6 #include "disasm.h"
7 #include "dso.h"
8 #include "machine.h"
9 #include "map.h"
10 #include "namespaces.h"
11 #include "print_insn.h"
12 #include "symbol.h"
13 #include "thread.h"
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <string.h>
17 
18 #ifdef HAVE_LIBCAPSTONE_SUPPORT
19 #include <capstone/capstone.h>
20 #endif
21 
22 #ifdef HAVE_LIBCAPSTONE_SUPPORT
capstone_init(struct machine * machine,csh * cs_handle,bool is64,bool disassembler_style)23 static int capstone_init(struct machine *machine, csh *cs_handle, bool is64,
24 			 bool disassembler_style)
25 {
26 	cs_arch arch;
27 	cs_mode mode;
28 
29 	if (machine__is(machine, "x86_64") && is64) {
30 		arch = CS_ARCH_X86;
31 		mode = CS_MODE_64;
32 	} else if (machine__normalized_is(machine, "x86")) {
33 		arch = CS_ARCH_X86;
34 		mode = CS_MODE_32;
35 	} else if (machine__normalized_is(machine, "arm64")) {
36 		arch = CS_ARCH_ARM64;
37 		mode = CS_MODE_ARM;
38 	} else if (machine__normalized_is(machine, "arm")) {
39 		arch = CS_ARCH_ARM;
40 		mode = CS_MODE_ARM + CS_MODE_V8;
41 	} else if (machine__normalized_is(machine, "s390")) {
42 		arch = CS_ARCH_SYSZ;
43 		mode = CS_MODE_BIG_ENDIAN;
44 	} else {
45 		return -1;
46 	}
47 
48 	if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
49 		pr_warning_once("cs_open failed\n");
50 		return -1;
51 	}
52 
53 	if (machine__normalized_is(machine, "x86")) {
54 		/*
55 		 * In case of using capstone_init while symbol__disassemble
56 		 * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
57 		 * is set via annotation args
58 		 */
59 		if (disassembler_style)
60 			cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
61 		/*
62 		 * Resolving address operands to symbols is implemented
63 		 * on x86 by investigating instruction details.
64 		 */
65 		cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
66 	}
67 
68 	return 0;
69 }
70 #endif
71 
72 #ifdef HAVE_LIBCAPSTONE_SUPPORT
print_insn_x86(struct thread * thread,u8 cpumode,cs_insn * insn,int print_opts,FILE * fp)73 static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn,
74 			     int print_opts, FILE *fp)
75 {
76 	struct addr_location al;
77 	size_t printed = 0;
78 
79 	if (insn->detail && insn->detail->x86.op_count == 1) {
80 		cs_x86_op *op = &insn->detail->x86.operands[0];
81 
82 		addr_location__init(&al);
83 		if (op->type == X86_OP_IMM &&
84 		    thread__find_symbol(thread, cpumode, op->imm, &al)) {
85 			printed += fprintf(fp, "%s ", insn[0].mnemonic);
86 			printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
87 			if (print_opts & PRINT_INSN_IMM_HEX)
88 				printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
89 			addr_location__exit(&al);
90 			return printed;
91 		}
92 		addr_location__exit(&al);
93 	}
94 
95 	printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
96 	return printed;
97 }
98 #endif
99 
100 
capstone__fprintf_insn_asm(struct machine * machine __maybe_unused,struct thread * thread __maybe_unused,u8 cpumode __maybe_unused,bool is64bit __maybe_unused,const uint8_t * code __maybe_unused,size_t code_size __maybe_unused,uint64_t ip __maybe_unused,int * lenp __maybe_unused,int print_opts __maybe_unused,FILE * fp __maybe_unused)101 ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused,
102 				   struct thread *thread __maybe_unused,
103 				   u8 cpumode __maybe_unused, bool is64bit __maybe_unused,
104 				   const uint8_t *code __maybe_unused,
105 				   size_t code_size __maybe_unused,
106 				   uint64_t ip __maybe_unused, int *lenp __maybe_unused,
107 				   int print_opts __maybe_unused, FILE *fp __maybe_unused)
108 {
109 #ifdef HAVE_LIBCAPSTONE_SUPPORT
110 	size_t printed;
111 	cs_insn *insn;
112 	csh cs_handle;
113 	size_t count;
114 	int ret;
115 
116 	/* TODO: Try to initiate capstone only once but need a proper place. */
117 	ret = capstone_init(machine, &cs_handle, is64bit, true);
118 	if (ret < 0)
119 		return ret;
120 
121 	count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
122 	if (count > 0) {
123 		if (machine__normalized_is(machine, "x86"))
124 			printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
125 		else
126 			printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
127 		if (lenp)
128 			*lenp = insn->size;
129 		cs_free(insn, count);
130 	} else {
131 		printed = -1;
132 	}
133 
134 	cs_close(&cs_handle);
135 	return printed;
136 #else
137 	return -1;
138 #endif
139 }
140 
141 #ifdef HAVE_LIBCAPSTONE_SUPPORT
print_capstone_detail(cs_insn * insn,char * buf,size_t len,struct annotate_args * args,u64 addr)142 static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
143 				  struct annotate_args *args, u64 addr)
144 {
145 	int i;
146 	struct map *map = args->ms.map;
147 	struct symbol *sym;
148 
149 	/* TODO: support more architectures */
150 	if (!arch__is(args->arch, "x86"))
151 		return;
152 
153 	if (insn->detail == NULL)
154 		return;
155 
156 	for (i = 0; i < insn->detail->x86.op_count; i++) {
157 		cs_x86_op *op = &insn->detail->x86.operands[i];
158 		u64 orig_addr;
159 
160 		if (op->type != X86_OP_MEM)
161 			continue;
162 
163 		/* only print RIP-based global symbols for now */
164 		if (op->mem.base != X86_REG_RIP)
165 			continue;
166 
167 		/* get the target address */
168 		orig_addr = addr + insn->size + op->mem.disp;
169 		addr = map__objdump_2mem(map, orig_addr);
170 
171 		if (dso__kernel(map__dso(map))) {
172 			/*
173 			 * The kernel maps can be split into sections, let's
174 			 * find the map first and the search the symbol.
175 			 */
176 			map = maps__find(map__kmaps(map), addr);
177 			if (map == NULL)
178 				continue;
179 		}
180 
181 		/* convert it to map-relative address for search */
182 		addr = map__map_ip(map, addr);
183 
184 		sym = map__find_symbol(map, addr);
185 		if (sym == NULL)
186 			continue;
187 
188 		if (addr == sym->start) {
189 			scnprintf(buf, len, "\t# %"PRIx64" <%s>",
190 				  orig_addr, sym->name);
191 		} else {
192 			scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
193 				  orig_addr, sym->name, addr - sym->start);
194 		}
195 		break;
196 	}
197 }
198 #endif
199 
200 #ifdef HAVE_LIBCAPSTONE_SUPPORT
201 struct find_file_offset_data {
202 	u64 ip;
203 	u64 offset;
204 };
205 
206 /* This will be called for each PHDR in an ELF binary */
find_file_offset(u64 start,u64 len,u64 pgoff,void * arg)207 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
208 {
209 	struct find_file_offset_data *data = arg;
210 
211 	if (start <= data->ip && data->ip < start + len) {
212 		data->offset = pgoff + data->ip - start;
213 		return 1;
214 	}
215 	return 0;
216 }
217 #endif
218 
symbol__disassemble_capstone(const char * filename __maybe_unused,struct symbol * sym __maybe_unused,struct annotate_args * args __maybe_unused)219 int symbol__disassemble_capstone(const char *filename __maybe_unused,
220 				 struct symbol *sym __maybe_unused,
221 				 struct annotate_args *args __maybe_unused)
222 {
223 #ifdef HAVE_LIBCAPSTONE_SUPPORT
224 	struct annotation *notes = symbol__annotation(sym);
225 	struct map *map = args->ms.map;
226 	struct dso *dso = map__dso(map);
227 	u64 start = map__rip_2objdump(map, sym->start);
228 	u64 offset;
229 	int i, count, free_count;
230 	bool is_64bit = false;
231 	bool needs_cs_close = false;
232 	/* Malloc-ed buffer containing instructions read from disk. */
233 	u8 *code_buf = NULL;
234 	/* Pointer to code to be disassembled. */
235 	const u8 *buf;
236 	u64 buf_len;
237 	csh handle;
238 	cs_insn *insn = NULL;
239 	char disasm_buf[512];
240 	struct disasm_line *dl;
241 	bool disassembler_style = false;
242 
243 	if (args->options->objdump_path)
244 		return -1;
245 
246 	buf = dso__read_symbol(dso, filename, map, sym,
247 			       &code_buf, &buf_len, &is_64bit);
248 	if (buf == NULL)
249 		return errno;
250 
251 	/* add the function address and name */
252 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
253 		  start, sym->name);
254 
255 	args->offset = -1;
256 	args->line = disasm_buf;
257 	args->line_nr = 0;
258 	args->fileloc = NULL;
259 	args->ms.sym = sym;
260 
261 	dl = disasm_line__new(args);
262 	if (dl == NULL)
263 		goto err;
264 
265 	annotation_line__add(&dl->al, &notes->src->source);
266 
267 	if (!args->options->disassembler_style ||
268 	    !strcmp(args->options->disassembler_style, "att"))
269 		disassembler_style = true;
270 
271 	if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
272 		goto err;
273 
274 	needs_cs_close = true;
275 
276 	free_count = count = cs_disasm(handle, buf, buf_len, start, buf_len, &insn);
277 	for (i = 0, offset = 0; i < count; i++) {
278 		int printed;
279 
280 		printed = scnprintf(disasm_buf, sizeof(disasm_buf),
281 				    "       %-7s %s",
282 				    insn[i].mnemonic, insn[i].op_str);
283 		print_capstone_detail(&insn[i], disasm_buf + printed,
284 				      sizeof(disasm_buf) - printed, args,
285 				      start + offset);
286 
287 		args->offset = offset;
288 		args->line = disasm_buf;
289 
290 		dl = disasm_line__new(args);
291 		if (dl == NULL)
292 			goto err;
293 
294 		annotation_line__add(&dl->al, &notes->src->source);
295 
296 		offset += insn[i].size;
297 	}
298 
299 	/* It failed in the middle: probably due to unknown instructions */
300 	if (offset != buf_len) {
301 		struct list_head *list = &notes->src->source;
302 
303 		/* Discard all lines and fallback to objdump */
304 		while (!list_empty(list)) {
305 			dl = list_first_entry(list, struct disasm_line, al.node);
306 
307 			list_del_init(&dl->al.node);
308 			disasm_line__free(dl);
309 		}
310 		count = -1;
311 	}
312 
313 out:
314 	if (needs_cs_close) {
315 		cs_close(&handle);
316 		if (free_count > 0)
317 			cs_free(insn, free_count);
318 	}
319 	free(code_buf);
320 	return count < 0 ? count : 0;
321 
322 err:
323 	if (needs_cs_close) {
324 		struct disasm_line *tmp;
325 
326 		/*
327 		 * It probably failed in the middle of the above loop.
328 		 * Release any resources it might add.
329 		 */
330 		list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
331 			list_del(&dl->al.node);
332 			disasm_line__free(dl);
333 		}
334 	}
335 	count = -1;
336 	goto out;
337 #else
338 	return -1;
339 #endif
340 }
341 
symbol__disassemble_capstone_powerpc(const char * filename __maybe_unused,struct symbol * sym __maybe_unused,struct annotate_args * args __maybe_unused)342 int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused,
343 					 struct symbol *sym __maybe_unused,
344 					 struct annotate_args *args __maybe_unused)
345 {
346 #ifdef HAVE_LIBCAPSTONE_SUPPORT
347 	struct annotation *notes = symbol__annotation(sym);
348 	struct map *map = args->ms.map;
349 	struct dso *dso = map__dso(map);
350 	struct nscookie nsc;
351 	u64 start = map__rip_2objdump(map, sym->start);
352 	u64 end = map__rip_2objdump(map, sym->end);
353 	u64 len = end - start;
354 	u64 offset;
355 	int i, fd, count;
356 	bool is_64bit = false;
357 	bool needs_cs_close = false;
358 	u8 *buf = NULL;
359 	struct find_file_offset_data data = {
360 		.ip = start,
361 	};
362 	csh handle;
363 	char disasm_buf[512];
364 	struct disasm_line *dl;
365 	u32 *line;
366 	bool disassembler_style = false;
367 
368 	if (args->options->objdump_path)
369 		return -1;
370 
371 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
372 	fd = open(filename, O_RDONLY);
373 	nsinfo__mountns_exit(&nsc);
374 	if (fd < 0)
375 		return -1;
376 
377 	if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
378 			    &is_64bit) == 0)
379 		goto err;
380 
381 	if (!args->options->disassembler_style ||
382 	    !strcmp(args->options->disassembler_style, "att"))
383 		disassembler_style = true;
384 
385 	if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
386 		goto err;
387 
388 	needs_cs_close = true;
389 
390 	buf = malloc(len);
391 	if (buf == NULL)
392 		goto err;
393 
394 	count = pread(fd, buf, len, data.offset);
395 	close(fd);
396 	fd = -1;
397 
398 	if ((u64)count != len)
399 		goto err;
400 
401 	line = (u32 *)buf;
402 
403 	/* add the function address and name */
404 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
405 		  start, sym->name);
406 
407 	args->offset = -1;
408 	args->line = disasm_buf;
409 	args->line_nr = 0;
410 	args->fileloc = NULL;
411 	args->ms.sym = sym;
412 
413 	dl = disasm_line__new(args);
414 	if (dl == NULL)
415 		goto err;
416 
417 	annotation_line__add(&dl->al, &notes->src->source);
418 
419 	/*
420 	 * TODO: enable disassm for powerpc
421 	 * count = cs_disasm(handle, buf, len, start, len, &insn);
422 	 *
423 	 * For now, only binary code is saved in disassembled line
424 	 * to be used in "type" and "typeoff" sort keys. Each raw code
425 	 * is 32 bit instruction. So use "len/4" to get the number of
426 	 * entries.
427 	 */
428 	count = len/4;
429 
430 	for (i = 0, offset = 0; i < count; i++) {
431 		args->offset = offset;
432 		sprintf(args->line, "%x", line[i]);
433 
434 		dl = disasm_line__new(args);
435 		if (dl == NULL)
436 			break;
437 
438 		annotation_line__add(&dl->al, &notes->src->source);
439 
440 		offset += 4;
441 	}
442 
443 	/* It failed in the middle */
444 	if (offset != len) {
445 		struct list_head *list = &notes->src->source;
446 
447 		/* Discard all lines and fallback to objdump */
448 		while (!list_empty(list)) {
449 			dl = list_first_entry(list, struct disasm_line, al.node);
450 
451 			list_del_init(&dl->al.node);
452 			disasm_line__free(dl);
453 		}
454 		count = -1;
455 	}
456 
457 out:
458 	if (needs_cs_close)
459 		cs_close(&handle);
460 	free(buf);
461 	return count < 0 ? count : 0;
462 
463 err:
464 	if (fd >= 0)
465 		close(fd);
466 	count = -1;
467 	goto out;
468 #else
469 	return -1;
470 #endif
471 }
472