xref: /linux/tools/perf/util/capstone.c (revision 9518e10c2b399cb97eb527f2a67a8d97f11c1910)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "capstone.h"
3 #include "annotate.h"
4 #include "addr_location.h"
5 #include "debug.h"
6 #include "disasm.h"
7 #include "dso.h"
8 #include "machine.h"
9 #include "map.h"
10 #include "namespaces.h"
11 #include "print_insn.h"
12 #include "symbol.h"
13 #include "thread.h"
14 #include <fcntl.h>
15 #include <string.h>
16 
17 #ifdef HAVE_LIBCAPSTONE_SUPPORT
18 #include <capstone/capstone.h>
19 #endif
20 
21 #ifdef HAVE_LIBCAPSTONE_SUPPORT
22 static int capstone_init(struct machine *machine, csh *cs_handle, bool is64,
23 			 bool disassembler_style)
24 {
25 	cs_arch arch;
26 	cs_mode mode;
27 
28 	if (machine__is(machine, "x86_64") && is64) {
29 		arch = CS_ARCH_X86;
30 		mode = CS_MODE_64;
31 	} else if (machine__normalized_is(machine, "x86")) {
32 		arch = CS_ARCH_X86;
33 		mode = CS_MODE_32;
34 	} else if (machine__normalized_is(machine, "arm64")) {
35 		arch = CS_ARCH_ARM64;
36 		mode = CS_MODE_ARM;
37 	} else if (machine__normalized_is(machine, "arm")) {
38 		arch = CS_ARCH_ARM;
39 		mode = CS_MODE_ARM + CS_MODE_V8;
40 	} else if (machine__normalized_is(machine, "s390")) {
41 		arch = CS_ARCH_SYSZ;
42 		mode = CS_MODE_BIG_ENDIAN;
43 	} else {
44 		return -1;
45 	}
46 
47 	if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
48 		pr_warning_once("cs_open failed\n");
49 		return -1;
50 	}
51 
52 	if (machine__normalized_is(machine, "x86")) {
53 		/*
54 		 * In case of using capstone_init while symbol__disassemble
55 		 * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts
56 		 * is set via annotation args
57 		 */
58 		if (disassembler_style)
59 			cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
60 		/*
61 		 * Resolving address operands to symbols is implemented
62 		 * on x86 by investigating instruction details.
63 		 */
64 		cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
65 	}
66 
67 	return 0;
68 }
69 #endif
70 
71 #ifdef HAVE_LIBCAPSTONE_SUPPORT
72 static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn,
73 			     int print_opts, FILE *fp)
74 {
75 	struct addr_location al;
76 	size_t printed = 0;
77 
78 	if (insn->detail && insn->detail->x86.op_count == 1) {
79 		cs_x86_op *op = &insn->detail->x86.operands[0];
80 
81 		addr_location__init(&al);
82 		if (op->type == X86_OP_IMM &&
83 		    thread__find_symbol(thread, cpumode, op->imm, &al)) {
84 			printed += fprintf(fp, "%s ", insn[0].mnemonic);
85 			printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
86 			if (print_opts & PRINT_INSN_IMM_HEX)
87 				printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
88 			addr_location__exit(&al);
89 			return printed;
90 		}
91 		addr_location__exit(&al);
92 	}
93 
94 	printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
95 	return printed;
96 }
97 #endif
98 
99 
100 ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused,
101 				   struct thread *thread __maybe_unused,
102 				   u8 cpumode __maybe_unused, bool is64bit __maybe_unused,
103 				   const uint8_t *code __maybe_unused,
104 				   size_t code_size __maybe_unused,
105 				   uint64_t ip __maybe_unused, int *lenp __maybe_unused,
106 				   int print_opts __maybe_unused, FILE *fp __maybe_unused)
107 {
108 #ifdef HAVE_LIBCAPSTONE_SUPPORT
109 	size_t printed;
110 	cs_insn *insn;
111 	csh cs_handle;
112 	size_t count;
113 	int ret;
114 
115 	/* TODO: Try to initiate capstone only once but need a proper place. */
116 	ret = capstone_init(machine, &cs_handle, is64bit, true);
117 	if (ret < 0)
118 		return ret;
119 
120 	count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
121 	if (count > 0) {
122 		if (machine__normalized_is(machine, "x86"))
123 			printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
124 		else
125 			printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
126 		if (lenp)
127 			*lenp = insn->size;
128 		cs_free(insn, count);
129 	} else {
130 		printed = -1;
131 	}
132 
133 	cs_close(&cs_handle);
134 	return printed;
135 #else
136 	return -1;
137 #endif
138 }
139 
140 #ifdef HAVE_LIBCAPSTONE_SUPPORT
141 static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
142 				  struct annotate_args *args, u64 addr)
143 {
144 	int i;
145 	struct map *map = args->ms.map;
146 	struct symbol *sym;
147 
148 	/* TODO: support more architectures */
149 	if (!arch__is(args->arch, "x86"))
150 		return;
151 
152 	if (insn->detail == NULL)
153 		return;
154 
155 	for (i = 0; i < insn->detail->x86.op_count; i++) {
156 		cs_x86_op *op = &insn->detail->x86.operands[i];
157 		u64 orig_addr;
158 
159 		if (op->type != X86_OP_MEM)
160 			continue;
161 
162 		/* only print RIP-based global symbols for now */
163 		if (op->mem.base != X86_REG_RIP)
164 			continue;
165 
166 		/* get the target address */
167 		orig_addr = addr + insn->size + op->mem.disp;
168 		addr = map__objdump_2mem(map, orig_addr);
169 
170 		if (dso__kernel(map__dso(map))) {
171 			/*
172 			 * The kernel maps can be split into sections, let's
173 			 * find the map first and the search the symbol.
174 			 */
175 			map = maps__find(map__kmaps(map), addr);
176 			if (map == NULL)
177 				continue;
178 		}
179 
180 		/* convert it to map-relative address for search */
181 		addr = map__map_ip(map, addr);
182 
183 		sym = map__find_symbol(map, addr);
184 		if (sym == NULL)
185 			continue;
186 
187 		if (addr == sym->start) {
188 			scnprintf(buf, len, "\t# %"PRIx64" <%s>",
189 				  orig_addr, sym->name);
190 		} else {
191 			scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
192 				  orig_addr, sym->name, addr - sym->start);
193 		}
194 		break;
195 	}
196 }
197 #endif
198 
199 #ifdef HAVE_LIBCAPSTONE_SUPPORT
200 struct find_file_offset_data {
201 	u64 ip;
202 	u64 offset;
203 };
204 
205 /* This will be called for each PHDR in an ELF binary */
206 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
207 {
208 	struct find_file_offset_data *data = arg;
209 
210 	if (start <= data->ip && data->ip < start + len) {
211 		data->offset = pgoff + data->ip - start;
212 		return 1;
213 	}
214 	return 0;
215 }
216 #endif
217 
218 int symbol__disassemble_capstone(const char *filename __maybe_unused,
219 				 struct symbol *sym __maybe_unused,
220 				 struct annotate_args *args __maybe_unused)
221 {
222 #ifdef HAVE_LIBCAPSTONE_SUPPORT
223 	struct annotation *notes = symbol__annotation(sym);
224 	struct map *map = args->ms.map;
225 	struct dso *dso = map__dso(map);
226 	u64 start = map__rip_2objdump(map, sym->start);
227 	u64 offset;
228 	int i, count, free_count;
229 	bool is_64bit = false;
230 	bool needs_cs_close = false;
231 	/* Malloc-ed buffer containing instructions read from disk. */
232 	u8 *code_buf = NULL;
233 	/* Pointer to code to be disassembled. */
234 	const u8 *buf;
235 	u64 buf_len;
236 	csh handle;
237 	cs_insn *insn = NULL;
238 	char disasm_buf[512];
239 	struct disasm_line *dl;
240 	bool disassembler_style = false;
241 
242 	if (args->options->objdump_path)
243 		return -1;
244 
245 	buf = dso__read_symbol(dso, filename, map, sym,
246 			       &code_buf, &buf_len, &is_64bit);
247 	if (buf == NULL)
248 		return -1;
249 
250 	/* add the function address and name */
251 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
252 		  start, sym->name);
253 
254 	args->offset = -1;
255 	args->line = disasm_buf;
256 	args->line_nr = 0;
257 	args->fileloc = NULL;
258 	args->ms.sym = sym;
259 
260 	dl = disasm_line__new(args);
261 	if (dl == NULL)
262 		goto err;
263 
264 	annotation_line__add(&dl->al, &notes->src->source);
265 
266 	if (!args->options->disassembler_style ||
267 	    !strcmp(args->options->disassembler_style, "att"))
268 		disassembler_style = true;
269 
270 	if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
271 		goto err;
272 
273 	needs_cs_close = true;
274 
275 	free_count = count = cs_disasm(handle, buf, buf_len, start, buf_len, &insn);
276 	for (i = 0, offset = 0; i < count; i++) {
277 		int printed;
278 
279 		printed = scnprintf(disasm_buf, sizeof(disasm_buf),
280 				    "       %-7s %s",
281 				    insn[i].mnemonic, insn[i].op_str);
282 		print_capstone_detail(&insn[i], disasm_buf + printed,
283 				      sizeof(disasm_buf) - printed, args,
284 				      start + offset);
285 
286 		args->offset = offset;
287 		args->line = disasm_buf;
288 
289 		dl = disasm_line__new(args);
290 		if (dl == NULL)
291 			goto err;
292 
293 		annotation_line__add(&dl->al, &notes->src->source);
294 
295 		offset += insn[i].size;
296 	}
297 
298 	/* It failed in the middle: probably due to unknown instructions */
299 	if (offset != buf_len) {
300 		struct list_head *list = &notes->src->source;
301 
302 		/* Discard all lines and fallback to objdump */
303 		while (!list_empty(list)) {
304 			dl = list_first_entry(list, struct disasm_line, al.node);
305 
306 			list_del_init(&dl->al.node);
307 			disasm_line__free(dl);
308 		}
309 		count = -1;
310 	}
311 
312 out:
313 	if (needs_cs_close) {
314 		cs_close(&handle);
315 		if (free_count > 0)
316 			cs_free(insn, free_count);
317 	}
318 	free(code_buf);
319 	return count < 0 ? count : 0;
320 
321 err:
322 	if (needs_cs_close) {
323 		struct disasm_line *tmp;
324 
325 		/*
326 		 * It probably failed in the middle of the above loop.
327 		 * Release any resources it might add.
328 		 */
329 		list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
330 			list_del(&dl->al.node);
331 			disasm_line__free(dl);
332 		}
333 	}
334 	count = -1;
335 	goto out;
336 #else
337 	return -1;
338 #endif
339 }
340 
341 int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused,
342 					 struct symbol *sym __maybe_unused,
343 					 struct annotate_args *args __maybe_unused)
344 {
345 #ifdef HAVE_LIBCAPSTONE_SUPPORT
346 	struct annotation *notes = symbol__annotation(sym);
347 	struct map *map = args->ms.map;
348 	struct dso *dso = map__dso(map);
349 	struct nscookie nsc;
350 	u64 start = map__rip_2objdump(map, sym->start);
351 	u64 end = map__rip_2objdump(map, sym->end);
352 	u64 len = end - start;
353 	u64 offset;
354 	int i, fd, count;
355 	bool is_64bit = false;
356 	bool needs_cs_close = false;
357 	u8 *buf = NULL;
358 	struct find_file_offset_data data = {
359 		.ip = start,
360 	};
361 	csh handle;
362 	char disasm_buf[512];
363 	struct disasm_line *dl;
364 	u32 *line;
365 	bool disassembler_style = false;
366 
367 	if (args->options->objdump_path)
368 		return -1;
369 
370 	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
371 	fd = open(filename, O_RDONLY);
372 	nsinfo__mountns_exit(&nsc);
373 	if (fd < 0)
374 		return -1;
375 
376 	if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
377 			    &is_64bit) == 0)
378 		goto err;
379 
380 	if (!args->options->disassembler_style ||
381 	    !strcmp(args->options->disassembler_style, "att"))
382 		disassembler_style = true;
383 
384 	if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
385 		goto err;
386 
387 	needs_cs_close = true;
388 
389 	buf = malloc(len);
390 	if (buf == NULL)
391 		goto err;
392 
393 	count = pread(fd, buf, len, data.offset);
394 	close(fd);
395 	fd = -1;
396 
397 	if ((u64)count != len)
398 		goto err;
399 
400 	line = (u32 *)buf;
401 
402 	/* add the function address and name */
403 	scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
404 		  start, sym->name);
405 
406 	args->offset = -1;
407 	args->line = disasm_buf;
408 	args->line_nr = 0;
409 	args->fileloc = NULL;
410 	args->ms.sym = sym;
411 
412 	dl = disasm_line__new(args);
413 	if (dl == NULL)
414 		goto err;
415 
416 	annotation_line__add(&dl->al, &notes->src->source);
417 
418 	/*
419 	 * TODO: enable disassm for powerpc
420 	 * count = cs_disasm(handle, buf, len, start, len, &insn);
421 	 *
422 	 * For now, only binary code is saved in disassembled line
423 	 * to be used in "type" and "typeoff" sort keys. Each raw code
424 	 * is 32 bit instruction. So use "len/4" to get the number of
425 	 * entries.
426 	 */
427 	count = len/4;
428 
429 	for (i = 0, offset = 0; i < count; i++) {
430 		args->offset = offset;
431 		sprintf(args->line, "%x", line[i]);
432 
433 		dl = disasm_line__new(args);
434 		if (dl == NULL)
435 			break;
436 
437 		annotation_line__add(&dl->al, &notes->src->source);
438 
439 		offset += 4;
440 	}
441 
442 	/* It failed in the middle */
443 	if (offset != len) {
444 		struct list_head *list = &notes->src->source;
445 
446 		/* Discard all lines and fallback to objdump */
447 		while (!list_empty(list)) {
448 			dl = list_first_entry(list, struct disasm_line, al.node);
449 
450 			list_del_init(&dl->al.node);
451 			disasm_line__free(dl);
452 		}
453 		count = -1;
454 	}
455 
456 out:
457 	if (needs_cs_close)
458 		cs_close(&handle);
459 	free(buf);
460 	return count < 0 ? count : 0;
461 
462 err:
463 	if (fd >= 0)
464 		close(fd);
465 	count = -1;
466 	goto out;
467 #else
468 	return -1;
469 #endif
470 }
471