1 // SPDX-License-Identifier: GPL-2.0 2 #include "capstone.h" 3 4 #include <errno.h> 5 #include <inttypes.h> 6 #include <string.h> 7 8 #include <dlfcn.h> 9 #include <elf.h> 10 #include <fcntl.h> 11 #include <linux/ctype.h> 12 13 #include <capstone/capstone.h> 14 15 #include "addr_location.h" 16 #include "annotate.h" 17 #include "debug.h" 18 #include "disasm.h" 19 #include "dso.h" 20 #include "machine.h" 21 #include "map.h" 22 #include "namespaces.h" 23 #include "print_insn.h" 24 #include "symbol.h" 25 #include "thread.h" 26 27 #ifdef LIBCAPSTONE_DLOPEN 28 static void *perf_cs_dll_handle(void) 29 { 30 static bool dll_handle_init; 31 static void *dll_handle; 32 33 if (!dll_handle_init) { 34 dll_handle_init = true; 35 dll_handle = dlopen("libcapstone.so", RTLD_LAZY); 36 if (!dll_handle) 37 pr_debug("dlopen failed for libcapstone.so\n"); 38 } 39 return dll_handle; 40 } 41 #endif 42 43 static enum cs_err perf_cs_open(enum cs_arch arch, enum cs_mode mode, csh *handle) 44 { 45 #ifndef LIBCAPSTONE_DLOPEN 46 return cs_open(arch, mode, handle); 47 #else 48 static bool fn_init; 49 static enum cs_err (*fn)(enum cs_arch arch, enum cs_mode mode, csh *handle); 50 51 if (!fn_init) { 52 fn = dlsym(perf_cs_dll_handle(), "cs_open"); 53 if (!fn) 54 pr_debug("dlsym failed for cs_open\n"); 55 fn_init = true; 56 } 57 if (!fn) 58 return CS_ERR_HANDLE; 59 return fn(arch, mode, handle); 60 #endif 61 } 62 63 static enum cs_err perf_cs_option(csh handle, enum cs_opt_type type, size_t value) 64 { 65 #ifndef LIBCAPSTONE_DLOPEN 66 return cs_option(handle, type, value); 67 #else 68 static bool fn_init; 69 static enum cs_err (*fn)(csh handle, enum cs_opt_type type, size_t value); 70 71 if (!fn_init) { 72 fn = dlsym(perf_cs_dll_handle(), "cs_option"); 73 if (!fn) 74 pr_debug("dlsym failed for cs_option\n"); 75 fn_init = true; 76 } 77 if (!fn) 78 return CS_ERR_HANDLE; 79 return fn(handle, type, value); 80 #endif 81 } 82 83 static size_t perf_cs_disasm(csh handle, const uint8_t *code, size_t code_size, 84 uint64_t address, size_t count, struct cs_insn **insn) 85 { 86 #ifndef LIBCAPSTONE_DLOPEN 87 return cs_disasm(handle, code, code_size, address, count, insn); 88 #else 89 static bool fn_init; 90 static enum cs_err (*fn)(csh handle, const uint8_t *code, size_t code_size, 91 uint64_t address, size_t count, struct cs_insn **insn); 92 93 if (!fn_init) { 94 fn = dlsym(perf_cs_dll_handle(), "cs_disasm"); 95 if (!fn) 96 pr_debug("dlsym failed for cs_disasm\n"); 97 fn_init = true; 98 } 99 if (!fn) 100 return CS_ERR_HANDLE; 101 return fn(handle, code, code_size, address, count, insn); 102 #endif 103 } 104 105 static void perf_cs_free(struct cs_insn *insn, size_t count) 106 { 107 #ifndef LIBCAPSTONE_DLOPEN 108 cs_free(insn, count); 109 #else 110 static bool fn_init; 111 static void (*fn)(struct cs_insn *insn, size_t count); 112 113 if (!fn_init) { 114 fn = dlsym(perf_cs_dll_handle(), "cs_free"); 115 if (!fn) 116 pr_debug("dlsym failed for cs_free\n"); 117 fn_init = true; 118 } 119 if (!fn) 120 return; 121 fn(insn, count); 122 #endif 123 } 124 125 static enum cs_err perf_cs_close(csh *handle) 126 { 127 #ifndef LIBCAPSTONE_DLOPEN 128 return cs_close(handle); 129 #else 130 static bool fn_init; 131 static enum cs_err (*fn)(csh *handle); 132 133 if (!fn_init) { 134 fn = dlsym(perf_cs_dll_handle(), "cs_close"); 135 if (!fn) 136 pr_debug("dlsym failed for cs_close\n"); 137 fn_init = true; 138 } 139 if (!fn) 140 return CS_ERR_HANDLE; 141 return fn(handle); 142 #endif 143 } 144 145 static bool e_machine_to_capstone(uint16_t e_machine, bool is64, bool is_big_endian, 146 enum cs_arch *arch, enum cs_mode *mode) 147 { 148 *mode = is_big_endian ? CS_MODE_BIG_ENDIAN : CS_MODE_LITTLE_ENDIAN; 149 150 switch (e_machine) { 151 case EM_X86_64: 152 case EM_386: 153 *arch = CS_ARCH_X86; 154 *mode |= is64 ? CS_MODE_64 : CS_MODE_32; 155 return true; 156 case EM_AARCH64: 157 *arch = CS_ARCH_ARM64; 158 *mode |= CS_MODE_ARM; 159 return true; 160 case EM_ARM: 161 *arch = CS_ARCH_ARM; 162 *mode |= CS_MODE_ARM | CS_MODE_V8; 163 return true; 164 case EM_S390: 165 *arch = CS_ARCH_SYSZ; 166 return true; 167 case EM_MIPS: 168 *arch = CS_ARCH_MIPS; 169 *mode |= is64 ? CS_MODE_MIPS64 : CS_MODE_MIPS32; 170 return true; 171 case EM_PPC: 172 *arch = CS_ARCH_PPC; 173 return true; 174 case EM_PPC64: 175 *arch = CS_ARCH_PPC; 176 *mode |= CS_MODE_64; 177 return true; 178 case EM_SPARC: 179 *arch = CS_ARCH_SPARC; 180 return true; 181 case EM_SPARCV9: 182 *arch = CS_ARCH_SPARC; 183 *mode |= CS_MODE_V9; 184 return true; 185 case EM_RISCV: 186 *arch = CS_ARCH_RISCV; 187 *mode |= (is64 ? CS_MODE_RISCV64 : CS_MODE_RISCV32) | CS_MODE_RISCVC; 188 return true; 189 default: 190 return false; 191 } 192 } 193 194 static int capstone_init(uint16_t e_machine, csh *cs_handle, bool is64, bool is_big_endian, 195 bool disassembler_style) 196 { 197 enum cs_arch arch; 198 enum cs_mode mode; 199 200 if (!e_machine_to_capstone(e_machine, is64, is_big_endian, &arch, &mode)) 201 return -1; 202 203 if (perf_cs_open(arch, mode, cs_handle) != CS_ERR_OK) { 204 pr_warning_once("cs_open failed\n"); 205 return -1; 206 } 207 208 if (arch == CS_ARCH_X86) { 209 /* 210 * In case of using capstone_init while symbol__disassemble 211 * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts 212 * is set via annotation args 213 */ 214 if (disassembler_style) 215 perf_cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); 216 /* 217 * Resolving address operands to symbols is implemented 218 * on x86 by investigating instruction details. 219 */ 220 perf_cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON); 221 } 222 223 return 0; 224 } 225 226 static size_t print_insn_x86(struct thread *thread, u8 cpumode, struct cs_insn *insn, 227 int print_opts, FILE *fp) 228 { 229 struct addr_location al; 230 size_t printed = 0; 231 232 if (insn->detail && insn->detail->x86.op_count == 1) { 233 struct cs_x86_op *op = &insn->detail->x86.operands[0]; 234 235 addr_location__init(&al); 236 if (op->type == X86_OP_IMM && 237 thread__find_symbol(thread, cpumode, op->imm, &al)) { 238 printed += fprintf(fp, "%s ", insn[0].mnemonic); 239 printed += symbol__fprintf_symname_offs(al.sym, &al, fp); 240 if (print_opts & PRINT_INSN_IMM_HEX) 241 printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); 242 addr_location__exit(&al); 243 return printed; 244 } 245 addr_location__exit(&al); 246 } 247 248 printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); 249 return printed; 250 } 251 252 ssize_t capstone__fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, 253 bool is64bit, const uint8_t *code, size_t code_size, uint64_t ip, 254 int *lenp, int print_opts, FILE *fp) 255 { 256 size_t printed; 257 struct cs_insn *insn; 258 csh cs_handle; 259 size_t count; 260 bool is_big_endian = false; 261 uint16_t e_machine = thread__e_machine_endian(thread, machine, 262 /*e_flags=*/NULL, &is_big_endian); 263 int ret; 264 265 /* TODO: Try to initiate capstone only once but need a proper place. */ 266 ret = capstone_init(e_machine, &cs_handle, is64bit, is_big_endian, 267 /*disassembler_style=*/true); 268 if (ret < 0) 269 return ret; 270 271 count = perf_cs_disasm(cs_handle, code, code_size, ip, 1, &insn); 272 if (count > 0) { 273 if (e_machine == EM_X86_64 || e_machine == EM_386) 274 printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); 275 else 276 printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); 277 if (lenp) 278 *lenp = insn->size; 279 perf_cs_free(insn, count); 280 } else { 281 printed = -1; 282 } 283 284 perf_cs_close(&cs_handle); 285 return printed; 286 } 287 288 static void print_capstone_detail(struct cs_insn *insn, char *buf, size_t len, 289 struct annotate_args *args, u64 addr) 290 { 291 int i; 292 struct map *map = args->ms->map; 293 struct symbol *sym; 294 295 /* TODO: support more architectures */ 296 if (!arch__is_x86(args->arch)) 297 return; 298 299 if (insn->detail == NULL) 300 return; 301 302 for (i = 0; i < insn->detail->x86.op_count; i++) { 303 struct cs_x86_op *op = &insn->detail->x86.operands[i]; 304 u64 orig_addr; 305 306 if (op->type != X86_OP_MEM) 307 continue; 308 309 /* only print RIP-based global symbols for now */ 310 if (op->mem.base != X86_REG_RIP) 311 continue; 312 313 /* get the target address */ 314 orig_addr = addr + insn->size + op->mem.disp; 315 addr = map__objdump_2mem(map, orig_addr); 316 317 if (dso__kernel(map__dso(map))) { 318 /* 319 * The kernel maps can be split into sections, let's 320 * find the map first and the search the symbol. 321 */ 322 map = maps__find(map__kmaps(map), addr); 323 if (map == NULL) 324 continue; 325 } 326 327 /* convert it to map-relative address for search */ 328 addr = map__map_ip(map, addr); 329 330 sym = map__find_symbol(map, addr); 331 if (sym == NULL) 332 continue; 333 334 if (addr == sym->start) { 335 scnprintf(buf, len, "\t# %"PRIx64" <%s>", 336 orig_addr, sym->name); 337 } else { 338 scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", 339 orig_addr, sym->name, addr - sym->start); 340 } 341 break; 342 } 343 } 344 345 struct find_file_offset_data { 346 u64 ip; 347 u64 offset; 348 }; 349 350 /* This will be called for each PHDR in an ELF binary */ 351 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) 352 { 353 struct find_file_offset_data *data = arg; 354 355 if (start <= data->ip && data->ip < start + len) { 356 data->offset = pgoff + data->ip - start; 357 return 1; 358 } 359 return 0; 360 } 361 362 int symbol__disassemble_capstone(const char *filename, struct symbol *sym, 363 struct annotate_args *args) 364 { 365 struct annotation *notes = symbol__annotation(sym); 366 struct map *map = args->ms->map; 367 struct dso *dso = map__dso(map); 368 u64 start = map__rip_2objdump(map, sym->start); 369 u64 offset; 370 int i, count, free_count; 371 bool is_64bit = false; 372 bool needs_cs_close = false; 373 /* Malloc-ed buffer containing instructions read from disk. */ 374 u8 *code_buf = NULL; 375 /* Pointer to code to be disassembled. */ 376 const u8 *buf; 377 u64 buf_len; 378 csh handle; 379 struct cs_insn *insn = NULL; 380 char disasm_buf[512]; 381 struct disasm_line *dl; 382 bool disassembler_style = false; 383 uint16_t e_machine; 384 bool is_big_endian = false; 385 386 if (args->options->objdump_path) 387 return -1; 388 389 buf = dso__read_symbol(dso, filename, map, sym, 390 &code_buf, &buf_len, &is_64bit); 391 if (buf == NULL) 392 return errno; 393 394 /* add the function address and name */ 395 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 396 start, sym->name); 397 398 args->offset = -1; 399 args->line = disasm_buf; 400 args->line_nr = 0; 401 args->fileloc = NULL; 402 args->ms->sym = sym; 403 404 dl = disasm_line__new(args); 405 if (dl == NULL) 406 goto err; 407 408 annotation_line__add(&dl->al, ¬es->src->source); 409 410 if (!args->options->disassembler_style || 411 !strcmp(args->options->disassembler_style, "att")) 412 disassembler_style = true; 413 414 e_machine = thread__e_machine_endian(args->ms->thread, 415 /*machine=*/NULL, 416 /*e_flags=*/NULL, &is_big_endian); 417 if (capstone_init(e_machine, &handle, is_64bit, is_big_endian, disassembler_style) < 0) 418 goto err; 419 420 needs_cs_close = true; 421 422 free_count = count = perf_cs_disasm(handle, buf, buf_len, start, buf_len, &insn); 423 for (i = 0, offset = 0; i < count; i++) { 424 int printed; 425 426 printed = scnprintf(disasm_buf, sizeof(disasm_buf), 427 " %-7s %s", 428 insn[i].mnemonic, insn[i].op_str); 429 print_capstone_detail(&insn[i], disasm_buf + printed, 430 sizeof(disasm_buf) - printed, args, 431 start + offset); 432 433 args->offset = offset; 434 args->line = disasm_buf; 435 436 dl = disasm_line__new(args); 437 if (dl == NULL) 438 goto err; 439 440 annotation_line__add(&dl->al, ¬es->src->source); 441 442 offset += insn[i].size; 443 } 444 445 /* It failed in the middle: probably due to unknown instructions */ 446 if (offset != buf_len) { 447 struct list_head *list = ¬es->src->source; 448 449 /* Discard all lines and fallback to objdump */ 450 while (!list_empty(list)) { 451 dl = list_first_entry(list, struct disasm_line, al.node); 452 453 list_del_init(&dl->al.node); 454 disasm_line__free(dl); 455 } 456 count = -1; 457 } 458 459 out: 460 if (needs_cs_close) { 461 perf_cs_close(&handle); 462 if (free_count > 0) 463 perf_cs_free(insn, free_count); 464 } 465 free(code_buf); 466 return count < 0 ? count : 0; 467 468 err: 469 if (needs_cs_close) { 470 struct disasm_line *tmp; 471 472 /* 473 * It probably failed in the middle of the above loop. 474 * Release any resources it might add. 475 */ 476 list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { 477 list_del(&dl->al.node); 478 disasm_line__free(dl); 479 } 480 } 481 count = -1; 482 goto out; 483 } 484 485 int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused, 486 struct symbol *sym __maybe_unused, 487 struct annotate_args *args __maybe_unused) 488 { 489 struct annotation *notes = symbol__annotation(sym); 490 struct map *map = args->ms->map; 491 struct dso *dso = map__dso(map); 492 struct nscookie nsc; 493 u64 start = map__rip_2objdump(map, sym->start); 494 u64 end = map__rip_2objdump(map, sym->end); 495 u64 len = end - start; 496 u64 offset; 497 int i, fd, count; 498 bool is_64bit = false; 499 bool needs_cs_close = false; 500 u8 *buf = NULL; 501 struct find_file_offset_data data = { 502 .ip = start, 503 }; 504 csh handle; 505 char disasm_buf[512]; 506 struct disasm_line *dl; 507 u32 *line; 508 bool disassembler_style = false; 509 uint16_t e_machine; 510 bool is_big_endian = false; 511 512 if (args->options->objdump_path) 513 return -1; 514 515 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 516 fd = open(filename, O_RDONLY); 517 nsinfo__mountns_exit(&nsc); 518 if (fd < 0) 519 return -1; 520 521 if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, 522 &is_64bit) == 0) 523 goto err; 524 525 if (!args->options->disassembler_style || 526 !strcmp(args->options->disassembler_style, "att")) 527 disassembler_style = true; 528 529 e_machine = thread__e_machine_endian(args->ms->thread, 530 /*machine=*/NULL, 531 /*e_flags=*/NULL, &is_big_endian); 532 if (capstone_init(e_machine, &handle, is_64bit, is_big_endian, disassembler_style) < 0) 533 goto err; 534 535 needs_cs_close = true; 536 537 buf = malloc(len); 538 if (buf == NULL) 539 goto err; 540 541 count = pread(fd, buf, len, data.offset); 542 close(fd); 543 fd = -1; 544 545 if ((u64)count != len) 546 goto err; 547 548 line = (u32 *)buf; 549 550 /* add the function address and name */ 551 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 552 start, sym->name); 553 554 args->offset = -1; 555 args->line = disasm_buf; 556 args->line_nr = 0; 557 args->fileloc = NULL; 558 args->ms->sym = sym; 559 560 dl = disasm_line__new(args); 561 if (dl == NULL) 562 goto err; 563 564 annotation_line__add(&dl->al, ¬es->src->source); 565 566 /* 567 * TODO: enable disassm for powerpc 568 * count = cs_disasm(handle, buf, len, start, len, &insn); 569 * 570 * For now, only binary code is saved in disassembled line 571 * to be used in "type" and "typeoff" sort keys. Each raw code 572 * is 32 bit instruction. So use "len/4" to get the number of 573 * entries. 574 */ 575 count = len/4; 576 577 for (i = 0, offset = 0; i < count; i++) { 578 args->offset = offset; 579 sprintf(args->line, "%x", line[i]); 580 581 dl = disasm_line__new(args); 582 if (dl == NULL) 583 break; 584 585 annotation_line__add(&dl->al, ¬es->src->source); 586 587 offset += 4; 588 } 589 590 /* It failed in the middle */ 591 if (offset != len) { 592 struct list_head *list = ¬es->src->source; 593 594 /* Discard all lines and fallback to objdump */ 595 while (!list_empty(list)) { 596 dl = list_first_entry(list, struct disasm_line, al.node); 597 598 list_del_init(&dl->al.node); 599 disasm_line__free(dl); 600 } 601 count = -1; 602 } 603 604 out: 605 if (needs_cs_close) 606 perf_cs_close(&handle); 607 free(buf); 608 return count < 0 ? count : 0; 609 610 err: 611 if (fd >= 0) 612 close(fd); 613 count = -1; 614 goto out; 615 } 616