1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <inttypes.h> 6 #include <libgen.h> 7 #include <regex.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 11 #include <linux/string.h> 12 #include <subcmd/run-command.h> 13 14 #include "annotate.h" 15 #include "annotate-data.h" 16 #include "build-id.h" 17 #include "debug.h" 18 #include "disasm.h" 19 #include "disasm_bpf.h" 20 #include "dso.h" 21 #include "env.h" 22 #include "evsel.h" 23 #include "map.h" 24 #include "maps.h" 25 #include "namespaces.h" 26 #include "srcline.h" 27 #include "symbol.h" 28 #include "util.h" 29 30 static regex_t file_lineno; 31 32 /* These can be referred from the arch-dependent code */ 33 static struct ins_ops call_ops; 34 static struct ins_ops dec_ops; 35 static struct ins_ops jump_ops; 36 static struct ins_ops mov_ops; 37 static struct ins_ops nop_ops; 38 static struct ins_ops lock_ops; 39 static struct ins_ops ret_ops; 40 static struct ins_ops load_store_ops; 41 static struct ins_ops arithmetic_ops; 42 43 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 44 struct ins_operands *ops, int max_ins_name); 45 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 46 struct ins_operands *ops, int max_ins_name); 47 48 static void ins__sort(struct arch *arch); 49 static int disasm_line__parse(char *line, const char **namep, char **rawp); 50 static int disasm_line__parse_powerpc(struct disasm_line *dl); 51 52 static __attribute__((constructor)) void symbol__init_regexpr(void) 53 { 54 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 55 } 56 57 static int arch__grow_instructions(struct arch *arch) 58 { 59 struct ins *new_instructions; 60 size_t new_nr_allocated; 61 62 if (arch->nr_instructions_allocated == 0 && arch->instructions) 63 goto grow_from_non_allocated_table; 64 65 new_nr_allocated = arch->nr_instructions_allocated + 128; 66 new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); 67 if (new_instructions == NULL) 68 return -1; 69 70 out_update_instructions: 71 arch->instructions = new_instructions; 72 arch->nr_instructions_allocated = new_nr_allocated; 73 return 0; 74 75 grow_from_non_allocated_table: 76 new_nr_allocated = arch->nr_instructions + 128; 77 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 78 if (new_instructions == NULL) 79 return -1; 80 81 memcpy(new_instructions, arch->instructions, arch->nr_instructions); 82 goto out_update_instructions; 83 } 84 85 static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) 86 { 87 struct ins *ins; 88 89 if (arch->nr_instructions == arch->nr_instructions_allocated && 90 arch__grow_instructions(arch)) 91 return -1; 92 93 ins = &arch->instructions[arch->nr_instructions]; 94 ins->name = strdup(name); 95 if (!ins->name) 96 return -1; 97 98 ins->ops = ops; 99 arch->nr_instructions++; 100 101 ins__sort(arch); 102 return 0; 103 } 104 105 #include "arch/arc/annotate/instructions.c" 106 #include "arch/arm/annotate/instructions.c" 107 #include "arch/arm64/annotate/instructions.c" 108 #include "arch/csky/annotate/instructions.c" 109 #include "arch/loongarch/annotate/instructions.c" 110 #include "arch/mips/annotate/instructions.c" 111 #include "arch/x86/annotate/instructions.c" 112 #include "arch/powerpc/annotate/instructions.c" 113 #include "arch/riscv64/annotate/instructions.c" 114 #include "arch/s390/annotate/instructions.c" 115 #include "arch/sparc/annotate/instructions.c" 116 117 static struct arch architectures[] = { 118 { 119 .name = "arc", 120 .init = arc__annotate_init, 121 }, 122 { 123 .name = "arm", 124 .init = arm__annotate_init, 125 }, 126 { 127 .name = "arm64", 128 .init = arm64__annotate_init, 129 }, 130 { 131 .name = "csky", 132 .init = csky__annotate_init, 133 }, 134 { 135 .name = "mips", 136 .init = mips__annotate_init, 137 .objdump = { 138 .comment_char = '#', 139 }, 140 }, 141 { 142 .name = "x86", 143 .init = x86__annotate_init, 144 .instructions = x86__instructions, 145 .nr_instructions = ARRAY_SIZE(x86__instructions), 146 .insn_suffix = "bwlq", 147 .objdump = { 148 .comment_char = '#', 149 .register_char = '%', 150 .memory_ref_char = '(', 151 .imm_char = '$', 152 }, 153 #ifdef HAVE_DWARF_SUPPORT 154 .update_insn_state = update_insn_state_x86, 155 #endif 156 }, 157 { 158 .name = "powerpc", 159 .init = powerpc__annotate_init, 160 #ifdef HAVE_DWARF_SUPPORT 161 .update_insn_state = update_insn_state_powerpc, 162 #endif 163 }, 164 { 165 .name = "riscv64", 166 .init = riscv64__annotate_init, 167 }, 168 { 169 .name = "s390", 170 .init = s390__annotate_init, 171 .objdump = { 172 .comment_char = '#', 173 }, 174 }, 175 { 176 .name = "sparc", 177 .init = sparc__annotate_init, 178 .objdump = { 179 .comment_char = '#', 180 }, 181 }, 182 { 183 .name = "loongarch", 184 .init = loongarch__annotate_init, 185 .objdump = { 186 .comment_char = '#', 187 }, 188 }, 189 }; 190 191 static int arch__key_cmp(const void *name, const void *archp) 192 { 193 const struct arch *arch = archp; 194 195 return strcmp(name, arch->name); 196 } 197 198 static int arch__cmp(const void *a, const void *b) 199 { 200 const struct arch *aa = a; 201 const struct arch *ab = b; 202 203 return strcmp(aa->name, ab->name); 204 } 205 206 static void arch__sort(void) 207 { 208 const int nmemb = ARRAY_SIZE(architectures); 209 210 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); 211 } 212 213 struct arch *arch__find(const char *name) 214 { 215 const int nmemb = ARRAY_SIZE(architectures); 216 static bool sorted; 217 218 if (!sorted) { 219 arch__sort(); 220 sorted = true; 221 } 222 223 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 224 } 225 226 bool arch__is(struct arch *arch, const char *name) 227 { 228 return !strcmp(arch->name, name); 229 } 230 231 static void ins_ops__delete(struct ins_operands *ops) 232 { 233 if (ops == NULL) 234 return; 235 zfree(&ops->source.raw); 236 zfree(&ops->source.name); 237 zfree(&ops->target.raw); 238 zfree(&ops->target.name); 239 } 240 241 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, 242 struct ins_operands *ops, int max_ins_name) 243 { 244 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 245 } 246 247 int ins__scnprintf(struct ins *ins, char *bf, size_t size, 248 struct ins_operands *ops, int max_ins_name) 249 { 250 if (ins->ops->scnprintf) 251 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 252 253 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 254 } 255 256 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) 257 { 258 if (!arch || !arch->ins_is_fused) 259 return false; 260 261 return arch->ins_is_fused(arch, ins1, ins2); 262 } 263 264 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 265 struct disasm_line *dl __maybe_unused) 266 { 267 char *endptr, *tok, *name; 268 struct map *map = ms->map; 269 struct addr_map_symbol target = { 270 .ms = { .map = map, }, 271 }; 272 273 ops->target.addr = strtoull(ops->raw, &endptr, 16); 274 275 name = strchr(endptr, '<'); 276 if (name == NULL) 277 goto indirect_call; 278 279 name++; 280 281 if (arch->objdump.skip_functions_char && 282 strchr(name, arch->objdump.skip_functions_char)) 283 return -1; 284 285 tok = strchr(name, '>'); 286 if (tok == NULL) 287 return -1; 288 289 *tok = '\0'; 290 ops->target.name = strdup(name); 291 *tok = '>'; 292 293 if (ops->target.name == NULL) 294 return -1; 295 find_target: 296 target.addr = map__objdump_2mem(map, ops->target.addr); 297 298 if (maps__find_ams(ms->maps, &target) == 0 && 299 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 300 ops->target.sym = target.ms.sym; 301 302 return 0; 303 304 indirect_call: 305 tok = strchr(endptr, '*'); 306 if (tok != NULL) { 307 endptr++; 308 309 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 310 * Do not parse such instruction. */ 311 if (strstr(endptr, "(%r") == NULL) 312 ops->target.addr = strtoull(endptr, NULL, 16); 313 } 314 goto find_target; 315 } 316 317 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 318 struct ins_operands *ops, int max_ins_name) 319 { 320 if (ops->target.sym) 321 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 322 323 if (ops->target.addr == 0) 324 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 325 326 if (ops->target.name) 327 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 328 329 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 330 } 331 332 static struct ins_ops call_ops = { 333 .parse = call__parse, 334 .scnprintf = call__scnprintf, 335 }; 336 337 bool ins__is_call(const struct ins *ins) 338 { 339 return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; 340 } 341 342 /* 343 * Prevents from matching commas in the comment section, e.g.: 344 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 345 * 346 * and skip comma as part of function arguments, e.g.: 347 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 348 */ 349 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 350 { 351 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 352 return NULL; 353 354 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 355 return NULL; 356 357 return c; 358 } 359 360 static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 361 struct disasm_line *dl __maybe_unused) 362 { 363 struct map *map = ms->map; 364 struct symbol *sym = ms->sym; 365 struct addr_map_symbol target = { 366 .ms = { .map = map, }, 367 }; 368 const char *c = strchr(ops->raw, ','); 369 u64 start, end; 370 371 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 372 ops->jump.raw_func_start = strchr(ops->raw, '<'); 373 374 c = validate_comma(c, ops); 375 376 /* 377 * Examples of lines to parse for the _cpp_lex_token@@Base 378 * function: 379 * 380 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 381 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 382 * 383 * The first is a jump to an offset inside the same function, 384 * the second is to another function, i.e. that 0xa72 is an 385 * offset in the cpp_named_operator2name@@base function. 386 */ 387 /* 388 * skip over possible up to 2 operands to get to address, e.g.: 389 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 390 */ 391 if (c++ != NULL) { 392 ops->target.addr = strtoull(c, NULL, 16); 393 if (!ops->target.addr) { 394 c = strchr(c, ','); 395 c = validate_comma(c, ops); 396 if (c++ != NULL) 397 ops->target.addr = strtoull(c, NULL, 16); 398 } 399 } else { 400 ops->target.addr = strtoull(ops->raw, NULL, 16); 401 } 402 403 target.addr = map__objdump_2mem(map, ops->target.addr); 404 start = map__unmap_ip(map, sym->start); 405 end = map__unmap_ip(map, sym->end); 406 407 ops->target.outside = target.addr < start || target.addr > end; 408 409 /* 410 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 411 412 cpp_named_operator2name@@Base+0xa72 413 414 * Point to a place that is after the cpp_named_operator2name 415 * boundaries, i.e. in the ELF symbol table for cc1 416 * cpp_named_operator2name is marked as being 32-bytes long, but it in 417 * fact is much larger than that, so we seem to need a symbols__find() 418 * routine that looks for >= current->start and < next_symbol->start, 419 * possibly just for C++ objects? 420 * 421 * For now lets just make some progress by marking jumps to outside the 422 * current function as call like. 423 * 424 * Actual navigation will come next, with further understanding of how 425 * the symbol searching and disassembly should be done. 426 */ 427 if (maps__find_ams(ms->maps, &target) == 0 && 428 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 429 ops->target.sym = target.ms.sym; 430 431 if (!ops->target.outside) { 432 ops->target.offset = target.addr - start; 433 ops->target.offset_avail = true; 434 } else { 435 ops->target.offset_avail = false; 436 } 437 438 return 0; 439 } 440 441 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 442 struct ins_operands *ops, int max_ins_name) 443 { 444 const char *c; 445 446 if (!ops->target.addr || ops->target.offset < 0) 447 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 448 449 if (ops->target.outside && ops->target.sym != NULL) 450 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 451 452 c = strchr(ops->raw, ','); 453 c = validate_comma(c, ops); 454 455 if (c != NULL) { 456 const char *c2 = strchr(c + 1, ','); 457 458 c2 = validate_comma(c2, ops); 459 /* check for 3-op insn */ 460 if (c2 != NULL) 461 c = c2; 462 c++; 463 464 /* mirror arch objdump's space-after-comma style */ 465 if (*c == ' ') 466 c++; 467 } 468 469 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 470 ins->name, c ? c - ops->raw : 0, ops->raw, 471 ops->target.offset); 472 } 473 474 static void jump__delete(struct ins_operands *ops __maybe_unused) 475 { 476 /* 477 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 478 * raw string, don't free them. 479 */ 480 } 481 482 static struct ins_ops jump_ops = { 483 .free = jump__delete, 484 .parse = jump__parse, 485 .scnprintf = jump__scnprintf, 486 }; 487 488 bool ins__is_jump(const struct ins *ins) 489 { 490 return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; 491 } 492 493 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 494 { 495 char *endptr, *name, *t; 496 497 if (strstr(raw, "(%rip)") == NULL) 498 return 0; 499 500 *addrp = strtoull(comment, &endptr, 16); 501 if (endptr == comment) 502 return 0; 503 name = strchr(endptr, '<'); 504 if (name == NULL) 505 return -1; 506 507 name++; 508 509 t = strchr(name, '>'); 510 if (t == NULL) 511 return 0; 512 513 *t = '\0'; 514 *namep = strdup(name); 515 *t = '>'; 516 517 return 0; 518 } 519 520 static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 521 struct disasm_line *dl __maybe_unused) 522 { 523 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 524 if (ops->locked.ops == NULL) 525 return 0; 526 527 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 528 goto out_free_ops; 529 530 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 531 532 if (ops->locked.ins.ops == NULL) 533 goto out_free_ops; 534 535 if (ops->locked.ins.ops->parse && 536 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 537 goto out_free_ops; 538 539 return 0; 540 541 out_free_ops: 542 zfree(&ops->locked.ops); 543 return 0; 544 } 545 546 static int lock__scnprintf(struct ins *ins, char *bf, size_t size, 547 struct ins_operands *ops, int max_ins_name) 548 { 549 int printed; 550 551 if (ops->locked.ins.ops == NULL) 552 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 553 554 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 555 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 556 size - printed, ops->locked.ops, max_ins_name); 557 } 558 559 static void lock__delete(struct ins_operands *ops) 560 { 561 struct ins *ins = &ops->locked.ins; 562 563 if (ins->ops && ins->ops->free) 564 ins->ops->free(ops->locked.ops); 565 else 566 ins_ops__delete(ops->locked.ops); 567 568 zfree(&ops->locked.ops); 569 zfree(&ops->locked.ins.name); 570 zfree(&ops->target.raw); 571 zfree(&ops->target.name); 572 } 573 574 static struct ins_ops lock_ops = { 575 .free = lock__delete, 576 .parse = lock__parse, 577 .scnprintf = lock__scnprintf, 578 }; 579 580 /* 581 * Check if the operand has more than one registers like x86 SIB addressing: 582 * 0x1234(%rax, %rbx, 8) 583 * 584 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 585 * the input string after 'memory_ref_char' if exists. 586 */ 587 static bool check_multi_regs(struct arch *arch, const char *op) 588 { 589 int count = 0; 590 591 if (arch->objdump.register_char == 0) 592 return false; 593 594 if (arch->objdump.memory_ref_char) { 595 op = strchr(op, arch->objdump.memory_ref_char); 596 if (op == NULL) 597 return false; 598 } 599 600 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 601 count++; 602 op++; 603 } 604 605 return count > 1; 606 } 607 608 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 609 struct disasm_line *dl __maybe_unused) 610 { 611 char *s = strchr(ops->raw, ','), *target, *comment, prev; 612 613 if (s == NULL) 614 return -1; 615 616 *s = '\0'; 617 618 /* 619 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 620 * then it needs to have the closing parenthesis. 621 */ 622 if (strchr(ops->raw, '(')) { 623 *s = ','; 624 s = strchr(ops->raw, ')'); 625 if (s == NULL || s[1] != ',') 626 return -1; 627 *++s = '\0'; 628 } 629 630 ops->source.raw = strdup(ops->raw); 631 *s = ','; 632 633 if (ops->source.raw == NULL) 634 return -1; 635 636 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 637 638 target = skip_spaces(++s); 639 comment = strchr(s, arch->objdump.comment_char); 640 641 if (comment != NULL) 642 s = comment - 1; 643 else 644 s = strchr(s, '\0') - 1; 645 646 while (s > target && isspace(s[0])) 647 --s; 648 s++; 649 prev = *s; 650 *s = '\0'; 651 652 ops->target.raw = strdup(target); 653 *s = prev; 654 655 if (ops->target.raw == NULL) 656 goto out_free_source; 657 658 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 659 660 if (comment == NULL) 661 return 0; 662 663 comment = skip_spaces(comment); 664 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 665 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 666 667 return 0; 668 669 out_free_source: 670 zfree(&ops->source.raw); 671 return -1; 672 } 673 674 static int mov__scnprintf(struct ins *ins, char *bf, size_t size, 675 struct ins_operands *ops, int max_ins_name) 676 { 677 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 678 ops->source.name ?: ops->source.raw, 679 ops->target.name ?: ops->target.raw); 680 } 681 682 static struct ins_ops mov_ops = { 683 .parse = mov__parse, 684 .scnprintf = mov__scnprintf, 685 }; 686 687 #define PPC_22_30(R) (((R) >> 1) & 0x1ff) 688 #define MINUS_EXT_XO_FORM 234 689 #define SUB_EXT_XO_FORM 232 690 #define ADD_ZERO_EXT_XO_FORM 202 691 #define SUB_ZERO_EXT_XO_FORM 200 692 693 static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, 694 struct ins_operands *ops, int max_ins_name) 695 { 696 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 697 ops->raw); 698 } 699 700 /* 701 * Sets the fields: multi_regs and "mem_ref". 702 * "mem_ref" is set for ops->source which is later used to 703 * fill the objdump->memory_ref-char field. This ops is currently 704 * used by powerpc and since binary instruction code is used to 705 * extract opcode, regs and offset, no other parsing is needed here. 706 * 707 * Dont set multi regs for 4 cases since it has only one operand 708 * for source: 709 * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) 710 * - Subtract From Minus One Extended XO-form ( Ex: subfme ) 711 * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) 712 * - Subtract From Zero Extended XO-form ( Ex: subfze ) 713 */ 714 static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 715 struct map_symbol *ms __maybe_unused, struct disasm_line *dl) 716 { 717 int opcode = PPC_OP(dl->raw.raw_insn); 718 719 ops->source.mem_ref = false; 720 if (opcode == 31) { 721 if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ 722 && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) 723 ops->source.multi_regs = true; 724 } 725 726 ops->target.mem_ref = false; 727 ops->target.multi_regs = false; 728 729 return 0; 730 } 731 732 static struct ins_ops arithmetic_ops = { 733 .parse = arithmetic__parse, 734 .scnprintf = arithmetic__scnprintf, 735 }; 736 737 static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, 738 struct ins_operands *ops, int max_ins_name) 739 { 740 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 741 ops->raw); 742 } 743 744 /* 745 * Sets the fields: multi_regs and "mem_ref". 746 * "mem_ref" is set for ops->source which is later used to 747 * fill the objdump->memory_ref-char field. This ops is currently 748 * used by powerpc and since binary instruction code is used to 749 * extract opcode, regs and offset, no other parsing is needed here 750 */ 751 static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 752 struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) 753 { 754 ops->source.mem_ref = true; 755 ops->source.multi_regs = false; 756 /* opcode 31 is of X form */ 757 if (PPC_OP(dl->raw.raw_insn) == 31) 758 ops->source.multi_regs = true; 759 760 ops->target.mem_ref = false; 761 ops->target.multi_regs = false; 762 763 return 0; 764 } 765 766 static struct ins_ops load_store_ops = { 767 .parse = load_store__parse, 768 .scnprintf = load_store__scnprintf, 769 }; 770 771 static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 772 struct disasm_line *dl __maybe_unused) 773 { 774 char *target, *comment, *s, prev; 775 776 target = s = ops->raw; 777 778 while (s[0] != '\0' && !isspace(s[0])) 779 ++s; 780 prev = *s; 781 *s = '\0'; 782 783 ops->target.raw = strdup(target); 784 *s = prev; 785 786 if (ops->target.raw == NULL) 787 return -1; 788 789 comment = strchr(s, arch->objdump.comment_char); 790 if (comment == NULL) 791 return 0; 792 793 comment = skip_spaces(comment); 794 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 795 796 return 0; 797 } 798 799 static int dec__scnprintf(struct ins *ins, char *bf, size_t size, 800 struct ins_operands *ops, int max_ins_name) 801 { 802 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 803 ops->target.name ?: ops->target.raw); 804 } 805 806 static struct ins_ops dec_ops = { 807 .parse = dec__parse, 808 .scnprintf = dec__scnprintf, 809 }; 810 811 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, 812 struct ins_operands *ops __maybe_unused, int max_ins_name) 813 { 814 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 815 } 816 817 static struct ins_ops nop_ops = { 818 .scnprintf = nop__scnprintf, 819 }; 820 821 static struct ins_ops ret_ops = { 822 .scnprintf = ins__raw_scnprintf, 823 }; 824 825 bool ins__is_nop(const struct ins *ins) 826 { 827 return ins->ops == &nop_ops; 828 } 829 830 bool ins__is_ret(const struct ins *ins) 831 { 832 return ins->ops == &ret_ops; 833 } 834 835 bool ins__is_lock(const struct ins *ins) 836 { 837 return ins->ops == &lock_ops; 838 } 839 840 static int ins__key_cmp(const void *name, const void *insp) 841 { 842 const struct ins *ins = insp; 843 844 return strcmp(name, ins->name); 845 } 846 847 static int ins__cmp(const void *a, const void *b) 848 { 849 const struct ins *ia = a; 850 const struct ins *ib = b; 851 852 return strcmp(ia->name, ib->name); 853 } 854 855 static void ins__sort(struct arch *arch) 856 { 857 const int nmemb = arch->nr_instructions; 858 859 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 860 } 861 862 static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 863 { 864 struct ins *ins; 865 const int nmemb = arch->nr_instructions; 866 867 if (arch__is(arch, "powerpc")) { 868 /* 869 * For powerpc, identify the instruction ops 870 * from the opcode using raw_insn. 871 */ 872 struct ins_ops *ops; 873 874 ops = check_ppc_insn(dl); 875 if (ops) 876 return ops; 877 } 878 879 if (!arch->sorted_instructions) { 880 ins__sort(arch); 881 arch->sorted_instructions = true; 882 } 883 884 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 885 if (ins) 886 return ins->ops; 887 888 if (arch->insn_suffix) { 889 char tmp[32]; 890 char suffix; 891 size_t len = strlen(name); 892 893 if (len == 0 || len >= sizeof(tmp)) 894 return NULL; 895 896 suffix = name[len - 1]; 897 if (strchr(arch->insn_suffix, suffix) == NULL) 898 return NULL; 899 900 strcpy(tmp, name); 901 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 902 903 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 904 } 905 return ins ? ins->ops : NULL; 906 } 907 908 struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 909 { 910 struct ins_ops *ops = __ins__find(arch, name, dl); 911 912 if (!ops && arch->associate_instruction_ops) 913 ops = arch->associate_instruction_ops(arch, name); 914 915 return ops; 916 } 917 918 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) 919 { 920 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 921 922 if (!dl->ins.ops) 923 return; 924 925 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 926 dl->ins.ops = NULL; 927 } 928 929 static int disasm_line__parse(char *line, const char **namep, char **rawp) 930 { 931 char tmp, *name = skip_spaces(line); 932 933 if (name[0] == '\0') 934 return -1; 935 936 *rawp = name + 1; 937 938 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 939 ++*rawp; 940 941 tmp = (*rawp)[0]; 942 (*rawp)[0] = '\0'; 943 *namep = strdup(name); 944 945 if (*namep == NULL) 946 goto out; 947 948 (*rawp)[0] = tmp; 949 *rawp = strim(*rawp); 950 951 return 0; 952 953 out: 954 return -1; 955 } 956 957 /* 958 * Parses the result captured from symbol__disassemble_* 959 * Example, line read from DSO file in powerpc: 960 * line: 38 01 81 e8 961 * opcode: fetched from arch specific get_opcode_insn 962 * rawp_insn: e8810138 963 * 964 * rawp_insn is used later to extract the reg/offset fields 965 */ 966 #define PPC_OP(op) (((op) >> 26) & 0x3F) 967 #define RAW_BYTES 11 968 969 static int disasm_line__parse_powerpc(struct disasm_line *dl) 970 { 971 char *line = dl->al.line; 972 const char **namep = &dl->ins.name; 973 char **rawp = &dl->ops.raw; 974 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 975 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 976 int objdump = 0; 977 978 if (strlen(line) > RAW_BYTES) 979 objdump = 1; 980 981 if (name_raw_insn[0] == '\0') 982 return -1; 983 984 if (objdump) { 985 disasm_line__parse(name, namep, rawp); 986 } else 987 *namep = ""; 988 989 tmp_raw_insn = strndup(name_raw_insn, 11); 990 if (tmp_raw_insn == NULL) 991 return -1; 992 993 remove_spaces(tmp_raw_insn); 994 995 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 996 if (objdump) 997 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 998 999 return 0; 1000 } 1001 1002 static void annotation_line__init(struct annotation_line *al, 1003 struct annotate_args *args, 1004 int nr) 1005 { 1006 al->offset = args->offset; 1007 al->line = strdup(args->line); 1008 al->line_nr = args->line_nr; 1009 al->fileloc = args->fileloc; 1010 al->data_nr = nr; 1011 } 1012 1013 static void annotation_line__exit(struct annotation_line *al) 1014 { 1015 zfree_srcline(&al->path); 1016 zfree(&al->line); 1017 zfree(&al->cycles); 1018 zfree(&al->br_cntr); 1019 } 1020 1021 static size_t disasm_line_size(int nr) 1022 { 1023 struct annotation_line *al; 1024 1025 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 1026 } 1027 1028 /* 1029 * Allocating the disasm annotation line data with 1030 * following structure: 1031 * 1032 * ------------------------------------------- 1033 * struct disasm_line | struct annotation_line 1034 * ------------------------------------------- 1035 * 1036 * We have 'struct annotation_line' member as last member 1037 * of 'struct disasm_line' to have an easy access. 1038 */ 1039 struct disasm_line *disasm_line__new(struct annotate_args *args) 1040 { 1041 struct disasm_line *dl = NULL; 1042 struct annotation *notes = symbol__annotation(args->ms.sym); 1043 int nr = notes->src->nr_events; 1044 1045 dl = zalloc(disasm_line_size(nr)); 1046 if (!dl) 1047 return NULL; 1048 1049 annotation_line__init(&dl->al, args, nr); 1050 if (dl->al.line == NULL) 1051 goto out_delete; 1052 1053 if (args->offset != -1) { 1054 if (arch__is(args->arch, "powerpc")) { 1055 if (disasm_line__parse_powerpc(dl) < 0) 1056 goto out_free_line; 1057 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 1058 goto out_free_line; 1059 1060 disasm_line__init_ins(dl, args->arch, &args->ms); 1061 } 1062 1063 return dl; 1064 1065 out_free_line: 1066 zfree(&dl->al.line); 1067 out_delete: 1068 free(dl); 1069 return NULL; 1070 } 1071 1072 void disasm_line__free(struct disasm_line *dl) 1073 { 1074 if (dl->ins.ops && dl->ins.ops->free) 1075 dl->ins.ops->free(&dl->ops); 1076 else 1077 ins_ops__delete(&dl->ops); 1078 zfree(&dl->ins.name); 1079 annotation_line__exit(&dl->al); 1080 free(dl); 1081 } 1082 1083 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 1084 { 1085 if (raw || !dl->ins.ops) 1086 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 1087 1088 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1089 } 1090 1091 /* 1092 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 1093 * which looks like following 1094 * 1095 * 0000000000415500 <_init>: 1096 * 415500: sub $0x8,%rsp 1097 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1098 * 41550b: test %rax,%rax 1099 * 41550e: je 415515 <_init+0x15> 1100 * 415510: callq 416e70 <__gmon_start__@plt> 1101 * 415515: add $0x8,%rsp 1102 * 415519: retq 1103 * 1104 * it will be parsed and saved into struct disasm_line as 1105 * <offset> <name> <ops.raw> 1106 * 1107 * The offset will be a relative offset from the start of the symbol and -1 1108 * means that it's not a disassembly line so should be treated differently. 1109 * The ops.raw part will be parsed further according to type of the instruction. 1110 */ 1111 static int symbol__parse_objdump_line(struct symbol *sym, 1112 struct annotate_args *args, 1113 char *parsed_line, int *line_nr, char **fileloc) 1114 { 1115 struct map *map = args->ms.map; 1116 struct annotation *notes = symbol__annotation(sym); 1117 struct disasm_line *dl; 1118 char *tmp; 1119 s64 line_ip, offset = -1; 1120 regmatch_t match[2]; 1121 1122 /* /filename:linenr ? Save line number and ignore. */ 1123 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1124 *line_nr = atoi(parsed_line + match[1].rm_so); 1125 free(*fileloc); 1126 *fileloc = strdup(parsed_line); 1127 return 0; 1128 } 1129 1130 /* Process hex address followed by ':'. */ 1131 line_ip = strtoull(parsed_line, &tmp, 16); 1132 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1133 u64 start = map__rip_2objdump(map, sym->start), 1134 end = map__rip_2objdump(map, sym->end); 1135 1136 offset = line_ip - start; 1137 if ((u64)line_ip < start || (u64)line_ip >= end) 1138 offset = -1; 1139 else 1140 parsed_line = tmp + 1; 1141 } 1142 1143 args->offset = offset; 1144 args->line = parsed_line; 1145 args->line_nr = *line_nr; 1146 args->fileloc = *fileloc; 1147 args->ms.sym = sym; 1148 1149 dl = disasm_line__new(args); 1150 (*line_nr)++; 1151 1152 if (dl == NULL) 1153 return -1; 1154 1155 if (!disasm_line__has_local_offset(dl)) { 1156 dl->ops.target.offset = dl->ops.target.addr - 1157 map__rip_2objdump(map, sym->start); 1158 dl->ops.target.offset_avail = true; 1159 } 1160 1161 /* kcore has no symbols, so add the call target symbol */ 1162 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1163 struct addr_map_symbol target = { 1164 .addr = dl->ops.target.addr, 1165 .ms = { .map = map, }, 1166 }; 1167 1168 if (!maps__find_ams(args->ms.maps, &target) && 1169 target.ms.sym->start == target.al_addr) 1170 dl->ops.target.sym = target.ms.sym; 1171 } 1172 1173 annotation_line__add(&dl->al, ¬es->src->source); 1174 return 0; 1175 } 1176 1177 static void delete_last_nop(struct symbol *sym) 1178 { 1179 struct annotation *notes = symbol__annotation(sym); 1180 struct list_head *list = ¬es->src->source; 1181 struct disasm_line *dl; 1182 1183 while (!list_empty(list)) { 1184 dl = list_entry(list->prev, struct disasm_line, al.node); 1185 1186 if (dl->ins.ops) { 1187 if (!ins__is_nop(&dl->ins)) 1188 return; 1189 } else { 1190 if (!strstr(dl->al.line, " nop ") && 1191 !strstr(dl->al.line, " nopl ") && 1192 !strstr(dl->al.line, " nopw ")) 1193 return; 1194 } 1195 1196 list_del_init(&dl->al.node); 1197 disasm_line__free(dl); 1198 } 1199 } 1200 1201 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1202 { 1203 struct dso *dso = map__dso(ms->map); 1204 1205 BUG_ON(buflen == 0); 1206 1207 if (errnum >= 0) { 1208 str_error_r(errnum, buf, buflen); 1209 return 0; 1210 } 1211 1212 switch (errnum) { 1213 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1214 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1215 char *build_id_msg = NULL; 1216 1217 if (dso__has_build_id(dso)) { 1218 build_id__sprintf(dso__bid(dso), bf + 15); 1219 build_id_msg = bf; 1220 } 1221 scnprintf(buf, buflen, 1222 "No vmlinux file%s\nwas found in the path.\n\n" 1223 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1224 "Please use:\n\n" 1225 " perf buildid-cache -vu vmlinux\n\n" 1226 "or:\n\n" 1227 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1228 } 1229 break; 1230 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1231 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1232 break; 1233 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1234 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1235 break; 1236 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1237 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1238 break; 1239 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1240 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1241 break; 1242 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1243 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1244 dso__long_name(dso)); 1245 break; 1246 default: 1247 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1248 break; 1249 } 1250 1251 return 0; 1252 } 1253 1254 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1255 { 1256 char linkname[PATH_MAX]; 1257 char *build_id_filename; 1258 char *build_id_path = NULL; 1259 char *pos; 1260 int len; 1261 1262 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1263 !dso__is_kcore(dso)) 1264 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1265 1266 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1267 if (build_id_filename) { 1268 __symbol__join_symfs(filename, filename_size, build_id_filename); 1269 free(build_id_filename); 1270 } else { 1271 if (dso__has_build_id(dso)) 1272 return ENOMEM; 1273 goto fallback; 1274 } 1275 1276 build_id_path = strdup(filename); 1277 if (!build_id_path) 1278 return ENOMEM; 1279 1280 /* 1281 * old style build-id cache has name of XX/XXXXXXX.. while 1282 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1283 * extract the build-id part of dirname in the new style only. 1284 */ 1285 pos = strrchr(build_id_path, '/'); 1286 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1287 dirname(build_id_path); 1288 1289 if (dso__is_kcore(dso)) 1290 goto fallback; 1291 1292 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1293 if (len < 0) 1294 goto fallback; 1295 1296 linkname[len] = '\0'; 1297 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1298 access(filename, R_OK)) { 1299 fallback: 1300 /* 1301 * If we don't have build-ids or the build-id file isn't in the 1302 * cache, or is just a kallsyms file, well, lets hope that this 1303 * DSO is the same as when 'perf record' ran. 1304 */ 1305 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1306 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1307 else 1308 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1309 1310 mutex_lock(dso__lock(dso)); 1311 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1312 char *new_name = dso__filename_with_chroot(dso, filename); 1313 if (new_name) { 1314 strlcpy(filename, new_name, filename_size); 1315 free(new_name); 1316 } 1317 } 1318 mutex_unlock(dso__lock(dso)); 1319 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1320 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1321 } 1322 1323 free(build_id_path); 1324 return 0; 1325 } 1326 1327 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1328 #include <capstone/capstone.h> 1329 1330 int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style); 1331 1332 static int open_capstone_handle(struct annotate_args *args, bool is_64bit, 1333 csh *handle) 1334 { 1335 struct annotation_options *opt = args->options; 1336 cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32; 1337 1338 /* TODO: support more architectures */ 1339 if (!arch__is(args->arch, "x86")) 1340 return -1; 1341 1342 if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK) 1343 return -1; 1344 1345 if (!opt->disassembler_style || 1346 !strcmp(opt->disassembler_style, "att")) 1347 cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); 1348 1349 /* 1350 * Resolving address operands to symbols is implemented 1351 * on x86 by investigating instruction details. 1352 */ 1353 cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON); 1354 1355 return 0; 1356 } 1357 1358 struct find_file_offset_data { 1359 u64 ip; 1360 u64 offset; 1361 }; 1362 1363 /* This will be called for each PHDR in an ELF binary */ 1364 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) 1365 { 1366 struct find_file_offset_data *data = arg; 1367 1368 if (start <= data->ip && data->ip < start + len) { 1369 data->offset = pgoff + data->ip - start; 1370 return 1; 1371 } 1372 return 0; 1373 } 1374 1375 static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, 1376 struct annotate_args *args, u64 addr) 1377 { 1378 int i; 1379 struct map *map = args->ms.map; 1380 struct symbol *sym; 1381 1382 /* TODO: support more architectures */ 1383 if (!arch__is(args->arch, "x86")) 1384 return; 1385 1386 if (insn->detail == NULL) 1387 return; 1388 1389 for (i = 0; i < insn->detail->x86.op_count; i++) { 1390 cs_x86_op *op = &insn->detail->x86.operands[i]; 1391 u64 orig_addr; 1392 1393 if (op->type != X86_OP_MEM) 1394 continue; 1395 1396 /* only print RIP-based global symbols for now */ 1397 if (op->mem.base != X86_REG_RIP) 1398 continue; 1399 1400 /* get the target address */ 1401 orig_addr = addr + insn->size + op->mem.disp; 1402 addr = map__objdump_2mem(map, orig_addr); 1403 1404 if (dso__kernel(map__dso(map))) { 1405 /* 1406 * The kernel maps can be splitted into sections, 1407 * let's find the map first and the search the symbol. 1408 */ 1409 map = maps__find(map__kmaps(map), addr); 1410 if (map == NULL) 1411 continue; 1412 } 1413 1414 /* convert it to map-relative address for search */ 1415 addr = map__map_ip(map, addr); 1416 1417 sym = map__find_symbol(map, addr); 1418 if (sym == NULL) 1419 continue; 1420 1421 if (addr == sym->start) { 1422 scnprintf(buf, len, "\t# %"PRIx64" <%s>", 1423 orig_addr, sym->name); 1424 } else { 1425 scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", 1426 orig_addr, sym->name, addr - sym->start); 1427 } 1428 break; 1429 } 1430 } 1431 1432 static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym, 1433 struct annotate_args *args) 1434 { 1435 struct annotation *notes = symbol__annotation(sym); 1436 struct map *map = args->ms.map; 1437 struct dso *dso = map__dso(map); 1438 struct nscookie nsc; 1439 u64 start = map__rip_2objdump(map, sym->start); 1440 u64 end = map__rip_2objdump(map, sym->end); 1441 u64 len = end - start; 1442 u64 offset; 1443 int i, fd, count; 1444 bool is_64bit = false; 1445 bool needs_cs_close = false; 1446 u8 *buf = NULL; 1447 struct find_file_offset_data data = { 1448 .ip = start, 1449 }; 1450 csh handle; 1451 char disasm_buf[512]; 1452 struct disasm_line *dl; 1453 u32 *line; 1454 bool disassembler_style = false; 1455 1456 if (args->options->objdump_path) 1457 return -1; 1458 1459 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 1460 fd = open(filename, O_RDONLY); 1461 nsinfo__mountns_exit(&nsc); 1462 if (fd < 0) 1463 return -1; 1464 1465 if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, 1466 &is_64bit) == 0) 1467 goto err; 1468 1469 if (!args->options->disassembler_style || 1470 !strcmp(args->options->disassembler_style, "att")) 1471 disassembler_style = true; 1472 1473 if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) 1474 goto err; 1475 1476 needs_cs_close = true; 1477 1478 buf = malloc(len); 1479 if (buf == NULL) 1480 goto err; 1481 1482 count = pread(fd, buf, len, data.offset); 1483 close(fd); 1484 fd = -1; 1485 1486 if ((u64)count != len) 1487 goto err; 1488 1489 line = (u32 *)buf; 1490 1491 /* add the function address and name */ 1492 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1493 start, sym->name); 1494 1495 args->offset = -1; 1496 args->line = disasm_buf; 1497 args->line_nr = 0; 1498 args->fileloc = NULL; 1499 args->ms.sym = sym; 1500 1501 dl = disasm_line__new(args); 1502 if (dl == NULL) 1503 goto err; 1504 1505 annotation_line__add(&dl->al, ¬es->src->source); 1506 1507 /* 1508 * TODO: enable disassm for powerpc 1509 * count = cs_disasm(handle, buf, len, start, len, &insn); 1510 * 1511 * For now, only binary code is saved in disassembled line 1512 * to be used in "type" and "typeoff" sort keys. Each raw code 1513 * is 32 bit instruction. So use "len/4" to get the number of 1514 * entries. 1515 */ 1516 count = len/4; 1517 1518 for (i = 0, offset = 0; i < count; i++) { 1519 args->offset = offset; 1520 sprintf(args->line, "%x", line[i]); 1521 1522 dl = disasm_line__new(args); 1523 if (dl == NULL) 1524 goto err; 1525 1526 annotation_line__add(&dl->al, ¬es->src->source); 1527 1528 offset += 4; 1529 } 1530 1531 /* It failed in the middle */ 1532 if (offset != len) { 1533 struct list_head *list = ¬es->src->source; 1534 1535 /* Discard all lines and fallback to objdump */ 1536 while (!list_empty(list)) { 1537 dl = list_first_entry(list, struct disasm_line, al.node); 1538 1539 list_del_init(&dl->al.node); 1540 disasm_line__free(dl); 1541 } 1542 count = -1; 1543 } 1544 1545 out: 1546 if (needs_cs_close) 1547 cs_close(&handle); 1548 free(buf); 1549 return count < 0 ? count : 0; 1550 1551 err: 1552 if (fd >= 0) 1553 close(fd); 1554 if (needs_cs_close) { 1555 struct disasm_line *tmp; 1556 1557 /* 1558 * It probably failed in the middle of the above loop. 1559 * Release any resources it might add. 1560 */ 1561 list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { 1562 list_del(&dl->al.node); 1563 free(dl); 1564 } 1565 } 1566 count = -1; 1567 goto out; 1568 } 1569 1570 static int symbol__disassemble_capstone(char *filename, struct symbol *sym, 1571 struct annotate_args *args) 1572 { 1573 struct annotation *notes = symbol__annotation(sym); 1574 struct map *map = args->ms.map; 1575 struct dso *dso = map__dso(map); 1576 struct nscookie nsc; 1577 u64 start = map__rip_2objdump(map, sym->start); 1578 u64 end = map__rip_2objdump(map, sym->end); 1579 u64 len = end - start; 1580 u64 offset; 1581 int i, fd, count; 1582 bool is_64bit = false; 1583 bool needs_cs_close = false; 1584 u8 *buf = NULL; 1585 struct find_file_offset_data data = { 1586 .ip = start, 1587 }; 1588 csh handle; 1589 cs_insn *insn; 1590 char disasm_buf[512]; 1591 struct disasm_line *dl; 1592 1593 if (args->options->objdump_path) 1594 return -1; 1595 1596 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 1597 fd = open(filename, O_RDONLY); 1598 nsinfo__mountns_exit(&nsc); 1599 if (fd < 0) 1600 return -1; 1601 1602 if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, 1603 &is_64bit) == 0) 1604 goto err; 1605 1606 if (open_capstone_handle(args, is_64bit, &handle) < 0) 1607 goto err; 1608 1609 needs_cs_close = true; 1610 1611 buf = malloc(len); 1612 if (buf == NULL) 1613 goto err; 1614 1615 count = pread(fd, buf, len, data.offset); 1616 close(fd); 1617 fd = -1; 1618 1619 if ((u64)count != len) 1620 goto err; 1621 1622 /* add the function address and name */ 1623 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1624 start, sym->name); 1625 1626 args->offset = -1; 1627 args->line = disasm_buf; 1628 args->line_nr = 0; 1629 args->fileloc = NULL; 1630 args->ms.sym = sym; 1631 1632 dl = disasm_line__new(args); 1633 if (dl == NULL) 1634 goto err; 1635 1636 annotation_line__add(&dl->al, ¬es->src->source); 1637 1638 count = cs_disasm(handle, buf, len, start, len, &insn); 1639 for (i = 0, offset = 0; i < count; i++) { 1640 int printed; 1641 1642 printed = scnprintf(disasm_buf, sizeof(disasm_buf), 1643 " %-7s %s", 1644 insn[i].mnemonic, insn[i].op_str); 1645 print_capstone_detail(&insn[i], disasm_buf + printed, 1646 sizeof(disasm_buf) - printed, args, 1647 start + offset); 1648 1649 args->offset = offset; 1650 args->line = disasm_buf; 1651 1652 dl = disasm_line__new(args); 1653 if (dl == NULL) 1654 goto err; 1655 1656 annotation_line__add(&dl->al, ¬es->src->source); 1657 1658 offset += insn[i].size; 1659 } 1660 1661 /* It failed in the middle: probably due to unknown instructions */ 1662 if (offset != len) { 1663 struct list_head *list = ¬es->src->source; 1664 1665 /* Discard all lines and fallback to objdump */ 1666 while (!list_empty(list)) { 1667 dl = list_first_entry(list, struct disasm_line, al.node); 1668 1669 list_del_init(&dl->al.node); 1670 disasm_line__free(dl); 1671 } 1672 count = -1; 1673 } 1674 1675 out: 1676 if (needs_cs_close) 1677 cs_close(&handle); 1678 free(buf); 1679 return count < 0 ? count : 0; 1680 1681 err: 1682 if (fd >= 0) 1683 close(fd); 1684 if (needs_cs_close) { 1685 struct disasm_line *tmp; 1686 1687 /* 1688 * It probably failed in the middle of the above loop. 1689 * Release any resources it might add. 1690 */ 1691 list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { 1692 list_del(&dl->al.node); 1693 free(dl); 1694 } 1695 } 1696 count = -1; 1697 goto out; 1698 } 1699 #endif 1700 1701 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1702 struct annotate_args *args) 1703 { 1704 struct annotation *notes = symbol__annotation(sym); 1705 struct map *map = args->ms.map; 1706 struct dso *dso = map__dso(map); 1707 u64 start = map__rip_2objdump(map, sym->start); 1708 u64 end = map__rip_2objdump(map, sym->end); 1709 u64 len = end - start; 1710 u64 offset; 1711 int i, count; 1712 u8 *buf = NULL; 1713 char disasm_buf[512]; 1714 struct disasm_line *dl; 1715 u32 *line; 1716 1717 /* Return if objdump is specified explicitly */ 1718 if (args->options->objdump_path) 1719 return -1; 1720 1721 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1722 1723 buf = malloc(len); 1724 if (buf == NULL) 1725 goto err; 1726 1727 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1728 1729 line = (u32 *)buf; 1730 1731 if ((u64)count != len) 1732 goto err; 1733 1734 /* add the function address and name */ 1735 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1736 start, sym->name); 1737 1738 args->offset = -1; 1739 args->line = disasm_buf; 1740 args->line_nr = 0; 1741 args->fileloc = NULL; 1742 args->ms.sym = sym; 1743 1744 dl = disasm_line__new(args); 1745 if (dl == NULL) 1746 goto err; 1747 1748 annotation_line__add(&dl->al, ¬es->src->source); 1749 1750 /* Each raw instruction is 4 byte */ 1751 count = len/4; 1752 1753 for (i = 0, offset = 0; i < count; i++) { 1754 args->offset = offset; 1755 sprintf(args->line, "%x", line[i]); 1756 dl = disasm_line__new(args); 1757 if (dl == NULL) 1758 goto err; 1759 1760 annotation_line__add(&dl->al, ¬es->src->source); 1761 offset += 4; 1762 } 1763 1764 /* It failed in the middle */ 1765 if (offset != len) { 1766 struct list_head *list = ¬es->src->source; 1767 1768 /* Discard all lines and fallback to objdump */ 1769 while (!list_empty(list)) { 1770 dl = list_first_entry(list, struct disasm_line, al.node); 1771 1772 list_del_init(&dl->al.node); 1773 disasm_line__free(dl); 1774 } 1775 count = -1; 1776 } 1777 1778 out: 1779 free(buf); 1780 return count < 0 ? count : 0; 1781 1782 err: 1783 count = -1; 1784 goto out; 1785 } 1786 /* 1787 * Possibly create a new version of line with tabs expanded. Returns the 1788 * existing or new line, storage is updated if a new line is allocated. If 1789 * allocation fails then NULL is returned. 1790 */ 1791 static char *expand_tabs(char *line, char **storage, size_t *storage_len) 1792 { 1793 size_t i, src, dst, len, new_storage_len, num_tabs; 1794 char *new_line; 1795 size_t line_len = strlen(line); 1796 1797 for (num_tabs = 0, i = 0; i < line_len; i++) 1798 if (line[i] == '\t') 1799 num_tabs++; 1800 1801 if (num_tabs == 0) 1802 return line; 1803 1804 /* 1805 * Space for the line and '\0', less the leading and trailing 1806 * spaces. Each tab may introduce 7 additional spaces. 1807 */ 1808 new_storage_len = line_len + 1 + (num_tabs * 7); 1809 1810 new_line = malloc(new_storage_len); 1811 if (new_line == NULL) { 1812 pr_err("Failure allocating memory for tab expansion\n"); 1813 return NULL; 1814 } 1815 1816 /* 1817 * Copy regions starting at src and expand tabs. If there are two 1818 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1819 * are inserted. 1820 */ 1821 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1822 if (line[i] == '\t') { 1823 len = i - src; 1824 memcpy(&new_line[dst], &line[src], len); 1825 dst += len; 1826 new_line[dst++] = ' '; 1827 while (dst % 8 != 0) 1828 new_line[dst++] = ' '; 1829 src = i + 1; 1830 num_tabs--; 1831 } 1832 } 1833 1834 /* Expand the last region. */ 1835 len = line_len - src; 1836 memcpy(&new_line[dst], &line[src], len); 1837 dst += len; 1838 new_line[dst] = '\0'; 1839 1840 free(*storage); 1841 *storage = new_line; 1842 *storage_len = new_storage_len; 1843 return new_line; 1844 } 1845 1846 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1847 { 1848 struct annotation_options *opts = &annotate_opts; 1849 struct map *map = args->ms.map; 1850 struct dso *dso = map__dso(map); 1851 char *command; 1852 FILE *file; 1853 char symfs_filename[PATH_MAX]; 1854 struct kcore_extract kce; 1855 bool delete_extract = false; 1856 bool decomp = false; 1857 int lineno = 0; 1858 char *fileloc = NULL; 1859 int nline; 1860 char *line; 1861 size_t line_len; 1862 const char *objdump_argv[] = { 1863 "/bin/sh", 1864 "-c", 1865 NULL, /* Will be the objdump command to run. */ 1866 "--", 1867 NULL, /* Will be the symfs path. */ 1868 NULL, 1869 }; 1870 struct child_process objdump_process; 1871 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1872 1873 if (err) 1874 return err; 1875 1876 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1877 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1878 map__unmap_ip(map, sym->end)); 1879 1880 pr_debug("annotating [%p] %30s : [%p] %30s\n", 1881 dso, dso__long_name(dso), sym, sym->name); 1882 1883 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { 1884 return symbol__disassemble_bpf(sym, args); 1885 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { 1886 return symbol__disassemble_bpf_image(sym, args); 1887 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1888 return -1; 1889 } else if (dso__is_kcore(dso)) { 1890 kce.kcore_filename = symfs_filename; 1891 kce.addr = map__rip_2objdump(map, sym->start); 1892 kce.offs = sym->start; 1893 kce.len = sym->end - sym->start; 1894 if (!kcore_extract__create(&kce)) { 1895 delete_extract = true; 1896 strlcpy(symfs_filename, kce.extract_filename, 1897 sizeof(symfs_filename)); 1898 } 1899 } else if (dso__needs_decompress(dso)) { 1900 char tmp[KMOD_DECOMP_LEN]; 1901 1902 if (dso__decompress_kmodule_path(dso, symfs_filename, 1903 tmp, sizeof(tmp)) < 0) 1904 return -1; 1905 1906 decomp = true; 1907 strcpy(symfs_filename, tmp); 1908 } 1909 1910 /* 1911 * For powerpc data type profiling, use the dso__data_read_offset 1912 * to read raw instruction directly and interpret the binary code 1913 * to understand instructions and register fields. For sort keys as 1914 * type and typeoff, disassemble to mnemonic notation is 1915 * not required in case of powerpc. 1916 */ 1917 if (arch__is(args->arch, "powerpc")) { 1918 extern const char *sort_order; 1919 1920 if (sort_order && !strstr(sort_order, "sym")) { 1921 err = symbol__disassemble_raw(symfs_filename, sym, args); 1922 if (err == 0) 1923 goto out_remove_tmp; 1924 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1925 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1926 if (err == 0) 1927 goto out_remove_tmp; 1928 #endif 1929 } 1930 } 1931 1932 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1933 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1934 if (err == 0) 1935 goto out_remove_tmp; 1936 #endif 1937 1938 err = asprintf(&command, 1939 "%s %s%s --start-address=0x%016" PRIx64 1940 " --stop-address=0x%016" PRIx64 1941 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1942 opts->objdump_path ?: "objdump", 1943 opts->disassembler_style ? "-M " : "", 1944 opts->disassembler_style ?: "", 1945 map__rip_2objdump(map, sym->start), 1946 map__rip_2objdump(map, sym->end), 1947 opts->show_linenr ? "-l" : "", 1948 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1949 opts->annotate_src ? "-S" : "", 1950 opts->prefix ? "--prefix " : "", 1951 opts->prefix ? '"' : ' ', 1952 opts->prefix ?: "", 1953 opts->prefix ? '"' : ' ', 1954 opts->prefix_strip ? "--prefix-strip=" : "", 1955 opts->prefix_strip ?: ""); 1956 1957 if (err < 0) { 1958 pr_err("Failure allocating memory for the command to run\n"); 1959 goto out_remove_tmp; 1960 } 1961 1962 pr_debug("Executing: %s\n", command); 1963 1964 objdump_argv[2] = command; 1965 objdump_argv[4] = symfs_filename; 1966 1967 /* Create a pipe to read from for stdout */ 1968 memset(&objdump_process, 0, sizeof(objdump_process)); 1969 objdump_process.argv = objdump_argv; 1970 objdump_process.out = -1; 1971 objdump_process.err = -1; 1972 objdump_process.no_stderr = 1; 1973 if (start_command(&objdump_process)) { 1974 pr_err("Failure starting to run %s\n", command); 1975 err = -1; 1976 goto out_free_command; 1977 } 1978 1979 file = fdopen(objdump_process.out, "r"); 1980 if (!file) { 1981 pr_err("Failure creating FILE stream for %s\n", command); 1982 /* 1983 * If we were using debug info should retry with 1984 * original binary. 1985 */ 1986 err = -1; 1987 goto out_close_stdout; 1988 } 1989 1990 /* Storage for getline. */ 1991 line = NULL; 1992 line_len = 0; 1993 1994 nline = 0; 1995 while (!feof(file)) { 1996 const char *match; 1997 char *expanded_line; 1998 1999 if (getline(&line, &line_len, file) < 0 || !line) 2000 break; 2001 2002 /* Skip lines containing "filename:" */ 2003 match = strstr(line, symfs_filename); 2004 if (match && match[strlen(symfs_filename)] == ':') 2005 continue; 2006 2007 expanded_line = strim(line); 2008 expanded_line = expand_tabs(expanded_line, &line, &line_len); 2009 if (!expanded_line) 2010 break; 2011 2012 /* 2013 * The source code line number (lineno) needs to be kept in 2014 * across calls to symbol__parse_objdump_line(), so that it 2015 * can associate it with the instructions till the next one. 2016 * See disasm_line__new() and struct disasm_line::line_nr. 2017 */ 2018 if (symbol__parse_objdump_line(sym, args, expanded_line, 2019 &lineno, &fileloc) < 0) 2020 break; 2021 nline++; 2022 } 2023 free(line); 2024 free(fileloc); 2025 2026 err = finish_command(&objdump_process); 2027 if (err) 2028 pr_err("Error running %s\n", command); 2029 2030 if (nline == 0) { 2031 err = -1; 2032 pr_err("No output from %s\n", command); 2033 } 2034 2035 /* 2036 * kallsyms does not have symbol sizes so there may a nop at the end. 2037 * Remove it. 2038 */ 2039 if (dso__is_kcore(dso)) 2040 delete_last_nop(sym); 2041 2042 fclose(file); 2043 2044 out_close_stdout: 2045 close(objdump_process.out); 2046 2047 out_free_command: 2048 free(command); 2049 2050 out_remove_tmp: 2051 if (decomp) 2052 unlink(symfs_filename); 2053 2054 if (delete_extract) 2055 kcore_extract__delete(&kce); 2056 2057 return err; 2058 } 2059