1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <inttypes.h> 6 #include <libgen.h> 7 #include <regex.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 11 #include <linux/string.h> 12 #include <subcmd/run-command.h> 13 14 #include "annotate.h" 15 #include "annotate-data.h" 16 #include "build-id.h" 17 #include "debug.h" 18 #include "disasm.h" 19 #include "disasm_bpf.h" 20 #include "dso.h" 21 #include "env.h" 22 #include "evsel.h" 23 #include "map.h" 24 #include "maps.h" 25 #include "namespaces.h" 26 #include "srcline.h" 27 #include "symbol.h" 28 #include "util.h" 29 30 static regex_t file_lineno; 31 32 /* These can be referred from the arch-dependent code */ 33 static struct ins_ops call_ops; 34 static struct ins_ops dec_ops; 35 static struct ins_ops jump_ops; 36 static struct ins_ops mov_ops; 37 static struct ins_ops nop_ops; 38 static struct ins_ops lock_ops; 39 static struct ins_ops ret_ops; 40 static struct ins_ops load_store_ops; 41 static struct ins_ops arithmetic_ops; 42 43 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 44 struct ins_operands *ops, int max_ins_name); 45 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 46 struct ins_operands *ops, int max_ins_name); 47 48 static void ins__sort(struct arch *arch); 49 static int disasm_line__parse(char *line, const char **namep, char **rawp); 50 static int disasm_line__parse_powerpc(struct disasm_line *dl); 51 52 static __attribute__((constructor)) void symbol__init_regexpr(void) 53 { 54 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 55 } 56 57 static int arch__grow_instructions(struct arch *arch) 58 { 59 struct ins *new_instructions; 60 size_t new_nr_allocated; 61 62 if (arch->nr_instructions_allocated == 0 && arch->instructions) 63 goto grow_from_non_allocated_table; 64 65 new_nr_allocated = arch->nr_instructions_allocated + 128; 66 new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); 67 if (new_instructions == NULL) 68 return -1; 69 70 out_update_instructions: 71 arch->instructions = new_instructions; 72 arch->nr_instructions_allocated = new_nr_allocated; 73 return 0; 74 75 grow_from_non_allocated_table: 76 new_nr_allocated = arch->nr_instructions + 128; 77 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 78 if (new_instructions == NULL) 79 return -1; 80 81 memcpy(new_instructions, arch->instructions, arch->nr_instructions); 82 goto out_update_instructions; 83 } 84 85 static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) 86 { 87 struct ins *ins; 88 89 if (arch->nr_instructions == arch->nr_instructions_allocated && 90 arch__grow_instructions(arch)) 91 return -1; 92 93 ins = &arch->instructions[arch->nr_instructions]; 94 ins->name = strdup(name); 95 if (!ins->name) 96 return -1; 97 98 ins->ops = ops; 99 arch->nr_instructions++; 100 101 ins__sort(arch); 102 return 0; 103 } 104 105 #include "arch/arc/annotate/instructions.c" 106 #include "arch/arm/annotate/instructions.c" 107 #include "arch/arm64/annotate/instructions.c" 108 #include "arch/csky/annotate/instructions.c" 109 #include "arch/loongarch/annotate/instructions.c" 110 #include "arch/mips/annotate/instructions.c" 111 #include "arch/x86/annotate/instructions.c" 112 #include "arch/powerpc/annotate/instructions.c" 113 #include "arch/riscv64/annotate/instructions.c" 114 #include "arch/s390/annotate/instructions.c" 115 #include "arch/sparc/annotate/instructions.c" 116 117 static struct arch architectures[] = { 118 { 119 .name = "arc", 120 .init = arc__annotate_init, 121 }, 122 { 123 .name = "arm", 124 .init = arm__annotate_init, 125 }, 126 { 127 .name = "arm64", 128 .init = arm64__annotate_init, 129 }, 130 { 131 .name = "csky", 132 .init = csky__annotate_init, 133 }, 134 { 135 .name = "mips", 136 .init = mips__annotate_init, 137 .objdump = { 138 .comment_char = '#', 139 }, 140 }, 141 { 142 .name = "x86", 143 .init = x86__annotate_init, 144 .instructions = x86__instructions, 145 .nr_instructions = ARRAY_SIZE(x86__instructions), 146 .insn_suffix = "bwlq", 147 .objdump = { 148 .comment_char = '#', 149 .register_char = '%', 150 .memory_ref_char = '(', 151 .imm_char = '$', 152 }, 153 #ifdef HAVE_DWARF_SUPPORT 154 .update_insn_state = update_insn_state_x86, 155 #endif 156 }, 157 { 158 .name = "powerpc", 159 .init = powerpc__annotate_init, 160 #ifdef HAVE_DWARF_SUPPORT 161 .update_insn_state = update_insn_state_powerpc, 162 #endif 163 }, 164 { 165 .name = "riscv64", 166 .init = riscv64__annotate_init, 167 }, 168 { 169 .name = "s390", 170 .init = s390__annotate_init, 171 .objdump = { 172 .comment_char = '#', 173 }, 174 }, 175 { 176 .name = "sparc", 177 .init = sparc__annotate_init, 178 .objdump = { 179 .comment_char = '#', 180 }, 181 }, 182 { 183 .name = "loongarch", 184 .init = loongarch__annotate_init, 185 .objdump = { 186 .comment_char = '#', 187 }, 188 }, 189 }; 190 191 static int arch__key_cmp(const void *name, const void *archp) 192 { 193 const struct arch *arch = archp; 194 195 return strcmp(name, arch->name); 196 } 197 198 static int arch__cmp(const void *a, const void *b) 199 { 200 const struct arch *aa = a; 201 const struct arch *ab = b; 202 203 return strcmp(aa->name, ab->name); 204 } 205 206 static void arch__sort(void) 207 { 208 const int nmemb = ARRAY_SIZE(architectures); 209 210 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); 211 } 212 213 struct arch *arch__find(const char *name) 214 { 215 const int nmemb = ARRAY_SIZE(architectures); 216 static bool sorted; 217 218 if (!sorted) { 219 arch__sort(); 220 sorted = true; 221 } 222 223 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 224 } 225 226 bool arch__is(struct arch *arch, const char *name) 227 { 228 return !strcmp(arch->name, name); 229 } 230 231 static void ins_ops__delete(struct ins_operands *ops) 232 { 233 if (ops == NULL) 234 return; 235 zfree(&ops->source.raw); 236 zfree(&ops->source.name); 237 zfree(&ops->target.raw); 238 zfree(&ops->target.name); 239 } 240 241 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, 242 struct ins_operands *ops, int max_ins_name) 243 { 244 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 245 } 246 247 int ins__scnprintf(struct ins *ins, char *bf, size_t size, 248 struct ins_operands *ops, int max_ins_name) 249 { 250 if (ins->ops->scnprintf) 251 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 252 253 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 254 } 255 256 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) 257 { 258 if (!arch || !arch->ins_is_fused) 259 return false; 260 261 return arch->ins_is_fused(arch, ins1, ins2); 262 } 263 264 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 265 struct disasm_line *dl __maybe_unused) 266 { 267 char *endptr, *tok, *name; 268 struct map *map = ms->map; 269 struct addr_map_symbol target = { 270 .ms = { .map = map, }, 271 }; 272 273 ops->target.addr = strtoull(ops->raw, &endptr, 16); 274 275 name = strchr(endptr, '<'); 276 if (name == NULL) 277 goto indirect_call; 278 279 name++; 280 281 if (arch->objdump.skip_functions_char && 282 strchr(name, arch->objdump.skip_functions_char)) 283 return -1; 284 285 tok = strchr(name, '>'); 286 if (tok == NULL) 287 return -1; 288 289 *tok = '\0'; 290 ops->target.name = strdup(name); 291 *tok = '>'; 292 293 if (ops->target.name == NULL) 294 return -1; 295 find_target: 296 target.addr = map__objdump_2mem(map, ops->target.addr); 297 298 if (maps__find_ams(ms->maps, &target) == 0 && 299 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 300 ops->target.sym = target.ms.sym; 301 302 return 0; 303 304 indirect_call: 305 tok = strchr(endptr, '*'); 306 if (tok != NULL) { 307 endptr++; 308 309 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 310 * Do not parse such instruction. */ 311 if (strstr(endptr, "(%r") == NULL) 312 ops->target.addr = strtoull(endptr, NULL, 16); 313 } 314 goto find_target; 315 } 316 317 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 318 struct ins_operands *ops, int max_ins_name) 319 { 320 if (ops->target.sym) 321 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 322 323 if (ops->target.addr == 0) 324 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 325 326 if (ops->target.name) 327 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 328 329 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 330 } 331 332 static struct ins_ops call_ops = { 333 .parse = call__parse, 334 .scnprintf = call__scnprintf, 335 }; 336 337 bool ins__is_call(const struct ins *ins) 338 { 339 return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; 340 } 341 342 /* 343 * Prevents from matching commas in the comment section, e.g.: 344 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 345 * 346 * and skip comma as part of function arguments, e.g.: 347 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 348 */ 349 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 350 { 351 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 352 return NULL; 353 354 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 355 return NULL; 356 357 return c; 358 } 359 360 static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 361 struct disasm_line *dl __maybe_unused) 362 { 363 struct map *map = ms->map; 364 struct symbol *sym = ms->sym; 365 struct addr_map_symbol target = { 366 .ms = { .map = map, }, 367 }; 368 const char *c = strchr(ops->raw, ','); 369 u64 start, end; 370 371 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 372 ops->jump.raw_func_start = strchr(ops->raw, '<'); 373 374 c = validate_comma(c, ops); 375 376 /* 377 * Examples of lines to parse for the _cpp_lex_token@@Base 378 * function: 379 * 380 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 381 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 382 * 383 * The first is a jump to an offset inside the same function, 384 * the second is to another function, i.e. that 0xa72 is an 385 * offset in the cpp_named_operator2name@@base function. 386 */ 387 /* 388 * skip over possible up to 2 operands to get to address, e.g.: 389 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 390 */ 391 if (c++ != NULL) { 392 ops->target.addr = strtoull(c, NULL, 16); 393 if (!ops->target.addr) { 394 c = strchr(c, ','); 395 c = validate_comma(c, ops); 396 if (c++ != NULL) 397 ops->target.addr = strtoull(c, NULL, 16); 398 } 399 } else { 400 ops->target.addr = strtoull(ops->raw, NULL, 16); 401 } 402 403 target.addr = map__objdump_2mem(map, ops->target.addr); 404 start = map__unmap_ip(map, sym->start); 405 end = map__unmap_ip(map, sym->end); 406 407 ops->target.outside = target.addr < start || target.addr > end; 408 409 /* 410 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 411 412 cpp_named_operator2name@@Base+0xa72 413 414 * Point to a place that is after the cpp_named_operator2name 415 * boundaries, i.e. in the ELF symbol table for cc1 416 * cpp_named_operator2name is marked as being 32-bytes long, but it in 417 * fact is much larger than that, so we seem to need a symbols__find() 418 * routine that looks for >= current->start and < next_symbol->start, 419 * possibly just for C++ objects? 420 * 421 * For now lets just make some progress by marking jumps to outside the 422 * current function as call like. 423 * 424 * Actual navigation will come next, with further understanding of how 425 * the symbol searching and disassembly should be done. 426 */ 427 if (maps__find_ams(ms->maps, &target) == 0 && 428 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 429 ops->target.sym = target.ms.sym; 430 431 if (!ops->target.outside) { 432 ops->target.offset = target.addr - start; 433 ops->target.offset_avail = true; 434 } else { 435 ops->target.offset_avail = false; 436 } 437 438 return 0; 439 } 440 441 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 442 struct ins_operands *ops, int max_ins_name) 443 { 444 const char *c; 445 446 if (!ops->target.addr || ops->target.offset < 0) 447 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 448 449 if (ops->target.outside && ops->target.sym != NULL) 450 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 451 452 c = strchr(ops->raw, ','); 453 c = validate_comma(c, ops); 454 455 if (c != NULL) { 456 const char *c2 = strchr(c + 1, ','); 457 458 c2 = validate_comma(c2, ops); 459 /* check for 3-op insn */ 460 if (c2 != NULL) 461 c = c2; 462 c++; 463 464 /* mirror arch objdump's space-after-comma style */ 465 if (*c == ' ') 466 c++; 467 } 468 469 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 470 ins->name, c ? c - ops->raw : 0, ops->raw, 471 ops->target.offset); 472 } 473 474 static void jump__delete(struct ins_operands *ops __maybe_unused) 475 { 476 /* 477 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 478 * raw string, don't free them. 479 */ 480 } 481 482 static struct ins_ops jump_ops = { 483 .free = jump__delete, 484 .parse = jump__parse, 485 .scnprintf = jump__scnprintf, 486 }; 487 488 bool ins__is_jump(const struct ins *ins) 489 { 490 return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; 491 } 492 493 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 494 { 495 char *endptr, *name, *t; 496 497 if (strstr(raw, "(%rip)") == NULL) 498 return 0; 499 500 *addrp = strtoull(comment, &endptr, 16); 501 if (endptr == comment) 502 return 0; 503 name = strchr(endptr, '<'); 504 if (name == NULL) 505 return -1; 506 507 name++; 508 509 t = strchr(name, '>'); 510 if (t == NULL) 511 return 0; 512 513 *t = '\0'; 514 *namep = strdup(name); 515 *t = '>'; 516 517 return 0; 518 } 519 520 static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 521 struct disasm_line *dl __maybe_unused) 522 { 523 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 524 if (ops->locked.ops == NULL) 525 return 0; 526 527 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 528 goto out_free_ops; 529 530 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 531 532 if (ops->locked.ins.ops == NULL) 533 goto out_free_ops; 534 535 if (ops->locked.ins.ops->parse && 536 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 537 goto out_free_ops; 538 539 return 0; 540 541 out_free_ops: 542 zfree(&ops->locked.ops); 543 return 0; 544 } 545 546 static int lock__scnprintf(struct ins *ins, char *bf, size_t size, 547 struct ins_operands *ops, int max_ins_name) 548 { 549 int printed; 550 551 if (ops->locked.ins.ops == NULL) 552 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 553 554 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 555 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 556 size - printed, ops->locked.ops, max_ins_name); 557 } 558 559 static void lock__delete(struct ins_operands *ops) 560 { 561 struct ins *ins = &ops->locked.ins; 562 563 if (ins->ops && ins->ops->free) 564 ins->ops->free(ops->locked.ops); 565 else 566 ins_ops__delete(ops->locked.ops); 567 568 zfree(&ops->locked.ops); 569 zfree(&ops->target.raw); 570 zfree(&ops->target.name); 571 } 572 573 static struct ins_ops lock_ops = { 574 .free = lock__delete, 575 .parse = lock__parse, 576 .scnprintf = lock__scnprintf, 577 }; 578 579 /* 580 * Check if the operand has more than one registers like x86 SIB addressing: 581 * 0x1234(%rax, %rbx, 8) 582 * 583 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 584 * the input string after 'memory_ref_char' if exists. 585 */ 586 static bool check_multi_regs(struct arch *arch, const char *op) 587 { 588 int count = 0; 589 590 if (arch->objdump.register_char == 0) 591 return false; 592 593 if (arch->objdump.memory_ref_char) { 594 op = strchr(op, arch->objdump.memory_ref_char); 595 if (op == NULL) 596 return false; 597 } 598 599 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 600 count++; 601 op++; 602 } 603 604 return count > 1; 605 } 606 607 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 608 struct disasm_line *dl __maybe_unused) 609 { 610 char *s = strchr(ops->raw, ','), *target, *comment, prev; 611 612 if (s == NULL) 613 return -1; 614 615 *s = '\0'; 616 617 /* 618 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 619 * then it needs to have the closing parenthesis. 620 */ 621 if (strchr(ops->raw, '(')) { 622 *s = ','; 623 s = strchr(ops->raw, ')'); 624 if (s == NULL || s[1] != ',') 625 return -1; 626 *++s = '\0'; 627 } 628 629 ops->source.raw = strdup(ops->raw); 630 *s = ','; 631 632 if (ops->source.raw == NULL) 633 return -1; 634 635 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 636 637 target = skip_spaces(++s); 638 comment = strchr(s, arch->objdump.comment_char); 639 640 if (comment != NULL) 641 s = comment - 1; 642 else 643 s = strchr(s, '\0') - 1; 644 645 while (s > target && isspace(s[0])) 646 --s; 647 s++; 648 prev = *s; 649 *s = '\0'; 650 651 ops->target.raw = strdup(target); 652 *s = prev; 653 654 if (ops->target.raw == NULL) 655 goto out_free_source; 656 657 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 658 659 if (comment == NULL) 660 return 0; 661 662 comment = skip_spaces(comment); 663 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 664 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 665 666 return 0; 667 668 out_free_source: 669 zfree(&ops->source.raw); 670 return -1; 671 } 672 673 static int mov__scnprintf(struct ins *ins, char *bf, size_t size, 674 struct ins_operands *ops, int max_ins_name) 675 { 676 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 677 ops->source.name ?: ops->source.raw, 678 ops->target.name ?: ops->target.raw); 679 } 680 681 static struct ins_ops mov_ops = { 682 .parse = mov__parse, 683 .scnprintf = mov__scnprintf, 684 }; 685 686 #define PPC_22_30(R) (((R) >> 1) & 0x1ff) 687 #define MINUS_EXT_XO_FORM 234 688 #define SUB_EXT_XO_FORM 232 689 #define ADD_ZERO_EXT_XO_FORM 202 690 #define SUB_ZERO_EXT_XO_FORM 200 691 692 static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, 693 struct ins_operands *ops, int max_ins_name) 694 { 695 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 696 ops->raw); 697 } 698 699 /* 700 * Sets the fields: multi_regs and "mem_ref". 701 * "mem_ref" is set for ops->source which is later used to 702 * fill the objdump->memory_ref-char field. This ops is currently 703 * used by powerpc and since binary instruction code is used to 704 * extract opcode, regs and offset, no other parsing is needed here. 705 * 706 * Dont set multi regs for 4 cases since it has only one operand 707 * for source: 708 * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) 709 * - Subtract From Minus One Extended XO-form ( Ex: subfme ) 710 * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) 711 * - Subtract From Zero Extended XO-form ( Ex: subfze ) 712 */ 713 static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 714 struct map_symbol *ms __maybe_unused, struct disasm_line *dl) 715 { 716 int opcode = PPC_OP(dl->raw.raw_insn); 717 718 ops->source.mem_ref = false; 719 if (opcode == 31) { 720 if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ 721 && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) 722 ops->source.multi_regs = true; 723 } 724 725 ops->target.mem_ref = false; 726 ops->target.multi_regs = false; 727 728 return 0; 729 } 730 731 static struct ins_ops arithmetic_ops = { 732 .parse = arithmetic__parse, 733 .scnprintf = arithmetic__scnprintf, 734 }; 735 736 static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, 737 struct ins_operands *ops, int max_ins_name) 738 { 739 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 740 ops->raw); 741 } 742 743 /* 744 * Sets the fields: multi_regs and "mem_ref". 745 * "mem_ref" is set for ops->source which is later used to 746 * fill the objdump->memory_ref-char field. This ops is currently 747 * used by powerpc and since binary instruction code is used to 748 * extract opcode, regs and offset, no other parsing is needed here 749 */ 750 static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 751 struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) 752 { 753 ops->source.mem_ref = true; 754 ops->source.multi_regs = false; 755 /* opcode 31 is of X form */ 756 if (PPC_OP(dl->raw.raw_insn) == 31) 757 ops->source.multi_regs = true; 758 759 ops->target.mem_ref = false; 760 ops->target.multi_regs = false; 761 762 return 0; 763 } 764 765 static struct ins_ops load_store_ops = { 766 .parse = load_store__parse, 767 .scnprintf = load_store__scnprintf, 768 }; 769 770 static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 771 struct disasm_line *dl __maybe_unused) 772 { 773 char *target, *comment, *s, prev; 774 775 target = s = ops->raw; 776 777 while (s[0] != '\0' && !isspace(s[0])) 778 ++s; 779 prev = *s; 780 *s = '\0'; 781 782 ops->target.raw = strdup(target); 783 *s = prev; 784 785 if (ops->target.raw == NULL) 786 return -1; 787 788 comment = strchr(s, arch->objdump.comment_char); 789 if (comment == NULL) 790 return 0; 791 792 comment = skip_spaces(comment); 793 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 794 795 return 0; 796 } 797 798 static int dec__scnprintf(struct ins *ins, char *bf, size_t size, 799 struct ins_operands *ops, int max_ins_name) 800 { 801 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 802 ops->target.name ?: ops->target.raw); 803 } 804 805 static struct ins_ops dec_ops = { 806 .parse = dec__parse, 807 .scnprintf = dec__scnprintf, 808 }; 809 810 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, 811 struct ins_operands *ops __maybe_unused, int max_ins_name) 812 { 813 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 814 } 815 816 static struct ins_ops nop_ops = { 817 .scnprintf = nop__scnprintf, 818 }; 819 820 static struct ins_ops ret_ops = { 821 .scnprintf = ins__raw_scnprintf, 822 }; 823 824 bool ins__is_nop(const struct ins *ins) 825 { 826 return ins->ops == &nop_ops; 827 } 828 829 bool ins__is_ret(const struct ins *ins) 830 { 831 return ins->ops == &ret_ops; 832 } 833 834 bool ins__is_lock(const struct ins *ins) 835 { 836 return ins->ops == &lock_ops; 837 } 838 839 static int ins__key_cmp(const void *name, const void *insp) 840 { 841 const struct ins *ins = insp; 842 843 return strcmp(name, ins->name); 844 } 845 846 static int ins__cmp(const void *a, const void *b) 847 { 848 const struct ins *ia = a; 849 const struct ins *ib = b; 850 851 return strcmp(ia->name, ib->name); 852 } 853 854 static void ins__sort(struct arch *arch) 855 { 856 const int nmemb = arch->nr_instructions; 857 858 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 859 } 860 861 static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 862 { 863 struct ins *ins; 864 const int nmemb = arch->nr_instructions; 865 866 if (arch__is(arch, "powerpc")) { 867 /* 868 * For powerpc, identify the instruction ops 869 * from the opcode using raw_insn. 870 */ 871 struct ins_ops *ops; 872 873 ops = check_ppc_insn(dl); 874 if (ops) 875 return ops; 876 } 877 878 if (!arch->sorted_instructions) { 879 ins__sort(arch); 880 arch->sorted_instructions = true; 881 } 882 883 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 884 if (ins) 885 return ins->ops; 886 887 if (arch->insn_suffix) { 888 char tmp[32]; 889 char suffix; 890 size_t len = strlen(name); 891 892 if (len == 0 || len >= sizeof(tmp)) 893 return NULL; 894 895 suffix = name[len - 1]; 896 if (strchr(arch->insn_suffix, suffix) == NULL) 897 return NULL; 898 899 strcpy(tmp, name); 900 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 901 902 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 903 } 904 return ins ? ins->ops : NULL; 905 } 906 907 struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 908 { 909 struct ins_ops *ops = __ins__find(arch, name, dl); 910 911 if (!ops && arch->associate_instruction_ops) 912 ops = arch->associate_instruction_ops(arch, name); 913 914 return ops; 915 } 916 917 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) 918 { 919 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 920 921 if (!dl->ins.ops) 922 return; 923 924 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 925 dl->ins.ops = NULL; 926 } 927 928 static int disasm_line__parse(char *line, const char **namep, char **rawp) 929 { 930 char tmp, *name = skip_spaces(line); 931 932 if (name[0] == '\0') 933 return -1; 934 935 *rawp = name + 1; 936 937 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 938 ++*rawp; 939 940 tmp = (*rawp)[0]; 941 (*rawp)[0] = '\0'; 942 *namep = strdup(name); 943 944 if (*namep == NULL) 945 goto out; 946 947 (*rawp)[0] = tmp; 948 *rawp = strim(*rawp); 949 950 return 0; 951 952 out: 953 return -1; 954 } 955 956 /* 957 * Parses the result captured from symbol__disassemble_* 958 * Example, line read from DSO file in powerpc: 959 * line: 38 01 81 e8 960 * opcode: fetched from arch specific get_opcode_insn 961 * rawp_insn: e8810138 962 * 963 * rawp_insn is used later to extract the reg/offset fields 964 */ 965 #define PPC_OP(op) (((op) >> 26) & 0x3F) 966 #define RAW_BYTES 11 967 968 static int disasm_line__parse_powerpc(struct disasm_line *dl) 969 { 970 char *line = dl->al.line; 971 const char **namep = &dl->ins.name; 972 char **rawp = &dl->ops.raw; 973 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 974 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 975 int objdump = 0; 976 977 if (strlen(line) > RAW_BYTES) 978 objdump = 1; 979 980 if (name_raw_insn[0] == '\0') 981 return -1; 982 983 if (objdump) { 984 disasm_line__parse(name, namep, rawp); 985 } else 986 *namep = ""; 987 988 tmp_raw_insn = strndup(name_raw_insn, 11); 989 if (tmp_raw_insn == NULL) 990 return -1; 991 992 remove_spaces(tmp_raw_insn); 993 994 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 995 if (objdump) 996 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 997 998 return 0; 999 } 1000 1001 static void annotation_line__init(struct annotation_line *al, 1002 struct annotate_args *args, 1003 int nr) 1004 { 1005 al->offset = args->offset; 1006 al->line = strdup(args->line); 1007 al->line_nr = args->line_nr; 1008 al->fileloc = args->fileloc; 1009 al->data_nr = nr; 1010 } 1011 1012 static void annotation_line__exit(struct annotation_line *al) 1013 { 1014 zfree_srcline(&al->path); 1015 zfree(&al->line); 1016 zfree(&al->cycles); 1017 } 1018 1019 static size_t disasm_line_size(int nr) 1020 { 1021 struct annotation_line *al; 1022 1023 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 1024 } 1025 1026 /* 1027 * Allocating the disasm annotation line data with 1028 * following structure: 1029 * 1030 * ------------------------------------------- 1031 * struct disasm_line | struct annotation_line 1032 * ------------------------------------------- 1033 * 1034 * We have 'struct annotation_line' member as last member 1035 * of 'struct disasm_line' to have an easy access. 1036 */ 1037 struct disasm_line *disasm_line__new(struct annotate_args *args) 1038 { 1039 struct disasm_line *dl = NULL; 1040 struct annotation *notes = symbol__annotation(args->ms.sym); 1041 int nr = notes->src->nr_events; 1042 1043 dl = zalloc(disasm_line_size(nr)); 1044 if (!dl) 1045 return NULL; 1046 1047 annotation_line__init(&dl->al, args, nr); 1048 if (dl->al.line == NULL) 1049 goto out_delete; 1050 1051 if (args->offset != -1) { 1052 if (arch__is(args->arch, "powerpc")) { 1053 if (disasm_line__parse_powerpc(dl) < 0) 1054 goto out_free_line; 1055 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 1056 goto out_free_line; 1057 1058 disasm_line__init_ins(dl, args->arch, &args->ms); 1059 } 1060 1061 return dl; 1062 1063 out_free_line: 1064 zfree(&dl->al.line); 1065 out_delete: 1066 free(dl); 1067 return NULL; 1068 } 1069 1070 void disasm_line__free(struct disasm_line *dl) 1071 { 1072 if (dl->ins.ops && dl->ins.ops->free) 1073 dl->ins.ops->free(&dl->ops); 1074 else 1075 ins_ops__delete(&dl->ops); 1076 zfree(&dl->ins.name); 1077 annotation_line__exit(&dl->al); 1078 free(dl); 1079 } 1080 1081 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 1082 { 1083 if (raw || !dl->ins.ops) 1084 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 1085 1086 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1087 } 1088 1089 /* 1090 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 1091 * which looks like following 1092 * 1093 * 0000000000415500 <_init>: 1094 * 415500: sub $0x8,%rsp 1095 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1096 * 41550b: test %rax,%rax 1097 * 41550e: je 415515 <_init+0x15> 1098 * 415510: callq 416e70 <__gmon_start__@plt> 1099 * 415515: add $0x8,%rsp 1100 * 415519: retq 1101 * 1102 * it will be parsed and saved into struct disasm_line as 1103 * <offset> <name> <ops.raw> 1104 * 1105 * The offset will be a relative offset from the start of the symbol and -1 1106 * means that it's not a disassembly line so should be treated differently. 1107 * The ops.raw part will be parsed further according to type of the instruction. 1108 */ 1109 static int symbol__parse_objdump_line(struct symbol *sym, 1110 struct annotate_args *args, 1111 char *parsed_line, int *line_nr, char **fileloc) 1112 { 1113 struct map *map = args->ms.map; 1114 struct annotation *notes = symbol__annotation(sym); 1115 struct disasm_line *dl; 1116 char *tmp; 1117 s64 line_ip, offset = -1; 1118 regmatch_t match[2]; 1119 1120 /* /filename:linenr ? Save line number and ignore. */ 1121 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1122 *line_nr = atoi(parsed_line + match[1].rm_so); 1123 free(*fileloc); 1124 *fileloc = strdup(parsed_line); 1125 return 0; 1126 } 1127 1128 /* Process hex address followed by ':'. */ 1129 line_ip = strtoull(parsed_line, &tmp, 16); 1130 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1131 u64 start = map__rip_2objdump(map, sym->start), 1132 end = map__rip_2objdump(map, sym->end); 1133 1134 offset = line_ip - start; 1135 if ((u64)line_ip < start || (u64)line_ip >= end) 1136 offset = -1; 1137 else 1138 parsed_line = tmp + 1; 1139 } 1140 1141 args->offset = offset; 1142 args->line = parsed_line; 1143 args->line_nr = *line_nr; 1144 args->fileloc = *fileloc; 1145 args->ms.sym = sym; 1146 1147 dl = disasm_line__new(args); 1148 (*line_nr)++; 1149 1150 if (dl == NULL) 1151 return -1; 1152 1153 if (!disasm_line__has_local_offset(dl)) { 1154 dl->ops.target.offset = dl->ops.target.addr - 1155 map__rip_2objdump(map, sym->start); 1156 dl->ops.target.offset_avail = true; 1157 } 1158 1159 /* kcore has no symbols, so add the call target symbol */ 1160 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1161 struct addr_map_symbol target = { 1162 .addr = dl->ops.target.addr, 1163 .ms = { .map = map, }, 1164 }; 1165 1166 if (!maps__find_ams(args->ms.maps, &target) && 1167 target.ms.sym->start == target.al_addr) 1168 dl->ops.target.sym = target.ms.sym; 1169 } 1170 1171 annotation_line__add(&dl->al, ¬es->src->source); 1172 return 0; 1173 } 1174 1175 static void delete_last_nop(struct symbol *sym) 1176 { 1177 struct annotation *notes = symbol__annotation(sym); 1178 struct list_head *list = ¬es->src->source; 1179 struct disasm_line *dl; 1180 1181 while (!list_empty(list)) { 1182 dl = list_entry(list->prev, struct disasm_line, al.node); 1183 1184 if (dl->ins.ops) { 1185 if (!ins__is_nop(&dl->ins)) 1186 return; 1187 } else { 1188 if (!strstr(dl->al.line, " nop ") && 1189 !strstr(dl->al.line, " nopl ") && 1190 !strstr(dl->al.line, " nopw ")) 1191 return; 1192 } 1193 1194 list_del_init(&dl->al.node); 1195 disasm_line__free(dl); 1196 } 1197 } 1198 1199 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1200 { 1201 struct dso *dso = map__dso(ms->map); 1202 1203 BUG_ON(buflen == 0); 1204 1205 if (errnum >= 0) { 1206 str_error_r(errnum, buf, buflen); 1207 return 0; 1208 } 1209 1210 switch (errnum) { 1211 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1212 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1213 char *build_id_msg = NULL; 1214 1215 if (dso__has_build_id(dso)) { 1216 build_id__sprintf(dso__bid(dso), bf + 15); 1217 build_id_msg = bf; 1218 } 1219 scnprintf(buf, buflen, 1220 "No vmlinux file%s\nwas found in the path.\n\n" 1221 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1222 "Please use:\n\n" 1223 " perf buildid-cache -vu vmlinux\n\n" 1224 "or:\n\n" 1225 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1226 } 1227 break; 1228 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1229 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1230 break; 1231 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1232 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1233 break; 1234 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1235 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1236 break; 1237 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1238 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1239 break; 1240 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1241 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1242 dso__long_name(dso)); 1243 break; 1244 default: 1245 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1246 break; 1247 } 1248 1249 return 0; 1250 } 1251 1252 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1253 { 1254 char linkname[PATH_MAX]; 1255 char *build_id_filename; 1256 char *build_id_path = NULL; 1257 char *pos; 1258 int len; 1259 1260 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1261 !dso__is_kcore(dso)) 1262 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1263 1264 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1265 if (build_id_filename) { 1266 __symbol__join_symfs(filename, filename_size, build_id_filename); 1267 free(build_id_filename); 1268 } else { 1269 if (dso__has_build_id(dso)) 1270 return ENOMEM; 1271 goto fallback; 1272 } 1273 1274 build_id_path = strdup(filename); 1275 if (!build_id_path) 1276 return ENOMEM; 1277 1278 /* 1279 * old style build-id cache has name of XX/XXXXXXX.. while 1280 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1281 * extract the build-id part of dirname in the new style only. 1282 */ 1283 pos = strrchr(build_id_path, '/'); 1284 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1285 dirname(build_id_path); 1286 1287 if (dso__is_kcore(dso)) 1288 goto fallback; 1289 1290 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1291 if (len < 0) 1292 goto fallback; 1293 1294 linkname[len] = '\0'; 1295 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1296 access(filename, R_OK)) { 1297 fallback: 1298 /* 1299 * If we don't have build-ids or the build-id file isn't in the 1300 * cache, or is just a kallsyms file, well, lets hope that this 1301 * DSO is the same as when 'perf record' ran. 1302 */ 1303 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1304 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1305 else 1306 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1307 1308 mutex_lock(dso__lock(dso)); 1309 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1310 char *new_name = dso__filename_with_chroot(dso, filename); 1311 if (new_name) { 1312 strlcpy(filename, new_name, filename_size); 1313 free(new_name); 1314 } 1315 } 1316 mutex_unlock(dso__lock(dso)); 1317 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1318 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1319 } 1320 1321 free(build_id_path); 1322 return 0; 1323 } 1324 1325 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1326 #include <capstone/capstone.h> 1327 1328 int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style); 1329 1330 static int open_capstone_handle(struct annotate_args *args, bool is_64bit, 1331 csh *handle) 1332 { 1333 struct annotation_options *opt = args->options; 1334 cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32; 1335 1336 /* TODO: support more architectures */ 1337 if (!arch__is(args->arch, "x86")) 1338 return -1; 1339 1340 if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK) 1341 return -1; 1342 1343 if (!opt->disassembler_style || 1344 !strcmp(opt->disassembler_style, "att")) 1345 cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); 1346 1347 /* 1348 * Resolving address operands to symbols is implemented 1349 * on x86 by investigating instruction details. 1350 */ 1351 cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON); 1352 1353 return 0; 1354 } 1355 1356 struct find_file_offset_data { 1357 u64 ip; 1358 u64 offset; 1359 }; 1360 1361 /* This will be called for each PHDR in an ELF binary */ 1362 static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) 1363 { 1364 struct find_file_offset_data *data = arg; 1365 1366 if (start <= data->ip && data->ip < start + len) { 1367 data->offset = pgoff + data->ip - start; 1368 return 1; 1369 } 1370 return 0; 1371 } 1372 1373 static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, 1374 struct annotate_args *args, u64 addr) 1375 { 1376 int i; 1377 struct map *map = args->ms.map; 1378 struct symbol *sym; 1379 1380 /* TODO: support more architectures */ 1381 if (!arch__is(args->arch, "x86")) 1382 return; 1383 1384 if (insn->detail == NULL) 1385 return; 1386 1387 for (i = 0; i < insn->detail->x86.op_count; i++) { 1388 cs_x86_op *op = &insn->detail->x86.operands[i]; 1389 u64 orig_addr; 1390 1391 if (op->type != X86_OP_MEM) 1392 continue; 1393 1394 /* only print RIP-based global symbols for now */ 1395 if (op->mem.base != X86_REG_RIP) 1396 continue; 1397 1398 /* get the target address */ 1399 orig_addr = addr + insn->size + op->mem.disp; 1400 addr = map__objdump_2mem(map, orig_addr); 1401 1402 if (dso__kernel(map__dso(map))) { 1403 /* 1404 * The kernel maps can be splitted into sections, 1405 * let's find the map first and the search the symbol. 1406 */ 1407 map = maps__find(map__kmaps(map), addr); 1408 if (map == NULL) 1409 continue; 1410 } 1411 1412 /* convert it to map-relative address for search */ 1413 addr = map__map_ip(map, addr); 1414 1415 sym = map__find_symbol(map, addr); 1416 if (sym == NULL) 1417 continue; 1418 1419 if (addr == sym->start) { 1420 scnprintf(buf, len, "\t# %"PRIx64" <%s>", 1421 orig_addr, sym->name); 1422 } else { 1423 scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", 1424 orig_addr, sym->name, addr - sym->start); 1425 } 1426 break; 1427 } 1428 } 1429 1430 static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym, 1431 struct annotate_args *args) 1432 { 1433 struct annotation *notes = symbol__annotation(sym); 1434 struct map *map = args->ms.map; 1435 struct dso *dso = map__dso(map); 1436 struct nscookie nsc; 1437 u64 start = map__rip_2objdump(map, sym->start); 1438 u64 end = map__rip_2objdump(map, sym->end); 1439 u64 len = end - start; 1440 u64 offset; 1441 int i, fd, count; 1442 bool is_64bit = false; 1443 bool needs_cs_close = false; 1444 u8 *buf = NULL; 1445 struct find_file_offset_data data = { 1446 .ip = start, 1447 }; 1448 csh handle; 1449 char disasm_buf[512]; 1450 struct disasm_line *dl; 1451 u32 *line; 1452 bool disassembler_style = false; 1453 1454 if (args->options->objdump_path) 1455 return -1; 1456 1457 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 1458 fd = open(filename, O_RDONLY); 1459 nsinfo__mountns_exit(&nsc); 1460 if (fd < 0) 1461 return -1; 1462 1463 if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, 1464 &is_64bit) == 0) 1465 goto err; 1466 1467 if (!args->options->disassembler_style || 1468 !strcmp(args->options->disassembler_style, "att")) 1469 disassembler_style = true; 1470 1471 if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) 1472 goto err; 1473 1474 needs_cs_close = true; 1475 1476 buf = malloc(len); 1477 if (buf == NULL) 1478 goto err; 1479 1480 count = pread(fd, buf, len, data.offset); 1481 close(fd); 1482 fd = -1; 1483 1484 if ((u64)count != len) 1485 goto err; 1486 1487 line = (u32 *)buf; 1488 1489 /* add the function address and name */ 1490 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1491 start, sym->name); 1492 1493 args->offset = -1; 1494 args->line = disasm_buf; 1495 args->line_nr = 0; 1496 args->fileloc = NULL; 1497 args->ms.sym = sym; 1498 1499 dl = disasm_line__new(args); 1500 if (dl == NULL) 1501 goto err; 1502 1503 annotation_line__add(&dl->al, ¬es->src->source); 1504 1505 /* 1506 * TODO: enable disassm for powerpc 1507 * count = cs_disasm(handle, buf, len, start, len, &insn); 1508 * 1509 * For now, only binary code is saved in disassembled line 1510 * to be used in "type" and "typeoff" sort keys. Each raw code 1511 * is 32 bit instruction. So use "len/4" to get the number of 1512 * entries. 1513 */ 1514 count = len/4; 1515 1516 for (i = 0, offset = 0; i < count; i++) { 1517 args->offset = offset; 1518 sprintf(args->line, "%x", line[i]); 1519 1520 dl = disasm_line__new(args); 1521 if (dl == NULL) 1522 goto err; 1523 1524 annotation_line__add(&dl->al, ¬es->src->source); 1525 1526 offset += 4; 1527 } 1528 1529 /* It failed in the middle */ 1530 if (offset != len) { 1531 struct list_head *list = ¬es->src->source; 1532 1533 /* Discard all lines and fallback to objdump */ 1534 while (!list_empty(list)) { 1535 dl = list_first_entry(list, struct disasm_line, al.node); 1536 1537 list_del_init(&dl->al.node); 1538 disasm_line__free(dl); 1539 } 1540 count = -1; 1541 } 1542 1543 out: 1544 if (needs_cs_close) 1545 cs_close(&handle); 1546 free(buf); 1547 return count < 0 ? count : 0; 1548 1549 err: 1550 if (fd >= 0) 1551 close(fd); 1552 if (needs_cs_close) { 1553 struct disasm_line *tmp; 1554 1555 /* 1556 * It probably failed in the middle of the above loop. 1557 * Release any resources it might add. 1558 */ 1559 list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { 1560 list_del(&dl->al.node); 1561 free(dl); 1562 } 1563 } 1564 count = -1; 1565 goto out; 1566 } 1567 1568 static int symbol__disassemble_capstone(char *filename, struct symbol *sym, 1569 struct annotate_args *args) 1570 { 1571 struct annotation *notes = symbol__annotation(sym); 1572 struct map *map = args->ms.map; 1573 struct dso *dso = map__dso(map); 1574 struct nscookie nsc; 1575 u64 start = map__rip_2objdump(map, sym->start); 1576 u64 end = map__rip_2objdump(map, sym->end); 1577 u64 len = end - start; 1578 u64 offset; 1579 int i, fd, count; 1580 bool is_64bit = false; 1581 bool needs_cs_close = false; 1582 u8 *buf = NULL; 1583 struct find_file_offset_data data = { 1584 .ip = start, 1585 }; 1586 csh handle; 1587 cs_insn *insn; 1588 char disasm_buf[512]; 1589 struct disasm_line *dl; 1590 1591 if (args->options->objdump_path) 1592 return -1; 1593 1594 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 1595 fd = open(filename, O_RDONLY); 1596 nsinfo__mountns_exit(&nsc); 1597 if (fd < 0) 1598 return -1; 1599 1600 if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, 1601 &is_64bit) == 0) 1602 goto err; 1603 1604 if (open_capstone_handle(args, is_64bit, &handle) < 0) 1605 goto err; 1606 1607 needs_cs_close = true; 1608 1609 buf = malloc(len); 1610 if (buf == NULL) 1611 goto err; 1612 1613 count = pread(fd, buf, len, data.offset); 1614 close(fd); 1615 fd = -1; 1616 1617 if ((u64)count != len) 1618 goto err; 1619 1620 /* add the function address and name */ 1621 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1622 start, sym->name); 1623 1624 args->offset = -1; 1625 args->line = disasm_buf; 1626 args->line_nr = 0; 1627 args->fileloc = NULL; 1628 args->ms.sym = sym; 1629 1630 dl = disasm_line__new(args); 1631 if (dl == NULL) 1632 goto err; 1633 1634 annotation_line__add(&dl->al, ¬es->src->source); 1635 1636 count = cs_disasm(handle, buf, len, start, len, &insn); 1637 for (i = 0, offset = 0; i < count; i++) { 1638 int printed; 1639 1640 printed = scnprintf(disasm_buf, sizeof(disasm_buf), 1641 " %-7s %s", 1642 insn[i].mnemonic, insn[i].op_str); 1643 print_capstone_detail(&insn[i], disasm_buf + printed, 1644 sizeof(disasm_buf) - printed, args, 1645 start + offset); 1646 1647 args->offset = offset; 1648 args->line = disasm_buf; 1649 1650 dl = disasm_line__new(args); 1651 if (dl == NULL) 1652 goto err; 1653 1654 annotation_line__add(&dl->al, ¬es->src->source); 1655 1656 offset += insn[i].size; 1657 } 1658 1659 /* It failed in the middle: probably due to unknown instructions */ 1660 if (offset != len) { 1661 struct list_head *list = ¬es->src->source; 1662 1663 /* Discard all lines and fallback to objdump */ 1664 while (!list_empty(list)) { 1665 dl = list_first_entry(list, struct disasm_line, al.node); 1666 1667 list_del_init(&dl->al.node); 1668 disasm_line__free(dl); 1669 } 1670 count = -1; 1671 } 1672 1673 out: 1674 if (needs_cs_close) 1675 cs_close(&handle); 1676 free(buf); 1677 return count < 0 ? count : 0; 1678 1679 err: 1680 if (fd >= 0) 1681 close(fd); 1682 if (needs_cs_close) { 1683 struct disasm_line *tmp; 1684 1685 /* 1686 * It probably failed in the middle of the above loop. 1687 * Release any resources it might add. 1688 */ 1689 list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { 1690 list_del(&dl->al.node); 1691 free(dl); 1692 } 1693 } 1694 count = -1; 1695 goto out; 1696 } 1697 #endif 1698 1699 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1700 struct annotate_args *args) 1701 { 1702 struct annotation *notes = symbol__annotation(sym); 1703 struct map *map = args->ms.map; 1704 struct dso *dso = map__dso(map); 1705 u64 start = map__rip_2objdump(map, sym->start); 1706 u64 end = map__rip_2objdump(map, sym->end); 1707 u64 len = end - start; 1708 u64 offset; 1709 int i, count; 1710 u8 *buf = NULL; 1711 char disasm_buf[512]; 1712 struct disasm_line *dl; 1713 u32 *line; 1714 1715 /* Return if objdump is specified explicitly */ 1716 if (args->options->objdump_path) 1717 return -1; 1718 1719 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1720 1721 buf = malloc(len); 1722 if (buf == NULL) 1723 goto err; 1724 1725 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1726 1727 line = (u32 *)buf; 1728 1729 if ((u64)count != len) 1730 goto err; 1731 1732 /* add the function address and name */ 1733 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1734 start, sym->name); 1735 1736 args->offset = -1; 1737 args->line = disasm_buf; 1738 args->line_nr = 0; 1739 args->fileloc = NULL; 1740 args->ms.sym = sym; 1741 1742 dl = disasm_line__new(args); 1743 if (dl == NULL) 1744 goto err; 1745 1746 annotation_line__add(&dl->al, ¬es->src->source); 1747 1748 /* Each raw instruction is 4 byte */ 1749 count = len/4; 1750 1751 for (i = 0, offset = 0; i < count; i++) { 1752 args->offset = offset; 1753 sprintf(args->line, "%x", line[i]); 1754 dl = disasm_line__new(args); 1755 if (dl == NULL) 1756 goto err; 1757 1758 annotation_line__add(&dl->al, ¬es->src->source); 1759 offset += 4; 1760 } 1761 1762 /* It failed in the middle */ 1763 if (offset != len) { 1764 struct list_head *list = ¬es->src->source; 1765 1766 /* Discard all lines and fallback to objdump */ 1767 while (!list_empty(list)) { 1768 dl = list_first_entry(list, struct disasm_line, al.node); 1769 1770 list_del_init(&dl->al.node); 1771 disasm_line__free(dl); 1772 } 1773 count = -1; 1774 } 1775 1776 out: 1777 free(buf); 1778 return count < 0 ? count : 0; 1779 1780 err: 1781 count = -1; 1782 goto out; 1783 } 1784 /* 1785 * Possibly create a new version of line with tabs expanded. Returns the 1786 * existing or new line, storage is updated if a new line is allocated. If 1787 * allocation fails then NULL is returned. 1788 */ 1789 static char *expand_tabs(char *line, char **storage, size_t *storage_len) 1790 { 1791 size_t i, src, dst, len, new_storage_len, num_tabs; 1792 char *new_line; 1793 size_t line_len = strlen(line); 1794 1795 for (num_tabs = 0, i = 0; i < line_len; i++) 1796 if (line[i] == '\t') 1797 num_tabs++; 1798 1799 if (num_tabs == 0) 1800 return line; 1801 1802 /* 1803 * Space for the line and '\0', less the leading and trailing 1804 * spaces. Each tab may introduce 7 additional spaces. 1805 */ 1806 new_storage_len = line_len + 1 + (num_tabs * 7); 1807 1808 new_line = malloc(new_storage_len); 1809 if (new_line == NULL) { 1810 pr_err("Failure allocating memory for tab expansion\n"); 1811 return NULL; 1812 } 1813 1814 /* 1815 * Copy regions starting at src and expand tabs. If there are two 1816 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1817 * are inserted. 1818 */ 1819 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1820 if (line[i] == '\t') { 1821 len = i - src; 1822 memcpy(&new_line[dst], &line[src], len); 1823 dst += len; 1824 new_line[dst++] = ' '; 1825 while (dst % 8 != 0) 1826 new_line[dst++] = ' '; 1827 src = i + 1; 1828 num_tabs--; 1829 } 1830 } 1831 1832 /* Expand the last region. */ 1833 len = line_len - src; 1834 memcpy(&new_line[dst], &line[src], len); 1835 dst += len; 1836 new_line[dst] = '\0'; 1837 1838 free(*storage); 1839 *storage = new_line; 1840 *storage_len = new_storage_len; 1841 return new_line; 1842 } 1843 1844 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1845 { 1846 struct annotation_options *opts = &annotate_opts; 1847 struct map *map = args->ms.map; 1848 struct dso *dso = map__dso(map); 1849 char *command; 1850 FILE *file; 1851 char symfs_filename[PATH_MAX]; 1852 struct kcore_extract kce; 1853 bool delete_extract = false; 1854 bool decomp = false; 1855 int lineno = 0; 1856 char *fileloc = NULL; 1857 int nline; 1858 char *line; 1859 size_t line_len; 1860 const char *objdump_argv[] = { 1861 "/bin/sh", 1862 "-c", 1863 NULL, /* Will be the objdump command to run. */ 1864 "--", 1865 NULL, /* Will be the symfs path. */ 1866 NULL, 1867 }; 1868 struct child_process objdump_process; 1869 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1870 1871 if (err) 1872 return err; 1873 1874 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1875 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1876 map__unmap_ip(map, sym->end)); 1877 1878 pr_debug("annotating [%p] %30s : [%p] %30s\n", 1879 dso, dso__long_name(dso), sym, sym->name); 1880 1881 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { 1882 return symbol__disassemble_bpf(sym, args); 1883 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { 1884 return symbol__disassemble_bpf_image(sym, args); 1885 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1886 return -1; 1887 } else if (dso__is_kcore(dso)) { 1888 kce.kcore_filename = symfs_filename; 1889 kce.addr = map__rip_2objdump(map, sym->start); 1890 kce.offs = sym->start; 1891 kce.len = sym->end - sym->start; 1892 if (!kcore_extract__create(&kce)) { 1893 delete_extract = true; 1894 strlcpy(symfs_filename, kce.extract_filename, 1895 sizeof(symfs_filename)); 1896 } 1897 } else if (dso__needs_decompress(dso)) { 1898 char tmp[KMOD_DECOMP_LEN]; 1899 1900 if (dso__decompress_kmodule_path(dso, symfs_filename, 1901 tmp, sizeof(tmp)) < 0) 1902 return -1; 1903 1904 decomp = true; 1905 strcpy(symfs_filename, tmp); 1906 } 1907 1908 /* 1909 * For powerpc data type profiling, use the dso__data_read_offset 1910 * to read raw instruction directly and interpret the binary code 1911 * to understand instructions and register fields. For sort keys as 1912 * type and typeoff, disassemble to mnemonic notation is 1913 * not required in case of powerpc. 1914 */ 1915 if (arch__is(args->arch, "powerpc")) { 1916 extern const char *sort_order; 1917 1918 if (sort_order && !strstr(sort_order, "sym")) { 1919 err = symbol__disassemble_raw(symfs_filename, sym, args); 1920 if (err == 0) 1921 goto out_remove_tmp; 1922 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1923 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1924 if (err == 0) 1925 goto out_remove_tmp; 1926 #endif 1927 } 1928 } 1929 1930 #ifdef HAVE_LIBCAPSTONE_SUPPORT 1931 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1932 if (err == 0) 1933 goto out_remove_tmp; 1934 #endif 1935 1936 err = asprintf(&command, 1937 "%s %s%s --start-address=0x%016" PRIx64 1938 " --stop-address=0x%016" PRIx64 1939 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1940 opts->objdump_path ?: "objdump", 1941 opts->disassembler_style ? "-M " : "", 1942 opts->disassembler_style ?: "", 1943 map__rip_2objdump(map, sym->start), 1944 map__rip_2objdump(map, sym->end), 1945 opts->show_linenr ? "-l" : "", 1946 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1947 opts->annotate_src ? "-S" : "", 1948 opts->prefix ? "--prefix " : "", 1949 opts->prefix ? '"' : ' ', 1950 opts->prefix ?: "", 1951 opts->prefix ? '"' : ' ', 1952 opts->prefix_strip ? "--prefix-strip=" : "", 1953 opts->prefix_strip ?: ""); 1954 1955 if (err < 0) { 1956 pr_err("Failure allocating memory for the command to run\n"); 1957 goto out_remove_tmp; 1958 } 1959 1960 pr_debug("Executing: %s\n", command); 1961 1962 objdump_argv[2] = command; 1963 objdump_argv[4] = symfs_filename; 1964 1965 /* Create a pipe to read from for stdout */ 1966 memset(&objdump_process, 0, sizeof(objdump_process)); 1967 objdump_process.argv = objdump_argv; 1968 objdump_process.out = -1; 1969 objdump_process.err = -1; 1970 objdump_process.no_stderr = 1; 1971 if (start_command(&objdump_process)) { 1972 pr_err("Failure starting to run %s\n", command); 1973 err = -1; 1974 goto out_free_command; 1975 } 1976 1977 file = fdopen(objdump_process.out, "r"); 1978 if (!file) { 1979 pr_err("Failure creating FILE stream for %s\n", command); 1980 /* 1981 * If we were using debug info should retry with 1982 * original binary. 1983 */ 1984 err = -1; 1985 goto out_close_stdout; 1986 } 1987 1988 /* Storage for getline. */ 1989 line = NULL; 1990 line_len = 0; 1991 1992 nline = 0; 1993 while (!feof(file)) { 1994 const char *match; 1995 char *expanded_line; 1996 1997 if (getline(&line, &line_len, file) < 0 || !line) 1998 break; 1999 2000 /* Skip lines containing "filename:" */ 2001 match = strstr(line, symfs_filename); 2002 if (match && match[strlen(symfs_filename)] == ':') 2003 continue; 2004 2005 expanded_line = strim(line); 2006 expanded_line = expand_tabs(expanded_line, &line, &line_len); 2007 if (!expanded_line) 2008 break; 2009 2010 /* 2011 * The source code line number (lineno) needs to be kept in 2012 * across calls to symbol__parse_objdump_line(), so that it 2013 * can associate it with the instructions till the next one. 2014 * See disasm_line__new() and struct disasm_line::line_nr. 2015 */ 2016 if (symbol__parse_objdump_line(sym, args, expanded_line, 2017 &lineno, &fileloc) < 0) 2018 break; 2019 nline++; 2020 } 2021 free(line); 2022 free(fileloc); 2023 2024 err = finish_command(&objdump_process); 2025 if (err) 2026 pr_err("Error running %s\n", command); 2027 2028 if (nline == 0) { 2029 err = -1; 2030 pr_err("No output from %s\n", command); 2031 } 2032 2033 /* 2034 * kallsyms does not have symbol sizes so there may a nop at the end. 2035 * Remove it. 2036 */ 2037 if (dso__is_kcore(dso)) 2038 delete_last_nop(sym); 2039 2040 fclose(file); 2041 2042 out_close_stdout: 2043 close(objdump_process.out); 2044 2045 out_free_command: 2046 free(command); 2047 2048 out_remove_tmp: 2049 if (decomp) 2050 unlink(symfs_filename); 2051 2052 if (delete_extract) 2053 kcore_extract__delete(&kce); 2054 2055 return err; 2056 } 2057