1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <elf.h> 4 #ifndef EF_CSKY_ABIMASK 5 #define EF_CSKY_ABIMASK 0XF0000000 6 #endif 7 #include <errno.h> 8 #include <fcntl.h> 9 #include <inttypes.h> 10 #include <libgen.h> 11 #include <regex.h> 12 #include <stdlib.h> 13 #include <unistd.h> 14 15 #include <linux/string.h> 16 #include <subcmd/run-command.h> 17 18 #include "annotate.h" 19 #include "annotate-data.h" 20 #include "build-id.h" 21 #include "capstone.h" 22 #include "debug.h" 23 #include "disasm.h" 24 #include "dso.h" 25 #include "dwarf-regs.h" 26 #include "env.h" 27 #include "evsel.h" 28 #include "libbfd.h" 29 #include "llvm.h" 30 #include "map.h" 31 #include "maps.h" 32 #include "namespaces.h" 33 #include "srcline.h" 34 #include "symbol.h" 35 #include "thread.h" 36 #include "util.h" 37 38 static regex_t file_lineno; 39 40 /* These can be referred from the arch-dependent code */ 41 const struct ins_ops call_ops; 42 const struct ins_ops dec_ops; 43 const struct ins_ops jump_ops; 44 const struct ins_ops mov_ops; 45 const struct ins_ops nop_ops; 46 const struct ins_ops lock_ops; 47 const struct ins_ops ret_ops; 48 const struct ins_ops load_store_ops; 49 const struct ins_ops arithmetic_ops; 50 51 static void ins__sort(struct arch *arch); 52 static int disasm_line__parse(char *line, const char **namep, char **rawp); 53 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); 54 55 static __attribute__((constructor)) void symbol__init_regexpr(void) 56 { 57 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 58 } 59 60 static int arch__grow_instructions(struct arch *arch) 61 { 62 struct ins *new_instructions; 63 size_t new_nr_allocated; 64 65 if (arch->nr_instructions_allocated == 0 && arch->instructions) 66 goto grow_from_non_allocated_table; 67 68 new_nr_allocated = arch->nr_instructions_allocated + 128; 69 new_instructions = realloc((void *)arch->instructions, 70 new_nr_allocated * sizeof(struct ins)); 71 if (new_instructions == NULL) 72 return -1; 73 74 out_update_instructions: 75 arch->instructions = new_instructions; 76 arch->nr_instructions_allocated = new_nr_allocated; 77 return 0; 78 79 grow_from_non_allocated_table: 80 new_nr_allocated = arch->nr_instructions + 128; 81 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 82 if (new_instructions == NULL) 83 return -1; 84 85 memcpy(new_instructions, arch->instructions, arch->nr_instructions * sizeof(struct ins)); 86 goto out_update_instructions; 87 } 88 89 int arch__associate_ins_ops(struct arch *arch, const char *name, const struct ins_ops *ops) 90 { 91 struct ins *ins; 92 93 if (arch->nr_instructions == arch->nr_instructions_allocated && 94 arch__grow_instructions(arch)) 95 return -1; 96 97 ins = (struct ins *)&arch->instructions[arch->nr_instructions]; 98 ins->name = strdup(name); 99 if (!ins->name) 100 return -1; 101 102 ins->ops = ops; 103 arch->nr_instructions++; 104 105 ins__sort(arch); 106 return 0; 107 } 108 109 static int e_machine_and_eflags__cmp(const struct e_machine_and_e_flags *val1, 110 const struct e_machine_and_e_flags *val2) 111 { 112 if (val1->e_machine == val2->e_machine) { 113 if (val1->e_machine != EM_CSKY) 114 return 0; 115 if ((val1->e_flags & EF_CSKY_ABIMASK) < (val2->e_flags & EF_CSKY_ABIMASK)) 116 return -1; 117 return (val1->e_flags & EF_CSKY_ABIMASK) > (val2->e_flags & EF_CSKY_ABIMASK); 118 } 119 return val1->e_machine < val2->e_machine ? -1 : 1; 120 } 121 122 static int arch__key_cmp(const void *key, const void *archp) 123 { 124 const struct arch *const *arch = archp; 125 126 return e_machine_and_eflags__cmp(key, &(*arch)->id); 127 } 128 129 static int arch__cmp(const void *a, const void *b) 130 { 131 const struct arch *const *aa = a; 132 const struct arch *const *ab = b; 133 134 return e_machine_and_eflags__cmp(&(*aa)->id, &(*ab)->id); 135 } 136 137 const struct arch *arch__find(uint16_t e_machine, uint32_t e_flags, const char *cpuid) 138 { 139 static const struct arch *(*const arch_new_fn[])(const struct e_machine_and_e_flags *id, 140 const char *cpuid) = { 141 [EM_386] = arch__new_x86, 142 [EM_ARC] = arch__new_arc, 143 [EM_ARM] = arch__new_arm, 144 [EM_AARCH64] = arch__new_arm64, 145 [EM_CSKY] = arch__new_csky, 146 [EM_LOONGARCH] = arch__new_loongarch, 147 [EM_MIPS] = arch__new_mips, 148 [EM_PPC64] = arch__new_powerpc, 149 [EM_PPC] = arch__new_powerpc, 150 [EM_RISCV] = arch__new_riscv64, 151 [EM_S390] = arch__new_s390, 152 [EM_SPARC] = arch__new_sparc, 153 [EM_SPARCV9] = arch__new_sparc, 154 [EM_X86_64] = arch__new_x86, 155 }; 156 static const struct arch **archs; 157 static size_t num_archs; 158 struct e_machine_and_e_flags key = { 159 .e_machine = e_machine, 160 .e_flags = e_flags, 161 }; 162 const struct arch *result = NULL, **tmp; 163 164 if (num_archs > 0) { 165 tmp = bsearch(&key, archs, num_archs, sizeof(*archs), arch__key_cmp); 166 if (tmp) 167 result = *tmp; 168 } 169 170 if (result) 171 return result; 172 173 if (e_machine >= ARRAY_SIZE(arch_new_fn) || arch_new_fn[e_machine] == NULL) { 174 errno = ENOTSUP; 175 return NULL; 176 } 177 178 tmp = reallocarray(archs, num_archs + 1, sizeof(*archs)); 179 if (!tmp) 180 return NULL; 181 182 result = arch_new_fn[e_machine](&key, cpuid); 183 if (!result) { 184 pr_err("%s: failed to initialize %s (%u) arch priv area\n", 185 __func__, result->name, e_machine); 186 free(tmp); 187 return NULL; 188 } 189 archs = tmp; 190 archs[num_archs++] = result; 191 qsort(archs, num_archs, sizeof(*archs), arch__cmp); 192 return result; 193 } 194 195 bool arch__is_x86(const struct arch *arch) 196 { 197 return arch->id.e_machine == EM_386 || arch->id.e_machine == EM_X86_64; 198 } 199 200 bool arch__is_powerpc(const struct arch *arch) 201 { 202 return arch->id.e_machine == EM_PPC || arch->id.e_machine == EM_PPC64; 203 } 204 205 static void ins_ops__delete(struct ins_operands *ops) 206 { 207 if (ops == NULL) 208 return; 209 zfree(&ops->source.raw); 210 zfree(&ops->source.name); 211 zfree(&ops->target.raw); 212 zfree(&ops->target.name); 213 } 214 215 int ins__raw_scnprintf(const struct ins *ins, char *bf, size_t size, 216 struct ins_operands *ops, int max_ins_name) 217 { 218 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 219 } 220 221 int ins__scnprintf(const struct ins *ins, char *bf, size_t size, 222 struct ins_operands *ops, int max_ins_name) 223 { 224 if (ins->ops->scnprintf) 225 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 226 227 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 228 } 229 230 bool ins__is_fused(const struct arch *arch, const char *ins1, const char *ins2) 231 { 232 if (!arch || !arch->ins_is_fused) 233 return false; 234 235 return arch->ins_is_fused(arch, ins1, ins2); 236 } 237 238 static int call__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 239 struct disasm_line *dl __maybe_unused) 240 { 241 char *endptr, *tok, *name; 242 struct map *map = ms->map; 243 struct addr_map_symbol target; 244 245 ops->target.addr = strtoull(ops->raw, &endptr, 16); 246 247 name = strchr(endptr, '<'); 248 if (name == NULL) 249 goto indirect_call; 250 251 name++; 252 253 if (arch->objdump.skip_functions_char && 254 strchr(name, arch->objdump.skip_functions_char)) 255 return -1; 256 257 tok = strchr(name, '>'); 258 if (tok == NULL) 259 return -1; 260 261 *tok = '\0'; 262 ops->target.name = strdup(name); 263 *tok = '>'; 264 265 if (ops->target.name == NULL) 266 return -1; 267 find_target: 268 target = (struct addr_map_symbol) { 269 .ms = { .map = map__get(map), }, 270 .addr = map__objdump_2mem(map, ops->target.addr), 271 }; 272 273 if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && 274 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 275 ops->target.sym = target.ms.sym; 276 277 addr_map_symbol__exit(&target); 278 return 0; 279 280 indirect_call: 281 tok = strchr(endptr, '*'); 282 if (tok != NULL) { 283 endptr++; 284 285 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 286 * Do not parse such instruction. */ 287 if (strstr(endptr, "(%r") == NULL) 288 ops->target.addr = strtoull(endptr, NULL, 16); 289 } 290 goto find_target; 291 } 292 293 int call__scnprintf(const struct ins *ins, char *bf, size_t size, 294 struct ins_operands *ops, int max_ins_name) 295 { 296 if (ops->target.sym) 297 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 298 299 if (ops->target.addr == 0) 300 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 301 302 if (ops->target.name) 303 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 304 305 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 306 } 307 308 const struct ins_ops call_ops = { 309 .parse = call__parse, 310 .scnprintf = call__scnprintf, 311 .is_call = true, 312 }; 313 314 bool ins__is_call(const struct ins *ins) 315 { 316 return ins->ops && ins->ops->is_call; 317 } 318 319 /* 320 * Prevents from matching commas in the comment section, e.g.: 321 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 322 * 323 * and skip comma as part of function arguments, e.g.: 324 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 325 */ 326 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 327 { 328 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 329 return NULL; 330 331 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 332 return NULL; 333 334 return c; 335 } 336 337 static int jump__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 338 struct disasm_line *dl __maybe_unused) 339 { 340 struct map *map = ms->map; 341 struct symbol *sym = ms->sym; 342 struct addr_map_symbol target = { 343 .ms = { .map = map__get(map), }, 344 }; 345 const char *c = strchr(ops->raw, ','); 346 u64 start, end; 347 348 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 349 ops->jump.raw_func_start = strchr(ops->raw, '<'); 350 351 c = validate_comma(c, ops); 352 353 /* 354 * Examples of lines to parse for the _cpp_lex_token@@Base 355 * function: 356 * 357 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 358 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 359 * 360 * The first is a jump to an offset inside the same function, 361 * the second is to another function, i.e. that 0xa72 is an 362 * offset in the cpp_named_operator2name@@base function. 363 */ 364 /* 365 * skip over possible up to 2 operands to get to address, e.g.: 366 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 367 */ 368 if (c != NULL) { 369 c++; 370 ops->target.addr = strtoull(c, NULL, 16); 371 if (!ops->target.addr) { 372 c = strchr(c, ','); 373 c = validate_comma(c, ops); 374 if (c != NULL) { 375 c++; 376 ops->target.addr = strtoull(c, NULL, 16); 377 } 378 } 379 } else { 380 ops->target.addr = strtoull(ops->raw, NULL, 16); 381 } 382 383 target.addr = map__objdump_2mem(map, ops->target.addr); 384 start = map__unmap_ip(map, sym->start); 385 end = map__unmap_ip(map, sym->end); 386 387 ops->target.outside = target.addr < start || target.addr > end; 388 389 /* 390 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 391 392 cpp_named_operator2name@@Base+0xa72 393 394 * Point to a place that is after the cpp_named_operator2name 395 * boundaries, i.e. in the ELF symbol table for cc1 396 * cpp_named_operator2name is marked as being 32-bytes long, but it in 397 * fact is much larger than that, so we seem to need a symbols__find() 398 * routine that looks for >= current->start and < next_symbol->start, 399 * possibly just for C++ objects? 400 * 401 * For now lets just make some progress by marking jumps to outside the 402 * current function as call like. 403 * 404 * Actual navigation will come next, with further understanding of how 405 * the symbol searching and disassembly should be done. 406 */ 407 if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && 408 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 409 ops->target.sym = target.ms.sym; 410 411 if (!ops->target.outside) { 412 ops->target.offset = target.addr - start; 413 ops->target.offset_avail = true; 414 } else { 415 ops->target.offset_avail = false; 416 } 417 addr_map_symbol__exit(&target); 418 return 0; 419 } 420 421 int jump__scnprintf(const struct ins *ins, char *bf, size_t size, 422 struct ins_operands *ops, int max_ins_name) 423 { 424 const char *c; 425 426 if (!ops->target.addr || ops->target.offset < 0) 427 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 428 429 if (ops->target.outside && ops->target.sym != NULL) 430 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 431 432 c = strchr(ops->raw, ','); 433 c = validate_comma(c, ops); 434 435 if (c != NULL) { 436 const char *c2 = strchr(c + 1, ','); 437 438 c2 = validate_comma(c2, ops); 439 /* check for 3-op insn */ 440 if (c2 != NULL) 441 c = c2; 442 c++; 443 444 /* mirror arch objdump's space-after-comma style */ 445 if (*c == ' ') 446 c++; 447 } 448 449 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 450 ins->name, c ? c - ops->raw : 0, ops->raw, 451 ops->target.offset); 452 } 453 454 static void jump__delete(struct ins_operands *ops __maybe_unused) 455 { 456 /* 457 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 458 * raw string, don't free them. 459 */ 460 } 461 462 const struct ins_ops jump_ops = { 463 .free = jump__delete, 464 .parse = jump__parse, 465 .scnprintf = jump__scnprintf, 466 .is_jump = true, 467 }; 468 469 bool ins__is_jump(const struct ins *ins) 470 { 471 return ins->ops && ins->ops->is_jump; 472 } 473 474 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 475 { 476 char *endptr, *name, *t; 477 478 if (strstr(raw, "(%rip)") == NULL) 479 return 0; 480 481 *addrp = strtoull(comment, &endptr, 16); 482 if (endptr == comment) 483 return 0; 484 name = strchr(endptr, '<'); 485 if (name == NULL) 486 return -1; 487 488 name++; 489 490 t = strchr(name, '>'); 491 if (t == NULL) 492 return 0; 493 494 *t = '\0'; 495 *namep = strdup(name); 496 *t = '>'; 497 498 return 0; 499 } 500 501 static int lock__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 502 struct disasm_line *dl __maybe_unused) 503 { 504 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 505 if (ops->locked.ops == NULL) 506 return 0; 507 508 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 509 goto out_free_ops; 510 511 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 512 513 if (ops->locked.ins.ops == NULL) 514 goto out_free_ops; 515 516 if (ops->locked.ins.ops->parse && 517 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 518 goto out_free_ops; 519 520 return 0; 521 522 out_free_ops: 523 zfree(&ops->locked.ops); 524 return 0; 525 } 526 527 static int lock__scnprintf(const struct ins *ins, char *bf, size_t size, 528 struct ins_operands *ops, int max_ins_name) 529 { 530 int printed; 531 532 if (ops->locked.ins.ops == NULL) 533 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 534 535 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 536 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 537 size - printed, ops->locked.ops, max_ins_name); 538 } 539 540 static void lock__delete(struct ins_operands *ops) 541 { 542 struct ins *ins = &ops->locked.ins; 543 544 if (ins->ops && ins->ops->free) 545 ins->ops->free(ops->locked.ops); 546 else 547 ins_ops__delete(ops->locked.ops); 548 549 zfree(&ops->locked.ops); 550 zfree(&ops->locked.ins.name); 551 zfree(&ops->target.raw); 552 zfree(&ops->target.name); 553 } 554 555 const struct ins_ops lock_ops = { 556 .free = lock__delete, 557 .parse = lock__parse, 558 .scnprintf = lock__scnprintf, 559 }; 560 561 /* 562 * Check if the operand has more than one registers like x86 SIB addressing: 563 * 0x1234(%rax, %rbx, 8) 564 * 565 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 566 * the input string after 'memory_ref_char' if exists. 567 */ 568 static bool check_multi_regs(const struct arch *arch, const char *op) 569 { 570 int count = 0; 571 572 if (arch->objdump.register_char == 0) 573 return false; 574 575 if (arch->objdump.memory_ref_char) { 576 op = strchr(op, arch->objdump.memory_ref_char); 577 if (op == NULL) 578 return false; 579 } 580 581 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 582 count++; 583 op++; 584 } 585 586 return count > 1; 587 } 588 589 static int mov__parse(const struct arch *arch, struct ins_operands *ops, 590 struct map_symbol *ms __maybe_unused, 591 struct disasm_line *dl __maybe_unused) 592 { 593 char *s = strchr(ops->raw, ','), *target, *comment, prev; 594 595 if (s == NULL) 596 return -1; 597 598 *s = '\0'; 599 600 /* 601 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 602 * then it needs to have the closing parenthesis. 603 */ 604 if (strchr(ops->raw, '(')) { 605 *s = ','; 606 s = strchr(ops->raw, ')'); 607 if (s == NULL || s[1] != ',') 608 return -1; 609 *++s = '\0'; 610 } 611 612 ops->source.raw = strdup(ops->raw); 613 *s = ','; 614 615 if (ops->source.raw == NULL) 616 return -1; 617 618 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 619 620 target = skip_spaces(++s); 621 comment = strchr(s, arch->objdump.comment_char); 622 623 if (comment != NULL) 624 s = comment - 1; 625 else 626 s = strchr(s, '\0') - 1; 627 628 while (s > target && isspace(s[0])) 629 --s; 630 s++; 631 prev = *s; 632 *s = '\0'; 633 634 ops->target.raw = strdup(target); 635 *s = prev; 636 637 if (ops->target.raw == NULL) 638 goto out_free_source; 639 640 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 641 642 if (comment == NULL) 643 return 0; 644 645 comment = skip_spaces(comment); 646 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 647 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 648 649 return 0; 650 651 out_free_source: 652 zfree(&ops->source.raw); 653 return -1; 654 } 655 656 int mov__scnprintf(const struct ins *ins, char *bf, size_t size, 657 struct ins_operands *ops, int max_ins_name) 658 { 659 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 660 ops->source.name ?: ops->source.raw, 661 ops->target.name ?: ops->target.raw); 662 } 663 664 const struct ins_ops mov_ops = { 665 .parse = mov__parse, 666 .scnprintf = mov__scnprintf, 667 }; 668 669 static int dec__parse(const struct arch *arch __maybe_unused, struct ins_operands *ops, 670 struct map_symbol *ms __maybe_unused, 671 struct disasm_line *dl __maybe_unused) 672 { 673 char *target, *comment, *s, prev; 674 675 target = s = ops->raw; 676 677 while (s[0] != '\0' && !isspace(s[0])) 678 ++s; 679 prev = *s; 680 *s = '\0'; 681 682 ops->target.raw = strdup(target); 683 *s = prev; 684 685 if (ops->target.raw == NULL) 686 return -1; 687 688 comment = strchr(s, arch->objdump.comment_char); 689 if (comment == NULL) 690 return 0; 691 692 comment = skip_spaces(comment); 693 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 694 695 return 0; 696 } 697 698 static int dec__scnprintf(const struct ins *ins, char *bf, size_t size, 699 struct ins_operands *ops, int max_ins_name) 700 { 701 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 702 ops->target.name ?: ops->target.raw); 703 } 704 705 const struct ins_ops dec_ops = { 706 .parse = dec__parse, 707 .scnprintf = dec__scnprintf, 708 }; 709 710 static int nop__scnprintf(const struct ins *ins __maybe_unused, char *bf, size_t size, 711 struct ins_operands *ops __maybe_unused, int max_ins_name) 712 { 713 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 714 } 715 716 const struct ins_ops nop_ops = { 717 .scnprintf = nop__scnprintf, 718 }; 719 720 const struct ins_ops ret_ops = { 721 .scnprintf = ins__raw_scnprintf, 722 }; 723 724 static bool ins__is_nop(const struct ins *ins) 725 { 726 return ins->ops == &nop_ops; 727 } 728 729 bool ins__is_ret(const struct ins *ins) 730 { 731 return ins->ops == &ret_ops; 732 } 733 734 bool ins__is_lock(const struct ins *ins) 735 { 736 return ins->ops == &lock_ops; 737 } 738 739 static int ins__key_cmp(const void *name, const void *insp) 740 { 741 const struct ins *ins = insp; 742 743 return strcmp(name, ins->name); 744 } 745 746 static int ins__cmp(const void *a, const void *b) 747 { 748 const struct ins *ia = a; 749 const struct ins *ib = b; 750 751 return strcmp(ia->name, ib->name); 752 } 753 754 static void ins__sort(struct arch *arch) 755 { 756 const int nmemb = arch->nr_instructions; 757 758 qsort((void *)arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 759 } 760 761 static const struct ins_ops *__ins__find(const struct arch *arch, const char *name, 762 struct disasm_line *dl) 763 { 764 const struct ins *ins; 765 const int nmemb = arch->nr_instructions; 766 767 if (arch__is_powerpc(arch)) { 768 /* 769 * For powerpc, identify the instruction ops 770 * from the opcode using raw_insn. 771 */ 772 const struct ins_ops *ops; 773 774 ops = check_ppc_insn(dl); 775 if (ops) 776 return ops; 777 } 778 779 if (!arch->sorted_instructions) { 780 ins__sort((struct arch *)arch); 781 ((struct arch *)arch)->sorted_instructions = true; 782 } 783 784 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 785 if (ins) 786 return ins->ops; 787 788 if (arch->insn_suffix) { 789 char tmp[32]; 790 char suffix; 791 size_t len = strlen(name); 792 793 if (len == 0 || len >= sizeof(tmp)) 794 return NULL; 795 796 suffix = name[len - 1]; 797 if (strchr(arch->insn_suffix, suffix) == NULL) 798 return NULL; 799 800 strcpy(tmp, name); 801 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 802 803 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 804 } 805 return ins ? ins->ops : NULL; 806 } 807 808 const struct ins_ops *ins__find(const struct arch *arch, const char *name, struct disasm_line *dl) 809 { 810 const struct ins_ops *ops = __ins__find(arch, name, dl); 811 812 if (!ops && arch->associate_instruction_ops) 813 ops = arch->associate_instruction_ops((struct arch *)arch, name); 814 815 return ops; 816 } 817 818 static void disasm_line__init_ins(struct disasm_line *dl, const struct arch *arch, 819 struct map_symbol *ms) 820 { 821 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 822 823 if (!dl->ins.ops) 824 return; 825 826 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 827 dl->ins.ops = NULL; 828 } 829 830 static int disasm_line__parse(char *line, const char **namep, char **rawp) 831 { 832 char tmp, *name = skip_spaces(line); 833 834 if (name[0] == '\0') 835 return -1; 836 837 *rawp = name + 1; 838 839 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 840 ++*rawp; 841 842 tmp = (*rawp)[0]; 843 (*rawp)[0] = '\0'; 844 *namep = strdup(name); 845 846 if (*namep == NULL) 847 goto out; 848 849 (*rawp)[0] = tmp; 850 *rawp = strim(*rawp); 851 852 return 0; 853 854 out: 855 return -1; 856 } 857 858 /* 859 * Parses the result captured from symbol__disassemble_* 860 * Example, line read from DSO file in powerpc: 861 * line: 38 01 81 e8 862 * opcode: fetched from arch specific get_opcode_insn 863 * rawp_insn: e8810138 864 * 865 * rawp_insn is used later to extract the reg/offset fields 866 */ 867 #define PPC_OP(op) (((op) >> 26) & 0x3F) 868 #define RAW_BYTES 11 869 870 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) 871 { 872 char *line = dl->al.line; 873 const char **namep = &dl->ins.name; 874 char **rawp = &dl->ops.raw; 875 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 876 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 877 int disasm = 0; 878 int ret = 0; 879 880 if (args->options->disassembler_used) 881 disasm = 1; 882 883 if (name_raw_insn[0] == '\0') 884 return -1; 885 886 if (disasm) 887 ret = disasm_line__parse(name, namep, rawp); 888 else 889 *namep = ""; 890 891 tmp_raw_insn = strndup(name_raw_insn, 11); 892 if (tmp_raw_insn == NULL) 893 return -1; 894 895 remove_spaces(tmp_raw_insn); 896 897 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 898 if (disasm) 899 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 900 901 return ret; 902 } 903 904 static void annotation_line__init(struct annotation_line *al, 905 struct annotate_args *args, 906 int nr) 907 { 908 al->offset = args->offset; 909 al->line = strdup(args->line); 910 al->line_nr = args->line_nr; 911 al->fileloc = args->fileloc; 912 al->data_nr = nr; 913 } 914 915 static void annotation_line__exit(struct annotation_line *al) 916 { 917 zfree_srcline(&al->path); 918 zfree(&al->line); 919 zfree(&al->cycles); 920 zfree(&al->br_cntr); 921 } 922 923 static size_t disasm_line_size(int nr) 924 { 925 struct annotation_line *al; 926 927 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 928 } 929 930 /* 931 * Allocating the disasm annotation line data with 932 * following structure: 933 * 934 * ------------------------------------------- 935 * struct disasm_line | struct annotation_line 936 * ------------------------------------------- 937 * 938 * We have 'struct annotation_line' member as last member 939 * of 'struct disasm_line' to have an easy access. 940 */ 941 struct disasm_line *disasm_line__new(struct annotate_args *args) 942 { 943 struct disasm_line *dl = NULL; 944 struct annotation *notes = symbol__annotation(args->ms->sym); 945 int nr = notes->src->nr_events; 946 947 dl = zalloc(disasm_line_size(nr)); 948 if (!dl) 949 return NULL; 950 951 annotation_line__init(&dl->al, args, nr); 952 if (dl->al.line == NULL) 953 goto out_delete; 954 955 if (args->offset != -1) { 956 if (arch__is_powerpc(args->arch)) { 957 if (disasm_line__parse_powerpc(dl, args) < 0) 958 goto out_free_line; 959 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 960 goto out_free_line; 961 962 disasm_line__init_ins(dl, args->arch, args->ms); 963 } 964 965 return dl; 966 967 out_free_line: 968 zfree(&dl->al.line); 969 out_delete: 970 free(dl); 971 return NULL; 972 } 973 974 void disasm_line__free(struct disasm_line *dl) 975 { 976 if (dl->ins.ops && dl->ins.ops->free) 977 dl->ins.ops->free(&dl->ops); 978 else 979 ins_ops__delete(&dl->ops); 980 zfree(&dl->ins.name); 981 annotation_line__exit(&dl->al); 982 free(dl); 983 } 984 985 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 986 { 987 if (raw || !dl->ins.ops) 988 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 989 990 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 991 } 992 993 /* 994 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 995 * which looks like following 996 * 997 * 0000000000415500 <_init>: 998 * 415500: sub $0x8,%rsp 999 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1000 * 41550b: test %rax,%rax 1001 * 41550e: je 415515 <_init+0x15> 1002 * 415510: callq 416e70 <__gmon_start__@plt> 1003 * 415515: add $0x8,%rsp 1004 * 415519: retq 1005 * 1006 * it will be parsed and saved into struct disasm_line as 1007 * <offset> <name> <ops.raw> 1008 * 1009 * The offset will be a relative offset from the start of the symbol and -1 1010 * means that it's not a disassembly line so should be treated differently. 1011 * The ops.raw part will be parsed further according to type of the instruction. 1012 */ 1013 static int symbol__parse_objdump_line(struct symbol *sym, 1014 struct annotate_args *args, 1015 char *parsed_line, int *line_nr, char **fileloc) 1016 { 1017 struct map *map = args->ms->map; 1018 struct annotation *notes = symbol__annotation(sym); 1019 struct disasm_line *dl; 1020 char *tmp; 1021 s64 line_ip, offset = -1; 1022 regmatch_t match[2]; 1023 1024 /* /filename:linenr ? Save line number and ignore. */ 1025 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1026 *line_nr = atoi(parsed_line + match[1].rm_so); 1027 free(*fileloc); 1028 *fileloc = strdup(parsed_line); 1029 return 0; 1030 } 1031 1032 /* Process hex address followed by ':'. */ 1033 line_ip = strtoull(parsed_line, &tmp, 16); 1034 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1035 u64 start = map__rip_2objdump(map, sym->start), 1036 end = map__rip_2objdump(map, sym->end); 1037 1038 offset = line_ip - start; 1039 if ((u64)line_ip < start || (u64)line_ip >= end) 1040 offset = -1; 1041 else 1042 parsed_line = tmp + 1; 1043 } 1044 1045 args->offset = offset; 1046 args->line = parsed_line; 1047 args->line_nr = *line_nr; 1048 args->fileloc = *fileloc; 1049 args->ms->sym = sym; 1050 1051 dl = disasm_line__new(args); 1052 (*line_nr)++; 1053 1054 if (dl == NULL) 1055 return -1; 1056 1057 if (!disasm_line__has_local_offset(dl)) { 1058 dl->ops.target.offset = dl->ops.target.addr - 1059 map__rip_2objdump(map, sym->start); 1060 dl->ops.target.offset_avail = true; 1061 } 1062 1063 /* kcore has no symbols, so add the call target symbol */ 1064 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1065 struct addr_map_symbol target = { 1066 .addr = dl->ops.target.addr, 1067 .ms = { .map = map__get(map), }, 1068 }; 1069 1070 if (!maps__find_ams(thread__maps(args->ms->thread), &target) && 1071 target.ms.sym->start == target.al_addr) 1072 dl->ops.target.sym = target.ms.sym; 1073 1074 addr_map_symbol__exit(&target); 1075 } 1076 1077 annotation_line__add(&dl->al, ¬es->src->source); 1078 return 0; 1079 } 1080 1081 static void delete_last_nop(struct symbol *sym) 1082 { 1083 struct annotation *notes = symbol__annotation(sym); 1084 struct list_head *list = ¬es->src->source; 1085 struct disasm_line *dl; 1086 1087 while (!list_empty(list)) { 1088 dl = list_entry(list->prev, struct disasm_line, al.node); 1089 1090 if (dl->ins.ops) { 1091 if (!ins__is_nop(&dl->ins)) 1092 return; 1093 } else { 1094 if (!strstr(dl->al.line, " nop ") && 1095 !strstr(dl->al.line, " nopl ") && 1096 !strstr(dl->al.line, " nopw ")) 1097 return; 1098 } 1099 1100 list_del_init(&dl->al.node); 1101 disasm_line__free(dl); 1102 } 1103 } 1104 1105 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1106 { 1107 struct dso *dso = map__dso(ms->map); 1108 1109 BUG_ON(buflen == 0); 1110 1111 if (errnum >= 0) { 1112 str_error_r(errnum, buf, buflen); 1113 return 0; 1114 } 1115 1116 switch (errnum) { 1117 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1118 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1119 char *build_id_msg = NULL; 1120 1121 if (dso__has_build_id(dso)) { 1122 build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); 1123 build_id_msg = bf; 1124 } 1125 scnprintf(buf, buflen, 1126 "No vmlinux file%s\nwas found in the path.\n\n" 1127 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1128 "Please use:\n\n" 1129 " perf buildid-cache -vu vmlinux\n\n" 1130 "or:\n\n" 1131 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1132 } 1133 break; 1134 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1135 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1136 break; 1137 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1138 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1139 break; 1140 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1141 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1142 break; 1143 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1144 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1145 break; 1146 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1147 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1148 dso__long_name(dso)); 1149 break; 1150 case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: 1151 scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); 1152 break; 1153 default: 1154 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1155 break; 1156 } 1157 1158 return 0; 1159 } 1160 1161 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1162 { 1163 char linkname[PATH_MAX]; 1164 char *build_id_filename; 1165 char *build_id_path = NULL; 1166 char *pos; 1167 int len; 1168 1169 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1170 !dso__is_kcore(dso)) 1171 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1172 1173 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1174 if (build_id_filename) { 1175 __symbol__join_symfs(filename, filename_size, build_id_filename); 1176 free(build_id_filename); 1177 } else { 1178 if (dso__has_build_id(dso)) 1179 return ENOMEM; 1180 goto fallback; 1181 } 1182 1183 build_id_path = strdup(filename); 1184 if (!build_id_path) 1185 return ENOMEM; 1186 1187 /* 1188 * old style build-id cache has name of XX/XXXXXXX.. while 1189 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1190 * extract the build-id part of dirname in the new style only. 1191 */ 1192 pos = strrchr(build_id_path, '/'); 1193 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1194 dirname(build_id_path); 1195 1196 if (dso__is_kcore(dso)) 1197 goto fallback; 1198 1199 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1200 if (len < 0) 1201 goto fallback; 1202 1203 linkname[len] = '\0'; 1204 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1205 access(filename, R_OK)) { 1206 fallback: 1207 /* 1208 * If we don't have build-ids or the build-id file isn't in the 1209 * cache, or is just a kallsyms file, well, lets hope that this 1210 * DSO is the same as when 'perf record' ran. 1211 */ 1212 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1213 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1214 else 1215 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1216 1217 mutex_lock(dso__lock(dso)); 1218 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1219 char *new_name = dso__filename_with_chroot(dso, filename); 1220 if (new_name) { 1221 strlcpy(filename, new_name, filename_size); 1222 free(new_name); 1223 } 1224 } 1225 mutex_unlock(dso__lock(dso)); 1226 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1227 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1228 } 1229 1230 free(build_id_path); 1231 return 0; 1232 } 1233 1234 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1235 struct annotate_args *args) 1236 { 1237 struct annotation *notes = symbol__annotation(sym); 1238 struct map *map = args->ms->map; 1239 struct dso *dso = map__dso(map); 1240 u64 start = map__rip_2objdump(map, sym->start); 1241 u64 end = map__rip_2objdump(map, sym->end); 1242 u64 len = end - start; 1243 u64 offset; 1244 int i, count; 1245 u8 *buf = NULL; 1246 char disasm_buf[512]; 1247 struct disasm_line *dl; 1248 u32 *line; 1249 1250 /* Return if objdump is specified explicitly */ 1251 if (args->options->objdump_path) 1252 return -1; 1253 1254 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1255 1256 buf = malloc(len); 1257 if (buf == NULL) 1258 goto err; 1259 1260 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1261 1262 line = (u32 *)buf; 1263 1264 if ((u64)count != len) 1265 goto err; 1266 1267 /* add the function address and name */ 1268 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1269 start, sym->name); 1270 1271 args->offset = -1; 1272 args->line = disasm_buf; 1273 args->line_nr = 0; 1274 args->fileloc = NULL; 1275 args->ms->sym = sym; 1276 1277 dl = disasm_line__new(args); 1278 if (dl == NULL) 1279 goto err; 1280 1281 annotation_line__add(&dl->al, ¬es->src->source); 1282 1283 /* Each raw instruction is 4 byte */ 1284 count = len/4; 1285 1286 for (i = 0, offset = 0; i < count; i++) { 1287 args->offset = offset; 1288 sprintf(args->line, "%x", line[i]); 1289 dl = disasm_line__new(args); 1290 if (dl == NULL) 1291 break; 1292 1293 annotation_line__add(&dl->al, ¬es->src->source); 1294 offset += 4; 1295 } 1296 1297 /* It failed in the middle */ 1298 if (offset != len) { 1299 struct list_head *list = ¬es->src->source; 1300 1301 /* Discard all lines and fallback to objdump */ 1302 while (!list_empty(list)) { 1303 dl = list_first_entry(list, struct disasm_line, al.node); 1304 1305 list_del_init(&dl->al.node); 1306 disasm_line__free(dl); 1307 } 1308 count = -1; 1309 } 1310 1311 out: 1312 free(buf); 1313 return count < 0 ? count : 0; 1314 1315 err: 1316 count = -1; 1317 goto out; 1318 } 1319 1320 /* 1321 * Possibly create a new version of line with tabs expanded. Returns the 1322 * existing or new line, storage is updated if a new line is allocated. If 1323 * allocation fails then NULL is returned. 1324 */ 1325 char *expand_tabs(char *line, char **storage, size_t *storage_len) 1326 { 1327 size_t i, src, dst, len, new_storage_len, num_tabs; 1328 char *new_line; 1329 size_t line_len = strlen(line); 1330 1331 for (num_tabs = 0, i = 0; i < line_len; i++) 1332 if (line[i] == '\t') 1333 num_tabs++; 1334 1335 if (num_tabs == 0) 1336 return line; 1337 1338 /* 1339 * Space for the line and '\0', less the leading and trailing 1340 * spaces. Each tab may introduce 7 additional spaces. 1341 */ 1342 new_storage_len = line_len + 1 + (num_tabs * 7); 1343 1344 new_line = malloc(new_storage_len); 1345 if (new_line == NULL) { 1346 pr_err("Failure allocating memory for tab expansion\n"); 1347 return NULL; 1348 } 1349 1350 /* 1351 * Copy regions starting at src and expand tabs. If there are two 1352 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1353 * are inserted. 1354 */ 1355 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1356 if (line[i] == '\t') { 1357 len = i - src; 1358 memcpy(&new_line[dst], &line[src], len); 1359 dst += len; 1360 new_line[dst++] = ' '; 1361 while (dst % 8 != 0) 1362 new_line[dst++] = ' '; 1363 src = i + 1; 1364 num_tabs--; 1365 } 1366 } 1367 1368 /* Expand the last region. */ 1369 len = line_len - src; 1370 memcpy(&new_line[dst], &line[src], len); 1371 dst += len; 1372 new_line[dst] = '\0'; 1373 1374 free(*storage); 1375 *storage = new_line; 1376 *storage_len = new_storage_len; 1377 return new_line; 1378 } 1379 1380 static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) 1381 { 1382 struct annotation *notes = symbol__annotation(sym); 1383 struct disasm_line *dl; 1384 1385 args->offset = -1; 1386 args->line = strdup("to be implemented"); 1387 args->line_nr = 0; 1388 args->fileloc = NULL; 1389 dl = disasm_line__new(args); 1390 if (dl) 1391 annotation_line__add(&dl->al, ¬es->src->source); 1392 1393 zfree(&args->line); 1394 return 0; 1395 } 1396 1397 static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, 1398 struct annotate_args *args) 1399 { 1400 struct annotation_options *opts = &annotate_opts; 1401 struct map *map = args->ms->map; 1402 struct dso *dso = map__dso(map); 1403 char *command; 1404 FILE *file; 1405 int lineno = 0; 1406 char *fileloc = NULL; 1407 int nline; 1408 char *line; 1409 size_t line_len; 1410 const char *objdump_argv[] = { 1411 "/bin/sh", 1412 "-c", 1413 NULL, /* Will be the objdump command to run. */ 1414 "--", 1415 NULL, /* Will be the symfs path. */ 1416 NULL, 1417 }; 1418 struct child_process objdump_process; 1419 int err; 1420 1421 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) 1422 return symbol__disassemble_bpf_libbfd(sym, args); 1423 1424 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) 1425 return symbol__disassemble_bpf_image(sym, args); 1426 1427 err = asprintf(&command, 1428 "%s %s%s --start-address=0x%016" PRIx64 1429 " --stop-address=0x%016" PRIx64 1430 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1431 opts->objdump_path ?: "objdump", 1432 opts->disassembler_style ? "-M " : "", 1433 opts->disassembler_style ?: "", 1434 map__rip_2objdump(map, sym->start), 1435 map__rip_2objdump(map, sym->end), 1436 opts->show_linenr ? "-l" : "", 1437 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1438 opts->annotate_src ? "-S" : "", 1439 opts->prefix ? "--prefix " : "", 1440 opts->prefix ? '"' : ' ', 1441 opts->prefix ?: "", 1442 opts->prefix ? '"' : ' ', 1443 opts->prefix_strip ? "--prefix-strip=" : "", 1444 opts->prefix_strip ?: ""); 1445 1446 if (err < 0) { 1447 pr_err("Failure allocating memory for the command to run\n"); 1448 return err; 1449 } 1450 1451 pr_debug("Executing: %s\n", command); 1452 1453 objdump_argv[2] = command; 1454 objdump_argv[4] = filename; 1455 1456 /* Create a pipe to read from for stdout */ 1457 memset(&objdump_process, 0, sizeof(objdump_process)); 1458 objdump_process.argv = objdump_argv; 1459 objdump_process.out = -1; 1460 objdump_process.err = -1; 1461 objdump_process.no_stderr = 1; 1462 if (start_command(&objdump_process)) { 1463 pr_err("Failure starting to run %s\n", command); 1464 err = -1; 1465 goto out_free_command; 1466 } 1467 1468 file = fdopen(objdump_process.out, "r"); 1469 if (!file) { 1470 pr_err("Failure creating FILE stream for %s\n", command); 1471 /* 1472 * If we were using debug info should retry with 1473 * original binary. 1474 */ 1475 err = -1; 1476 goto out_close_stdout; 1477 } 1478 1479 /* Storage for getline. */ 1480 line = NULL; 1481 line_len = 0; 1482 1483 nline = 0; 1484 while (!feof(file)) { 1485 const char *match; 1486 char *expanded_line; 1487 1488 if (getline(&line, &line_len, file) < 0 || !line) 1489 break; 1490 1491 /* Skip lines containing "filename:" */ 1492 match = strstr(line, filename); 1493 if (match && match[strlen(filename)] == ':') 1494 continue; 1495 1496 expanded_line = strim(line); 1497 expanded_line = expand_tabs(expanded_line, &line, &line_len); 1498 if (!expanded_line) 1499 break; 1500 1501 /* 1502 * The source code line number (lineno) needs to be kept in 1503 * across calls to symbol__parse_objdump_line(), so that it 1504 * can associate it with the instructions till the next one. 1505 * See disasm_line__new() and struct disasm_line::line_nr. 1506 */ 1507 if (symbol__parse_objdump_line(sym, args, expanded_line, 1508 &lineno, &fileloc) < 0) 1509 break; 1510 nline++; 1511 } 1512 free(line); 1513 free(fileloc); 1514 1515 err = finish_command(&objdump_process); 1516 if (err) 1517 pr_err("Error running %s\n", command); 1518 1519 if (nline == 0) { 1520 err = -1; 1521 pr_err("No output from %s\n", command); 1522 } 1523 1524 /* 1525 * kallsyms does not have symbol sizes so there may a nop at the end. 1526 * Remove it. 1527 */ 1528 if (dso__is_kcore(dso)) 1529 delete_last_nop(sym); 1530 1531 fclose(file); 1532 1533 out_close_stdout: 1534 close(objdump_process.out); 1535 1536 out_free_command: 1537 free(command); 1538 return err; 1539 } 1540 1541 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1542 { 1543 struct annotation_options *options = args->options; 1544 struct map *map = args->ms->map; 1545 struct dso *dso = map__dso(map); 1546 char symfs_filename[PATH_MAX]; 1547 bool delete_extract = false; 1548 struct kcore_extract kce; 1549 bool decomp = false; 1550 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1551 1552 if (err) 1553 return err; 1554 1555 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1556 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1557 map__unmap_ip(map, sym->end)); 1558 1559 pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); 1560 1561 if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1562 return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; 1563 } else if (dso__is_kcore(dso)) { 1564 kce.addr = map__rip_2objdump(map, sym->start); 1565 kce.kcore_filename = symfs_filename; 1566 kce.len = sym->end - sym->start; 1567 kce.offs = sym->start; 1568 1569 if (!kcore_extract__create(&kce)) { 1570 delete_extract = true; 1571 strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); 1572 } 1573 } else if (dso__needs_decompress(dso)) { 1574 char tmp[KMOD_DECOMP_LEN]; 1575 1576 if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) 1577 return -1; 1578 1579 decomp = true; 1580 strcpy(symfs_filename, tmp); 1581 } 1582 1583 /* 1584 * For powerpc data type profiling, use the dso__data_read_offset to 1585 * read raw instruction directly and interpret the binary code to 1586 * understand instructions and register fields. For sort keys as type 1587 * and typeoff, disassemble to mnemonic notation is not required in 1588 * case of powerpc. 1589 */ 1590 if (arch__is_powerpc(args->arch)) { 1591 extern const char *sort_order; 1592 1593 if (sort_order && !strstr(sort_order, "sym")) { 1594 err = symbol__disassemble_raw(symfs_filename, sym, args); 1595 if (err == 0) 1596 goto out_remove_tmp; 1597 1598 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1599 if (err == 0) 1600 goto out_remove_tmp; 1601 } 1602 } 1603 1604 /* FIXME: LLVM and CAPSTONE should support source code */ 1605 if (options->annotate_src && !options->hide_src_code) { 1606 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1607 if (err == 0) 1608 goto out_remove_tmp; 1609 } 1610 1611 err = -1; 1612 for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { 1613 enum perf_disassembler dis = options->disassemblers[i]; 1614 1615 switch (dis) { 1616 case PERF_DISASM_LLVM: 1617 args->options->disassembler_used = PERF_DISASM_LLVM; 1618 err = symbol__disassemble_llvm(symfs_filename, sym, args); 1619 break; 1620 case PERF_DISASM_CAPSTONE: 1621 args->options->disassembler_used = PERF_DISASM_CAPSTONE; 1622 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1623 break; 1624 case PERF_DISASM_OBJDUMP: 1625 args->options->disassembler_used = PERF_DISASM_OBJDUMP; 1626 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1627 break; 1628 case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ 1629 default: 1630 args->options->disassembler_used = PERF_DISASM_UNKNOWN; 1631 goto out_remove_tmp; 1632 } 1633 if (err == 0) 1634 pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); 1635 } 1636 out_remove_tmp: 1637 if (decomp) 1638 unlink(symfs_filename); 1639 1640 if (delete_extract) 1641 kcore_extract__delete(&kce); 1642 1643 return err; 1644 } 1645