1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <elf.h> 4 #ifndef EF_CSKY_ABIMASK 5 #define EF_CSKY_ABIMASK 0XF0000000 6 #endif 7 #include <errno.h> 8 #include <fcntl.h> 9 #include <inttypes.h> 10 #include <libgen.h> 11 #include <regex.h> 12 #include <stdlib.h> 13 #include <unistd.h> 14 15 #include <linux/string.h> 16 #include <subcmd/run-command.h> 17 18 #include "annotate.h" 19 #include "annotate-data.h" 20 #include "build-id.h" 21 #include "capstone.h" 22 #include "debug.h" 23 #include "disasm.h" 24 #include "dso.h" 25 #include "dwarf-regs.h" 26 #include "env.h" 27 #include "evsel.h" 28 #include "libbfd.h" 29 #include "llvm.h" 30 #include "map.h" 31 #include "maps.h" 32 #include "namespaces.h" 33 #include "srcline.h" 34 #include "symbol.h" 35 #include "thread.h" 36 #include "util.h" 37 38 static regex_t file_lineno; 39 40 /* These can be referred from the arch-dependent code */ 41 const struct ins_ops call_ops; 42 const struct ins_ops dec_ops; 43 const struct ins_ops jump_ops; 44 const struct ins_ops mov_ops; 45 const struct ins_ops nop_ops; 46 const struct ins_ops lock_ops; 47 const struct ins_ops ret_ops; 48 const struct ins_ops load_store_ops; 49 const struct ins_ops arithmetic_ops; 50 51 static void ins__sort(struct arch *arch); 52 static int disasm_line__parse(char *line, const char **namep, char **rawp); 53 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); 54 55 static __attribute__((constructor)) void symbol__init_regexpr(void) 56 { 57 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 58 } 59 60 static int arch__grow_instructions(struct arch *arch) 61 { 62 struct ins *new_instructions; 63 size_t new_nr_allocated; 64 65 if (arch->nr_instructions_allocated == 0 && arch->instructions) 66 goto grow_from_non_allocated_table; 67 68 new_nr_allocated = arch->nr_instructions_allocated + 128; 69 new_instructions = realloc((void *)arch->instructions, 70 new_nr_allocated * sizeof(struct ins)); 71 if (new_instructions == NULL) 72 return -1; 73 74 out_update_instructions: 75 arch->instructions = new_instructions; 76 arch->nr_instructions_allocated = new_nr_allocated; 77 return 0; 78 79 grow_from_non_allocated_table: 80 new_nr_allocated = arch->nr_instructions + 128; 81 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 82 if (new_instructions == NULL) 83 return -1; 84 85 memcpy(new_instructions, arch->instructions, arch->nr_instructions * sizeof(struct ins)); 86 goto out_update_instructions; 87 } 88 89 int arch__associate_ins_ops(struct arch *arch, const char *name, const struct ins_ops *ops) 90 { 91 struct ins *ins; 92 93 if (arch->nr_instructions == arch->nr_instructions_allocated && 94 arch__grow_instructions(arch)) 95 return -1; 96 97 ins = (struct ins *)&arch->instructions[arch->nr_instructions]; 98 ins->name = strdup(name); 99 if (!ins->name) 100 return -1; 101 102 ins->ops = ops; 103 arch->nr_instructions++; 104 105 ins__sort(arch); 106 return 0; 107 } 108 109 static int e_machine_and_eflags__cmp(const struct e_machine_and_e_flags *val1, 110 const struct e_machine_and_e_flags *val2) 111 { 112 if (val1->e_machine == val2->e_machine) { 113 if (val1->e_machine != EM_CSKY) 114 return 0; 115 if ((val1->e_flags & EF_CSKY_ABIMASK) < (val2->e_flags & EF_CSKY_ABIMASK)) 116 return -1; 117 return (val1->e_flags & EF_CSKY_ABIMASK) > (val2->e_flags & EF_CSKY_ABIMASK); 118 } 119 return val1->e_machine < val2->e_machine ? -1 : 1; 120 } 121 122 static int arch__key_cmp(const void *key, const void *archp) 123 { 124 const struct arch *const *arch = archp; 125 126 return e_machine_and_eflags__cmp(key, &(*arch)->id); 127 } 128 129 static int arch__cmp(const void *a, const void *b) 130 { 131 const struct arch *const *aa = a; 132 const struct arch *const *ab = b; 133 134 return e_machine_and_eflags__cmp(&(*aa)->id, &(*ab)->id); 135 } 136 137 const struct arch *arch__find(uint16_t e_machine, const char *cpuid) 138 { 139 static const struct arch *(*const arch_new_fn[])(const struct e_machine_and_e_flags *id, 140 const char *cpuid) = { 141 [EM_386] = arch__new_x86, 142 [EM_ARC] = arch__new_arc, 143 [EM_ARM] = arch__new_arm, 144 [EM_AARCH64] = arch__new_arm64, 145 [EM_CSKY] = arch__new_csky, 146 [EM_LOONGARCH] = arch__new_loongarch, 147 [EM_MIPS] = arch__new_mips, 148 [EM_PPC64] = arch__new_powerpc, 149 [EM_PPC] = arch__new_powerpc, 150 [EM_RISCV] = arch__new_riscv64, 151 [EM_S390] = arch__new_s390, 152 [EM_SPARC] = arch__new_sparc, 153 [EM_SPARCV9] = arch__new_sparc, 154 [EM_X86_64] = arch__new_x86, 155 }; 156 static const struct arch **archs; 157 static size_t num_archs; 158 struct e_machine_and_e_flags key = { 159 .e_machine = e_machine, 160 // TODO: e_flags should really come from the same source as e_machine. 161 .e_flags = EF_HOST, 162 }; 163 const struct arch *result = NULL, **tmp; 164 165 if (num_archs > 0) { 166 tmp = bsearch(&key, archs, num_archs, sizeof(*archs), arch__key_cmp); 167 if (tmp) 168 result = *tmp; 169 } 170 171 if (result) 172 return result; 173 174 if (e_machine >= ARRAY_SIZE(arch_new_fn) || arch_new_fn[e_machine] == NULL) { 175 errno = ENOTSUP; 176 return NULL; 177 } 178 179 tmp = reallocarray(archs, num_archs + 1, sizeof(*archs)); 180 if (!tmp) 181 return NULL; 182 183 result = arch_new_fn[e_machine](&key, cpuid); 184 if (!result) { 185 pr_err("%s: failed to initialize %s (%u) arch priv area\n", 186 __func__, result->name, e_machine); 187 free(tmp); 188 return NULL; 189 } 190 archs = tmp; 191 archs[num_archs++] = result; 192 qsort(archs, num_archs, sizeof(*archs), arch__cmp); 193 return result; 194 } 195 196 bool arch__is_x86(const struct arch *arch) 197 { 198 return arch->id.e_machine == EM_386 || arch->id.e_machine == EM_X86_64; 199 } 200 201 bool arch__is_powerpc(const struct arch *arch) 202 { 203 return arch->id.e_machine == EM_PPC || arch->id.e_machine == EM_PPC64; 204 } 205 206 static void ins_ops__delete(struct ins_operands *ops) 207 { 208 if (ops == NULL) 209 return; 210 zfree(&ops->source.raw); 211 zfree(&ops->source.name); 212 zfree(&ops->target.raw); 213 zfree(&ops->target.name); 214 } 215 216 int ins__raw_scnprintf(const struct ins *ins, char *bf, size_t size, 217 struct ins_operands *ops, int max_ins_name) 218 { 219 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 220 } 221 222 int ins__scnprintf(const struct ins *ins, char *bf, size_t size, 223 struct ins_operands *ops, int max_ins_name) 224 { 225 if (ins->ops->scnprintf) 226 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 227 228 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 229 } 230 231 bool ins__is_fused(const struct arch *arch, const char *ins1, const char *ins2) 232 { 233 if (!arch || !arch->ins_is_fused) 234 return false; 235 236 return arch->ins_is_fused(arch, ins1, ins2); 237 } 238 239 static int call__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 240 struct disasm_line *dl __maybe_unused) 241 { 242 char *endptr, *tok, *name; 243 struct map *map = ms->map; 244 struct addr_map_symbol target; 245 246 ops->target.addr = strtoull(ops->raw, &endptr, 16); 247 248 name = strchr(endptr, '<'); 249 if (name == NULL) 250 goto indirect_call; 251 252 name++; 253 254 if (arch->objdump.skip_functions_char && 255 strchr(name, arch->objdump.skip_functions_char)) 256 return -1; 257 258 tok = strchr(name, '>'); 259 if (tok == NULL) 260 return -1; 261 262 *tok = '\0'; 263 ops->target.name = strdup(name); 264 *tok = '>'; 265 266 if (ops->target.name == NULL) 267 return -1; 268 find_target: 269 target = (struct addr_map_symbol) { 270 .ms = { .map = map__get(map), }, 271 .addr = map__objdump_2mem(map, ops->target.addr), 272 }; 273 274 if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && 275 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 276 ops->target.sym = target.ms.sym; 277 278 addr_map_symbol__exit(&target); 279 return 0; 280 281 indirect_call: 282 tok = strchr(endptr, '*'); 283 if (tok != NULL) { 284 endptr++; 285 286 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 287 * Do not parse such instruction. */ 288 if (strstr(endptr, "(%r") == NULL) 289 ops->target.addr = strtoull(endptr, NULL, 16); 290 } 291 goto find_target; 292 } 293 294 int call__scnprintf(const struct ins *ins, char *bf, size_t size, 295 struct ins_operands *ops, int max_ins_name) 296 { 297 if (ops->target.sym) 298 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 299 300 if (ops->target.addr == 0) 301 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 302 303 if (ops->target.name) 304 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 305 306 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 307 } 308 309 const struct ins_ops call_ops = { 310 .parse = call__parse, 311 .scnprintf = call__scnprintf, 312 .is_call = true, 313 }; 314 315 bool ins__is_call(const struct ins *ins) 316 { 317 return ins->ops && ins->ops->is_call; 318 } 319 320 /* 321 * Prevents from matching commas in the comment section, e.g.: 322 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 323 * 324 * and skip comma as part of function arguments, e.g.: 325 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 326 */ 327 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 328 { 329 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 330 return NULL; 331 332 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 333 return NULL; 334 335 return c; 336 } 337 338 static int jump__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 339 struct disasm_line *dl __maybe_unused) 340 { 341 struct map *map = ms->map; 342 struct symbol *sym = ms->sym; 343 struct addr_map_symbol target = { 344 .ms = { .map = map__get(map), }, 345 }; 346 const char *c = strchr(ops->raw, ','); 347 u64 start, end; 348 349 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 350 ops->jump.raw_func_start = strchr(ops->raw, '<'); 351 352 c = validate_comma(c, ops); 353 354 /* 355 * Examples of lines to parse for the _cpp_lex_token@@Base 356 * function: 357 * 358 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 359 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 360 * 361 * The first is a jump to an offset inside the same function, 362 * the second is to another function, i.e. that 0xa72 is an 363 * offset in the cpp_named_operator2name@@base function. 364 */ 365 /* 366 * skip over possible up to 2 operands to get to address, e.g.: 367 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 368 */ 369 if (c != NULL) { 370 c++; 371 ops->target.addr = strtoull(c, NULL, 16); 372 if (!ops->target.addr) { 373 c = strchr(c, ','); 374 c = validate_comma(c, ops); 375 if (c != NULL) { 376 c++; 377 ops->target.addr = strtoull(c, NULL, 16); 378 } 379 } 380 } else { 381 ops->target.addr = strtoull(ops->raw, NULL, 16); 382 } 383 384 target.addr = map__objdump_2mem(map, ops->target.addr); 385 start = map__unmap_ip(map, sym->start); 386 end = map__unmap_ip(map, sym->end); 387 388 ops->target.outside = target.addr < start || target.addr > end; 389 390 /* 391 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 392 393 cpp_named_operator2name@@Base+0xa72 394 395 * Point to a place that is after the cpp_named_operator2name 396 * boundaries, i.e. in the ELF symbol table for cc1 397 * cpp_named_operator2name is marked as being 32-bytes long, but it in 398 * fact is much larger than that, so we seem to need a symbols__find() 399 * routine that looks for >= current->start and < next_symbol->start, 400 * possibly just for C++ objects? 401 * 402 * For now lets just make some progress by marking jumps to outside the 403 * current function as call like. 404 * 405 * Actual navigation will come next, with further understanding of how 406 * the symbol searching and disassembly should be done. 407 */ 408 if (maps__find_ams(thread__maps(ms->thread), &target) == 0 && 409 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 410 ops->target.sym = target.ms.sym; 411 412 if (!ops->target.outside) { 413 ops->target.offset = target.addr - start; 414 ops->target.offset_avail = true; 415 } else { 416 ops->target.offset_avail = false; 417 } 418 addr_map_symbol__exit(&target); 419 return 0; 420 } 421 422 int jump__scnprintf(const struct ins *ins, char *bf, size_t size, 423 struct ins_operands *ops, int max_ins_name) 424 { 425 const char *c; 426 427 if (!ops->target.addr || ops->target.offset < 0) 428 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 429 430 if (ops->target.outside && ops->target.sym != NULL) 431 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 432 433 c = strchr(ops->raw, ','); 434 c = validate_comma(c, ops); 435 436 if (c != NULL) { 437 const char *c2 = strchr(c + 1, ','); 438 439 c2 = validate_comma(c2, ops); 440 /* check for 3-op insn */ 441 if (c2 != NULL) 442 c = c2; 443 c++; 444 445 /* mirror arch objdump's space-after-comma style */ 446 if (*c == ' ') 447 c++; 448 } 449 450 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 451 ins->name, c ? c - ops->raw : 0, ops->raw, 452 ops->target.offset); 453 } 454 455 static void jump__delete(struct ins_operands *ops __maybe_unused) 456 { 457 /* 458 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 459 * raw string, don't free them. 460 */ 461 } 462 463 const struct ins_ops jump_ops = { 464 .free = jump__delete, 465 .parse = jump__parse, 466 .scnprintf = jump__scnprintf, 467 .is_jump = true, 468 }; 469 470 bool ins__is_jump(const struct ins *ins) 471 { 472 return ins->ops && ins->ops->is_jump; 473 } 474 475 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 476 { 477 char *endptr, *name, *t; 478 479 if (strstr(raw, "(%rip)") == NULL) 480 return 0; 481 482 *addrp = strtoull(comment, &endptr, 16); 483 if (endptr == comment) 484 return 0; 485 name = strchr(endptr, '<'); 486 if (name == NULL) 487 return -1; 488 489 name++; 490 491 t = strchr(name, '>'); 492 if (t == NULL) 493 return 0; 494 495 *t = '\0'; 496 *namep = strdup(name); 497 *t = '>'; 498 499 return 0; 500 } 501 502 static int lock__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 503 struct disasm_line *dl __maybe_unused) 504 { 505 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 506 if (ops->locked.ops == NULL) 507 return 0; 508 509 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 510 goto out_free_ops; 511 512 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 513 514 if (ops->locked.ins.ops == NULL) 515 goto out_free_ops; 516 517 if (ops->locked.ins.ops->parse && 518 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 519 goto out_free_ops; 520 521 return 0; 522 523 out_free_ops: 524 zfree(&ops->locked.ops); 525 return 0; 526 } 527 528 static int lock__scnprintf(const struct ins *ins, char *bf, size_t size, 529 struct ins_operands *ops, int max_ins_name) 530 { 531 int printed; 532 533 if (ops->locked.ins.ops == NULL) 534 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 535 536 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 537 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 538 size - printed, ops->locked.ops, max_ins_name); 539 } 540 541 static void lock__delete(struct ins_operands *ops) 542 { 543 struct ins *ins = &ops->locked.ins; 544 545 if (ins->ops && ins->ops->free) 546 ins->ops->free(ops->locked.ops); 547 else 548 ins_ops__delete(ops->locked.ops); 549 550 zfree(&ops->locked.ops); 551 zfree(&ops->locked.ins.name); 552 zfree(&ops->target.raw); 553 zfree(&ops->target.name); 554 } 555 556 const struct ins_ops lock_ops = { 557 .free = lock__delete, 558 .parse = lock__parse, 559 .scnprintf = lock__scnprintf, 560 }; 561 562 /* 563 * Check if the operand has more than one registers like x86 SIB addressing: 564 * 0x1234(%rax, %rbx, 8) 565 * 566 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 567 * the input string after 'memory_ref_char' if exists. 568 */ 569 static bool check_multi_regs(const struct arch *arch, const char *op) 570 { 571 int count = 0; 572 573 if (arch->objdump.register_char == 0) 574 return false; 575 576 if (arch->objdump.memory_ref_char) { 577 op = strchr(op, arch->objdump.memory_ref_char); 578 if (op == NULL) 579 return false; 580 } 581 582 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 583 count++; 584 op++; 585 } 586 587 return count > 1; 588 } 589 590 static int mov__parse(const struct arch *arch, struct ins_operands *ops, 591 struct map_symbol *ms __maybe_unused, 592 struct disasm_line *dl __maybe_unused) 593 { 594 char *s = strchr(ops->raw, ','), *target, *comment, prev; 595 596 if (s == NULL) 597 return -1; 598 599 *s = '\0'; 600 601 /* 602 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 603 * then it needs to have the closing parenthesis. 604 */ 605 if (strchr(ops->raw, '(')) { 606 *s = ','; 607 s = strchr(ops->raw, ')'); 608 if (s == NULL || s[1] != ',') 609 return -1; 610 *++s = '\0'; 611 } 612 613 ops->source.raw = strdup(ops->raw); 614 *s = ','; 615 616 if (ops->source.raw == NULL) 617 return -1; 618 619 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 620 621 target = skip_spaces(++s); 622 comment = strchr(s, arch->objdump.comment_char); 623 624 if (comment != NULL) 625 s = comment - 1; 626 else 627 s = strchr(s, '\0') - 1; 628 629 while (s > target && isspace(s[0])) 630 --s; 631 s++; 632 prev = *s; 633 *s = '\0'; 634 635 ops->target.raw = strdup(target); 636 *s = prev; 637 638 if (ops->target.raw == NULL) 639 goto out_free_source; 640 641 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 642 643 if (comment == NULL) 644 return 0; 645 646 comment = skip_spaces(comment); 647 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 648 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 649 650 return 0; 651 652 out_free_source: 653 zfree(&ops->source.raw); 654 return -1; 655 } 656 657 int mov__scnprintf(const struct ins *ins, char *bf, size_t size, 658 struct ins_operands *ops, int max_ins_name) 659 { 660 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 661 ops->source.name ?: ops->source.raw, 662 ops->target.name ?: ops->target.raw); 663 } 664 665 const struct ins_ops mov_ops = { 666 .parse = mov__parse, 667 .scnprintf = mov__scnprintf, 668 }; 669 670 static int dec__parse(const struct arch *arch __maybe_unused, struct ins_operands *ops, 671 struct map_symbol *ms __maybe_unused, 672 struct disasm_line *dl __maybe_unused) 673 { 674 char *target, *comment, *s, prev; 675 676 target = s = ops->raw; 677 678 while (s[0] != '\0' && !isspace(s[0])) 679 ++s; 680 prev = *s; 681 *s = '\0'; 682 683 ops->target.raw = strdup(target); 684 *s = prev; 685 686 if (ops->target.raw == NULL) 687 return -1; 688 689 comment = strchr(s, arch->objdump.comment_char); 690 if (comment == NULL) 691 return 0; 692 693 comment = skip_spaces(comment); 694 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 695 696 return 0; 697 } 698 699 static int dec__scnprintf(const struct ins *ins, char *bf, size_t size, 700 struct ins_operands *ops, int max_ins_name) 701 { 702 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 703 ops->target.name ?: ops->target.raw); 704 } 705 706 const struct ins_ops dec_ops = { 707 .parse = dec__parse, 708 .scnprintf = dec__scnprintf, 709 }; 710 711 static int nop__scnprintf(const struct ins *ins __maybe_unused, char *bf, size_t size, 712 struct ins_operands *ops __maybe_unused, int max_ins_name) 713 { 714 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 715 } 716 717 const struct ins_ops nop_ops = { 718 .scnprintf = nop__scnprintf, 719 }; 720 721 const struct ins_ops ret_ops = { 722 .scnprintf = ins__raw_scnprintf, 723 }; 724 725 static bool ins__is_nop(const struct ins *ins) 726 { 727 return ins->ops == &nop_ops; 728 } 729 730 bool ins__is_ret(const struct ins *ins) 731 { 732 return ins->ops == &ret_ops; 733 } 734 735 bool ins__is_lock(const struct ins *ins) 736 { 737 return ins->ops == &lock_ops; 738 } 739 740 static int ins__key_cmp(const void *name, const void *insp) 741 { 742 const struct ins *ins = insp; 743 744 return strcmp(name, ins->name); 745 } 746 747 static int ins__cmp(const void *a, const void *b) 748 { 749 const struct ins *ia = a; 750 const struct ins *ib = b; 751 752 return strcmp(ia->name, ib->name); 753 } 754 755 static void ins__sort(struct arch *arch) 756 { 757 const int nmemb = arch->nr_instructions; 758 759 qsort((void *)arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 760 } 761 762 static const struct ins_ops *__ins__find(const struct arch *arch, const char *name, 763 struct disasm_line *dl) 764 { 765 struct ins *ins; 766 const int nmemb = arch->nr_instructions; 767 768 if (arch__is_powerpc(arch)) { 769 /* 770 * For powerpc, identify the instruction ops 771 * from the opcode using raw_insn. 772 */ 773 const struct ins_ops *ops; 774 775 ops = check_ppc_insn(dl); 776 if (ops) 777 return ops; 778 } 779 780 if (!arch->sorted_instructions) { 781 ins__sort((struct arch *)arch); 782 ((struct arch *)arch)->sorted_instructions = true; 783 } 784 785 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 786 if (ins) 787 return ins->ops; 788 789 if (arch->insn_suffix) { 790 char tmp[32]; 791 char suffix; 792 size_t len = strlen(name); 793 794 if (len == 0 || len >= sizeof(tmp)) 795 return NULL; 796 797 suffix = name[len - 1]; 798 if (strchr(arch->insn_suffix, suffix) == NULL) 799 return NULL; 800 801 strcpy(tmp, name); 802 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 803 804 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 805 } 806 return ins ? ins->ops : NULL; 807 } 808 809 const struct ins_ops *ins__find(const struct arch *arch, const char *name, struct disasm_line *dl) 810 { 811 const struct ins_ops *ops = __ins__find(arch, name, dl); 812 813 if (!ops && arch->associate_instruction_ops) 814 ops = arch->associate_instruction_ops((struct arch *)arch, name); 815 816 return ops; 817 } 818 819 static void disasm_line__init_ins(struct disasm_line *dl, const struct arch *arch, 820 struct map_symbol *ms) 821 { 822 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 823 824 if (!dl->ins.ops) 825 return; 826 827 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 828 dl->ins.ops = NULL; 829 } 830 831 static int disasm_line__parse(char *line, const char **namep, char **rawp) 832 { 833 char tmp, *name = skip_spaces(line); 834 835 if (name[0] == '\0') 836 return -1; 837 838 *rawp = name + 1; 839 840 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 841 ++*rawp; 842 843 tmp = (*rawp)[0]; 844 (*rawp)[0] = '\0'; 845 *namep = strdup(name); 846 847 if (*namep == NULL) 848 goto out; 849 850 (*rawp)[0] = tmp; 851 *rawp = strim(*rawp); 852 853 return 0; 854 855 out: 856 return -1; 857 } 858 859 /* 860 * Parses the result captured from symbol__disassemble_* 861 * Example, line read from DSO file in powerpc: 862 * line: 38 01 81 e8 863 * opcode: fetched from arch specific get_opcode_insn 864 * rawp_insn: e8810138 865 * 866 * rawp_insn is used later to extract the reg/offset fields 867 */ 868 #define PPC_OP(op) (((op) >> 26) & 0x3F) 869 #define RAW_BYTES 11 870 871 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) 872 { 873 char *line = dl->al.line; 874 const char **namep = &dl->ins.name; 875 char **rawp = &dl->ops.raw; 876 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 877 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 878 int disasm = 0; 879 int ret = 0; 880 881 if (args->options->disassembler_used) 882 disasm = 1; 883 884 if (name_raw_insn[0] == '\0') 885 return -1; 886 887 if (disasm) 888 ret = disasm_line__parse(name, namep, rawp); 889 else 890 *namep = ""; 891 892 tmp_raw_insn = strndup(name_raw_insn, 11); 893 if (tmp_raw_insn == NULL) 894 return -1; 895 896 remove_spaces(tmp_raw_insn); 897 898 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 899 if (disasm) 900 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 901 902 return ret; 903 } 904 905 static void annotation_line__init(struct annotation_line *al, 906 struct annotate_args *args, 907 int nr) 908 { 909 al->offset = args->offset; 910 al->line = strdup(args->line); 911 al->line_nr = args->line_nr; 912 al->fileloc = args->fileloc; 913 al->data_nr = nr; 914 } 915 916 static void annotation_line__exit(struct annotation_line *al) 917 { 918 zfree_srcline(&al->path); 919 zfree(&al->line); 920 zfree(&al->cycles); 921 zfree(&al->br_cntr); 922 } 923 924 static size_t disasm_line_size(int nr) 925 { 926 struct annotation_line *al; 927 928 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 929 } 930 931 /* 932 * Allocating the disasm annotation line data with 933 * following structure: 934 * 935 * ------------------------------------------- 936 * struct disasm_line | struct annotation_line 937 * ------------------------------------------- 938 * 939 * We have 'struct annotation_line' member as last member 940 * of 'struct disasm_line' to have an easy access. 941 */ 942 struct disasm_line *disasm_line__new(struct annotate_args *args) 943 { 944 struct disasm_line *dl = NULL; 945 struct annotation *notes = symbol__annotation(args->ms->sym); 946 int nr = notes->src->nr_events; 947 948 dl = zalloc(disasm_line_size(nr)); 949 if (!dl) 950 return NULL; 951 952 annotation_line__init(&dl->al, args, nr); 953 if (dl->al.line == NULL) 954 goto out_delete; 955 956 if (args->offset != -1) { 957 if (arch__is_powerpc(args->arch)) { 958 if (disasm_line__parse_powerpc(dl, args) < 0) 959 goto out_free_line; 960 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 961 goto out_free_line; 962 963 disasm_line__init_ins(dl, args->arch, args->ms); 964 } 965 966 return dl; 967 968 out_free_line: 969 zfree(&dl->al.line); 970 out_delete: 971 free(dl); 972 return NULL; 973 } 974 975 void disasm_line__free(struct disasm_line *dl) 976 { 977 if (dl->ins.ops && dl->ins.ops->free) 978 dl->ins.ops->free(&dl->ops); 979 else 980 ins_ops__delete(&dl->ops); 981 zfree(&dl->ins.name); 982 annotation_line__exit(&dl->al); 983 free(dl); 984 } 985 986 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 987 { 988 if (raw || !dl->ins.ops) 989 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 990 991 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 992 } 993 994 /* 995 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 996 * which looks like following 997 * 998 * 0000000000415500 <_init>: 999 * 415500: sub $0x8,%rsp 1000 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1001 * 41550b: test %rax,%rax 1002 * 41550e: je 415515 <_init+0x15> 1003 * 415510: callq 416e70 <__gmon_start__@plt> 1004 * 415515: add $0x8,%rsp 1005 * 415519: retq 1006 * 1007 * it will be parsed and saved into struct disasm_line as 1008 * <offset> <name> <ops.raw> 1009 * 1010 * The offset will be a relative offset from the start of the symbol and -1 1011 * means that it's not a disassembly line so should be treated differently. 1012 * The ops.raw part will be parsed further according to type of the instruction. 1013 */ 1014 static int symbol__parse_objdump_line(struct symbol *sym, 1015 struct annotate_args *args, 1016 char *parsed_line, int *line_nr, char **fileloc) 1017 { 1018 struct map *map = args->ms->map; 1019 struct annotation *notes = symbol__annotation(sym); 1020 struct disasm_line *dl; 1021 char *tmp; 1022 s64 line_ip, offset = -1; 1023 regmatch_t match[2]; 1024 1025 /* /filename:linenr ? Save line number and ignore. */ 1026 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1027 *line_nr = atoi(parsed_line + match[1].rm_so); 1028 free(*fileloc); 1029 *fileloc = strdup(parsed_line); 1030 return 0; 1031 } 1032 1033 /* Process hex address followed by ':'. */ 1034 line_ip = strtoull(parsed_line, &tmp, 16); 1035 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1036 u64 start = map__rip_2objdump(map, sym->start), 1037 end = map__rip_2objdump(map, sym->end); 1038 1039 offset = line_ip - start; 1040 if ((u64)line_ip < start || (u64)line_ip >= end) 1041 offset = -1; 1042 else 1043 parsed_line = tmp + 1; 1044 } 1045 1046 args->offset = offset; 1047 args->line = parsed_line; 1048 args->line_nr = *line_nr; 1049 args->fileloc = *fileloc; 1050 args->ms->sym = sym; 1051 1052 dl = disasm_line__new(args); 1053 (*line_nr)++; 1054 1055 if (dl == NULL) 1056 return -1; 1057 1058 if (!disasm_line__has_local_offset(dl)) { 1059 dl->ops.target.offset = dl->ops.target.addr - 1060 map__rip_2objdump(map, sym->start); 1061 dl->ops.target.offset_avail = true; 1062 } 1063 1064 /* kcore has no symbols, so add the call target symbol */ 1065 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1066 struct addr_map_symbol target = { 1067 .addr = dl->ops.target.addr, 1068 .ms = { .map = map__get(map), }, 1069 }; 1070 1071 if (!maps__find_ams(thread__maps(args->ms->thread), &target) && 1072 target.ms.sym->start == target.al_addr) 1073 dl->ops.target.sym = target.ms.sym; 1074 1075 addr_map_symbol__exit(&target); 1076 } 1077 1078 annotation_line__add(&dl->al, ¬es->src->source); 1079 return 0; 1080 } 1081 1082 static void delete_last_nop(struct symbol *sym) 1083 { 1084 struct annotation *notes = symbol__annotation(sym); 1085 struct list_head *list = ¬es->src->source; 1086 struct disasm_line *dl; 1087 1088 while (!list_empty(list)) { 1089 dl = list_entry(list->prev, struct disasm_line, al.node); 1090 1091 if (dl->ins.ops) { 1092 if (!ins__is_nop(&dl->ins)) 1093 return; 1094 } else { 1095 if (!strstr(dl->al.line, " nop ") && 1096 !strstr(dl->al.line, " nopl ") && 1097 !strstr(dl->al.line, " nopw ")) 1098 return; 1099 } 1100 1101 list_del_init(&dl->al.node); 1102 disasm_line__free(dl); 1103 } 1104 } 1105 1106 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1107 { 1108 struct dso *dso = map__dso(ms->map); 1109 1110 BUG_ON(buflen == 0); 1111 1112 if (errnum >= 0) { 1113 str_error_r(errnum, buf, buflen); 1114 return 0; 1115 } 1116 1117 switch (errnum) { 1118 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1119 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1120 char *build_id_msg = NULL; 1121 1122 if (dso__has_build_id(dso)) { 1123 build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); 1124 build_id_msg = bf; 1125 } 1126 scnprintf(buf, buflen, 1127 "No vmlinux file%s\nwas found in the path.\n\n" 1128 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1129 "Please use:\n\n" 1130 " perf buildid-cache -vu vmlinux\n\n" 1131 "or:\n\n" 1132 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1133 } 1134 break; 1135 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1136 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1137 break; 1138 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1139 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1140 break; 1141 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1142 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1143 break; 1144 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1145 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1146 break; 1147 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1148 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1149 dso__long_name(dso)); 1150 break; 1151 case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: 1152 scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); 1153 break; 1154 default: 1155 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1156 break; 1157 } 1158 1159 return 0; 1160 } 1161 1162 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1163 { 1164 char linkname[PATH_MAX]; 1165 char *build_id_filename; 1166 char *build_id_path = NULL; 1167 char *pos; 1168 int len; 1169 1170 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1171 !dso__is_kcore(dso)) 1172 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1173 1174 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1175 if (build_id_filename) { 1176 __symbol__join_symfs(filename, filename_size, build_id_filename); 1177 free(build_id_filename); 1178 } else { 1179 if (dso__has_build_id(dso)) 1180 return ENOMEM; 1181 goto fallback; 1182 } 1183 1184 build_id_path = strdup(filename); 1185 if (!build_id_path) 1186 return ENOMEM; 1187 1188 /* 1189 * old style build-id cache has name of XX/XXXXXXX.. while 1190 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1191 * extract the build-id part of dirname in the new style only. 1192 */ 1193 pos = strrchr(build_id_path, '/'); 1194 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1195 dirname(build_id_path); 1196 1197 if (dso__is_kcore(dso)) 1198 goto fallback; 1199 1200 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1201 if (len < 0) 1202 goto fallback; 1203 1204 linkname[len] = '\0'; 1205 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1206 access(filename, R_OK)) { 1207 fallback: 1208 /* 1209 * If we don't have build-ids or the build-id file isn't in the 1210 * cache, or is just a kallsyms file, well, lets hope that this 1211 * DSO is the same as when 'perf record' ran. 1212 */ 1213 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1214 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1215 else 1216 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1217 1218 mutex_lock(dso__lock(dso)); 1219 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1220 char *new_name = dso__filename_with_chroot(dso, filename); 1221 if (new_name) { 1222 strlcpy(filename, new_name, filename_size); 1223 free(new_name); 1224 } 1225 } 1226 mutex_unlock(dso__lock(dso)); 1227 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1228 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1229 } 1230 1231 free(build_id_path); 1232 return 0; 1233 } 1234 1235 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1236 struct annotate_args *args) 1237 { 1238 struct annotation *notes = symbol__annotation(sym); 1239 struct map *map = args->ms->map; 1240 struct dso *dso = map__dso(map); 1241 u64 start = map__rip_2objdump(map, sym->start); 1242 u64 end = map__rip_2objdump(map, sym->end); 1243 u64 len = end - start; 1244 u64 offset; 1245 int i, count; 1246 u8 *buf = NULL; 1247 char disasm_buf[512]; 1248 struct disasm_line *dl; 1249 u32 *line; 1250 1251 /* Return if objdump is specified explicitly */ 1252 if (args->options->objdump_path) 1253 return -1; 1254 1255 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1256 1257 buf = malloc(len); 1258 if (buf == NULL) 1259 goto err; 1260 1261 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1262 1263 line = (u32 *)buf; 1264 1265 if ((u64)count != len) 1266 goto err; 1267 1268 /* add the function address and name */ 1269 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1270 start, sym->name); 1271 1272 args->offset = -1; 1273 args->line = disasm_buf; 1274 args->line_nr = 0; 1275 args->fileloc = NULL; 1276 args->ms->sym = sym; 1277 1278 dl = disasm_line__new(args); 1279 if (dl == NULL) 1280 goto err; 1281 1282 annotation_line__add(&dl->al, ¬es->src->source); 1283 1284 /* Each raw instruction is 4 byte */ 1285 count = len/4; 1286 1287 for (i = 0, offset = 0; i < count; i++) { 1288 args->offset = offset; 1289 sprintf(args->line, "%x", line[i]); 1290 dl = disasm_line__new(args); 1291 if (dl == NULL) 1292 break; 1293 1294 annotation_line__add(&dl->al, ¬es->src->source); 1295 offset += 4; 1296 } 1297 1298 /* It failed in the middle */ 1299 if (offset != len) { 1300 struct list_head *list = ¬es->src->source; 1301 1302 /* Discard all lines and fallback to objdump */ 1303 while (!list_empty(list)) { 1304 dl = list_first_entry(list, struct disasm_line, al.node); 1305 1306 list_del_init(&dl->al.node); 1307 disasm_line__free(dl); 1308 } 1309 count = -1; 1310 } 1311 1312 out: 1313 free(buf); 1314 return count < 0 ? count : 0; 1315 1316 err: 1317 count = -1; 1318 goto out; 1319 } 1320 1321 /* 1322 * Possibly create a new version of line with tabs expanded. Returns the 1323 * existing or new line, storage is updated if a new line is allocated. If 1324 * allocation fails then NULL is returned. 1325 */ 1326 char *expand_tabs(char *line, char **storage, size_t *storage_len) 1327 { 1328 size_t i, src, dst, len, new_storage_len, num_tabs; 1329 char *new_line; 1330 size_t line_len = strlen(line); 1331 1332 for (num_tabs = 0, i = 0; i < line_len; i++) 1333 if (line[i] == '\t') 1334 num_tabs++; 1335 1336 if (num_tabs == 0) 1337 return line; 1338 1339 /* 1340 * Space for the line and '\0', less the leading and trailing 1341 * spaces. Each tab may introduce 7 additional spaces. 1342 */ 1343 new_storage_len = line_len + 1 + (num_tabs * 7); 1344 1345 new_line = malloc(new_storage_len); 1346 if (new_line == NULL) { 1347 pr_err("Failure allocating memory for tab expansion\n"); 1348 return NULL; 1349 } 1350 1351 /* 1352 * Copy regions starting at src and expand tabs. If there are two 1353 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1354 * are inserted. 1355 */ 1356 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1357 if (line[i] == '\t') { 1358 len = i - src; 1359 memcpy(&new_line[dst], &line[src], len); 1360 dst += len; 1361 new_line[dst++] = ' '; 1362 while (dst % 8 != 0) 1363 new_line[dst++] = ' '; 1364 src = i + 1; 1365 num_tabs--; 1366 } 1367 } 1368 1369 /* Expand the last region. */ 1370 len = line_len - src; 1371 memcpy(&new_line[dst], &line[src], len); 1372 dst += len; 1373 new_line[dst] = '\0'; 1374 1375 free(*storage); 1376 *storage = new_line; 1377 *storage_len = new_storage_len; 1378 return new_line; 1379 } 1380 1381 static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) 1382 { 1383 struct annotation *notes = symbol__annotation(sym); 1384 struct disasm_line *dl; 1385 1386 args->offset = -1; 1387 args->line = strdup("to be implemented"); 1388 args->line_nr = 0; 1389 args->fileloc = NULL; 1390 dl = disasm_line__new(args); 1391 if (dl) 1392 annotation_line__add(&dl->al, ¬es->src->source); 1393 1394 zfree(&args->line); 1395 return 0; 1396 } 1397 1398 static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, 1399 struct annotate_args *args) 1400 { 1401 struct annotation_options *opts = &annotate_opts; 1402 struct map *map = args->ms->map; 1403 struct dso *dso = map__dso(map); 1404 char *command; 1405 FILE *file; 1406 int lineno = 0; 1407 char *fileloc = NULL; 1408 int nline; 1409 char *line; 1410 size_t line_len; 1411 const char *objdump_argv[] = { 1412 "/bin/sh", 1413 "-c", 1414 NULL, /* Will be the objdump command to run. */ 1415 "--", 1416 NULL, /* Will be the symfs path. */ 1417 NULL, 1418 }; 1419 struct child_process objdump_process; 1420 int err; 1421 1422 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) 1423 return symbol__disassemble_bpf_libbfd(sym, args); 1424 1425 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) 1426 return symbol__disassemble_bpf_image(sym, args); 1427 1428 err = asprintf(&command, 1429 "%s %s%s --start-address=0x%016" PRIx64 1430 " --stop-address=0x%016" PRIx64 1431 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1432 opts->objdump_path ?: "objdump", 1433 opts->disassembler_style ? "-M " : "", 1434 opts->disassembler_style ?: "", 1435 map__rip_2objdump(map, sym->start), 1436 map__rip_2objdump(map, sym->end), 1437 opts->show_linenr ? "-l" : "", 1438 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1439 opts->annotate_src ? "-S" : "", 1440 opts->prefix ? "--prefix " : "", 1441 opts->prefix ? '"' : ' ', 1442 opts->prefix ?: "", 1443 opts->prefix ? '"' : ' ', 1444 opts->prefix_strip ? "--prefix-strip=" : "", 1445 opts->prefix_strip ?: ""); 1446 1447 if (err < 0) { 1448 pr_err("Failure allocating memory for the command to run\n"); 1449 return err; 1450 } 1451 1452 pr_debug("Executing: %s\n", command); 1453 1454 objdump_argv[2] = command; 1455 objdump_argv[4] = filename; 1456 1457 /* Create a pipe to read from for stdout */ 1458 memset(&objdump_process, 0, sizeof(objdump_process)); 1459 objdump_process.argv = objdump_argv; 1460 objdump_process.out = -1; 1461 objdump_process.err = -1; 1462 objdump_process.no_stderr = 1; 1463 if (start_command(&objdump_process)) { 1464 pr_err("Failure starting to run %s\n", command); 1465 err = -1; 1466 goto out_free_command; 1467 } 1468 1469 file = fdopen(objdump_process.out, "r"); 1470 if (!file) { 1471 pr_err("Failure creating FILE stream for %s\n", command); 1472 /* 1473 * If we were using debug info should retry with 1474 * original binary. 1475 */ 1476 err = -1; 1477 goto out_close_stdout; 1478 } 1479 1480 /* Storage for getline. */ 1481 line = NULL; 1482 line_len = 0; 1483 1484 nline = 0; 1485 while (!feof(file)) { 1486 const char *match; 1487 char *expanded_line; 1488 1489 if (getline(&line, &line_len, file) < 0 || !line) 1490 break; 1491 1492 /* Skip lines containing "filename:" */ 1493 match = strstr(line, filename); 1494 if (match && match[strlen(filename)] == ':') 1495 continue; 1496 1497 expanded_line = strim(line); 1498 expanded_line = expand_tabs(expanded_line, &line, &line_len); 1499 if (!expanded_line) 1500 break; 1501 1502 /* 1503 * The source code line number (lineno) needs to be kept in 1504 * across calls to symbol__parse_objdump_line(), so that it 1505 * can associate it with the instructions till the next one. 1506 * See disasm_line__new() and struct disasm_line::line_nr. 1507 */ 1508 if (symbol__parse_objdump_line(sym, args, expanded_line, 1509 &lineno, &fileloc) < 0) 1510 break; 1511 nline++; 1512 } 1513 free(line); 1514 free(fileloc); 1515 1516 err = finish_command(&objdump_process); 1517 if (err) 1518 pr_err("Error running %s\n", command); 1519 1520 if (nline == 0) { 1521 err = -1; 1522 pr_err("No output from %s\n", command); 1523 } 1524 1525 /* 1526 * kallsyms does not have symbol sizes so there may a nop at the end. 1527 * Remove it. 1528 */ 1529 if (dso__is_kcore(dso)) 1530 delete_last_nop(sym); 1531 1532 fclose(file); 1533 1534 out_close_stdout: 1535 close(objdump_process.out); 1536 1537 out_free_command: 1538 free(command); 1539 return err; 1540 } 1541 1542 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1543 { 1544 struct annotation_options *options = args->options; 1545 struct map *map = args->ms->map; 1546 struct dso *dso = map__dso(map); 1547 char symfs_filename[PATH_MAX]; 1548 bool delete_extract = false; 1549 struct kcore_extract kce; 1550 bool decomp = false; 1551 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1552 1553 if (err) 1554 return err; 1555 1556 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1557 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1558 map__unmap_ip(map, sym->end)); 1559 1560 pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); 1561 1562 if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1563 return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; 1564 } else if (dso__is_kcore(dso)) { 1565 kce.addr = map__rip_2objdump(map, sym->start); 1566 kce.kcore_filename = symfs_filename; 1567 kce.len = sym->end - sym->start; 1568 kce.offs = sym->start; 1569 1570 if (!kcore_extract__create(&kce)) { 1571 delete_extract = true; 1572 strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); 1573 } 1574 } else if (dso__needs_decompress(dso)) { 1575 char tmp[KMOD_DECOMP_LEN]; 1576 1577 if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) 1578 return -1; 1579 1580 decomp = true; 1581 strcpy(symfs_filename, tmp); 1582 } 1583 1584 /* 1585 * For powerpc data type profiling, use the dso__data_read_offset to 1586 * read raw instruction directly and interpret the binary code to 1587 * understand instructions and register fields. For sort keys as type 1588 * and typeoff, disassemble to mnemonic notation is not required in 1589 * case of powerpc. 1590 */ 1591 if (arch__is_powerpc(args->arch)) { 1592 extern const char *sort_order; 1593 1594 if (sort_order && !strstr(sort_order, "sym")) { 1595 err = symbol__disassemble_raw(symfs_filename, sym, args); 1596 if (err == 0) 1597 goto out_remove_tmp; 1598 1599 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1600 if (err == 0) 1601 goto out_remove_tmp; 1602 } 1603 } 1604 1605 /* FIXME: LLVM and CAPSTONE should support source code */ 1606 if (options->annotate_src && !options->hide_src_code) { 1607 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1608 if (err == 0) 1609 goto out_remove_tmp; 1610 } 1611 1612 err = -1; 1613 for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { 1614 enum perf_disassembler dis = options->disassemblers[i]; 1615 1616 switch (dis) { 1617 case PERF_DISASM_LLVM: 1618 args->options->disassembler_used = PERF_DISASM_LLVM; 1619 err = symbol__disassemble_llvm(symfs_filename, sym, args); 1620 break; 1621 case PERF_DISASM_CAPSTONE: 1622 args->options->disassembler_used = PERF_DISASM_CAPSTONE; 1623 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1624 break; 1625 case PERF_DISASM_OBJDUMP: 1626 args->options->disassembler_used = PERF_DISASM_OBJDUMP; 1627 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1628 break; 1629 case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ 1630 default: 1631 args->options->disassembler_used = PERF_DISASM_UNKNOWN; 1632 goto out_remove_tmp; 1633 } 1634 if (err == 0) 1635 pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); 1636 } 1637 out_remove_tmp: 1638 if (decomp) 1639 unlink(symfs_filename); 1640 1641 if (delete_extract) 1642 kcore_extract__delete(&kce); 1643 1644 return err; 1645 } 1646