1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <inttypes.h> 6 #include <libgen.h> 7 #include <regex.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 11 #include <linux/string.h> 12 #include <subcmd/run-command.h> 13 14 #include "annotate.h" 15 #include "annotate-data.h" 16 #include "build-id.h" 17 #include "capstone.h" 18 #include "debug.h" 19 #include "disasm.h" 20 #include "dso.h" 21 #include "dwarf-regs.h" 22 #include "env.h" 23 #include "evsel.h" 24 #include "libbfd.h" 25 #include "llvm.h" 26 #include "map.h" 27 #include "maps.h" 28 #include "namespaces.h" 29 #include "srcline.h" 30 #include "symbol.h" 31 #include "util.h" 32 33 static regex_t file_lineno; 34 35 /* These can be referred from the arch-dependent code */ 36 static const struct ins_ops call_ops; 37 static const struct ins_ops dec_ops; 38 static const struct ins_ops jump_ops; 39 static const struct ins_ops mov_ops; 40 static const struct ins_ops nop_ops; 41 static const struct ins_ops lock_ops; 42 static const struct ins_ops ret_ops; 43 static const struct ins_ops load_store_ops; 44 static const struct ins_ops arithmetic_ops; 45 46 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 47 struct ins_operands *ops, int max_ins_name); 48 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 49 struct ins_operands *ops, int max_ins_name); 50 51 static void ins__sort(struct arch *arch); 52 static int disasm_line__parse(char *line, const char **namep, char **rawp); 53 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); 54 55 static __attribute__((constructor)) void symbol__init_regexpr(void) 56 { 57 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 58 } 59 60 static int arch__grow_instructions(struct arch *arch) 61 { 62 struct ins *new_instructions; 63 size_t new_nr_allocated; 64 65 if (arch->nr_instructions_allocated == 0 && arch->instructions) 66 goto grow_from_non_allocated_table; 67 68 new_nr_allocated = arch->nr_instructions_allocated + 128; 69 new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); 70 if (new_instructions == NULL) 71 return -1; 72 73 out_update_instructions: 74 arch->instructions = new_instructions; 75 arch->nr_instructions_allocated = new_nr_allocated; 76 return 0; 77 78 grow_from_non_allocated_table: 79 new_nr_allocated = arch->nr_instructions + 128; 80 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 81 if (new_instructions == NULL) 82 return -1; 83 84 memcpy(new_instructions, arch->instructions, arch->nr_instructions); 85 goto out_update_instructions; 86 } 87 88 static int arch__associate_ins_ops(struct arch *arch, const char *name, const struct ins_ops *ops) 89 { 90 struct ins *ins; 91 92 if (arch->nr_instructions == arch->nr_instructions_allocated && 93 arch__grow_instructions(arch)) 94 return -1; 95 96 ins = &arch->instructions[arch->nr_instructions]; 97 ins->name = strdup(name); 98 if (!ins->name) 99 return -1; 100 101 ins->ops = ops; 102 arch->nr_instructions++; 103 104 ins__sort(arch); 105 return 0; 106 } 107 108 #include "arch/arc/annotate/instructions.c" 109 #include "arch/arm/annotate/instructions.c" 110 #include "arch/arm64/annotate/instructions.c" 111 #include "arch/csky/annotate/instructions.c" 112 #include "arch/loongarch/annotate/instructions.c" 113 #include "arch/mips/annotate/instructions.c" 114 #include "arch/x86/annotate/instructions.c" 115 #include "arch/powerpc/annotate/instructions.c" 116 #include "arch/riscv64/annotate/instructions.c" 117 #include "arch/s390/annotate/instructions.c" 118 #include "arch/sparc/annotate/instructions.c" 119 120 static struct arch architectures[] = { 121 { 122 .name = "arc", 123 .init = arc__annotate_init, 124 }, 125 { 126 .name = "arm", 127 .init = arm__annotate_init, 128 }, 129 { 130 .name = "arm64", 131 .init = arm64__annotate_init, 132 }, 133 { 134 .name = "csky", 135 .init = csky__annotate_init, 136 }, 137 { 138 .name = "mips", 139 .init = mips__annotate_init, 140 .objdump = { 141 .comment_char = '#', 142 }, 143 }, 144 { 145 .name = "x86", 146 .init = x86__annotate_init, 147 .instructions = x86__instructions, 148 .nr_instructions = ARRAY_SIZE(x86__instructions), 149 .insn_suffix = "bwlq", 150 .objdump = { 151 .comment_char = '#', 152 .register_char = '%', 153 .memory_ref_char = '(', 154 .imm_char = '$', 155 }, 156 #ifdef HAVE_LIBDW_SUPPORT 157 .update_insn_state = update_insn_state_x86, 158 #endif 159 }, 160 { 161 .name = "powerpc", 162 .init = powerpc__annotate_init, 163 #ifdef HAVE_LIBDW_SUPPORT 164 .update_insn_state = update_insn_state_powerpc, 165 #endif 166 }, 167 { 168 .name = "riscv64", 169 .init = riscv64__annotate_init, 170 }, 171 { 172 .name = "s390", 173 .init = s390__annotate_init, 174 .objdump = { 175 .comment_char = '#', 176 }, 177 }, 178 { 179 .name = "sparc", 180 .init = sparc__annotate_init, 181 .objdump = { 182 .comment_char = '#', 183 }, 184 }, 185 { 186 .name = "loongarch", 187 .init = loongarch__annotate_init, 188 .objdump = { 189 .comment_char = '#', 190 }, 191 }, 192 }; 193 194 static int arch__key_cmp(const void *name, const void *archp) 195 { 196 const struct arch *arch = archp; 197 198 return strcmp(name, arch->name); 199 } 200 201 static int arch__cmp(const void *a, const void *b) 202 { 203 const struct arch *aa = a; 204 const struct arch *ab = b; 205 206 return strcmp(aa->name, ab->name); 207 } 208 209 static void arch__sort(void) 210 { 211 const int nmemb = ARRAY_SIZE(architectures); 212 213 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); 214 } 215 216 const struct arch *arch__find(const char *name) 217 { 218 const int nmemb = ARRAY_SIZE(architectures); 219 static bool sorted; 220 221 if (!sorted) { 222 arch__sort(); 223 sorted = true; 224 } 225 226 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 227 } 228 229 bool arch__is(const struct arch *arch, const char *name) 230 { 231 return !strcmp(arch->name, name); 232 } 233 234 static void ins_ops__delete(struct ins_operands *ops) 235 { 236 if (ops == NULL) 237 return; 238 zfree(&ops->source.raw); 239 zfree(&ops->source.name); 240 zfree(&ops->target.raw); 241 zfree(&ops->target.name); 242 } 243 244 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, 245 struct ins_operands *ops, int max_ins_name) 246 { 247 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 248 } 249 250 static int ins__scnprintf(struct ins *ins, char *bf, size_t size, 251 struct ins_operands *ops, int max_ins_name) 252 { 253 if (ins->ops->scnprintf) 254 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 255 256 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 257 } 258 259 bool ins__is_fused(const struct arch *arch, const char *ins1, const char *ins2) 260 { 261 if (!arch || !arch->ins_is_fused) 262 return false; 263 264 return arch->ins_is_fused(arch, ins1, ins2); 265 } 266 267 static int call__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 268 struct disasm_line *dl __maybe_unused) 269 { 270 char *endptr, *tok, *name; 271 struct map *map = ms->map; 272 struct addr_map_symbol target; 273 274 ops->target.addr = strtoull(ops->raw, &endptr, 16); 275 276 name = strchr(endptr, '<'); 277 if (name == NULL) 278 goto indirect_call; 279 280 name++; 281 282 if (arch->objdump.skip_functions_char && 283 strchr(name, arch->objdump.skip_functions_char)) 284 return -1; 285 286 tok = strchr(name, '>'); 287 if (tok == NULL) 288 return -1; 289 290 *tok = '\0'; 291 ops->target.name = strdup(name); 292 *tok = '>'; 293 294 if (ops->target.name == NULL) 295 return -1; 296 find_target: 297 target = (struct addr_map_symbol) { 298 .ms = { .map = map__get(map), }, 299 .addr = map__objdump_2mem(map, ops->target.addr), 300 }; 301 302 if (maps__find_ams(ms->maps, &target) == 0 && 303 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 304 ops->target.sym = target.ms.sym; 305 306 addr_map_symbol__exit(&target); 307 return 0; 308 309 indirect_call: 310 tok = strchr(endptr, '*'); 311 if (tok != NULL) { 312 endptr++; 313 314 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 315 * Do not parse such instruction. */ 316 if (strstr(endptr, "(%r") == NULL) 317 ops->target.addr = strtoull(endptr, NULL, 16); 318 } 319 goto find_target; 320 } 321 322 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 323 struct ins_operands *ops, int max_ins_name) 324 { 325 if (ops->target.sym) 326 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 327 328 if (ops->target.addr == 0) 329 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 330 331 if (ops->target.name) 332 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 333 334 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 335 } 336 337 static const struct ins_ops call_ops = { 338 .parse = call__parse, 339 .scnprintf = call__scnprintf, 340 }; 341 342 bool ins__is_call(const struct ins *ins) 343 { 344 return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; 345 } 346 347 /* 348 * Prevents from matching commas in the comment section, e.g.: 349 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 350 * 351 * and skip comma as part of function arguments, e.g.: 352 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 353 */ 354 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 355 { 356 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 357 return NULL; 358 359 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 360 return NULL; 361 362 return c; 363 } 364 365 static int jump__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 366 struct disasm_line *dl __maybe_unused) 367 { 368 struct map *map = ms->map; 369 struct symbol *sym = ms->sym; 370 struct addr_map_symbol target = { 371 .ms = { .map = map__get(map), }, 372 }; 373 const char *c = strchr(ops->raw, ','); 374 u64 start, end; 375 376 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 377 ops->jump.raw_func_start = strchr(ops->raw, '<'); 378 379 c = validate_comma(c, ops); 380 381 /* 382 * Examples of lines to parse for the _cpp_lex_token@@Base 383 * function: 384 * 385 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 386 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 387 * 388 * The first is a jump to an offset inside the same function, 389 * the second is to another function, i.e. that 0xa72 is an 390 * offset in the cpp_named_operator2name@@base function. 391 */ 392 /* 393 * skip over possible up to 2 operands to get to address, e.g.: 394 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 395 */ 396 if (c != NULL) { 397 c++; 398 ops->target.addr = strtoull(c, NULL, 16); 399 if (!ops->target.addr) { 400 c = strchr(c, ','); 401 c = validate_comma(c, ops); 402 if (c != NULL) { 403 c++; 404 ops->target.addr = strtoull(c, NULL, 16); 405 } 406 } 407 } else { 408 ops->target.addr = strtoull(ops->raw, NULL, 16); 409 } 410 411 target.addr = map__objdump_2mem(map, ops->target.addr); 412 start = map__unmap_ip(map, sym->start); 413 end = map__unmap_ip(map, sym->end); 414 415 ops->target.outside = target.addr < start || target.addr > end; 416 417 /* 418 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 419 420 cpp_named_operator2name@@Base+0xa72 421 422 * Point to a place that is after the cpp_named_operator2name 423 * boundaries, i.e. in the ELF symbol table for cc1 424 * cpp_named_operator2name is marked as being 32-bytes long, but it in 425 * fact is much larger than that, so we seem to need a symbols__find() 426 * routine that looks for >= current->start and < next_symbol->start, 427 * possibly just for C++ objects? 428 * 429 * For now lets just make some progress by marking jumps to outside the 430 * current function as call like. 431 * 432 * Actual navigation will come next, with further understanding of how 433 * the symbol searching and disassembly should be done. 434 */ 435 if (maps__find_ams(ms->maps, &target) == 0 && 436 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 437 ops->target.sym = target.ms.sym; 438 439 if (!ops->target.outside) { 440 ops->target.offset = target.addr - start; 441 ops->target.offset_avail = true; 442 } else { 443 ops->target.offset_avail = false; 444 } 445 addr_map_symbol__exit(&target); 446 return 0; 447 } 448 449 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 450 struct ins_operands *ops, int max_ins_name) 451 { 452 const char *c; 453 454 if (!ops->target.addr || ops->target.offset < 0) 455 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 456 457 if (ops->target.outside && ops->target.sym != NULL) 458 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 459 460 c = strchr(ops->raw, ','); 461 c = validate_comma(c, ops); 462 463 if (c != NULL) { 464 const char *c2 = strchr(c + 1, ','); 465 466 c2 = validate_comma(c2, ops); 467 /* check for 3-op insn */ 468 if (c2 != NULL) 469 c = c2; 470 c++; 471 472 /* mirror arch objdump's space-after-comma style */ 473 if (*c == ' ') 474 c++; 475 } 476 477 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 478 ins->name, c ? c - ops->raw : 0, ops->raw, 479 ops->target.offset); 480 } 481 482 static void jump__delete(struct ins_operands *ops __maybe_unused) 483 { 484 /* 485 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 486 * raw string, don't free them. 487 */ 488 } 489 490 static const struct ins_ops jump_ops = { 491 .free = jump__delete, 492 .parse = jump__parse, 493 .scnprintf = jump__scnprintf, 494 }; 495 496 bool ins__is_jump(const struct ins *ins) 497 { 498 return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; 499 } 500 501 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 502 { 503 char *endptr, *name, *t; 504 505 if (strstr(raw, "(%rip)") == NULL) 506 return 0; 507 508 *addrp = strtoull(comment, &endptr, 16); 509 if (endptr == comment) 510 return 0; 511 name = strchr(endptr, '<'); 512 if (name == NULL) 513 return -1; 514 515 name++; 516 517 t = strchr(name, '>'); 518 if (t == NULL) 519 return 0; 520 521 *t = '\0'; 522 *namep = strdup(name); 523 *t = '>'; 524 525 return 0; 526 } 527 528 static int lock__parse(const struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 529 struct disasm_line *dl __maybe_unused) 530 { 531 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 532 if (ops->locked.ops == NULL) 533 return 0; 534 535 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 536 goto out_free_ops; 537 538 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 539 540 if (ops->locked.ins.ops == NULL) 541 goto out_free_ops; 542 543 if (ops->locked.ins.ops->parse && 544 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 545 goto out_free_ops; 546 547 return 0; 548 549 out_free_ops: 550 zfree(&ops->locked.ops); 551 return 0; 552 } 553 554 static int lock__scnprintf(struct ins *ins, char *bf, size_t size, 555 struct ins_operands *ops, int max_ins_name) 556 { 557 int printed; 558 559 if (ops->locked.ins.ops == NULL) 560 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 561 562 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 563 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 564 size - printed, ops->locked.ops, max_ins_name); 565 } 566 567 static void lock__delete(struct ins_operands *ops) 568 { 569 struct ins *ins = &ops->locked.ins; 570 571 if (ins->ops && ins->ops->free) 572 ins->ops->free(ops->locked.ops); 573 else 574 ins_ops__delete(ops->locked.ops); 575 576 zfree(&ops->locked.ops); 577 zfree(&ops->locked.ins.name); 578 zfree(&ops->target.raw); 579 zfree(&ops->target.name); 580 } 581 582 static const struct ins_ops lock_ops = { 583 .free = lock__delete, 584 .parse = lock__parse, 585 .scnprintf = lock__scnprintf, 586 }; 587 588 /* 589 * Check if the operand has more than one registers like x86 SIB addressing: 590 * 0x1234(%rax, %rbx, 8) 591 * 592 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 593 * the input string after 'memory_ref_char' if exists. 594 */ 595 static bool check_multi_regs(const struct arch *arch, const char *op) 596 { 597 int count = 0; 598 599 if (arch->objdump.register_char == 0) 600 return false; 601 602 if (arch->objdump.memory_ref_char) { 603 op = strchr(op, arch->objdump.memory_ref_char); 604 if (op == NULL) 605 return false; 606 } 607 608 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 609 count++; 610 op++; 611 } 612 613 return count > 1; 614 } 615 616 static int mov__parse(const struct arch *arch, struct ins_operands *ops, 617 struct map_symbol *ms __maybe_unused, 618 struct disasm_line *dl __maybe_unused) 619 { 620 char *s = strchr(ops->raw, ','), *target, *comment, prev; 621 622 if (s == NULL) 623 return -1; 624 625 *s = '\0'; 626 627 /* 628 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 629 * then it needs to have the closing parenthesis. 630 */ 631 if (strchr(ops->raw, '(')) { 632 *s = ','; 633 s = strchr(ops->raw, ')'); 634 if (s == NULL || s[1] != ',') 635 return -1; 636 *++s = '\0'; 637 } 638 639 ops->source.raw = strdup(ops->raw); 640 *s = ','; 641 642 if (ops->source.raw == NULL) 643 return -1; 644 645 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 646 647 target = skip_spaces(++s); 648 comment = strchr(s, arch->objdump.comment_char); 649 650 if (comment != NULL) 651 s = comment - 1; 652 else 653 s = strchr(s, '\0') - 1; 654 655 while (s > target && isspace(s[0])) 656 --s; 657 s++; 658 prev = *s; 659 *s = '\0'; 660 661 ops->target.raw = strdup(target); 662 *s = prev; 663 664 if (ops->target.raw == NULL) 665 goto out_free_source; 666 667 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 668 669 if (comment == NULL) 670 return 0; 671 672 comment = skip_spaces(comment); 673 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 674 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 675 676 return 0; 677 678 out_free_source: 679 zfree(&ops->source.raw); 680 return -1; 681 } 682 683 static int mov__scnprintf(struct ins *ins, char *bf, size_t size, 684 struct ins_operands *ops, int max_ins_name) 685 { 686 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 687 ops->source.name ?: ops->source.raw, 688 ops->target.name ?: ops->target.raw); 689 } 690 691 static const struct ins_ops mov_ops = { 692 .parse = mov__parse, 693 .scnprintf = mov__scnprintf, 694 }; 695 696 #define PPC_22_30(R) (((R) >> 1) & 0x1ff) 697 #define MINUS_EXT_XO_FORM 234 698 #define SUB_EXT_XO_FORM 232 699 #define ADD_ZERO_EXT_XO_FORM 202 700 #define SUB_ZERO_EXT_XO_FORM 200 701 702 static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, 703 struct ins_operands *ops, int max_ins_name) 704 { 705 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 706 ops->raw); 707 } 708 709 /* 710 * Sets the fields: multi_regs and "mem_ref". 711 * "mem_ref" is set for ops->source which is later used to 712 * fill the objdump->memory_ref-char field. This ops is currently 713 * used by powerpc and since binary instruction code is used to 714 * extract opcode, regs and offset, no other parsing is needed here. 715 * 716 * Dont set multi regs for 4 cases since it has only one operand 717 * for source: 718 * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) 719 * - Subtract From Minus One Extended XO-form ( Ex: subfme ) 720 * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) 721 * - Subtract From Zero Extended XO-form ( Ex: subfze ) 722 */ 723 static int arithmetic__parse(const struct arch *arch __maybe_unused, struct ins_operands *ops, 724 struct map_symbol *ms __maybe_unused, struct disasm_line *dl) 725 { 726 int opcode = PPC_OP(dl->raw.raw_insn); 727 728 ops->source.mem_ref = false; 729 if (opcode == 31) { 730 if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ 731 && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) 732 ops->source.multi_regs = true; 733 } 734 735 ops->target.mem_ref = false; 736 ops->target.multi_regs = false; 737 738 return 0; 739 } 740 741 static const struct ins_ops arithmetic_ops = { 742 .parse = arithmetic__parse, 743 .scnprintf = arithmetic__scnprintf, 744 }; 745 746 static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, 747 struct ins_operands *ops, int max_ins_name) 748 { 749 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 750 ops->raw); 751 } 752 753 /* 754 * Sets the fields: multi_regs and "mem_ref". 755 * "mem_ref" is set for ops->source which is later used to 756 * fill the objdump->memory_ref-char field. This ops is currently 757 * used by powerpc and since binary instruction code is used to 758 * extract opcode, regs and offset, no other parsing is needed here 759 */ 760 static int load_store__parse(const struct arch *arch __maybe_unused, struct ins_operands *ops, 761 struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) 762 { 763 ops->source.mem_ref = true; 764 ops->source.multi_regs = false; 765 /* opcode 31 is of X form */ 766 if (PPC_OP(dl->raw.raw_insn) == 31) 767 ops->source.multi_regs = true; 768 769 ops->target.mem_ref = false; 770 ops->target.multi_regs = false; 771 772 return 0; 773 } 774 775 static const struct ins_ops load_store_ops = { 776 .parse = load_store__parse, 777 .scnprintf = load_store__scnprintf, 778 }; 779 780 static int dec__parse(const struct arch *arch __maybe_unused, struct ins_operands *ops, 781 struct map_symbol *ms __maybe_unused, 782 struct disasm_line *dl __maybe_unused) 783 { 784 char *target, *comment, *s, prev; 785 786 target = s = ops->raw; 787 788 while (s[0] != '\0' && !isspace(s[0])) 789 ++s; 790 prev = *s; 791 *s = '\0'; 792 793 ops->target.raw = strdup(target); 794 *s = prev; 795 796 if (ops->target.raw == NULL) 797 return -1; 798 799 comment = strchr(s, arch->objdump.comment_char); 800 if (comment == NULL) 801 return 0; 802 803 comment = skip_spaces(comment); 804 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 805 806 return 0; 807 } 808 809 static int dec__scnprintf(struct ins *ins, char *bf, size_t size, 810 struct ins_operands *ops, int max_ins_name) 811 { 812 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 813 ops->target.name ?: ops->target.raw); 814 } 815 816 static const struct ins_ops dec_ops = { 817 .parse = dec__parse, 818 .scnprintf = dec__scnprintf, 819 }; 820 821 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, 822 struct ins_operands *ops __maybe_unused, int max_ins_name) 823 { 824 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 825 } 826 827 static const struct ins_ops nop_ops = { 828 .scnprintf = nop__scnprintf, 829 }; 830 831 static const struct ins_ops ret_ops = { 832 .scnprintf = ins__raw_scnprintf, 833 }; 834 835 static bool ins__is_nop(const struct ins *ins) 836 { 837 return ins->ops == &nop_ops; 838 } 839 840 bool ins__is_ret(const struct ins *ins) 841 { 842 return ins->ops == &ret_ops; 843 } 844 845 bool ins__is_lock(const struct ins *ins) 846 { 847 return ins->ops == &lock_ops; 848 } 849 850 static int ins__key_cmp(const void *name, const void *insp) 851 { 852 const struct ins *ins = insp; 853 854 return strcmp(name, ins->name); 855 } 856 857 static int ins__cmp(const void *a, const void *b) 858 { 859 const struct ins *ia = a; 860 const struct ins *ib = b; 861 862 return strcmp(ia->name, ib->name); 863 } 864 865 static void ins__sort(struct arch *arch) 866 { 867 const int nmemb = arch->nr_instructions; 868 869 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 870 } 871 872 static const struct ins_ops *__ins__find(const struct arch *arch, const char *name, 873 struct disasm_line *dl) 874 { 875 struct ins *ins; 876 const int nmemb = arch->nr_instructions; 877 878 if (arch__is(arch, "powerpc")) { 879 /* 880 * For powerpc, identify the instruction ops 881 * from the opcode using raw_insn. 882 */ 883 const struct ins_ops *ops; 884 885 ops = check_ppc_insn(dl); 886 if (ops) 887 return ops; 888 } 889 890 if (!arch->sorted_instructions) { 891 ins__sort((struct arch *)arch); 892 ((struct arch *)arch)->sorted_instructions = true; 893 } 894 895 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 896 if (ins) 897 return ins->ops; 898 899 if (arch->insn_suffix) { 900 char tmp[32]; 901 char suffix; 902 size_t len = strlen(name); 903 904 if (len == 0 || len >= sizeof(tmp)) 905 return NULL; 906 907 suffix = name[len - 1]; 908 if (strchr(arch->insn_suffix, suffix) == NULL) 909 return NULL; 910 911 strcpy(tmp, name); 912 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 913 914 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 915 } 916 return ins ? ins->ops : NULL; 917 } 918 919 const struct ins_ops *ins__find(const struct arch *arch, const char *name, struct disasm_line *dl) 920 { 921 const struct ins_ops *ops = __ins__find(arch, name, dl); 922 923 if (!ops && arch->associate_instruction_ops) 924 ops = arch->associate_instruction_ops((struct arch *)arch, name); 925 926 return ops; 927 } 928 929 static void disasm_line__init_ins(struct disasm_line *dl, const struct arch *arch, 930 struct map_symbol *ms) 931 { 932 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 933 934 if (!dl->ins.ops) 935 return; 936 937 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 938 dl->ins.ops = NULL; 939 } 940 941 static int disasm_line__parse(char *line, const char **namep, char **rawp) 942 { 943 char tmp, *name = skip_spaces(line); 944 945 if (name[0] == '\0') 946 return -1; 947 948 *rawp = name + 1; 949 950 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 951 ++*rawp; 952 953 tmp = (*rawp)[0]; 954 (*rawp)[0] = '\0'; 955 *namep = strdup(name); 956 957 if (*namep == NULL) 958 goto out; 959 960 (*rawp)[0] = tmp; 961 *rawp = strim(*rawp); 962 963 return 0; 964 965 out: 966 return -1; 967 } 968 969 /* 970 * Parses the result captured from symbol__disassemble_* 971 * Example, line read from DSO file in powerpc: 972 * line: 38 01 81 e8 973 * opcode: fetched from arch specific get_opcode_insn 974 * rawp_insn: e8810138 975 * 976 * rawp_insn is used later to extract the reg/offset fields 977 */ 978 #define PPC_OP(op) (((op) >> 26) & 0x3F) 979 #define RAW_BYTES 11 980 981 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) 982 { 983 char *line = dl->al.line; 984 const char **namep = &dl->ins.name; 985 char **rawp = &dl->ops.raw; 986 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 987 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 988 int disasm = 0; 989 int ret = 0; 990 991 if (args->options->disassembler_used) 992 disasm = 1; 993 994 if (name_raw_insn[0] == '\0') 995 return -1; 996 997 if (disasm) 998 ret = disasm_line__parse(name, namep, rawp); 999 else 1000 *namep = ""; 1001 1002 tmp_raw_insn = strndup(name_raw_insn, 11); 1003 if (tmp_raw_insn == NULL) 1004 return -1; 1005 1006 remove_spaces(tmp_raw_insn); 1007 1008 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 1009 if (disasm) 1010 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 1011 1012 return ret; 1013 } 1014 1015 static void annotation_line__init(struct annotation_line *al, 1016 struct annotate_args *args, 1017 int nr) 1018 { 1019 al->offset = args->offset; 1020 al->line = strdup(args->line); 1021 al->line_nr = args->line_nr; 1022 al->fileloc = args->fileloc; 1023 al->data_nr = nr; 1024 } 1025 1026 static void annotation_line__exit(struct annotation_line *al) 1027 { 1028 zfree_srcline(&al->path); 1029 zfree(&al->line); 1030 zfree(&al->cycles); 1031 zfree(&al->br_cntr); 1032 } 1033 1034 static size_t disasm_line_size(int nr) 1035 { 1036 struct annotation_line *al; 1037 1038 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 1039 } 1040 1041 /* 1042 * Allocating the disasm annotation line data with 1043 * following structure: 1044 * 1045 * ------------------------------------------- 1046 * struct disasm_line | struct annotation_line 1047 * ------------------------------------------- 1048 * 1049 * We have 'struct annotation_line' member as last member 1050 * of 'struct disasm_line' to have an easy access. 1051 */ 1052 struct disasm_line *disasm_line__new(struct annotate_args *args) 1053 { 1054 struct disasm_line *dl = NULL; 1055 struct annotation *notes = symbol__annotation(args->ms->sym); 1056 int nr = notes->src->nr_events; 1057 1058 dl = zalloc(disasm_line_size(nr)); 1059 if (!dl) 1060 return NULL; 1061 1062 annotation_line__init(&dl->al, args, nr); 1063 if (dl->al.line == NULL) 1064 goto out_delete; 1065 1066 if (args->offset != -1) { 1067 if (arch__is(args->arch, "powerpc")) { 1068 if (disasm_line__parse_powerpc(dl, args) < 0) 1069 goto out_free_line; 1070 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 1071 goto out_free_line; 1072 1073 disasm_line__init_ins(dl, args->arch, args->ms); 1074 } 1075 1076 return dl; 1077 1078 out_free_line: 1079 zfree(&dl->al.line); 1080 out_delete: 1081 free(dl); 1082 return NULL; 1083 } 1084 1085 void disasm_line__free(struct disasm_line *dl) 1086 { 1087 if (dl->ins.ops && dl->ins.ops->free) 1088 dl->ins.ops->free(&dl->ops); 1089 else 1090 ins_ops__delete(&dl->ops); 1091 zfree(&dl->ins.name); 1092 annotation_line__exit(&dl->al); 1093 free(dl); 1094 } 1095 1096 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 1097 { 1098 if (raw || !dl->ins.ops) 1099 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 1100 1101 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1102 } 1103 1104 /* 1105 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 1106 * which looks like following 1107 * 1108 * 0000000000415500 <_init>: 1109 * 415500: sub $0x8,%rsp 1110 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1111 * 41550b: test %rax,%rax 1112 * 41550e: je 415515 <_init+0x15> 1113 * 415510: callq 416e70 <__gmon_start__@plt> 1114 * 415515: add $0x8,%rsp 1115 * 415519: retq 1116 * 1117 * it will be parsed and saved into struct disasm_line as 1118 * <offset> <name> <ops.raw> 1119 * 1120 * The offset will be a relative offset from the start of the symbol and -1 1121 * means that it's not a disassembly line so should be treated differently. 1122 * The ops.raw part will be parsed further according to type of the instruction. 1123 */ 1124 static int symbol__parse_objdump_line(struct symbol *sym, 1125 struct annotate_args *args, 1126 char *parsed_line, int *line_nr, char **fileloc) 1127 { 1128 struct map *map = args->ms->map; 1129 struct annotation *notes = symbol__annotation(sym); 1130 struct disasm_line *dl; 1131 char *tmp; 1132 s64 line_ip, offset = -1; 1133 regmatch_t match[2]; 1134 1135 /* /filename:linenr ? Save line number and ignore. */ 1136 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1137 *line_nr = atoi(parsed_line + match[1].rm_so); 1138 free(*fileloc); 1139 *fileloc = strdup(parsed_line); 1140 return 0; 1141 } 1142 1143 /* Process hex address followed by ':'. */ 1144 line_ip = strtoull(parsed_line, &tmp, 16); 1145 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1146 u64 start = map__rip_2objdump(map, sym->start), 1147 end = map__rip_2objdump(map, sym->end); 1148 1149 offset = line_ip - start; 1150 if ((u64)line_ip < start || (u64)line_ip >= end) 1151 offset = -1; 1152 else 1153 parsed_line = tmp + 1; 1154 } 1155 1156 args->offset = offset; 1157 args->line = parsed_line; 1158 args->line_nr = *line_nr; 1159 args->fileloc = *fileloc; 1160 args->ms->sym = sym; 1161 1162 dl = disasm_line__new(args); 1163 (*line_nr)++; 1164 1165 if (dl == NULL) 1166 return -1; 1167 1168 if (!disasm_line__has_local_offset(dl)) { 1169 dl->ops.target.offset = dl->ops.target.addr - 1170 map__rip_2objdump(map, sym->start); 1171 dl->ops.target.offset_avail = true; 1172 } 1173 1174 /* kcore has no symbols, so add the call target symbol */ 1175 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1176 struct addr_map_symbol target = { 1177 .addr = dl->ops.target.addr, 1178 .ms = { .map = map__get(map), }, 1179 }; 1180 1181 if (!maps__find_ams(args->ms->maps, &target) && 1182 target.ms.sym->start == target.al_addr) 1183 dl->ops.target.sym = target.ms.sym; 1184 1185 addr_map_symbol__exit(&target); 1186 } 1187 1188 annotation_line__add(&dl->al, ¬es->src->source); 1189 return 0; 1190 } 1191 1192 static void delete_last_nop(struct symbol *sym) 1193 { 1194 struct annotation *notes = symbol__annotation(sym); 1195 struct list_head *list = ¬es->src->source; 1196 struct disasm_line *dl; 1197 1198 while (!list_empty(list)) { 1199 dl = list_entry(list->prev, struct disasm_line, al.node); 1200 1201 if (dl->ins.ops) { 1202 if (!ins__is_nop(&dl->ins)) 1203 return; 1204 } else { 1205 if (!strstr(dl->al.line, " nop ") && 1206 !strstr(dl->al.line, " nopl ") && 1207 !strstr(dl->al.line, " nopw ")) 1208 return; 1209 } 1210 1211 list_del_init(&dl->al.node); 1212 disasm_line__free(dl); 1213 } 1214 } 1215 1216 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1217 { 1218 struct dso *dso = map__dso(ms->map); 1219 1220 BUG_ON(buflen == 0); 1221 1222 if (errnum >= 0) { 1223 str_error_r(errnum, buf, buflen); 1224 return 0; 1225 } 1226 1227 switch (errnum) { 1228 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1229 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1230 char *build_id_msg = NULL; 1231 1232 if (dso__has_build_id(dso)) { 1233 build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); 1234 build_id_msg = bf; 1235 } 1236 scnprintf(buf, buflen, 1237 "No vmlinux file%s\nwas found in the path.\n\n" 1238 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1239 "Please use:\n\n" 1240 " perf buildid-cache -vu vmlinux\n\n" 1241 "or:\n\n" 1242 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1243 } 1244 break; 1245 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1246 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1247 break; 1248 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1249 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1250 break; 1251 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1252 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1253 break; 1254 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1255 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1256 break; 1257 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1258 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1259 dso__long_name(dso)); 1260 break; 1261 case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: 1262 scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); 1263 break; 1264 default: 1265 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1266 break; 1267 } 1268 1269 return 0; 1270 } 1271 1272 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1273 { 1274 char linkname[PATH_MAX]; 1275 char *build_id_filename; 1276 char *build_id_path = NULL; 1277 char *pos; 1278 int len; 1279 1280 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1281 !dso__is_kcore(dso)) 1282 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1283 1284 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1285 if (build_id_filename) { 1286 __symbol__join_symfs(filename, filename_size, build_id_filename); 1287 free(build_id_filename); 1288 } else { 1289 if (dso__has_build_id(dso)) 1290 return ENOMEM; 1291 goto fallback; 1292 } 1293 1294 build_id_path = strdup(filename); 1295 if (!build_id_path) 1296 return ENOMEM; 1297 1298 /* 1299 * old style build-id cache has name of XX/XXXXXXX.. while 1300 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1301 * extract the build-id part of dirname in the new style only. 1302 */ 1303 pos = strrchr(build_id_path, '/'); 1304 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1305 dirname(build_id_path); 1306 1307 if (dso__is_kcore(dso)) 1308 goto fallback; 1309 1310 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1311 if (len < 0) 1312 goto fallback; 1313 1314 linkname[len] = '\0'; 1315 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1316 access(filename, R_OK)) { 1317 fallback: 1318 /* 1319 * If we don't have build-ids or the build-id file isn't in the 1320 * cache, or is just a kallsyms file, well, lets hope that this 1321 * DSO is the same as when 'perf record' ran. 1322 */ 1323 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1324 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1325 else 1326 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1327 1328 mutex_lock(dso__lock(dso)); 1329 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1330 char *new_name = dso__filename_with_chroot(dso, filename); 1331 if (new_name) { 1332 strlcpy(filename, new_name, filename_size); 1333 free(new_name); 1334 } 1335 } 1336 mutex_unlock(dso__lock(dso)); 1337 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1338 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1339 } 1340 1341 free(build_id_path); 1342 return 0; 1343 } 1344 1345 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1346 struct annotate_args *args) 1347 { 1348 struct annotation *notes = symbol__annotation(sym); 1349 struct map *map = args->ms->map; 1350 struct dso *dso = map__dso(map); 1351 u64 start = map__rip_2objdump(map, sym->start); 1352 u64 end = map__rip_2objdump(map, sym->end); 1353 u64 len = end - start; 1354 u64 offset; 1355 int i, count; 1356 u8 *buf = NULL; 1357 char disasm_buf[512]; 1358 struct disasm_line *dl; 1359 u32 *line; 1360 1361 /* Return if objdump is specified explicitly */ 1362 if (args->options->objdump_path) 1363 return -1; 1364 1365 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1366 1367 buf = malloc(len); 1368 if (buf == NULL) 1369 goto err; 1370 1371 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1372 1373 line = (u32 *)buf; 1374 1375 if ((u64)count != len) 1376 goto err; 1377 1378 /* add the function address and name */ 1379 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1380 start, sym->name); 1381 1382 args->offset = -1; 1383 args->line = disasm_buf; 1384 args->line_nr = 0; 1385 args->fileloc = NULL; 1386 args->ms->sym = sym; 1387 1388 dl = disasm_line__new(args); 1389 if (dl == NULL) 1390 goto err; 1391 1392 annotation_line__add(&dl->al, ¬es->src->source); 1393 1394 /* Each raw instruction is 4 byte */ 1395 count = len/4; 1396 1397 for (i = 0, offset = 0; i < count; i++) { 1398 args->offset = offset; 1399 sprintf(args->line, "%x", line[i]); 1400 dl = disasm_line__new(args); 1401 if (dl == NULL) 1402 break; 1403 1404 annotation_line__add(&dl->al, ¬es->src->source); 1405 offset += 4; 1406 } 1407 1408 /* It failed in the middle */ 1409 if (offset != len) { 1410 struct list_head *list = ¬es->src->source; 1411 1412 /* Discard all lines and fallback to objdump */ 1413 while (!list_empty(list)) { 1414 dl = list_first_entry(list, struct disasm_line, al.node); 1415 1416 list_del_init(&dl->al.node); 1417 disasm_line__free(dl); 1418 } 1419 count = -1; 1420 } 1421 1422 out: 1423 free(buf); 1424 return count < 0 ? count : 0; 1425 1426 err: 1427 count = -1; 1428 goto out; 1429 } 1430 1431 /* 1432 * Possibly create a new version of line with tabs expanded. Returns the 1433 * existing or new line, storage is updated if a new line is allocated. If 1434 * allocation fails then NULL is returned. 1435 */ 1436 char *expand_tabs(char *line, char **storage, size_t *storage_len) 1437 { 1438 size_t i, src, dst, len, new_storage_len, num_tabs; 1439 char *new_line; 1440 size_t line_len = strlen(line); 1441 1442 for (num_tabs = 0, i = 0; i < line_len; i++) 1443 if (line[i] == '\t') 1444 num_tabs++; 1445 1446 if (num_tabs == 0) 1447 return line; 1448 1449 /* 1450 * Space for the line and '\0', less the leading and trailing 1451 * spaces. Each tab may introduce 7 additional spaces. 1452 */ 1453 new_storage_len = line_len + 1 + (num_tabs * 7); 1454 1455 new_line = malloc(new_storage_len); 1456 if (new_line == NULL) { 1457 pr_err("Failure allocating memory for tab expansion\n"); 1458 return NULL; 1459 } 1460 1461 /* 1462 * Copy regions starting at src and expand tabs. If there are two 1463 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1464 * are inserted. 1465 */ 1466 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1467 if (line[i] == '\t') { 1468 len = i - src; 1469 memcpy(&new_line[dst], &line[src], len); 1470 dst += len; 1471 new_line[dst++] = ' '; 1472 while (dst % 8 != 0) 1473 new_line[dst++] = ' '; 1474 src = i + 1; 1475 num_tabs--; 1476 } 1477 } 1478 1479 /* Expand the last region. */ 1480 len = line_len - src; 1481 memcpy(&new_line[dst], &line[src], len); 1482 dst += len; 1483 new_line[dst] = '\0'; 1484 1485 free(*storage); 1486 *storage = new_line; 1487 *storage_len = new_storage_len; 1488 return new_line; 1489 } 1490 1491 static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) 1492 { 1493 struct annotation *notes = symbol__annotation(sym); 1494 struct disasm_line *dl; 1495 1496 args->offset = -1; 1497 args->line = strdup("to be implemented"); 1498 args->line_nr = 0; 1499 args->fileloc = NULL; 1500 dl = disasm_line__new(args); 1501 if (dl) 1502 annotation_line__add(&dl->al, ¬es->src->source); 1503 1504 zfree(&args->line); 1505 return 0; 1506 } 1507 1508 static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, 1509 struct annotate_args *args) 1510 { 1511 struct annotation_options *opts = &annotate_opts; 1512 struct map *map = args->ms->map; 1513 struct dso *dso = map__dso(map); 1514 char *command; 1515 FILE *file; 1516 int lineno = 0; 1517 char *fileloc = NULL; 1518 int nline; 1519 char *line; 1520 size_t line_len; 1521 const char *objdump_argv[] = { 1522 "/bin/sh", 1523 "-c", 1524 NULL, /* Will be the objdump command to run. */ 1525 "--", 1526 NULL, /* Will be the symfs path. */ 1527 NULL, 1528 }; 1529 struct child_process objdump_process; 1530 int err; 1531 1532 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) 1533 return symbol__disassemble_bpf_libbfd(sym, args); 1534 1535 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) 1536 return symbol__disassemble_bpf_image(sym, args); 1537 1538 err = asprintf(&command, 1539 "%s %s%s --start-address=0x%016" PRIx64 1540 " --stop-address=0x%016" PRIx64 1541 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1542 opts->objdump_path ?: "objdump", 1543 opts->disassembler_style ? "-M " : "", 1544 opts->disassembler_style ?: "", 1545 map__rip_2objdump(map, sym->start), 1546 map__rip_2objdump(map, sym->end), 1547 opts->show_linenr ? "-l" : "", 1548 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1549 opts->annotate_src ? "-S" : "", 1550 opts->prefix ? "--prefix " : "", 1551 opts->prefix ? '"' : ' ', 1552 opts->prefix ?: "", 1553 opts->prefix ? '"' : ' ', 1554 opts->prefix_strip ? "--prefix-strip=" : "", 1555 opts->prefix_strip ?: ""); 1556 1557 if (err < 0) { 1558 pr_err("Failure allocating memory for the command to run\n"); 1559 return err; 1560 } 1561 1562 pr_debug("Executing: %s\n", command); 1563 1564 objdump_argv[2] = command; 1565 objdump_argv[4] = filename; 1566 1567 /* Create a pipe to read from for stdout */ 1568 memset(&objdump_process, 0, sizeof(objdump_process)); 1569 objdump_process.argv = objdump_argv; 1570 objdump_process.out = -1; 1571 objdump_process.err = -1; 1572 objdump_process.no_stderr = 1; 1573 if (start_command(&objdump_process)) { 1574 pr_err("Failure starting to run %s\n", command); 1575 err = -1; 1576 goto out_free_command; 1577 } 1578 1579 file = fdopen(objdump_process.out, "r"); 1580 if (!file) { 1581 pr_err("Failure creating FILE stream for %s\n", command); 1582 /* 1583 * If we were using debug info should retry with 1584 * original binary. 1585 */ 1586 err = -1; 1587 goto out_close_stdout; 1588 } 1589 1590 /* Storage for getline. */ 1591 line = NULL; 1592 line_len = 0; 1593 1594 nline = 0; 1595 while (!feof(file)) { 1596 const char *match; 1597 char *expanded_line; 1598 1599 if (getline(&line, &line_len, file) < 0 || !line) 1600 break; 1601 1602 /* Skip lines containing "filename:" */ 1603 match = strstr(line, filename); 1604 if (match && match[strlen(filename)] == ':') 1605 continue; 1606 1607 expanded_line = strim(line); 1608 expanded_line = expand_tabs(expanded_line, &line, &line_len); 1609 if (!expanded_line) 1610 break; 1611 1612 /* 1613 * The source code line number (lineno) needs to be kept in 1614 * across calls to symbol__parse_objdump_line(), so that it 1615 * can associate it with the instructions till the next one. 1616 * See disasm_line__new() and struct disasm_line::line_nr. 1617 */ 1618 if (symbol__parse_objdump_line(sym, args, expanded_line, 1619 &lineno, &fileloc) < 0) 1620 break; 1621 nline++; 1622 } 1623 free(line); 1624 free(fileloc); 1625 1626 err = finish_command(&objdump_process); 1627 if (err) 1628 pr_err("Error running %s\n", command); 1629 1630 if (nline == 0) { 1631 err = -1; 1632 pr_err("No output from %s\n", command); 1633 } 1634 1635 /* 1636 * kallsyms does not have symbol sizes so there may a nop at the end. 1637 * Remove it. 1638 */ 1639 if (dso__is_kcore(dso)) 1640 delete_last_nop(sym); 1641 1642 fclose(file); 1643 1644 out_close_stdout: 1645 close(objdump_process.out); 1646 1647 out_free_command: 1648 free(command); 1649 return err; 1650 } 1651 1652 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1653 { 1654 struct annotation_options *options = args->options; 1655 struct map *map = args->ms->map; 1656 struct dso *dso = map__dso(map); 1657 char symfs_filename[PATH_MAX]; 1658 bool delete_extract = false; 1659 struct kcore_extract kce; 1660 bool decomp = false; 1661 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1662 1663 if (err) 1664 return err; 1665 1666 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1667 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1668 map__unmap_ip(map, sym->end)); 1669 1670 pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); 1671 1672 if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1673 return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; 1674 } else if (dso__is_kcore(dso)) { 1675 kce.addr = map__rip_2objdump(map, sym->start); 1676 kce.kcore_filename = symfs_filename; 1677 kce.len = sym->end - sym->start; 1678 kce.offs = sym->start; 1679 1680 if (!kcore_extract__create(&kce)) { 1681 delete_extract = true; 1682 strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); 1683 } 1684 } else if (dso__needs_decompress(dso)) { 1685 char tmp[KMOD_DECOMP_LEN]; 1686 1687 if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) 1688 return -1; 1689 1690 decomp = true; 1691 strcpy(symfs_filename, tmp); 1692 } 1693 1694 /* 1695 * For powerpc data type profiling, use the dso__data_read_offset to 1696 * read raw instruction directly and interpret the binary code to 1697 * understand instructions and register fields. For sort keys as type 1698 * and typeoff, disassemble to mnemonic notation is not required in 1699 * case of powerpc. 1700 */ 1701 if (arch__is(args->arch, "powerpc")) { 1702 extern const char *sort_order; 1703 1704 if (sort_order && !strstr(sort_order, "sym")) { 1705 err = symbol__disassemble_raw(symfs_filename, sym, args); 1706 if (err == 0) 1707 goto out_remove_tmp; 1708 1709 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1710 if (err == 0) 1711 goto out_remove_tmp; 1712 } 1713 } 1714 1715 /* FIXME: LLVM and CAPSTONE should support source code */ 1716 if (options->annotate_src && !options->hide_src_code) { 1717 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1718 if (err == 0) 1719 goto out_remove_tmp; 1720 } 1721 1722 err = -1; 1723 for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { 1724 enum perf_disassembler dis = options->disassemblers[i]; 1725 1726 switch (dis) { 1727 case PERF_DISASM_LLVM: 1728 args->options->disassembler_used = PERF_DISASM_LLVM; 1729 err = symbol__disassemble_llvm(symfs_filename, sym, args); 1730 break; 1731 case PERF_DISASM_CAPSTONE: 1732 args->options->disassembler_used = PERF_DISASM_CAPSTONE; 1733 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1734 break; 1735 case PERF_DISASM_OBJDUMP: 1736 args->options->disassembler_used = PERF_DISASM_OBJDUMP; 1737 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1738 break; 1739 case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ 1740 default: 1741 args->options->disassembler_used = PERF_DISASM_UNKNOWN; 1742 goto out_remove_tmp; 1743 } 1744 if (err == 0) 1745 pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); 1746 } 1747 out_remove_tmp: 1748 if (decomp) 1749 unlink(symfs_filename); 1750 1751 if (delete_extract) 1752 kcore_extract__delete(&kce); 1753 1754 return err; 1755 } 1756