1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <ctype.h> 3 #include <errno.h> 4 #include <fcntl.h> 5 #include <inttypes.h> 6 #include <libgen.h> 7 #include <regex.h> 8 #include <stdlib.h> 9 #include <unistd.h> 10 11 #include <linux/string.h> 12 #include <subcmd/run-command.h> 13 14 #include "annotate.h" 15 #include "annotate-data.h" 16 #include "build-id.h" 17 #include "capstone.h" 18 #include "debug.h" 19 #include "disasm.h" 20 #include "dso.h" 21 #include "dwarf-regs.h" 22 #include "env.h" 23 #include "evsel.h" 24 #include "libbfd.h" 25 #include "llvm.h" 26 #include "map.h" 27 #include "maps.h" 28 #include "namespaces.h" 29 #include "srcline.h" 30 #include "symbol.h" 31 #include "util.h" 32 33 static regex_t file_lineno; 34 35 /* These can be referred from the arch-dependent code */ 36 static struct ins_ops call_ops; 37 static struct ins_ops dec_ops; 38 static struct ins_ops jump_ops; 39 static struct ins_ops mov_ops; 40 static struct ins_ops nop_ops; 41 static struct ins_ops lock_ops; 42 static struct ins_ops ret_ops; 43 static struct ins_ops load_store_ops; 44 static struct ins_ops arithmetic_ops; 45 46 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 47 struct ins_operands *ops, int max_ins_name); 48 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 49 struct ins_operands *ops, int max_ins_name); 50 51 static void ins__sort(struct arch *arch); 52 static int disasm_line__parse(char *line, const char **namep, char **rawp); 53 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); 54 55 static __attribute__((constructor)) void symbol__init_regexpr(void) 56 { 57 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); 58 } 59 60 static int arch__grow_instructions(struct arch *arch) 61 { 62 struct ins *new_instructions; 63 size_t new_nr_allocated; 64 65 if (arch->nr_instructions_allocated == 0 && arch->instructions) 66 goto grow_from_non_allocated_table; 67 68 new_nr_allocated = arch->nr_instructions_allocated + 128; 69 new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); 70 if (new_instructions == NULL) 71 return -1; 72 73 out_update_instructions: 74 arch->instructions = new_instructions; 75 arch->nr_instructions_allocated = new_nr_allocated; 76 return 0; 77 78 grow_from_non_allocated_table: 79 new_nr_allocated = arch->nr_instructions + 128; 80 new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); 81 if (new_instructions == NULL) 82 return -1; 83 84 memcpy(new_instructions, arch->instructions, arch->nr_instructions); 85 goto out_update_instructions; 86 } 87 88 static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) 89 { 90 struct ins *ins; 91 92 if (arch->nr_instructions == arch->nr_instructions_allocated && 93 arch__grow_instructions(arch)) 94 return -1; 95 96 ins = &arch->instructions[arch->nr_instructions]; 97 ins->name = strdup(name); 98 if (!ins->name) 99 return -1; 100 101 ins->ops = ops; 102 arch->nr_instructions++; 103 104 ins__sort(arch); 105 return 0; 106 } 107 108 #include "arch/arc/annotate/instructions.c" 109 #include "arch/arm/annotate/instructions.c" 110 #include "arch/arm64/annotate/instructions.c" 111 #include "arch/csky/annotate/instructions.c" 112 #include "arch/loongarch/annotate/instructions.c" 113 #include "arch/mips/annotate/instructions.c" 114 #include "arch/x86/annotate/instructions.c" 115 #include "arch/powerpc/annotate/instructions.c" 116 #include "arch/riscv64/annotate/instructions.c" 117 #include "arch/s390/annotate/instructions.c" 118 #include "arch/sparc/annotate/instructions.c" 119 120 static struct arch architectures[] = { 121 { 122 .name = "arc", 123 .init = arc__annotate_init, 124 }, 125 { 126 .name = "arm", 127 .init = arm__annotate_init, 128 }, 129 { 130 .name = "arm64", 131 .init = arm64__annotate_init, 132 }, 133 { 134 .name = "csky", 135 .init = csky__annotate_init, 136 }, 137 { 138 .name = "mips", 139 .init = mips__annotate_init, 140 .objdump = { 141 .comment_char = '#', 142 }, 143 }, 144 { 145 .name = "x86", 146 .init = x86__annotate_init, 147 .instructions = x86__instructions, 148 .nr_instructions = ARRAY_SIZE(x86__instructions), 149 .insn_suffix = "bwlq", 150 .objdump = { 151 .comment_char = '#', 152 .register_char = '%', 153 .memory_ref_char = '(', 154 .imm_char = '$', 155 }, 156 #ifdef HAVE_LIBDW_SUPPORT 157 .update_insn_state = update_insn_state_x86, 158 #endif 159 }, 160 { 161 .name = "powerpc", 162 .init = powerpc__annotate_init, 163 #ifdef HAVE_LIBDW_SUPPORT 164 .update_insn_state = update_insn_state_powerpc, 165 #endif 166 }, 167 { 168 .name = "riscv64", 169 .init = riscv64__annotate_init, 170 }, 171 { 172 .name = "s390", 173 .init = s390__annotate_init, 174 .objdump = { 175 .comment_char = '#', 176 }, 177 }, 178 { 179 .name = "sparc", 180 .init = sparc__annotate_init, 181 .objdump = { 182 .comment_char = '#', 183 }, 184 }, 185 { 186 .name = "loongarch", 187 .init = loongarch__annotate_init, 188 .objdump = { 189 .comment_char = '#', 190 }, 191 }, 192 }; 193 194 static int arch__key_cmp(const void *name, const void *archp) 195 { 196 const struct arch *arch = archp; 197 198 return strcmp(name, arch->name); 199 } 200 201 static int arch__cmp(const void *a, const void *b) 202 { 203 const struct arch *aa = a; 204 const struct arch *ab = b; 205 206 return strcmp(aa->name, ab->name); 207 } 208 209 static void arch__sort(void) 210 { 211 const int nmemb = ARRAY_SIZE(architectures); 212 213 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); 214 } 215 216 struct arch *arch__find(const char *name) 217 { 218 const int nmemb = ARRAY_SIZE(architectures); 219 static bool sorted; 220 221 if (!sorted) { 222 arch__sort(); 223 sorted = true; 224 } 225 226 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 227 } 228 229 bool arch__is(struct arch *arch, const char *name) 230 { 231 return !strcmp(arch->name, name); 232 } 233 234 static void ins_ops__delete(struct ins_operands *ops) 235 { 236 if (ops == NULL) 237 return; 238 zfree(&ops->source.raw); 239 zfree(&ops->source.name); 240 zfree(&ops->target.raw); 241 zfree(&ops->target.name); 242 } 243 244 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, 245 struct ins_operands *ops, int max_ins_name) 246 { 247 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); 248 } 249 250 static int ins__scnprintf(struct ins *ins, char *bf, size_t size, 251 struct ins_operands *ops, int max_ins_name) 252 { 253 if (ins->ops->scnprintf) 254 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); 255 256 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 257 } 258 259 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) 260 { 261 if (!arch || !arch->ins_is_fused) 262 return false; 263 264 return arch->ins_is_fused(arch, ins1, ins2); 265 } 266 267 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 268 struct disasm_line *dl __maybe_unused) 269 { 270 char *endptr, *tok, *name; 271 struct map *map = ms->map; 272 struct addr_map_symbol target = { 273 .ms = { .map = map, }, 274 }; 275 276 ops->target.addr = strtoull(ops->raw, &endptr, 16); 277 278 name = strchr(endptr, '<'); 279 if (name == NULL) 280 goto indirect_call; 281 282 name++; 283 284 if (arch->objdump.skip_functions_char && 285 strchr(name, arch->objdump.skip_functions_char)) 286 return -1; 287 288 tok = strchr(name, '>'); 289 if (tok == NULL) 290 return -1; 291 292 *tok = '\0'; 293 ops->target.name = strdup(name); 294 *tok = '>'; 295 296 if (ops->target.name == NULL) 297 return -1; 298 find_target: 299 target.addr = map__objdump_2mem(map, ops->target.addr); 300 301 if (maps__find_ams(ms->maps, &target) == 0 && 302 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 303 ops->target.sym = target.ms.sym; 304 305 return 0; 306 307 indirect_call: 308 tok = strchr(endptr, '*'); 309 if (tok != NULL) { 310 endptr++; 311 312 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). 313 * Do not parse such instruction. */ 314 if (strstr(endptr, "(%r") == NULL) 315 ops->target.addr = strtoull(endptr, NULL, 16); 316 } 317 goto find_target; 318 } 319 320 static int call__scnprintf(struct ins *ins, char *bf, size_t size, 321 struct ins_operands *ops, int max_ins_name) 322 { 323 if (ops->target.sym) 324 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 325 326 if (ops->target.addr == 0) 327 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 328 329 if (ops->target.name) 330 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); 331 332 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); 333 } 334 335 static struct ins_ops call_ops = { 336 .parse = call__parse, 337 .scnprintf = call__scnprintf, 338 }; 339 340 bool ins__is_call(const struct ins *ins) 341 { 342 return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; 343 } 344 345 /* 346 * Prevents from matching commas in the comment section, e.g.: 347 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast 348 * 349 * and skip comma as part of function arguments, e.g.: 350 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> 351 */ 352 static inline const char *validate_comma(const char *c, struct ins_operands *ops) 353 { 354 if (ops->jump.raw_comment && c > ops->jump.raw_comment) 355 return NULL; 356 357 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) 358 return NULL; 359 360 return c; 361 } 362 363 static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 364 struct disasm_line *dl __maybe_unused) 365 { 366 struct map *map = ms->map; 367 struct symbol *sym = ms->sym; 368 struct addr_map_symbol target = { 369 .ms = { .map = map, }, 370 }; 371 const char *c = strchr(ops->raw, ','); 372 u64 start, end; 373 374 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); 375 ops->jump.raw_func_start = strchr(ops->raw, '<'); 376 377 c = validate_comma(c, ops); 378 379 /* 380 * Examples of lines to parse for the _cpp_lex_token@@Base 381 * function: 382 * 383 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> 384 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> 385 * 386 * The first is a jump to an offset inside the same function, 387 * the second is to another function, i.e. that 0xa72 is an 388 * offset in the cpp_named_operator2name@@base function. 389 */ 390 /* 391 * skip over possible up to 2 operands to get to address, e.g.: 392 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> 393 */ 394 if (c != NULL) { 395 c++; 396 ops->target.addr = strtoull(c, NULL, 16); 397 if (!ops->target.addr) { 398 c = strchr(c, ','); 399 c = validate_comma(c, ops); 400 if (c != NULL) { 401 c++; 402 ops->target.addr = strtoull(c, NULL, 16); 403 } 404 } 405 } else { 406 ops->target.addr = strtoull(ops->raw, NULL, 16); 407 } 408 409 target.addr = map__objdump_2mem(map, ops->target.addr); 410 start = map__unmap_ip(map, sym->start); 411 end = map__unmap_ip(map, sym->end); 412 413 ops->target.outside = target.addr < start || target.addr > end; 414 415 /* 416 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): 417 418 cpp_named_operator2name@@Base+0xa72 419 420 * Point to a place that is after the cpp_named_operator2name 421 * boundaries, i.e. in the ELF symbol table for cc1 422 * cpp_named_operator2name is marked as being 32-bytes long, but it in 423 * fact is much larger than that, so we seem to need a symbols__find() 424 * routine that looks for >= current->start and < next_symbol->start, 425 * possibly just for C++ objects? 426 * 427 * For now lets just make some progress by marking jumps to outside the 428 * current function as call like. 429 * 430 * Actual navigation will come next, with further understanding of how 431 * the symbol searching and disassembly should be done. 432 */ 433 if (maps__find_ams(ms->maps, &target) == 0 && 434 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) 435 ops->target.sym = target.ms.sym; 436 437 if (!ops->target.outside) { 438 ops->target.offset = target.addr - start; 439 ops->target.offset_avail = true; 440 } else { 441 ops->target.offset_avail = false; 442 } 443 444 return 0; 445 } 446 447 static int jump__scnprintf(struct ins *ins, char *bf, size_t size, 448 struct ins_operands *ops, int max_ins_name) 449 { 450 const char *c; 451 452 if (!ops->target.addr || ops->target.offset < 0) 453 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 454 455 if (ops->target.outside && ops->target.sym != NULL) 456 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); 457 458 c = strchr(ops->raw, ','); 459 c = validate_comma(c, ops); 460 461 if (c != NULL) { 462 const char *c2 = strchr(c + 1, ','); 463 464 c2 = validate_comma(c2, ops); 465 /* check for 3-op insn */ 466 if (c2 != NULL) 467 c = c2; 468 c++; 469 470 /* mirror arch objdump's space-after-comma style */ 471 if (*c == ' ') 472 c++; 473 } 474 475 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, 476 ins->name, c ? c - ops->raw : 0, ops->raw, 477 ops->target.offset); 478 } 479 480 static void jump__delete(struct ins_operands *ops __maybe_unused) 481 { 482 /* 483 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the 484 * raw string, don't free them. 485 */ 486 } 487 488 static struct ins_ops jump_ops = { 489 .free = jump__delete, 490 .parse = jump__parse, 491 .scnprintf = jump__scnprintf, 492 }; 493 494 bool ins__is_jump(const struct ins *ins) 495 { 496 return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; 497 } 498 499 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) 500 { 501 char *endptr, *name, *t; 502 503 if (strstr(raw, "(%rip)") == NULL) 504 return 0; 505 506 *addrp = strtoull(comment, &endptr, 16); 507 if (endptr == comment) 508 return 0; 509 name = strchr(endptr, '<'); 510 if (name == NULL) 511 return -1; 512 513 name++; 514 515 t = strchr(name, '>'); 516 if (t == NULL) 517 return 0; 518 519 *t = '\0'; 520 *namep = strdup(name); 521 *t = '>'; 522 523 return 0; 524 } 525 526 static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, 527 struct disasm_line *dl __maybe_unused) 528 { 529 ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); 530 if (ops->locked.ops == NULL) 531 return 0; 532 533 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) 534 goto out_free_ops; 535 536 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); 537 538 if (ops->locked.ins.ops == NULL) 539 goto out_free_ops; 540 541 if (ops->locked.ins.ops->parse && 542 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) 543 goto out_free_ops; 544 545 return 0; 546 547 out_free_ops: 548 zfree(&ops->locked.ops); 549 return 0; 550 } 551 552 static int lock__scnprintf(struct ins *ins, char *bf, size_t size, 553 struct ins_operands *ops, int max_ins_name) 554 { 555 int printed; 556 557 if (ops->locked.ins.ops == NULL) 558 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); 559 560 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); 561 return printed + ins__scnprintf(&ops->locked.ins, bf + printed, 562 size - printed, ops->locked.ops, max_ins_name); 563 } 564 565 static void lock__delete(struct ins_operands *ops) 566 { 567 struct ins *ins = &ops->locked.ins; 568 569 if (ins->ops && ins->ops->free) 570 ins->ops->free(ops->locked.ops); 571 else 572 ins_ops__delete(ops->locked.ops); 573 574 zfree(&ops->locked.ops); 575 zfree(&ops->locked.ins.name); 576 zfree(&ops->target.raw); 577 zfree(&ops->target.name); 578 } 579 580 static struct ins_ops lock_ops = { 581 .free = lock__delete, 582 .parse = lock__parse, 583 .scnprintf = lock__scnprintf, 584 }; 585 586 /* 587 * Check if the operand has more than one registers like x86 SIB addressing: 588 * 0x1234(%rax, %rbx, 8) 589 * 590 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check 591 * the input string after 'memory_ref_char' if exists. 592 */ 593 static bool check_multi_regs(struct arch *arch, const char *op) 594 { 595 int count = 0; 596 597 if (arch->objdump.register_char == 0) 598 return false; 599 600 if (arch->objdump.memory_ref_char) { 601 op = strchr(op, arch->objdump.memory_ref_char); 602 if (op == NULL) 603 return false; 604 } 605 606 while ((op = strchr(op, arch->objdump.register_char)) != NULL) { 607 count++; 608 op++; 609 } 610 611 return count > 1; 612 } 613 614 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 615 struct disasm_line *dl __maybe_unused) 616 { 617 char *s = strchr(ops->raw, ','), *target, *comment, prev; 618 619 if (s == NULL) 620 return -1; 621 622 *s = '\0'; 623 624 /* 625 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) 626 * then it needs to have the closing parenthesis. 627 */ 628 if (strchr(ops->raw, '(')) { 629 *s = ','; 630 s = strchr(ops->raw, ')'); 631 if (s == NULL || s[1] != ',') 632 return -1; 633 *++s = '\0'; 634 } 635 636 ops->source.raw = strdup(ops->raw); 637 *s = ','; 638 639 if (ops->source.raw == NULL) 640 return -1; 641 642 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); 643 644 target = skip_spaces(++s); 645 comment = strchr(s, arch->objdump.comment_char); 646 647 if (comment != NULL) 648 s = comment - 1; 649 else 650 s = strchr(s, '\0') - 1; 651 652 while (s > target && isspace(s[0])) 653 --s; 654 s++; 655 prev = *s; 656 *s = '\0'; 657 658 ops->target.raw = strdup(target); 659 *s = prev; 660 661 if (ops->target.raw == NULL) 662 goto out_free_source; 663 664 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); 665 666 if (comment == NULL) 667 return 0; 668 669 comment = skip_spaces(comment); 670 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 671 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 672 673 return 0; 674 675 out_free_source: 676 zfree(&ops->source.raw); 677 return -1; 678 } 679 680 static int mov__scnprintf(struct ins *ins, char *bf, size_t size, 681 struct ins_operands *ops, int max_ins_name) 682 { 683 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, 684 ops->source.name ?: ops->source.raw, 685 ops->target.name ?: ops->target.raw); 686 } 687 688 static struct ins_ops mov_ops = { 689 .parse = mov__parse, 690 .scnprintf = mov__scnprintf, 691 }; 692 693 #define PPC_22_30(R) (((R) >> 1) & 0x1ff) 694 #define MINUS_EXT_XO_FORM 234 695 #define SUB_EXT_XO_FORM 232 696 #define ADD_ZERO_EXT_XO_FORM 202 697 #define SUB_ZERO_EXT_XO_FORM 200 698 699 static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, 700 struct ins_operands *ops, int max_ins_name) 701 { 702 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 703 ops->raw); 704 } 705 706 /* 707 * Sets the fields: multi_regs and "mem_ref". 708 * "mem_ref" is set for ops->source which is later used to 709 * fill the objdump->memory_ref-char field. This ops is currently 710 * used by powerpc and since binary instruction code is used to 711 * extract opcode, regs and offset, no other parsing is needed here. 712 * 713 * Dont set multi regs for 4 cases since it has only one operand 714 * for source: 715 * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) 716 * - Subtract From Minus One Extended XO-form ( Ex: subfme ) 717 * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) 718 * - Subtract From Zero Extended XO-form ( Ex: subfze ) 719 */ 720 static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 721 struct map_symbol *ms __maybe_unused, struct disasm_line *dl) 722 { 723 int opcode = PPC_OP(dl->raw.raw_insn); 724 725 ops->source.mem_ref = false; 726 if (opcode == 31) { 727 if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ 728 && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) 729 ops->source.multi_regs = true; 730 } 731 732 ops->target.mem_ref = false; 733 ops->target.multi_regs = false; 734 735 return 0; 736 } 737 738 static struct ins_ops arithmetic_ops = { 739 .parse = arithmetic__parse, 740 .scnprintf = arithmetic__scnprintf, 741 }; 742 743 static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, 744 struct ins_operands *ops, int max_ins_name) 745 { 746 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 747 ops->raw); 748 } 749 750 /* 751 * Sets the fields: multi_regs and "mem_ref". 752 * "mem_ref" is set for ops->source which is later used to 753 * fill the objdump->memory_ref-char field. This ops is currently 754 * used by powerpc and since binary instruction code is used to 755 * extract opcode, regs and offset, no other parsing is needed here 756 */ 757 static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, 758 struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) 759 { 760 ops->source.mem_ref = true; 761 ops->source.multi_regs = false; 762 /* opcode 31 is of X form */ 763 if (PPC_OP(dl->raw.raw_insn) == 31) 764 ops->source.multi_regs = true; 765 766 ops->target.mem_ref = false; 767 ops->target.multi_regs = false; 768 769 return 0; 770 } 771 772 static struct ins_ops load_store_ops = { 773 .parse = load_store__parse, 774 .scnprintf = load_store__scnprintf, 775 }; 776 777 static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, 778 struct disasm_line *dl __maybe_unused) 779 { 780 char *target, *comment, *s, prev; 781 782 target = s = ops->raw; 783 784 while (s[0] != '\0' && !isspace(s[0])) 785 ++s; 786 prev = *s; 787 *s = '\0'; 788 789 ops->target.raw = strdup(target); 790 *s = prev; 791 792 if (ops->target.raw == NULL) 793 return -1; 794 795 comment = strchr(s, arch->objdump.comment_char); 796 if (comment == NULL) 797 return 0; 798 799 comment = skip_spaces(comment); 800 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 801 802 return 0; 803 } 804 805 static int dec__scnprintf(struct ins *ins, char *bf, size_t size, 806 struct ins_operands *ops, int max_ins_name) 807 { 808 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, 809 ops->target.name ?: ops->target.raw); 810 } 811 812 static struct ins_ops dec_ops = { 813 .parse = dec__parse, 814 .scnprintf = dec__scnprintf, 815 }; 816 817 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, 818 struct ins_operands *ops __maybe_unused, int max_ins_name) 819 { 820 return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); 821 } 822 823 static struct ins_ops nop_ops = { 824 .scnprintf = nop__scnprintf, 825 }; 826 827 static struct ins_ops ret_ops = { 828 .scnprintf = ins__raw_scnprintf, 829 }; 830 831 static bool ins__is_nop(const struct ins *ins) 832 { 833 return ins->ops == &nop_ops; 834 } 835 836 bool ins__is_ret(const struct ins *ins) 837 { 838 return ins->ops == &ret_ops; 839 } 840 841 bool ins__is_lock(const struct ins *ins) 842 { 843 return ins->ops == &lock_ops; 844 } 845 846 static int ins__key_cmp(const void *name, const void *insp) 847 { 848 const struct ins *ins = insp; 849 850 return strcmp(name, ins->name); 851 } 852 853 static int ins__cmp(const void *a, const void *b) 854 { 855 const struct ins *ia = a; 856 const struct ins *ib = b; 857 858 return strcmp(ia->name, ib->name); 859 } 860 861 static void ins__sort(struct arch *arch) 862 { 863 const int nmemb = arch->nr_instructions; 864 865 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); 866 } 867 868 static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 869 { 870 struct ins *ins; 871 const int nmemb = arch->nr_instructions; 872 873 if (arch__is(arch, "powerpc")) { 874 /* 875 * For powerpc, identify the instruction ops 876 * from the opcode using raw_insn. 877 */ 878 struct ins_ops *ops; 879 880 ops = check_ppc_insn(dl); 881 if (ops) 882 return ops; 883 } 884 885 if (!arch->sorted_instructions) { 886 ins__sort(arch); 887 arch->sorted_instructions = true; 888 } 889 890 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 891 if (ins) 892 return ins->ops; 893 894 if (arch->insn_suffix) { 895 char tmp[32]; 896 char suffix; 897 size_t len = strlen(name); 898 899 if (len == 0 || len >= sizeof(tmp)) 900 return NULL; 901 902 suffix = name[len - 1]; 903 if (strchr(arch->insn_suffix, suffix) == NULL) 904 return NULL; 905 906 strcpy(tmp, name); 907 tmp[len - 1] = '\0'; /* remove the suffix and check again */ 908 909 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); 910 } 911 return ins ? ins->ops : NULL; 912 } 913 914 struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl) 915 { 916 struct ins_ops *ops = __ins__find(arch, name, dl); 917 918 if (!ops && arch->associate_instruction_ops) 919 ops = arch->associate_instruction_ops(arch, name); 920 921 return ops; 922 } 923 924 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) 925 { 926 dl->ins.ops = ins__find(arch, dl->ins.name, dl); 927 928 if (!dl->ins.ops) 929 return; 930 931 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) 932 dl->ins.ops = NULL; 933 } 934 935 static int disasm_line__parse(char *line, const char **namep, char **rawp) 936 { 937 char tmp, *name = skip_spaces(line); 938 939 if (name[0] == '\0') 940 return -1; 941 942 *rawp = name + 1; 943 944 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) 945 ++*rawp; 946 947 tmp = (*rawp)[0]; 948 (*rawp)[0] = '\0'; 949 *namep = strdup(name); 950 951 if (*namep == NULL) 952 goto out; 953 954 (*rawp)[0] = tmp; 955 *rawp = strim(*rawp); 956 957 return 0; 958 959 out: 960 return -1; 961 } 962 963 /* 964 * Parses the result captured from symbol__disassemble_* 965 * Example, line read from DSO file in powerpc: 966 * line: 38 01 81 e8 967 * opcode: fetched from arch specific get_opcode_insn 968 * rawp_insn: e8810138 969 * 970 * rawp_insn is used later to extract the reg/offset fields 971 */ 972 #define PPC_OP(op) (((op) >> 26) & 0x3F) 973 #define RAW_BYTES 11 974 975 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) 976 { 977 char *line = dl->al.line; 978 const char **namep = &dl->ins.name; 979 char **rawp = &dl->ops.raw; 980 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); 981 char *name = skip_spaces(name_raw_insn + RAW_BYTES); 982 int disasm = 0; 983 int ret = 0; 984 985 if (args->options->disassembler_used) 986 disasm = 1; 987 988 if (name_raw_insn[0] == '\0') 989 return -1; 990 991 if (disasm) 992 ret = disasm_line__parse(name, namep, rawp); 993 else 994 *namep = ""; 995 996 tmp_raw_insn = strndup(name_raw_insn, 11); 997 if (tmp_raw_insn == NULL) 998 return -1; 999 1000 remove_spaces(tmp_raw_insn); 1001 1002 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); 1003 if (disasm) 1004 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); 1005 1006 return ret; 1007 } 1008 1009 static void annotation_line__init(struct annotation_line *al, 1010 struct annotate_args *args, 1011 int nr) 1012 { 1013 al->offset = args->offset; 1014 al->line = strdup(args->line); 1015 al->line_nr = args->line_nr; 1016 al->fileloc = args->fileloc; 1017 al->data_nr = nr; 1018 } 1019 1020 static void annotation_line__exit(struct annotation_line *al) 1021 { 1022 zfree_srcline(&al->path); 1023 zfree(&al->line); 1024 zfree(&al->cycles); 1025 zfree(&al->br_cntr); 1026 } 1027 1028 static size_t disasm_line_size(int nr) 1029 { 1030 struct annotation_line *al; 1031 1032 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); 1033 } 1034 1035 /* 1036 * Allocating the disasm annotation line data with 1037 * following structure: 1038 * 1039 * ------------------------------------------- 1040 * struct disasm_line | struct annotation_line 1041 * ------------------------------------------- 1042 * 1043 * We have 'struct annotation_line' member as last member 1044 * of 'struct disasm_line' to have an easy access. 1045 */ 1046 struct disasm_line *disasm_line__new(struct annotate_args *args) 1047 { 1048 struct disasm_line *dl = NULL; 1049 struct annotation *notes = symbol__annotation(args->ms.sym); 1050 int nr = notes->src->nr_events; 1051 1052 dl = zalloc(disasm_line_size(nr)); 1053 if (!dl) 1054 return NULL; 1055 1056 annotation_line__init(&dl->al, args, nr); 1057 if (dl->al.line == NULL) 1058 goto out_delete; 1059 1060 if (args->offset != -1) { 1061 if (arch__is(args->arch, "powerpc")) { 1062 if (disasm_line__parse_powerpc(dl, args) < 0) 1063 goto out_free_line; 1064 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) 1065 goto out_free_line; 1066 1067 disasm_line__init_ins(dl, args->arch, &args->ms); 1068 } 1069 1070 return dl; 1071 1072 out_free_line: 1073 zfree(&dl->al.line); 1074 out_delete: 1075 free(dl); 1076 return NULL; 1077 } 1078 1079 void disasm_line__free(struct disasm_line *dl) 1080 { 1081 if (dl->ins.ops && dl->ins.ops->free) 1082 dl->ins.ops->free(&dl->ops); 1083 else 1084 ins_ops__delete(&dl->ops); 1085 zfree(&dl->ins.name); 1086 annotation_line__exit(&dl->al); 1087 free(dl); 1088 } 1089 1090 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) 1091 { 1092 if (raw || !dl->ins.ops) 1093 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); 1094 1095 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1096 } 1097 1098 /* 1099 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) 1100 * which looks like following 1101 * 1102 * 0000000000415500 <_init>: 1103 * 415500: sub $0x8,%rsp 1104 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> 1105 * 41550b: test %rax,%rax 1106 * 41550e: je 415515 <_init+0x15> 1107 * 415510: callq 416e70 <__gmon_start__@plt> 1108 * 415515: add $0x8,%rsp 1109 * 415519: retq 1110 * 1111 * it will be parsed and saved into struct disasm_line as 1112 * <offset> <name> <ops.raw> 1113 * 1114 * The offset will be a relative offset from the start of the symbol and -1 1115 * means that it's not a disassembly line so should be treated differently. 1116 * The ops.raw part will be parsed further according to type of the instruction. 1117 */ 1118 static int symbol__parse_objdump_line(struct symbol *sym, 1119 struct annotate_args *args, 1120 char *parsed_line, int *line_nr, char **fileloc) 1121 { 1122 struct map *map = args->ms.map; 1123 struct annotation *notes = symbol__annotation(sym); 1124 struct disasm_line *dl; 1125 char *tmp; 1126 s64 line_ip, offset = -1; 1127 regmatch_t match[2]; 1128 1129 /* /filename:linenr ? Save line number and ignore. */ 1130 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1131 *line_nr = atoi(parsed_line + match[1].rm_so); 1132 free(*fileloc); 1133 *fileloc = strdup(parsed_line); 1134 return 0; 1135 } 1136 1137 /* Process hex address followed by ':'. */ 1138 line_ip = strtoull(parsed_line, &tmp, 16); 1139 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { 1140 u64 start = map__rip_2objdump(map, sym->start), 1141 end = map__rip_2objdump(map, sym->end); 1142 1143 offset = line_ip - start; 1144 if ((u64)line_ip < start || (u64)line_ip >= end) 1145 offset = -1; 1146 else 1147 parsed_line = tmp + 1; 1148 } 1149 1150 args->offset = offset; 1151 args->line = parsed_line; 1152 args->line_nr = *line_nr; 1153 args->fileloc = *fileloc; 1154 args->ms.sym = sym; 1155 1156 dl = disasm_line__new(args); 1157 (*line_nr)++; 1158 1159 if (dl == NULL) 1160 return -1; 1161 1162 if (!disasm_line__has_local_offset(dl)) { 1163 dl->ops.target.offset = dl->ops.target.addr - 1164 map__rip_2objdump(map, sym->start); 1165 dl->ops.target.offset_avail = true; 1166 } 1167 1168 /* kcore has no symbols, so add the call target symbol */ 1169 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { 1170 struct addr_map_symbol target = { 1171 .addr = dl->ops.target.addr, 1172 .ms = { .map = map, }, 1173 }; 1174 1175 if (!maps__find_ams(args->ms.maps, &target) && 1176 target.ms.sym->start == target.al_addr) 1177 dl->ops.target.sym = target.ms.sym; 1178 } 1179 1180 annotation_line__add(&dl->al, ¬es->src->source); 1181 return 0; 1182 } 1183 1184 static void delete_last_nop(struct symbol *sym) 1185 { 1186 struct annotation *notes = symbol__annotation(sym); 1187 struct list_head *list = ¬es->src->source; 1188 struct disasm_line *dl; 1189 1190 while (!list_empty(list)) { 1191 dl = list_entry(list->prev, struct disasm_line, al.node); 1192 1193 if (dl->ins.ops) { 1194 if (!ins__is_nop(&dl->ins)) 1195 return; 1196 } else { 1197 if (!strstr(dl->al.line, " nop ") && 1198 !strstr(dl->al.line, " nopl ") && 1199 !strstr(dl->al.line, " nopw ")) 1200 return; 1201 } 1202 1203 list_del_init(&dl->al.node); 1204 disasm_line__free(dl); 1205 } 1206 } 1207 1208 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) 1209 { 1210 struct dso *dso = map__dso(ms->map); 1211 1212 BUG_ON(buflen == 0); 1213 1214 if (errnum >= 0) { 1215 str_error_r(errnum, buf, buflen); 1216 return 0; 1217 } 1218 1219 switch (errnum) { 1220 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { 1221 char bf[SBUILD_ID_SIZE + 15] = " with build id "; 1222 char *build_id_msg = NULL; 1223 1224 if (dso__has_build_id(dso)) { 1225 build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); 1226 build_id_msg = bf; 1227 } 1228 scnprintf(buf, buflen, 1229 "No vmlinux file%s\nwas found in the path.\n\n" 1230 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" 1231 "Please use:\n\n" 1232 " perf buildid-cache -vu vmlinux\n\n" 1233 "or:\n\n" 1234 " --vmlinux vmlinux\n", build_id_msg ?: ""); 1235 } 1236 break; 1237 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: 1238 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); 1239 break; 1240 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: 1241 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); 1242 break; 1243 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: 1244 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); 1245 break; 1246 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: 1247 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); 1248 break; 1249 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: 1250 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", 1251 dso__long_name(dso)); 1252 break; 1253 case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: 1254 scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); 1255 break; 1256 default: 1257 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); 1258 break; 1259 } 1260 1261 return 0; 1262 } 1263 1264 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) 1265 { 1266 char linkname[PATH_MAX]; 1267 char *build_id_filename; 1268 char *build_id_path = NULL; 1269 char *pos; 1270 int len; 1271 1272 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && 1273 !dso__is_kcore(dso)) 1274 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; 1275 1276 build_id_filename = dso__build_id_filename(dso, NULL, 0, false); 1277 if (build_id_filename) { 1278 __symbol__join_symfs(filename, filename_size, build_id_filename); 1279 free(build_id_filename); 1280 } else { 1281 if (dso__has_build_id(dso)) 1282 return ENOMEM; 1283 goto fallback; 1284 } 1285 1286 build_id_path = strdup(filename); 1287 if (!build_id_path) 1288 return ENOMEM; 1289 1290 /* 1291 * old style build-id cache has name of XX/XXXXXXX.. while 1292 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. 1293 * extract the build-id part of dirname in the new style only. 1294 */ 1295 pos = strrchr(build_id_path, '/'); 1296 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) 1297 dirname(build_id_path); 1298 1299 if (dso__is_kcore(dso)) 1300 goto fallback; 1301 1302 len = readlink(build_id_path, linkname, sizeof(linkname) - 1); 1303 if (len < 0) 1304 goto fallback; 1305 1306 linkname[len] = '\0'; 1307 if (strstr(linkname, DSO__NAME_KALLSYMS) || 1308 access(filename, R_OK)) { 1309 fallback: 1310 /* 1311 * If we don't have build-ids or the build-id file isn't in the 1312 * cache, or is just a kallsyms file, well, lets hope that this 1313 * DSO is the same as when 'perf record' ran. 1314 */ 1315 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') 1316 snprintf(filename, filename_size, "%s", dso__long_name(dso)); 1317 else 1318 __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); 1319 1320 mutex_lock(dso__lock(dso)); 1321 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { 1322 char *new_name = dso__filename_with_chroot(dso, filename); 1323 if (new_name) { 1324 strlcpy(filename, new_name, filename_size); 1325 free(new_name); 1326 } 1327 } 1328 mutex_unlock(dso__lock(dso)); 1329 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1330 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); 1331 } 1332 1333 free(build_id_path); 1334 return 0; 1335 } 1336 1337 static int symbol__disassemble_raw(char *filename, struct symbol *sym, 1338 struct annotate_args *args) 1339 { 1340 struct annotation *notes = symbol__annotation(sym); 1341 struct map *map = args->ms.map; 1342 struct dso *dso = map__dso(map); 1343 u64 start = map__rip_2objdump(map, sym->start); 1344 u64 end = map__rip_2objdump(map, sym->end); 1345 u64 len = end - start; 1346 u64 offset; 1347 int i, count; 1348 u8 *buf = NULL; 1349 char disasm_buf[512]; 1350 struct disasm_line *dl; 1351 u32 *line; 1352 1353 /* Return if objdump is specified explicitly */ 1354 if (args->options->objdump_path) 1355 return -1; 1356 1357 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); 1358 1359 buf = malloc(len); 1360 if (buf == NULL) 1361 goto err; 1362 1363 count = dso__data_read_offset(dso, NULL, sym->start, buf, len); 1364 1365 line = (u32 *)buf; 1366 1367 if ((u64)count != len) 1368 goto err; 1369 1370 /* add the function address and name */ 1371 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", 1372 start, sym->name); 1373 1374 args->offset = -1; 1375 args->line = disasm_buf; 1376 args->line_nr = 0; 1377 args->fileloc = NULL; 1378 args->ms.sym = sym; 1379 1380 dl = disasm_line__new(args); 1381 if (dl == NULL) 1382 goto err; 1383 1384 annotation_line__add(&dl->al, ¬es->src->source); 1385 1386 /* Each raw instruction is 4 byte */ 1387 count = len/4; 1388 1389 for (i = 0, offset = 0; i < count; i++) { 1390 args->offset = offset; 1391 sprintf(args->line, "%x", line[i]); 1392 dl = disasm_line__new(args); 1393 if (dl == NULL) 1394 break; 1395 1396 annotation_line__add(&dl->al, ¬es->src->source); 1397 offset += 4; 1398 } 1399 1400 /* It failed in the middle */ 1401 if (offset != len) { 1402 struct list_head *list = ¬es->src->source; 1403 1404 /* Discard all lines and fallback to objdump */ 1405 while (!list_empty(list)) { 1406 dl = list_first_entry(list, struct disasm_line, al.node); 1407 1408 list_del_init(&dl->al.node); 1409 disasm_line__free(dl); 1410 } 1411 count = -1; 1412 } 1413 1414 out: 1415 free(buf); 1416 return count < 0 ? count : 0; 1417 1418 err: 1419 count = -1; 1420 goto out; 1421 } 1422 1423 /* 1424 * Possibly create a new version of line with tabs expanded. Returns the 1425 * existing or new line, storage is updated if a new line is allocated. If 1426 * allocation fails then NULL is returned. 1427 */ 1428 char *expand_tabs(char *line, char **storage, size_t *storage_len) 1429 { 1430 size_t i, src, dst, len, new_storage_len, num_tabs; 1431 char *new_line; 1432 size_t line_len = strlen(line); 1433 1434 for (num_tabs = 0, i = 0; i < line_len; i++) 1435 if (line[i] == '\t') 1436 num_tabs++; 1437 1438 if (num_tabs == 0) 1439 return line; 1440 1441 /* 1442 * Space for the line and '\0', less the leading and trailing 1443 * spaces. Each tab may introduce 7 additional spaces. 1444 */ 1445 new_storage_len = line_len + 1 + (num_tabs * 7); 1446 1447 new_line = malloc(new_storage_len); 1448 if (new_line == NULL) { 1449 pr_err("Failure allocating memory for tab expansion\n"); 1450 return NULL; 1451 } 1452 1453 /* 1454 * Copy regions starting at src and expand tabs. If there are two 1455 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces 1456 * are inserted. 1457 */ 1458 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { 1459 if (line[i] == '\t') { 1460 len = i - src; 1461 memcpy(&new_line[dst], &line[src], len); 1462 dst += len; 1463 new_line[dst++] = ' '; 1464 while (dst % 8 != 0) 1465 new_line[dst++] = ' '; 1466 src = i + 1; 1467 num_tabs--; 1468 } 1469 } 1470 1471 /* Expand the last region. */ 1472 len = line_len - src; 1473 memcpy(&new_line[dst], &line[src], len); 1474 dst += len; 1475 new_line[dst] = '\0'; 1476 1477 free(*storage); 1478 *storage = new_line; 1479 *storage_len = new_storage_len; 1480 return new_line; 1481 } 1482 1483 static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) 1484 { 1485 struct annotation *notes = symbol__annotation(sym); 1486 struct disasm_line *dl; 1487 1488 args->offset = -1; 1489 args->line = strdup("to be implemented"); 1490 args->line_nr = 0; 1491 args->fileloc = NULL; 1492 dl = disasm_line__new(args); 1493 if (dl) 1494 annotation_line__add(&dl->al, ¬es->src->source); 1495 1496 zfree(&args->line); 1497 return 0; 1498 } 1499 1500 static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, 1501 struct annotate_args *args) 1502 { 1503 struct annotation_options *opts = &annotate_opts; 1504 struct map *map = args->ms.map; 1505 struct dso *dso = map__dso(map); 1506 char *command; 1507 FILE *file; 1508 int lineno = 0; 1509 char *fileloc = NULL; 1510 int nline; 1511 char *line; 1512 size_t line_len; 1513 const char *objdump_argv[] = { 1514 "/bin/sh", 1515 "-c", 1516 NULL, /* Will be the objdump command to run. */ 1517 "--", 1518 NULL, /* Will be the symfs path. */ 1519 NULL, 1520 }; 1521 struct child_process objdump_process; 1522 int err; 1523 1524 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) 1525 return symbol__disassemble_bpf_libbfd(sym, args); 1526 1527 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) 1528 return symbol__disassemble_bpf_image(sym, args); 1529 1530 err = asprintf(&command, 1531 "%s %s%s --start-address=0x%016" PRIx64 1532 " --stop-address=0x%016" PRIx64 1533 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", 1534 opts->objdump_path ?: "objdump", 1535 opts->disassembler_style ? "-M " : "", 1536 opts->disassembler_style ?: "", 1537 map__rip_2objdump(map, sym->start), 1538 map__rip_2objdump(map, sym->end), 1539 opts->show_linenr ? "-l" : "", 1540 opts->show_asm_raw ? "" : "--no-show-raw-insn", 1541 opts->annotate_src ? "-S" : "", 1542 opts->prefix ? "--prefix " : "", 1543 opts->prefix ? '"' : ' ', 1544 opts->prefix ?: "", 1545 opts->prefix ? '"' : ' ', 1546 opts->prefix_strip ? "--prefix-strip=" : "", 1547 opts->prefix_strip ?: ""); 1548 1549 if (err < 0) { 1550 pr_err("Failure allocating memory for the command to run\n"); 1551 return err; 1552 } 1553 1554 pr_debug("Executing: %s\n", command); 1555 1556 objdump_argv[2] = command; 1557 objdump_argv[4] = filename; 1558 1559 /* Create a pipe to read from for stdout */ 1560 memset(&objdump_process, 0, sizeof(objdump_process)); 1561 objdump_process.argv = objdump_argv; 1562 objdump_process.out = -1; 1563 objdump_process.err = -1; 1564 objdump_process.no_stderr = 1; 1565 if (start_command(&objdump_process)) { 1566 pr_err("Failure starting to run %s\n", command); 1567 err = -1; 1568 goto out_free_command; 1569 } 1570 1571 file = fdopen(objdump_process.out, "r"); 1572 if (!file) { 1573 pr_err("Failure creating FILE stream for %s\n", command); 1574 /* 1575 * If we were using debug info should retry with 1576 * original binary. 1577 */ 1578 err = -1; 1579 goto out_close_stdout; 1580 } 1581 1582 /* Storage for getline. */ 1583 line = NULL; 1584 line_len = 0; 1585 1586 nline = 0; 1587 while (!feof(file)) { 1588 const char *match; 1589 char *expanded_line; 1590 1591 if (getline(&line, &line_len, file) < 0 || !line) 1592 break; 1593 1594 /* Skip lines containing "filename:" */ 1595 match = strstr(line, filename); 1596 if (match && match[strlen(filename)] == ':') 1597 continue; 1598 1599 expanded_line = strim(line); 1600 expanded_line = expand_tabs(expanded_line, &line, &line_len); 1601 if (!expanded_line) 1602 break; 1603 1604 /* 1605 * The source code line number (lineno) needs to be kept in 1606 * across calls to symbol__parse_objdump_line(), so that it 1607 * can associate it with the instructions till the next one. 1608 * See disasm_line__new() and struct disasm_line::line_nr. 1609 */ 1610 if (symbol__parse_objdump_line(sym, args, expanded_line, 1611 &lineno, &fileloc) < 0) 1612 break; 1613 nline++; 1614 } 1615 free(line); 1616 free(fileloc); 1617 1618 err = finish_command(&objdump_process); 1619 if (err) 1620 pr_err("Error running %s\n", command); 1621 1622 if (nline == 0) { 1623 err = -1; 1624 pr_err("No output from %s\n", command); 1625 } 1626 1627 /* 1628 * kallsyms does not have symbol sizes so there may a nop at the end. 1629 * Remove it. 1630 */ 1631 if (dso__is_kcore(dso)) 1632 delete_last_nop(sym); 1633 1634 fclose(file); 1635 1636 out_close_stdout: 1637 close(objdump_process.out); 1638 1639 out_free_command: 1640 free(command); 1641 return err; 1642 } 1643 1644 int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1645 { 1646 struct annotation_options *options = args->options; 1647 struct map *map = args->ms.map; 1648 struct dso *dso = map__dso(map); 1649 char symfs_filename[PATH_MAX]; 1650 bool delete_extract = false; 1651 struct kcore_extract kce; 1652 bool decomp = false; 1653 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); 1654 1655 if (err) 1656 return err; 1657 1658 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, 1659 symfs_filename, sym->name, map__unmap_ip(map, sym->start), 1660 map__unmap_ip(map, sym->end)); 1661 1662 pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); 1663 1664 if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { 1665 return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; 1666 } else if (dso__is_kcore(dso)) { 1667 kce.addr = map__rip_2objdump(map, sym->start); 1668 kce.kcore_filename = symfs_filename; 1669 kce.len = sym->end - sym->start; 1670 kce.offs = sym->start; 1671 1672 if (!kcore_extract__create(&kce)) { 1673 delete_extract = true; 1674 strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); 1675 } 1676 } else if (dso__needs_decompress(dso)) { 1677 char tmp[KMOD_DECOMP_LEN]; 1678 1679 if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) 1680 return -1; 1681 1682 decomp = true; 1683 strcpy(symfs_filename, tmp); 1684 } 1685 1686 /* 1687 * For powerpc data type profiling, use the dso__data_read_offset to 1688 * read raw instruction directly and interpret the binary code to 1689 * understand instructions and register fields. For sort keys as type 1690 * and typeoff, disassemble to mnemonic notation is not required in 1691 * case of powerpc. 1692 */ 1693 if (arch__is(args->arch, "powerpc")) { 1694 extern const char *sort_order; 1695 1696 if (sort_order && !strstr(sort_order, "sym")) { 1697 err = symbol__disassemble_raw(symfs_filename, sym, args); 1698 if (err == 0) 1699 goto out_remove_tmp; 1700 1701 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); 1702 if (err == 0) 1703 goto out_remove_tmp; 1704 } 1705 } 1706 1707 /* FIXME: LLVM and CAPSTONE should support source code */ 1708 if (options->annotate_src && !options->hide_src_code) { 1709 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1710 if (err == 0) 1711 goto out_remove_tmp; 1712 } 1713 1714 err = -1; 1715 for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { 1716 enum perf_disassembler dis = options->disassemblers[i]; 1717 1718 switch (dis) { 1719 case PERF_DISASM_LLVM: 1720 args->options->disassembler_used = PERF_DISASM_LLVM; 1721 err = symbol__disassemble_llvm(symfs_filename, sym, args); 1722 break; 1723 case PERF_DISASM_CAPSTONE: 1724 args->options->disassembler_used = PERF_DISASM_CAPSTONE; 1725 err = symbol__disassemble_capstone(symfs_filename, sym, args); 1726 break; 1727 case PERF_DISASM_OBJDUMP: 1728 args->options->disassembler_used = PERF_DISASM_OBJDUMP; 1729 err = symbol__disassemble_objdump(symfs_filename, sym, args); 1730 break; 1731 case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ 1732 default: 1733 args->options->disassembler_used = PERF_DISASM_UNKNOWN; 1734 goto out_remove_tmp; 1735 } 1736 if (err == 0) 1737 pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); 1738 } 1739 out_remove_tmp: 1740 if (decomp) 1741 unlink(symfs_filename); 1742 1743 if (delete_extract) 1744 kcore_extract__delete(&kce); 1745 1746 return err; 1747 } 1748