1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <ctype.h>
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <libgen.h>
7 #include <regex.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10
11 #include <linux/string.h>
12 #include <subcmd/run-command.h>
13
14 #include "annotate.h"
15 #include "annotate-data.h"
16 #include "build-id.h"
17 #include "capstone.h"
18 #include "debug.h"
19 #include "disasm.h"
20 #include "dso.h"
21 #include "dwarf-regs.h"
22 #include "env.h"
23 #include "evsel.h"
24 #include "libbfd.h"
25 #include "llvm.h"
26 #include "map.h"
27 #include "maps.h"
28 #include "namespaces.h"
29 #include "srcline.h"
30 #include "symbol.h"
31 #include "util.h"
32
33 static regex_t file_lineno;
34
35 /* These can be referred from the arch-dependent code */
36 static struct ins_ops call_ops;
37 static struct ins_ops dec_ops;
38 static struct ins_ops jump_ops;
39 static struct ins_ops mov_ops;
40 static struct ins_ops nop_ops;
41 static struct ins_ops lock_ops;
42 static struct ins_ops ret_ops;
43 static struct ins_ops load_store_ops;
44 static struct ins_ops arithmetic_ops;
45
46 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
47 struct ins_operands *ops, int max_ins_name);
48 static int call__scnprintf(struct ins *ins, char *bf, size_t size,
49 struct ins_operands *ops, int max_ins_name);
50
51 static void ins__sort(struct arch *arch);
52 static int disasm_line__parse(char *line, const char **namep, char **rawp);
53 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args);
54
symbol__init_regexpr(void)55 static __attribute__((constructor)) void symbol__init_regexpr(void)
56 {
57 regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
58 }
59
arch__grow_instructions(struct arch * arch)60 static int arch__grow_instructions(struct arch *arch)
61 {
62 struct ins *new_instructions;
63 size_t new_nr_allocated;
64
65 if (arch->nr_instructions_allocated == 0 && arch->instructions)
66 goto grow_from_non_allocated_table;
67
68 new_nr_allocated = arch->nr_instructions_allocated + 128;
69 new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
70 if (new_instructions == NULL)
71 return -1;
72
73 out_update_instructions:
74 arch->instructions = new_instructions;
75 arch->nr_instructions_allocated = new_nr_allocated;
76 return 0;
77
78 grow_from_non_allocated_table:
79 new_nr_allocated = arch->nr_instructions + 128;
80 new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
81 if (new_instructions == NULL)
82 return -1;
83
84 memcpy(new_instructions, arch->instructions, arch->nr_instructions);
85 goto out_update_instructions;
86 }
87
arch__associate_ins_ops(struct arch * arch,const char * name,struct ins_ops * ops)88 static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
89 {
90 struct ins *ins;
91
92 if (arch->nr_instructions == arch->nr_instructions_allocated &&
93 arch__grow_instructions(arch))
94 return -1;
95
96 ins = &arch->instructions[arch->nr_instructions];
97 ins->name = strdup(name);
98 if (!ins->name)
99 return -1;
100
101 ins->ops = ops;
102 arch->nr_instructions++;
103
104 ins__sort(arch);
105 return 0;
106 }
107
108 #include "arch/arc/annotate/instructions.c"
109 #include "arch/arm/annotate/instructions.c"
110 #include "arch/arm64/annotate/instructions.c"
111 #include "arch/csky/annotate/instructions.c"
112 #include "arch/loongarch/annotate/instructions.c"
113 #include "arch/mips/annotate/instructions.c"
114 #include "arch/x86/annotate/instructions.c"
115 #include "arch/powerpc/annotate/instructions.c"
116 #include "arch/riscv64/annotate/instructions.c"
117 #include "arch/s390/annotate/instructions.c"
118 #include "arch/sparc/annotate/instructions.c"
119
120 static struct arch architectures[] = {
121 {
122 .name = "arc",
123 .init = arc__annotate_init,
124 },
125 {
126 .name = "arm",
127 .init = arm__annotate_init,
128 },
129 {
130 .name = "arm64",
131 .init = arm64__annotate_init,
132 },
133 {
134 .name = "csky",
135 .init = csky__annotate_init,
136 },
137 {
138 .name = "mips",
139 .init = mips__annotate_init,
140 .objdump = {
141 .comment_char = '#',
142 },
143 },
144 {
145 .name = "x86",
146 .init = x86__annotate_init,
147 .instructions = x86__instructions,
148 .nr_instructions = ARRAY_SIZE(x86__instructions),
149 .insn_suffix = "bwlq",
150 .objdump = {
151 .comment_char = '#',
152 .register_char = '%',
153 .memory_ref_char = '(',
154 .imm_char = '$',
155 },
156 #ifdef HAVE_LIBDW_SUPPORT
157 .update_insn_state = update_insn_state_x86,
158 #endif
159 },
160 {
161 .name = "powerpc",
162 .init = powerpc__annotate_init,
163 #ifdef HAVE_LIBDW_SUPPORT
164 .update_insn_state = update_insn_state_powerpc,
165 #endif
166 },
167 {
168 .name = "riscv64",
169 .init = riscv64__annotate_init,
170 },
171 {
172 .name = "s390",
173 .init = s390__annotate_init,
174 .objdump = {
175 .comment_char = '#',
176 },
177 },
178 {
179 .name = "sparc",
180 .init = sparc__annotate_init,
181 .objdump = {
182 .comment_char = '#',
183 },
184 },
185 {
186 .name = "loongarch",
187 .init = loongarch__annotate_init,
188 .objdump = {
189 .comment_char = '#',
190 },
191 },
192 };
193
arch__key_cmp(const void * name,const void * archp)194 static int arch__key_cmp(const void *name, const void *archp)
195 {
196 const struct arch *arch = archp;
197
198 return strcmp(name, arch->name);
199 }
200
arch__cmp(const void * a,const void * b)201 static int arch__cmp(const void *a, const void *b)
202 {
203 const struct arch *aa = a;
204 const struct arch *ab = b;
205
206 return strcmp(aa->name, ab->name);
207 }
208
arch__sort(void)209 static void arch__sort(void)
210 {
211 const int nmemb = ARRAY_SIZE(architectures);
212
213 qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
214 }
215
arch__find(const char * name)216 struct arch *arch__find(const char *name)
217 {
218 const int nmemb = ARRAY_SIZE(architectures);
219 static bool sorted;
220
221 if (!sorted) {
222 arch__sort();
223 sorted = true;
224 }
225
226 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
227 }
228
arch__is(struct arch * arch,const char * name)229 bool arch__is(struct arch *arch, const char *name)
230 {
231 return !strcmp(arch->name, name);
232 }
233
ins_ops__delete(struct ins_operands * ops)234 static void ins_ops__delete(struct ins_operands *ops)
235 {
236 if (ops == NULL)
237 return;
238 zfree(&ops->source.raw);
239 zfree(&ops->source.name);
240 zfree(&ops->target.raw);
241 zfree(&ops->target.name);
242 }
243
ins__raw_scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)244 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
245 struct ins_operands *ops, int max_ins_name)
246 {
247 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
248 }
249
ins__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)250 static int ins__scnprintf(struct ins *ins, char *bf, size_t size,
251 struct ins_operands *ops, int max_ins_name)
252 {
253 if (ins->ops->scnprintf)
254 return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
255
256 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
257 }
258
ins__is_fused(struct arch * arch,const char * ins1,const char * ins2)259 bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
260 {
261 if (!arch || !arch->ins_is_fused)
262 return false;
263
264 return arch->ins_is_fused(arch, ins1, ins2);
265 }
266
call__parse(struct arch * arch,struct ins_operands * ops,struct map_symbol * ms,struct disasm_line * dl __maybe_unused)267 static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
268 struct disasm_line *dl __maybe_unused)
269 {
270 char *endptr, *tok, *name;
271 struct map *map = ms->map;
272 struct addr_map_symbol target = {
273 .ms = { .map = map, },
274 };
275
276 ops->target.addr = strtoull(ops->raw, &endptr, 16);
277
278 name = strchr(endptr, '<');
279 if (name == NULL)
280 goto indirect_call;
281
282 name++;
283
284 if (arch->objdump.skip_functions_char &&
285 strchr(name, arch->objdump.skip_functions_char))
286 return -1;
287
288 tok = strchr(name, '>');
289 if (tok == NULL)
290 return -1;
291
292 *tok = '\0';
293 ops->target.name = strdup(name);
294 *tok = '>';
295
296 if (ops->target.name == NULL)
297 return -1;
298 find_target:
299 target.addr = map__objdump_2mem(map, ops->target.addr);
300
301 if (maps__find_ams(ms->maps, &target) == 0 &&
302 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
303 ops->target.sym = target.ms.sym;
304
305 return 0;
306
307 indirect_call:
308 tok = strchr(endptr, '*');
309 if (tok != NULL) {
310 endptr++;
311
312 /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
313 * Do not parse such instruction. */
314 if (strstr(endptr, "(%r") == NULL)
315 ops->target.addr = strtoull(endptr, NULL, 16);
316 }
317 goto find_target;
318 }
319
call__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)320 static int call__scnprintf(struct ins *ins, char *bf, size_t size,
321 struct ins_operands *ops, int max_ins_name)
322 {
323 if (ops->target.sym)
324 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
325
326 if (ops->target.addr == 0)
327 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
328
329 if (ops->target.name)
330 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
331
332 return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
333 }
334
335 static struct ins_ops call_ops = {
336 .parse = call__parse,
337 .scnprintf = call__scnprintf,
338 };
339
ins__is_call(const struct ins * ins)340 bool ins__is_call(const struct ins *ins)
341 {
342 return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
343 }
344
345 /*
346 * Prevents from matching commas in the comment section, e.g.:
347 * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
348 *
349 * and skip comma as part of function arguments, e.g.:
350 * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
351 */
validate_comma(const char * c,struct ins_operands * ops)352 static inline const char *validate_comma(const char *c, struct ins_operands *ops)
353 {
354 if (ops->jump.raw_comment && c > ops->jump.raw_comment)
355 return NULL;
356
357 if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
358 return NULL;
359
360 return c;
361 }
362
jump__parse(struct arch * arch,struct ins_operands * ops,struct map_symbol * ms,struct disasm_line * dl __maybe_unused)363 static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
364 struct disasm_line *dl __maybe_unused)
365 {
366 struct map *map = ms->map;
367 struct symbol *sym = ms->sym;
368 struct addr_map_symbol target = {
369 .ms = { .map = map, },
370 };
371 const char *c = strchr(ops->raw, ',');
372 u64 start, end;
373
374 ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
375 ops->jump.raw_func_start = strchr(ops->raw, '<');
376
377 c = validate_comma(c, ops);
378
379 /*
380 * Examples of lines to parse for the _cpp_lex_token@@Base
381 * function:
382 *
383 * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
384 * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
385 *
386 * The first is a jump to an offset inside the same function,
387 * the second is to another function, i.e. that 0xa72 is an
388 * offset in the cpp_named_operator2name@@base function.
389 */
390 /*
391 * skip over possible up to 2 operands to get to address, e.g.:
392 * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
393 */
394 if (c != NULL) {
395 c++;
396 ops->target.addr = strtoull(c, NULL, 16);
397 if (!ops->target.addr) {
398 c = strchr(c, ',');
399 c = validate_comma(c, ops);
400 if (c != NULL) {
401 c++;
402 ops->target.addr = strtoull(c, NULL, 16);
403 }
404 }
405 } else {
406 ops->target.addr = strtoull(ops->raw, NULL, 16);
407 }
408
409 target.addr = map__objdump_2mem(map, ops->target.addr);
410 start = map__unmap_ip(map, sym->start);
411 end = map__unmap_ip(map, sym->end);
412
413 ops->target.outside = target.addr < start || target.addr > end;
414
415 /*
416 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
417
418 cpp_named_operator2name@@Base+0xa72
419
420 * Point to a place that is after the cpp_named_operator2name
421 * boundaries, i.e. in the ELF symbol table for cc1
422 * cpp_named_operator2name is marked as being 32-bytes long, but it in
423 * fact is much larger than that, so we seem to need a symbols__find()
424 * routine that looks for >= current->start and < next_symbol->start,
425 * possibly just for C++ objects?
426 *
427 * For now lets just make some progress by marking jumps to outside the
428 * current function as call like.
429 *
430 * Actual navigation will come next, with further understanding of how
431 * the symbol searching and disassembly should be done.
432 */
433 if (maps__find_ams(ms->maps, &target) == 0 &&
434 map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
435 ops->target.sym = target.ms.sym;
436
437 if (!ops->target.outside) {
438 ops->target.offset = target.addr - start;
439 ops->target.offset_avail = true;
440 } else {
441 ops->target.offset_avail = false;
442 }
443
444 return 0;
445 }
446
jump__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)447 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
448 struct ins_operands *ops, int max_ins_name)
449 {
450 const char *c;
451
452 if (!ops->target.addr || ops->target.offset < 0)
453 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
454
455 if (ops->target.outside && ops->target.sym != NULL)
456 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
457
458 c = strchr(ops->raw, ',');
459 c = validate_comma(c, ops);
460
461 if (c != NULL) {
462 const char *c2 = strchr(c + 1, ',');
463
464 c2 = validate_comma(c2, ops);
465 /* check for 3-op insn */
466 if (c2 != NULL)
467 c = c2;
468 c++;
469
470 /* mirror arch objdump's space-after-comma style */
471 if (*c == ' ')
472 c++;
473 }
474
475 return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
476 ins->name, c ? c - ops->raw : 0, ops->raw,
477 ops->target.offset);
478 }
479
jump__delete(struct ins_operands * ops __maybe_unused)480 static void jump__delete(struct ins_operands *ops __maybe_unused)
481 {
482 /*
483 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the
484 * raw string, don't free them.
485 */
486 }
487
488 static struct ins_ops jump_ops = {
489 .free = jump__delete,
490 .parse = jump__parse,
491 .scnprintf = jump__scnprintf,
492 };
493
ins__is_jump(const struct ins * ins)494 bool ins__is_jump(const struct ins *ins)
495 {
496 return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
497 }
498
comment__symbol(char * raw,char * comment,u64 * addrp,char ** namep)499 static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
500 {
501 char *endptr, *name, *t;
502
503 if (strstr(raw, "(%rip)") == NULL)
504 return 0;
505
506 *addrp = strtoull(comment, &endptr, 16);
507 if (endptr == comment)
508 return 0;
509 name = strchr(endptr, '<');
510 if (name == NULL)
511 return -1;
512
513 name++;
514
515 t = strchr(name, '>');
516 if (t == NULL)
517 return 0;
518
519 *t = '\0';
520 *namep = strdup(name);
521 *t = '>';
522
523 return 0;
524 }
525
lock__parse(struct arch * arch,struct ins_operands * ops,struct map_symbol * ms,struct disasm_line * dl __maybe_unused)526 static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
527 struct disasm_line *dl __maybe_unused)
528 {
529 ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
530 if (ops->locked.ops == NULL)
531 return 0;
532
533 if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
534 goto out_free_ops;
535
536 ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0);
537
538 if (ops->locked.ins.ops == NULL)
539 goto out_free_ops;
540
541 if (ops->locked.ins.ops->parse &&
542 ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0)
543 goto out_free_ops;
544
545 return 0;
546
547 out_free_ops:
548 zfree(&ops->locked.ops);
549 return 0;
550 }
551
lock__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)552 static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
553 struct ins_operands *ops, int max_ins_name)
554 {
555 int printed;
556
557 if (ops->locked.ins.ops == NULL)
558 return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
559
560 printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
561 return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
562 size - printed, ops->locked.ops, max_ins_name);
563 }
564
lock__delete(struct ins_operands * ops)565 static void lock__delete(struct ins_operands *ops)
566 {
567 struct ins *ins = &ops->locked.ins;
568
569 if (ins->ops && ins->ops->free)
570 ins->ops->free(ops->locked.ops);
571 else
572 ins_ops__delete(ops->locked.ops);
573
574 zfree(&ops->locked.ops);
575 zfree(&ops->locked.ins.name);
576 zfree(&ops->target.raw);
577 zfree(&ops->target.name);
578 }
579
580 static struct ins_ops lock_ops = {
581 .free = lock__delete,
582 .parse = lock__parse,
583 .scnprintf = lock__scnprintf,
584 };
585
586 /*
587 * Check if the operand has more than one registers like x86 SIB addressing:
588 * 0x1234(%rax, %rbx, 8)
589 *
590 * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check
591 * the input string after 'memory_ref_char' if exists.
592 */
check_multi_regs(struct arch * arch,const char * op)593 static bool check_multi_regs(struct arch *arch, const char *op)
594 {
595 int count = 0;
596
597 if (arch->objdump.register_char == 0)
598 return false;
599
600 if (arch->objdump.memory_ref_char) {
601 op = strchr(op, arch->objdump.memory_ref_char);
602 if (op == NULL)
603 return false;
604 }
605
606 while ((op = strchr(op, arch->objdump.register_char)) != NULL) {
607 count++;
608 op++;
609 }
610
611 return count > 1;
612 }
613
mov__parse(struct arch * arch,struct ins_operands * ops,struct map_symbol * ms __maybe_unused,struct disasm_line * dl __maybe_unused)614 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
615 struct disasm_line *dl __maybe_unused)
616 {
617 char *s = strchr(ops->raw, ','), *target, *comment, prev;
618
619 if (s == NULL)
620 return -1;
621
622 *s = '\0';
623
624 /*
625 * x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
626 * then it needs to have the closing parenthesis.
627 */
628 if (strchr(ops->raw, '(')) {
629 *s = ',';
630 s = strchr(ops->raw, ')');
631 if (s == NULL || s[1] != ',')
632 return -1;
633 *++s = '\0';
634 }
635
636 ops->source.raw = strdup(ops->raw);
637 *s = ',';
638
639 if (ops->source.raw == NULL)
640 return -1;
641
642 ops->source.multi_regs = check_multi_regs(arch, ops->source.raw);
643
644 target = skip_spaces(++s);
645 comment = strchr(s, arch->objdump.comment_char);
646
647 if (comment != NULL)
648 s = comment - 1;
649 else
650 s = strchr(s, '\0') - 1;
651
652 while (s > target && isspace(s[0]))
653 --s;
654 s++;
655 prev = *s;
656 *s = '\0';
657
658 ops->target.raw = strdup(target);
659 *s = prev;
660
661 if (ops->target.raw == NULL)
662 goto out_free_source;
663
664 ops->target.multi_regs = check_multi_regs(arch, ops->target.raw);
665
666 if (comment == NULL)
667 return 0;
668
669 comment = skip_spaces(comment);
670 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
671 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
672
673 return 0;
674
675 out_free_source:
676 zfree(&ops->source.raw);
677 return -1;
678 }
679
mov__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)680 static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
681 struct ins_operands *ops, int max_ins_name)
682 {
683 return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
684 ops->source.name ?: ops->source.raw,
685 ops->target.name ?: ops->target.raw);
686 }
687
688 static struct ins_ops mov_ops = {
689 .parse = mov__parse,
690 .scnprintf = mov__scnprintf,
691 };
692
693 #define PPC_22_30(R) (((R) >> 1) & 0x1ff)
694 #define MINUS_EXT_XO_FORM 234
695 #define SUB_EXT_XO_FORM 232
696 #define ADD_ZERO_EXT_XO_FORM 202
697 #define SUB_ZERO_EXT_XO_FORM 200
698
arithmetic__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)699 static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size,
700 struct ins_operands *ops, int max_ins_name)
701 {
702 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
703 ops->raw);
704 }
705
706 /*
707 * Sets the fields: multi_regs and "mem_ref".
708 * "mem_ref" is set for ops->source which is later used to
709 * fill the objdump->memory_ref-char field. This ops is currently
710 * used by powerpc and since binary instruction code is used to
711 * extract opcode, regs and offset, no other parsing is needed here.
712 *
713 * Dont set multi regs for 4 cases since it has only one operand
714 * for source:
715 * - Add to Minus One Extended XO-form ( Ex: addme, addmeo )
716 * - Subtract From Minus One Extended XO-form ( Ex: subfme )
717 * - Add to Zero Extended XO-form ( Ex: addze, addzeo )
718 * - Subtract From Zero Extended XO-form ( Ex: subfze )
719 */
arithmetic__parse(struct arch * arch __maybe_unused,struct ins_operands * ops,struct map_symbol * ms __maybe_unused,struct disasm_line * dl)720 static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
721 struct map_symbol *ms __maybe_unused, struct disasm_line *dl)
722 {
723 int opcode = PPC_OP(dl->raw.raw_insn);
724
725 ops->source.mem_ref = false;
726 if (opcode == 31) {
727 if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \
728 && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM))
729 ops->source.multi_regs = true;
730 }
731
732 ops->target.mem_ref = false;
733 ops->target.multi_regs = false;
734
735 return 0;
736 }
737
738 static struct ins_ops arithmetic_ops = {
739 .parse = arithmetic__parse,
740 .scnprintf = arithmetic__scnprintf,
741 };
742
load_store__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)743 static int load_store__scnprintf(struct ins *ins, char *bf, size_t size,
744 struct ins_operands *ops, int max_ins_name)
745 {
746 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
747 ops->raw);
748 }
749
750 /*
751 * Sets the fields: multi_regs and "mem_ref".
752 * "mem_ref" is set for ops->source which is later used to
753 * fill the objdump->memory_ref-char field. This ops is currently
754 * used by powerpc and since binary instruction code is used to
755 * extract opcode, regs and offset, no other parsing is needed here
756 */
load_store__parse(struct arch * arch __maybe_unused,struct ins_operands * ops,struct map_symbol * ms __maybe_unused,struct disasm_line * dl __maybe_unused)757 static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
758 struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused)
759 {
760 ops->source.mem_ref = true;
761 ops->source.multi_regs = false;
762 /* opcode 31 is of X form */
763 if (PPC_OP(dl->raw.raw_insn) == 31)
764 ops->source.multi_regs = true;
765
766 ops->target.mem_ref = false;
767 ops->target.multi_regs = false;
768
769 return 0;
770 }
771
772 static struct ins_ops load_store_ops = {
773 .parse = load_store__parse,
774 .scnprintf = load_store__scnprintf,
775 };
776
dec__parse(struct arch * arch __maybe_unused,struct ins_operands * ops,struct map_symbol * ms __maybe_unused,struct disasm_line * dl __maybe_unused)777 static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
778 struct disasm_line *dl __maybe_unused)
779 {
780 char *target, *comment, *s, prev;
781
782 target = s = ops->raw;
783
784 while (s[0] != '\0' && !isspace(s[0]))
785 ++s;
786 prev = *s;
787 *s = '\0';
788
789 ops->target.raw = strdup(target);
790 *s = prev;
791
792 if (ops->target.raw == NULL)
793 return -1;
794
795 comment = strchr(s, arch->objdump.comment_char);
796 if (comment == NULL)
797 return 0;
798
799 comment = skip_spaces(comment);
800 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
801
802 return 0;
803 }
804
dec__scnprintf(struct ins * ins,char * bf,size_t size,struct ins_operands * ops,int max_ins_name)805 static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
806 struct ins_operands *ops, int max_ins_name)
807 {
808 return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
809 ops->target.name ?: ops->target.raw);
810 }
811
812 static struct ins_ops dec_ops = {
813 .parse = dec__parse,
814 .scnprintf = dec__scnprintf,
815 };
816
nop__scnprintf(struct ins * ins __maybe_unused,char * bf,size_t size,struct ins_operands * ops __maybe_unused,int max_ins_name)817 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
818 struct ins_operands *ops __maybe_unused, int max_ins_name)
819 {
820 return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
821 }
822
823 static struct ins_ops nop_ops = {
824 .scnprintf = nop__scnprintf,
825 };
826
827 static struct ins_ops ret_ops = {
828 .scnprintf = ins__raw_scnprintf,
829 };
830
ins__is_nop(const struct ins * ins)831 static bool ins__is_nop(const struct ins *ins)
832 {
833 return ins->ops == &nop_ops;
834 }
835
ins__is_ret(const struct ins * ins)836 bool ins__is_ret(const struct ins *ins)
837 {
838 return ins->ops == &ret_ops;
839 }
840
ins__is_lock(const struct ins * ins)841 bool ins__is_lock(const struct ins *ins)
842 {
843 return ins->ops == &lock_ops;
844 }
845
ins__key_cmp(const void * name,const void * insp)846 static int ins__key_cmp(const void *name, const void *insp)
847 {
848 const struct ins *ins = insp;
849
850 return strcmp(name, ins->name);
851 }
852
ins__cmp(const void * a,const void * b)853 static int ins__cmp(const void *a, const void *b)
854 {
855 const struct ins *ia = a;
856 const struct ins *ib = b;
857
858 return strcmp(ia->name, ib->name);
859 }
860
ins__sort(struct arch * arch)861 static void ins__sort(struct arch *arch)
862 {
863 const int nmemb = arch->nr_instructions;
864
865 qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
866 }
867
__ins__find(struct arch * arch,const char * name,struct disasm_line * dl)868 static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
869 {
870 struct ins *ins;
871 const int nmemb = arch->nr_instructions;
872
873 if (arch__is(arch, "powerpc")) {
874 /*
875 * For powerpc, identify the instruction ops
876 * from the opcode using raw_insn.
877 */
878 struct ins_ops *ops;
879
880 ops = check_ppc_insn(dl);
881 if (ops)
882 return ops;
883 }
884
885 if (!arch->sorted_instructions) {
886 ins__sort(arch);
887 arch->sorted_instructions = true;
888 }
889
890 ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
891 if (ins)
892 return ins->ops;
893
894 if (arch->insn_suffix) {
895 char tmp[32];
896 char suffix;
897 size_t len = strlen(name);
898
899 if (len == 0 || len >= sizeof(tmp))
900 return NULL;
901
902 suffix = name[len - 1];
903 if (strchr(arch->insn_suffix, suffix) == NULL)
904 return NULL;
905
906 strcpy(tmp, name);
907 tmp[len - 1] = '\0'; /* remove the suffix and check again */
908
909 ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
910 }
911 return ins ? ins->ops : NULL;
912 }
913
ins__find(struct arch * arch,const char * name,struct disasm_line * dl)914 struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
915 {
916 struct ins_ops *ops = __ins__find(arch, name, dl);
917
918 if (!ops && arch->associate_instruction_ops)
919 ops = arch->associate_instruction_ops(arch, name);
920
921 return ops;
922 }
923
disasm_line__init_ins(struct disasm_line * dl,struct arch * arch,struct map_symbol * ms)924 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
925 {
926 dl->ins.ops = ins__find(arch, dl->ins.name, dl);
927
928 if (!dl->ins.ops)
929 return;
930
931 if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0)
932 dl->ins.ops = NULL;
933 }
934
disasm_line__parse(char * line,const char ** namep,char ** rawp)935 static int disasm_line__parse(char *line, const char **namep, char **rawp)
936 {
937 char tmp, *name = skip_spaces(line);
938
939 if (name[0] == '\0')
940 return -1;
941
942 *rawp = name + 1;
943
944 while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
945 ++*rawp;
946
947 tmp = (*rawp)[0];
948 (*rawp)[0] = '\0';
949 *namep = strdup(name);
950
951 if (*namep == NULL)
952 goto out;
953
954 (*rawp)[0] = tmp;
955 *rawp = strim(*rawp);
956
957 return 0;
958
959 out:
960 return -1;
961 }
962
963 /*
964 * Parses the result captured from symbol__disassemble_*
965 * Example, line read from DSO file in powerpc:
966 * line: 38 01 81 e8
967 * opcode: fetched from arch specific get_opcode_insn
968 * rawp_insn: e8810138
969 *
970 * rawp_insn is used later to extract the reg/offset fields
971 */
972 #define PPC_OP(op) (((op) >> 26) & 0x3F)
973 #define RAW_BYTES 11
974
disasm_line__parse_powerpc(struct disasm_line * dl,struct annotate_args * args)975 static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args)
976 {
977 char *line = dl->al.line;
978 const char **namep = &dl->ins.name;
979 char **rawp = &dl->ops.raw;
980 char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
981 char *name = skip_spaces(name_raw_insn + RAW_BYTES);
982 int disasm = 0;
983 int ret = 0;
984
985 if (args->options->disassembler_used)
986 disasm = 1;
987
988 if (name_raw_insn[0] == '\0')
989 return -1;
990
991 if (disasm)
992 ret = disasm_line__parse(name, namep, rawp);
993 else
994 *namep = "";
995
996 tmp_raw_insn = strndup(name_raw_insn, 11);
997 if (tmp_raw_insn == NULL)
998 return -1;
999
1000 remove_spaces(tmp_raw_insn);
1001
1002 sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
1003 if (disasm)
1004 dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
1005
1006 return ret;
1007 }
1008
annotation_line__init(struct annotation_line * al,struct annotate_args * args,int nr)1009 static void annotation_line__init(struct annotation_line *al,
1010 struct annotate_args *args,
1011 int nr)
1012 {
1013 al->offset = args->offset;
1014 al->line = strdup(args->line);
1015 al->line_nr = args->line_nr;
1016 al->fileloc = args->fileloc;
1017 al->data_nr = nr;
1018 }
1019
annotation_line__exit(struct annotation_line * al)1020 static void annotation_line__exit(struct annotation_line *al)
1021 {
1022 zfree_srcline(&al->path);
1023 zfree(&al->line);
1024 zfree(&al->cycles);
1025 zfree(&al->br_cntr);
1026 }
1027
disasm_line_size(int nr)1028 static size_t disasm_line_size(int nr)
1029 {
1030 struct annotation_line *al;
1031
1032 return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
1033 }
1034
1035 /*
1036 * Allocating the disasm annotation line data with
1037 * following structure:
1038 *
1039 * -------------------------------------------
1040 * struct disasm_line | struct annotation_line
1041 * -------------------------------------------
1042 *
1043 * We have 'struct annotation_line' member as last member
1044 * of 'struct disasm_line' to have an easy access.
1045 */
disasm_line__new(struct annotate_args * args)1046 struct disasm_line *disasm_line__new(struct annotate_args *args)
1047 {
1048 struct disasm_line *dl = NULL;
1049 struct annotation *notes = symbol__annotation(args->ms.sym);
1050 int nr = notes->src->nr_events;
1051
1052 dl = zalloc(disasm_line_size(nr));
1053 if (!dl)
1054 return NULL;
1055
1056 annotation_line__init(&dl->al, args, nr);
1057 if (dl->al.line == NULL)
1058 goto out_delete;
1059
1060 if (args->offset != -1) {
1061 if (arch__is(args->arch, "powerpc")) {
1062 if (disasm_line__parse_powerpc(dl, args) < 0)
1063 goto out_free_line;
1064 } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
1065 goto out_free_line;
1066
1067 disasm_line__init_ins(dl, args->arch, &args->ms);
1068 }
1069
1070 return dl;
1071
1072 out_free_line:
1073 zfree(&dl->al.line);
1074 out_delete:
1075 free(dl);
1076 return NULL;
1077 }
1078
disasm_line__free(struct disasm_line * dl)1079 void disasm_line__free(struct disasm_line *dl)
1080 {
1081 if (dl->ins.ops && dl->ins.ops->free)
1082 dl->ins.ops->free(&dl->ops);
1083 else
1084 ins_ops__delete(&dl->ops);
1085 zfree(&dl->ins.name);
1086 annotation_line__exit(&dl->al);
1087 free(dl);
1088 }
1089
disasm_line__scnprintf(struct disasm_line * dl,char * bf,size_t size,bool raw,int max_ins_name)1090 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
1091 {
1092 if (raw || !dl->ins.ops)
1093 return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
1094
1095 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
1096 }
1097
1098 /*
1099 * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
1100 * which looks like following
1101 *
1102 * 0000000000415500 <_init>:
1103 * 415500: sub $0x8,%rsp
1104 * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
1105 * 41550b: test %rax,%rax
1106 * 41550e: je 415515 <_init+0x15>
1107 * 415510: callq 416e70 <__gmon_start__@plt>
1108 * 415515: add $0x8,%rsp
1109 * 415519: retq
1110 *
1111 * it will be parsed and saved into struct disasm_line as
1112 * <offset> <name> <ops.raw>
1113 *
1114 * The offset will be a relative offset from the start of the symbol and -1
1115 * means that it's not a disassembly line so should be treated differently.
1116 * The ops.raw part will be parsed further according to type of the instruction.
1117 */
symbol__parse_objdump_line(struct symbol * sym,struct annotate_args * args,char * parsed_line,int * line_nr,char ** fileloc)1118 static int symbol__parse_objdump_line(struct symbol *sym,
1119 struct annotate_args *args,
1120 char *parsed_line, int *line_nr, char **fileloc)
1121 {
1122 struct map *map = args->ms.map;
1123 struct annotation *notes = symbol__annotation(sym);
1124 struct disasm_line *dl;
1125 char *tmp;
1126 s64 line_ip, offset = -1;
1127 regmatch_t match[2];
1128
1129 /* /filename:linenr ? Save line number and ignore. */
1130 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
1131 *line_nr = atoi(parsed_line + match[1].rm_so);
1132 free(*fileloc);
1133 *fileloc = strdup(parsed_line);
1134 return 0;
1135 }
1136
1137 /* Process hex address followed by ':'. */
1138 line_ip = strtoull(parsed_line, &tmp, 16);
1139 if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
1140 u64 start = map__rip_2objdump(map, sym->start),
1141 end = map__rip_2objdump(map, sym->end);
1142
1143 offset = line_ip - start;
1144 if ((u64)line_ip < start || (u64)line_ip >= end)
1145 offset = -1;
1146 else
1147 parsed_line = tmp + 1;
1148 }
1149
1150 args->offset = offset;
1151 args->line = parsed_line;
1152 args->line_nr = *line_nr;
1153 args->fileloc = *fileloc;
1154 args->ms.sym = sym;
1155
1156 dl = disasm_line__new(args);
1157 (*line_nr)++;
1158
1159 if (dl == NULL)
1160 return -1;
1161
1162 if (!disasm_line__has_local_offset(dl)) {
1163 dl->ops.target.offset = dl->ops.target.addr -
1164 map__rip_2objdump(map, sym->start);
1165 dl->ops.target.offset_avail = true;
1166 }
1167
1168 /* kcore has no symbols, so add the call target symbol */
1169 if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
1170 struct addr_map_symbol target = {
1171 .addr = dl->ops.target.addr,
1172 .ms = { .map = map, },
1173 };
1174
1175 if (!maps__find_ams(args->ms.maps, &target) &&
1176 target.ms.sym->start == target.al_addr)
1177 dl->ops.target.sym = target.ms.sym;
1178 }
1179
1180 annotation_line__add(&dl->al, ¬es->src->source);
1181 return 0;
1182 }
1183
delete_last_nop(struct symbol * sym)1184 static void delete_last_nop(struct symbol *sym)
1185 {
1186 struct annotation *notes = symbol__annotation(sym);
1187 struct list_head *list = ¬es->src->source;
1188 struct disasm_line *dl;
1189
1190 while (!list_empty(list)) {
1191 dl = list_entry(list->prev, struct disasm_line, al.node);
1192
1193 if (dl->ins.ops) {
1194 if (!ins__is_nop(&dl->ins))
1195 return;
1196 } else {
1197 if (!strstr(dl->al.line, " nop ") &&
1198 !strstr(dl->al.line, " nopl ") &&
1199 !strstr(dl->al.line, " nopw "))
1200 return;
1201 }
1202
1203 list_del_init(&dl->al.node);
1204 disasm_line__free(dl);
1205 }
1206 }
1207
symbol__strerror_disassemble(struct map_symbol * ms,int errnum,char * buf,size_t buflen)1208 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
1209 {
1210 struct dso *dso = map__dso(ms->map);
1211
1212 BUG_ON(buflen == 0);
1213
1214 if (errnum >= 0) {
1215 str_error_r(errnum, buf, buflen);
1216 return 0;
1217 }
1218
1219 switch (errnum) {
1220 case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
1221 char bf[SBUILD_ID_SIZE + 15] = " with build id ";
1222 char *build_id_msg = NULL;
1223
1224 if (dso__has_build_id(dso)) {
1225 build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15);
1226 build_id_msg = bf;
1227 }
1228 scnprintf(buf, buflen,
1229 "No vmlinux file%s\nwas found in the path.\n\n"
1230 "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
1231 "Please use:\n\n"
1232 " perf buildid-cache -vu vmlinux\n\n"
1233 "or:\n\n"
1234 " --vmlinux vmlinux\n", build_id_msg ?: "");
1235 }
1236 break;
1237 case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
1238 scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
1239 break;
1240 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
1241 scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
1242 break;
1243 case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
1244 scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
1245 break;
1246 case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
1247 scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso));
1248 break;
1249 case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
1250 scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
1251 dso__long_name(dso));
1252 break;
1253 case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE:
1254 scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso));
1255 break;
1256 default:
1257 scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
1258 break;
1259 }
1260
1261 return 0;
1262 }
1263
dso__disassemble_filename(struct dso * dso,char * filename,size_t filename_size)1264 static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
1265 {
1266 char linkname[PATH_MAX];
1267 char *build_id_filename;
1268 char *build_id_path = NULL;
1269 char *pos;
1270 int len;
1271
1272 if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS &&
1273 !dso__is_kcore(dso))
1274 return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
1275
1276 build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
1277 if (build_id_filename) {
1278 __symbol__join_symfs(filename, filename_size, build_id_filename);
1279 free(build_id_filename);
1280 } else {
1281 if (dso__has_build_id(dso))
1282 return ENOMEM;
1283 goto fallback;
1284 }
1285
1286 build_id_path = strdup(filename);
1287 if (!build_id_path)
1288 return ENOMEM;
1289
1290 /*
1291 * old style build-id cache has name of XX/XXXXXXX.. while
1292 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
1293 * extract the build-id part of dirname in the new style only.
1294 */
1295 pos = strrchr(build_id_path, '/');
1296 if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
1297 dirname(build_id_path);
1298
1299 if (dso__is_kcore(dso))
1300 goto fallback;
1301
1302 len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
1303 if (len < 0)
1304 goto fallback;
1305
1306 linkname[len] = '\0';
1307 if (strstr(linkname, DSO__NAME_KALLSYMS) ||
1308 access(filename, R_OK)) {
1309 fallback:
1310 /*
1311 * If we don't have build-ids or the build-id file isn't in the
1312 * cache, or is just a kallsyms file, well, lets hope that this
1313 * DSO is the same as when 'perf record' ran.
1314 */
1315 if (dso__kernel(dso) && dso__long_name(dso)[0] == '/')
1316 snprintf(filename, filename_size, "%s", dso__long_name(dso));
1317 else
1318 __symbol__join_symfs(filename, filename_size, dso__long_name(dso));
1319
1320 mutex_lock(dso__lock(dso));
1321 if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) {
1322 char *new_name = dso__filename_with_chroot(dso, filename);
1323 if (new_name) {
1324 strlcpy(filename, new_name, filename_size);
1325 free(new_name);
1326 }
1327 }
1328 mutex_unlock(dso__lock(dso));
1329 } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
1330 dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE);
1331 }
1332
1333 free(build_id_path);
1334 return 0;
1335 }
1336
symbol__disassemble_raw(char * filename,struct symbol * sym,struct annotate_args * args)1337 static int symbol__disassemble_raw(char *filename, struct symbol *sym,
1338 struct annotate_args *args)
1339 {
1340 struct annotation *notes = symbol__annotation(sym);
1341 struct map *map = args->ms.map;
1342 struct dso *dso = map__dso(map);
1343 u64 start = map__rip_2objdump(map, sym->start);
1344 u64 end = map__rip_2objdump(map, sym->end);
1345 u64 len = end - start;
1346 u64 offset;
1347 int i, count;
1348 u8 *buf = NULL;
1349 char disasm_buf[512];
1350 struct disasm_line *dl;
1351 u32 *line;
1352
1353 /* Return if objdump is specified explicitly */
1354 if (args->options->objdump_path)
1355 return -1;
1356
1357 pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename);
1358
1359 buf = malloc(len);
1360 if (buf == NULL)
1361 goto err;
1362
1363 count = dso__data_read_offset(dso, NULL, sym->start, buf, len);
1364
1365 line = (u32 *)buf;
1366
1367 if ((u64)count != len)
1368 goto err;
1369
1370 /* add the function address and name */
1371 scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
1372 start, sym->name);
1373
1374 args->offset = -1;
1375 args->line = disasm_buf;
1376 args->line_nr = 0;
1377 args->fileloc = NULL;
1378 args->ms.sym = sym;
1379
1380 dl = disasm_line__new(args);
1381 if (dl == NULL)
1382 goto err;
1383
1384 annotation_line__add(&dl->al, ¬es->src->source);
1385
1386 /* Each raw instruction is 4 byte */
1387 count = len/4;
1388
1389 for (i = 0, offset = 0; i < count; i++) {
1390 args->offset = offset;
1391 sprintf(args->line, "%x", line[i]);
1392 dl = disasm_line__new(args);
1393 if (dl == NULL)
1394 break;
1395
1396 annotation_line__add(&dl->al, ¬es->src->source);
1397 offset += 4;
1398 }
1399
1400 /* It failed in the middle */
1401 if (offset != len) {
1402 struct list_head *list = ¬es->src->source;
1403
1404 /* Discard all lines and fallback to objdump */
1405 while (!list_empty(list)) {
1406 dl = list_first_entry(list, struct disasm_line, al.node);
1407
1408 list_del_init(&dl->al.node);
1409 disasm_line__free(dl);
1410 }
1411 count = -1;
1412 }
1413
1414 out:
1415 free(buf);
1416 return count < 0 ? count : 0;
1417
1418 err:
1419 count = -1;
1420 goto out;
1421 }
1422
1423 /*
1424 * Possibly create a new version of line with tabs expanded. Returns the
1425 * existing or new line, storage is updated if a new line is allocated. If
1426 * allocation fails then NULL is returned.
1427 */
expand_tabs(char * line,char ** storage,size_t * storage_len)1428 char *expand_tabs(char *line, char **storage, size_t *storage_len)
1429 {
1430 size_t i, src, dst, len, new_storage_len, num_tabs;
1431 char *new_line;
1432 size_t line_len = strlen(line);
1433
1434 for (num_tabs = 0, i = 0; i < line_len; i++)
1435 if (line[i] == '\t')
1436 num_tabs++;
1437
1438 if (num_tabs == 0)
1439 return line;
1440
1441 /*
1442 * Space for the line and '\0', less the leading and trailing
1443 * spaces. Each tab may introduce 7 additional spaces.
1444 */
1445 new_storage_len = line_len + 1 + (num_tabs * 7);
1446
1447 new_line = malloc(new_storage_len);
1448 if (new_line == NULL) {
1449 pr_err("Failure allocating memory for tab expansion\n");
1450 return NULL;
1451 }
1452
1453 /*
1454 * Copy regions starting at src and expand tabs. If there are two
1455 * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
1456 * are inserted.
1457 */
1458 for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
1459 if (line[i] == '\t') {
1460 len = i - src;
1461 memcpy(&new_line[dst], &line[src], len);
1462 dst += len;
1463 new_line[dst++] = ' ';
1464 while (dst % 8 != 0)
1465 new_line[dst++] = ' ';
1466 src = i + 1;
1467 num_tabs--;
1468 }
1469 }
1470
1471 /* Expand the last region. */
1472 len = line_len - src;
1473 memcpy(&new_line[dst], &line[src], len);
1474 dst += len;
1475 new_line[dst] = '\0';
1476
1477 free(*storage);
1478 *storage = new_line;
1479 *storage_len = new_storage_len;
1480 return new_line;
1481 }
1482
symbol__disassemble_bpf_image(struct symbol * sym,struct annotate_args * args)1483 static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args)
1484 {
1485 struct annotation *notes = symbol__annotation(sym);
1486 struct disasm_line *dl;
1487
1488 args->offset = -1;
1489 args->line = strdup("to be implemented");
1490 args->line_nr = 0;
1491 args->fileloc = NULL;
1492 dl = disasm_line__new(args);
1493 if (dl)
1494 annotation_line__add(&dl->al, ¬es->src->source);
1495
1496 zfree(&args->line);
1497 return 0;
1498 }
1499
symbol__disassemble_objdump(const char * filename,struct symbol * sym,struct annotate_args * args)1500 static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
1501 struct annotate_args *args)
1502 {
1503 struct annotation_options *opts = &annotate_opts;
1504 struct map *map = args->ms.map;
1505 struct dso *dso = map__dso(map);
1506 char *command;
1507 FILE *file;
1508 int lineno = 0;
1509 char *fileloc = NULL;
1510 int nline;
1511 char *line;
1512 size_t line_len;
1513 const char *objdump_argv[] = {
1514 "/bin/sh",
1515 "-c",
1516 NULL, /* Will be the objdump command to run. */
1517 "--",
1518 NULL, /* Will be the symfs path. */
1519 NULL,
1520 };
1521 struct child_process objdump_process;
1522 int err;
1523
1524 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO)
1525 return symbol__disassemble_bpf_libbfd(sym, args);
1526
1527 if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE)
1528 return symbol__disassemble_bpf_image(sym, args);
1529
1530 err = asprintf(&command,
1531 "%s %s%s --start-address=0x%016" PRIx64
1532 " --stop-address=0x%016" PRIx64
1533 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"",
1534 opts->objdump_path ?: "objdump",
1535 opts->disassembler_style ? "-M " : "",
1536 opts->disassembler_style ?: "",
1537 map__rip_2objdump(map, sym->start),
1538 map__rip_2objdump(map, sym->end),
1539 opts->show_linenr ? "-l" : "",
1540 opts->show_asm_raw ? "" : "--no-show-raw-insn",
1541 opts->annotate_src ? "-S" : "",
1542 opts->prefix ? "--prefix " : "",
1543 opts->prefix ? '"' : ' ',
1544 opts->prefix ?: "",
1545 opts->prefix ? '"' : ' ',
1546 opts->prefix_strip ? "--prefix-strip=" : "",
1547 opts->prefix_strip ?: "");
1548
1549 if (err < 0) {
1550 pr_err("Failure allocating memory for the command to run\n");
1551 return err;
1552 }
1553
1554 pr_debug("Executing: %s\n", command);
1555
1556 objdump_argv[2] = command;
1557 objdump_argv[4] = filename;
1558
1559 /* Create a pipe to read from for stdout */
1560 memset(&objdump_process, 0, sizeof(objdump_process));
1561 objdump_process.argv = objdump_argv;
1562 objdump_process.out = -1;
1563 objdump_process.err = -1;
1564 objdump_process.no_stderr = 1;
1565 if (start_command(&objdump_process)) {
1566 pr_err("Failure starting to run %s\n", command);
1567 err = -1;
1568 goto out_free_command;
1569 }
1570
1571 file = fdopen(objdump_process.out, "r");
1572 if (!file) {
1573 pr_err("Failure creating FILE stream for %s\n", command);
1574 /*
1575 * If we were using debug info should retry with
1576 * original binary.
1577 */
1578 err = -1;
1579 goto out_close_stdout;
1580 }
1581
1582 /* Storage for getline. */
1583 line = NULL;
1584 line_len = 0;
1585
1586 nline = 0;
1587 while (!feof(file)) {
1588 const char *match;
1589 char *expanded_line;
1590
1591 if (getline(&line, &line_len, file) < 0 || !line)
1592 break;
1593
1594 /* Skip lines containing "filename:" */
1595 match = strstr(line, filename);
1596 if (match && match[strlen(filename)] == ':')
1597 continue;
1598
1599 expanded_line = strim(line);
1600 expanded_line = expand_tabs(expanded_line, &line, &line_len);
1601 if (!expanded_line)
1602 break;
1603
1604 /*
1605 * The source code line number (lineno) needs to be kept in
1606 * across calls to symbol__parse_objdump_line(), so that it
1607 * can associate it with the instructions till the next one.
1608 * See disasm_line__new() and struct disasm_line::line_nr.
1609 */
1610 if (symbol__parse_objdump_line(sym, args, expanded_line,
1611 &lineno, &fileloc) < 0)
1612 break;
1613 nline++;
1614 }
1615 free(line);
1616 free(fileloc);
1617
1618 err = finish_command(&objdump_process);
1619 if (err)
1620 pr_err("Error running %s\n", command);
1621
1622 if (nline == 0) {
1623 err = -1;
1624 pr_err("No output from %s\n", command);
1625 }
1626
1627 /*
1628 * kallsyms does not have symbol sizes so there may a nop at the end.
1629 * Remove it.
1630 */
1631 if (dso__is_kcore(dso))
1632 delete_last_nop(sym);
1633
1634 fclose(file);
1635
1636 out_close_stdout:
1637 close(objdump_process.out);
1638
1639 out_free_command:
1640 free(command);
1641 return err;
1642 }
1643
symbol__disassemble(struct symbol * sym,struct annotate_args * args)1644 int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
1645 {
1646 struct annotation_options *options = args->options;
1647 struct map *map = args->ms.map;
1648 struct dso *dso = map__dso(map);
1649 char symfs_filename[PATH_MAX];
1650 bool delete_extract = false;
1651 struct kcore_extract kce;
1652 bool decomp = false;
1653 int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
1654
1655 if (err)
1656 return err;
1657
1658 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
1659 symfs_filename, sym->name, map__unmap_ip(map, sym->start),
1660 map__unmap_ip(map, sym->end));
1661
1662 pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
1663
1664 if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
1665 return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE;
1666 } else if (dso__is_kcore(dso)) {
1667 kce.addr = map__rip_2objdump(map, sym->start);
1668 kce.kcore_filename = symfs_filename;
1669 kce.len = sym->end - sym->start;
1670 kce.offs = sym->start;
1671
1672 if (!kcore_extract__create(&kce)) {
1673 delete_extract = true;
1674 strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
1675 }
1676 } else if (dso__needs_decompress(dso)) {
1677 char tmp[KMOD_DECOMP_LEN];
1678
1679 if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
1680 return -1;
1681
1682 decomp = true;
1683 strcpy(symfs_filename, tmp);
1684 }
1685
1686 /*
1687 * For powerpc data type profiling, use the dso__data_read_offset to
1688 * read raw instruction directly and interpret the binary code to
1689 * understand instructions and register fields. For sort keys as type
1690 * and typeoff, disassemble to mnemonic notation is not required in
1691 * case of powerpc.
1692 */
1693 if (arch__is(args->arch, "powerpc")) {
1694 extern const char *sort_order;
1695
1696 if (sort_order && !strstr(sort_order, "sym")) {
1697 err = symbol__disassemble_raw(symfs_filename, sym, args);
1698 if (err == 0)
1699 goto out_remove_tmp;
1700
1701 err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
1702 if (err == 0)
1703 goto out_remove_tmp;
1704 }
1705 }
1706
1707 /* FIXME: LLVM and CAPSTONE should support source code */
1708 if (options->annotate_src && !options->hide_src_code) {
1709 err = symbol__disassemble_objdump(symfs_filename, sym, args);
1710 if (err == 0)
1711 goto out_remove_tmp;
1712 }
1713
1714 err = -1;
1715 for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
1716 enum perf_disassembler dis = options->disassemblers[i];
1717
1718 switch (dis) {
1719 case PERF_DISASM_LLVM:
1720 args->options->disassembler_used = PERF_DISASM_LLVM;
1721 err = symbol__disassemble_llvm(symfs_filename, sym, args);
1722 break;
1723 case PERF_DISASM_CAPSTONE:
1724 args->options->disassembler_used = PERF_DISASM_CAPSTONE;
1725 err = symbol__disassemble_capstone(symfs_filename, sym, args);
1726 break;
1727 case PERF_DISASM_OBJDUMP:
1728 args->options->disassembler_used = PERF_DISASM_OBJDUMP;
1729 err = symbol__disassemble_objdump(symfs_filename, sym, args);
1730 break;
1731 case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
1732 default:
1733 args->options->disassembler_used = PERF_DISASM_UNKNOWN;
1734 goto out_remove_tmp;
1735 }
1736 if (err == 0)
1737 pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]);
1738 }
1739 out_remove_tmp:
1740 if (decomp)
1741 unlink(symfs_filename);
1742
1743 if (delete_extract)
1744 kcore_extract__delete(&kce);
1745
1746 return err;
1747 }
1748