1 /* 2 * thread-stack.c: Synthesize a thread's stack using call / return events 3 * Copyright (c) 2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <linux/rbtree.h> 17 #include <linux/list.h> 18 #include <linux/log2.h> 19 #include <errno.h> 20 #include "thread.h" 21 #include "event.h" 22 #include "machine.h" 23 #include "env.h" 24 #include "util.h" 25 #include "debug.h" 26 #include "symbol.h" 27 #include "comm.h" 28 #include "call-path.h" 29 #include "thread-stack.h" 30 31 #define STACK_GROWTH 2048 32 33 /* 34 * State of retpoline detection. 35 * 36 * RETPOLINE_NONE: no retpoline detection 37 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible 38 * X86_RETPOLINE_DETECTED: x86 retpoline detected 39 */ 40 enum retpoline_state_t { 41 RETPOLINE_NONE, 42 X86_RETPOLINE_POSSIBLE, 43 X86_RETPOLINE_DETECTED, 44 }; 45 46 /** 47 * struct thread_stack_entry - thread stack entry. 48 * @ret_addr: return address 49 * @timestamp: timestamp (if known) 50 * @ref: external reference (e.g. db_id of sample) 51 * @branch_count: the branch count when the entry was created 52 * @cp: call path 53 * @no_call: a 'call' was not seen 54 * @trace_end: a 'call' but trace ended 55 * @non_call: a branch but not a 'call' to the start of a different symbol 56 */ 57 struct thread_stack_entry { 58 u64 ret_addr; 59 u64 timestamp; 60 u64 ref; 61 u64 branch_count; 62 struct call_path *cp; 63 bool no_call; 64 bool trace_end; 65 bool non_call; 66 }; 67 68 /** 69 * struct thread_stack - thread stack constructed from 'call' and 'return' 70 * branch samples. 71 * @stack: array that holds the stack 72 * @cnt: number of entries in the stack 73 * @sz: current maximum stack size 74 * @trace_nr: current trace number 75 * @branch_count: running branch count 76 * @kernel_start: kernel start address 77 * @last_time: last timestamp 78 * @crp: call/return processor 79 * @comm: current comm 80 * @arr_sz: size of array if this is the first element of an array 81 * @rstate: used to detect retpolines 82 */ 83 struct thread_stack { 84 struct thread_stack_entry *stack; 85 size_t cnt; 86 size_t sz; 87 u64 trace_nr; 88 u64 branch_count; 89 u64 kernel_start; 90 u64 last_time; 91 struct call_return_processor *crp; 92 struct comm *comm; 93 unsigned int arr_sz; 94 enum retpoline_state_t rstate; 95 }; 96 97 /* 98 * Assume pid == tid == 0 identifies the idle task as defined by 99 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, 100 * and therefore requires a stack for each cpu. 101 */ 102 static inline bool thread_stack__per_cpu(struct thread *thread) 103 { 104 return !(thread->tid || thread->pid_); 105 } 106 107 static int thread_stack__grow(struct thread_stack *ts) 108 { 109 struct thread_stack_entry *new_stack; 110 size_t sz, new_sz; 111 112 new_sz = ts->sz + STACK_GROWTH; 113 sz = new_sz * sizeof(struct thread_stack_entry); 114 115 new_stack = realloc(ts->stack, sz); 116 if (!new_stack) 117 return -ENOMEM; 118 119 ts->stack = new_stack; 120 ts->sz = new_sz; 121 122 return 0; 123 } 124 125 static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 126 struct call_return_processor *crp) 127 { 128 int err; 129 130 err = thread_stack__grow(ts); 131 if (err) 132 return err; 133 134 if (thread->mg && thread->mg->machine) { 135 struct machine *machine = thread->mg->machine; 136 const char *arch = perf_env__arch(machine->env); 137 138 ts->kernel_start = machine__kernel_start(machine); 139 if (!strcmp(arch, "x86")) 140 ts->rstate = X86_RETPOLINE_POSSIBLE; 141 } else { 142 ts->kernel_start = 1ULL << 63; 143 } 144 ts->crp = crp; 145 146 return 0; 147 } 148 149 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, 150 struct call_return_processor *crp) 151 { 152 struct thread_stack *ts = thread->ts, *new_ts; 153 unsigned int old_sz = ts ? ts->arr_sz : 0; 154 unsigned int new_sz = 1; 155 156 if (thread_stack__per_cpu(thread) && cpu > 0) 157 new_sz = roundup_pow_of_two(cpu + 1); 158 159 if (!ts || new_sz > old_sz) { 160 new_ts = calloc(new_sz, sizeof(*ts)); 161 if (!new_ts) 162 return NULL; 163 if (ts) 164 memcpy(new_ts, ts, old_sz * sizeof(*ts)); 165 new_ts->arr_sz = new_sz; 166 zfree(&thread->ts); 167 thread->ts = new_ts; 168 ts = new_ts; 169 } 170 171 if (thread_stack__per_cpu(thread) && cpu > 0 && 172 (unsigned int)cpu < ts->arr_sz) 173 ts += cpu; 174 175 if (!ts->stack && 176 thread_stack__init(ts, thread, crp)) 177 return NULL; 178 179 return ts; 180 } 181 182 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) 183 { 184 struct thread_stack *ts = thread->ts; 185 186 if (cpu < 0) 187 cpu = 0; 188 189 if (!ts || (unsigned int)cpu >= ts->arr_sz) 190 return NULL; 191 192 ts += cpu; 193 194 if (!ts->stack) 195 return NULL; 196 197 return ts; 198 } 199 200 static inline struct thread_stack *thread__stack(struct thread *thread, 201 int cpu) 202 { 203 if (!thread) 204 return NULL; 205 206 if (thread_stack__per_cpu(thread)) 207 return thread__cpu_stack(thread, cpu); 208 209 return thread->ts; 210 } 211 212 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 213 bool trace_end) 214 { 215 int err = 0; 216 217 if (ts->cnt == ts->sz) { 218 err = thread_stack__grow(ts); 219 if (err) { 220 pr_warning("Out of memory: discarding thread stack\n"); 221 ts->cnt = 0; 222 } 223 } 224 225 ts->stack[ts->cnt].trace_end = trace_end; 226 ts->stack[ts->cnt++].ret_addr = ret_addr; 227 228 return err; 229 } 230 231 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 232 { 233 size_t i; 234 235 /* 236 * In some cases there may be functions which are not seen to return. 237 * For example when setjmp / longjmp has been used. Or the perf context 238 * switch in the kernel which doesn't stop and start tracing in exactly 239 * the same code path. When that happens the return address will be 240 * further down the stack. If the return address is not found at all, 241 * we assume the opposite (i.e. this is a return for a call that wasn't 242 * seen for some reason) and leave the stack alone. 243 */ 244 for (i = ts->cnt; i; ) { 245 if (ts->stack[--i].ret_addr == ret_addr) { 246 ts->cnt = i; 247 return; 248 } 249 } 250 } 251 252 static void thread_stack__pop_trace_end(struct thread_stack *ts) 253 { 254 size_t i; 255 256 for (i = ts->cnt; i; ) { 257 if (ts->stack[--i].trace_end) 258 ts->cnt = i; 259 else 260 return; 261 } 262 } 263 264 static bool thread_stack__in_kernel(struct thread_stack *ts) 265 { 266 if (!ts->cnt) 267 return false; 268 269 return ts->stack[ts->cnt - 1].cp->in_kernel; 270 } 271 272 static int thread_stack__call_return(struct thread *thread, 273 struct thread_stack *ts, size_t idx, 274 u64 timestamp, u64 ref, bool no_return) 275 { 276 struct call_return_processor *crp = ts->crp; 277 struct thread_stack_entry *tse; 278 struct call_return cr = { 279 .thread = thread, 280 .comm = ts->comm, 281 .db_id = 0, 282 }; 283 284 tse = &ts->stack[idx]; 285 cr.cp = tse->cp; 286 cr.call_time = tse->timestamp; 287 cr.return_time = timestamp; 288 cr.branch_count = ts->branch_count - tse->branch_count; 289 cr.call_ref = tse->ref; 290 cr.return_ref = ref; 291 if (tse->no_call) 292 cr.flags |= CALL_RETURN_NO_CALL; 293 if (no_return) 294 cr.flags |= CALL_RETURN_NO_RETURN; 295 if (tse->non_call) 296 cr.flags |= CALL_RETURN_NON_CALL; 297 298 return crp->process(&cr, crp->data); 299 } 300 301 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 302 { 303 struct call_return_processor *crp = ts->crp; 304 int err; 305 306 if (!crp) { 307 ts->cnt = 0; 308 return 0; 309 } 310 311 while (ts->cnt) { 312 err = thread_stack__call_return(thread, ts, --ts->cnt, 313 ts->last_time, 0, true); 314 if (err) { 315 pr_err("Error flushing thread stack!\n"); 316 ts->cnt = 0; 317 return err; 318 } 319 } 320 321 return 0; 322 } 323 324 int thread_stack__flush(struct thread *thread) 325 { 326 struct thread_stack *ts = thread->ts; 327 unsigned int pos; 328 int err = 0; 329 330 if (ts) { 331 for (pos = 0; pos < ts->arr_sz; pos++) { 332 int ret = __thread_stack__flush(thread, ts + pos); 333 334 if (ret) 335 err = ret; 336 } 337 } 338 339 return err; 340 } 341 342 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, 343 u64 to_ip, u16 insn_len, u64 trace_nr) 344 { 345 struct thread_stack *ts = thread__stack(thread, cpu); 346 347 if (!thread) 348 return -EINVAL; 349 350 if (!ts) { 351 ts = thread_stack__new(thread, cpu, NULL); 352 if (!ts) { 353 pr_warning("Out of memory: no thread stack\n"); 354 return -ENOMEM; 355 } 356 ts->trace_nr = trace_nr; 357 } 358 359 /* 360 * When the trace is discontinuous, the trace_nr changes. In that case 361 * the stack might be completely invalid. Better to report nothing than 362 * to report something misleading, so flush the stack. 363 */ 364 if (trace_nr != ts->trace_nr) { 365 if (ts->trace_nr) 366 __thread_stack__flush(thread, ts); 367 ts->trace_nr = trace_nr; 368 } 369 370 /* Stop here if thread_stack__process() is in use */ 371 if (ts->crp) 372 return 0; 373 374 if (flags & PERF_IP_FLAG_CALL) { 375 u64 ret_addr; 376 377 if (!to_ip) 378 return 0; 379 ret_addr = from_ip + insn_len; 380 if (ret_addr == to_ip) 381 return 0; /* Zero-length calls are excluded */ 382 return thread_stack__push(ts, ret_addr, 383 flags & PERF_IP_FLAG_TRACE_END); 384 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 385 /* 386 * If the caller did not change the trace number (which would 387 * have flushed the stack) then try to make sense of the stack. 388 * Possibly, tracing began after returning to the current 389 * address, so try to pop that. Also, do not expect a call made 390 * when the trace ended, to return, so pop that. 391 */ 392 thread_stack__pop(ts, to_ip); 393 thread_stack__pop_trace_end(ts); 394 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 395 thread_stack__pop(ts, to_ip); 396 } 397 398 return 0; 399 } 400 401 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) 402 { 403 struct thread_stack *ts = thread__stack(thread, cpu); 404 405 if (!ts) 406 return; 407 408 if (trace_nr != ts->trace_nr) { 409 if (ts->trace_nr) 410 __thread_stack__flush(thread, ts); 411 ts->trace_nr = trace_nr; 412 } 413 } 414 415 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 416 { 417 __thread_stack__flush(thread, ts); 418 zfree(&ts->stack); 419 } 420 421 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 422 { 423 unsigned int arr_sz = ts->arr_sz; 424 425 __thread_stack__free(thread, ts); 426 memset(ts, 0, sizeof(*ts)); 427 ts->arr_sz = arr_sz; 428 } 429 430 void thread_stack__free(struct thread *thread) 431 { 432 struct thread_stack *ts = thread->ts; 433 unsigned int pos; 434 435 if (ts) { 436 for (pos = 0; pos < ts->arr_sz; pos++) 437 __thread_stack__free(thread, ts + pos); 438 zfree(&thread->ts); 439 } 440 } 441 442 static inline u64 callchain_context(u64 ip, u64 kernel_start) 443 { 444 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 445 } 446 447 void thread_stack__sample(struct thread *thread, int cpu, 448 struct ip_callchain *chain, 449 size_t sz, u64 ip, u64 kernel_start) 450 { 451 struct thread_stack *ts = thread__stack(thread, cpu); 452 u64 context = callchain_context(ip, kernel_start); 453 u64 last_context; 454 size_t i, j; 455 456 if (sz < 2) { 457 chain->nr = 0; 458 return; 459 } 460 461 chain->ips[0] = context; 462 chain->ips[1] = ip; 463 464 if (!ts) { 465 chain->nr = 2; 466 return; 467 } 468 469 last_context = context; 470 471 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 472 ip = ts->stack[ts->cnt - j].ret_addr; 473 context = callchain_context(ip, kernel_start); 474 if (context != last_context) { 475 if (i >= sz - 1) 476 break; 477 chain->ips[i++] = context; 478 last_context = context; 479 } 480 chain->ips[i] = ip; 481 } 482 483 chain->nr = i; 484 } 485 486 struct call_return_processor * 487 call_return_processor__new(int (*process)(struct call_return *cr, void *data), 488 void *data) 489 { 490 struct call_return_processor *crp; 491 492 crp = zalloc(sizeof(struct call_return_processor)); 493 if (!crp) 494 return NULL; 495 crp->cpr = call_path_root__new(); 496 if (!crp->cpr) 497 goto out_free; 498 crp->process = process; 499 crp->data = data; 500 return crp; 501 502 out_free: 503 free(crp); 504 return NULL; 505 } 506 507 void call_return_processor__free(struct call_return_processor *crp) 508 { 509 if (crp) { 510 call_path_root__free(crp->cpr); 511 free(crp); 512 } 513 } 514 515 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 516 u64 timestamp, u64 ref, struct call_path *cp, 517 bool no_call, bool trace_end) 518 { 519 struct thread_stack_entry *tse; 520 int err; 521 522 if (!cp) 523 return -ENOMEM; 524 525 if (ts->cnt == ts->sz) { 526 err = thread_stack__grow(ts); 527 if (err) 528 return err; 529 } 530 531 tse = &ts->stack[ts->cnt++]; 532 tse->ret_addr = ret_addr; 533 tse->timestamp = timestamp; 534 tse->ref = ref; 535 tse->branch_count = ts->branch_count; 536 tse->cp = cp; 537 tse->no_call = no_call; 538 tse->trace_end = trace_end; 539 tse->non_call = false; 540 541 return 0; 542 } 543 544 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 545 u64 ret_addr, u64 timestamp, u64 ref, 546 struct symbol *sym) 547 { 548 int err; 549 550 if (!ts->cnt) 551 return 1; 552 553 if (ts->cnt == 1) { 554 struct thread_stack_entry *tse = &ts->stack[0]; 555 556 if (tse->cp->sym == sym) 557 return thread_stack__call_return(thread, ts, --ts->cnt, 558 timestamp, ref, false); 559 } 560 561 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 562 !ts->stack[ts->cnt - 1].non_call) { 563 return thread_stack__call_return(thread, ts, --ts->cnt, 564 timestamp, ref, false); 565 } else { 566 size_t i = ts->cnt - 1; 567 568 while (i--) { 569 if (ts->stack[i].ret_addr != ret_addr || 570 ts->stack[i].non_call) 571 continue; 572 i += 1; 573 while (ts->cnt > i) { 574 err = thread_stack__call_return(thread, ts, 575 --ts->cnt, 576 timestamp, ref, 577 true); 578 if (err) 579 return err; 580 } 581 return thread_stack__call_return(thread, ts, --ts->cnt, 582 timestamp, ref, false); 583 } 584 } 585 586 return 1; 587 } 588 589 static int thread_stack__bottom(struct thread_stack *ts, 590 struct perf_sample *sample, 591 struct addr_location *from_al, 592 struct addr_location *to_al, u64 ref) 593 { 594 struct call_path_root *cpr = ts->crp->cpr; 595 struct call_path *cp; 596 struct symbol *sym; 597 u64 ip; 598 599 if (sample->ip) { 600 ip = sample->ip; 601 sym = from_al->sym; 602 } else if (sample->addr) { 603 ip = sample->addr; 604 sym = to_al->sym; 605 } else { 606 return 0; 607 } 608 609 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 610 ts->kernel_start); 611 612 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 613 true, false); 614 } 615 616 static int thread_stack__no_call_return(struct thread *thread, 617 struct thread_stack *ts, 618 struct perf_sample *sample, 619 struct addr_location *from_al, 620 struct addr_location *to_al, u64 ref) 621 { 622 struct call_path_root *cpr = ts->crp->cpr; 623 struct call_path *root = &cpr->call_path; 624 struct symbol *fsym = from_al->sym; 625 struct symbol *tsym = to_al->sym; 626 struct call_path *cp, *parent; 627 u64 ks = ts->kernel_start; 628 u64 addr = sample->addr; 629 u64 tm = sample->time; 630 u64 ip = sample->ip; 631 int err; 632 633 if (ip >= ks && addr < ks) { 634 /* Return to userspace, so pop all kernel addresses */ 635 while (thread_stack__in_kernel(ts)) { 636 err = thread_stack__call_return(thread, ts, --ts->cnt, 637 tm, ref, true); 638 if (err) 639 return err; 640 } 641 642 /* If the stack is empty, push the userspace address */ 643 if (!ts->cnt) { 644 cp = call_path__findnew(cpr, root, tsym, addr, ks); 645 return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 646 false); 647 } 648 } else if (thread_stack__in_kernel(ts) && ip < ks) { 649 /* Return to userspace, so pop all kernel addresses */ 650 while (thread_stack__in_kernel(ts)) { 651 err = thread_stack__call_return(thread, ts, --ts->cnt, 652 tm, ref, true); 653 if (err) 654 return err; 655 } 656 } 657 658 if (ts->cnt) 659 parent = ts->stack[ts->cnt - 1].cp; 660 else 661 parent = root; 662 663 if (parent->sym == from_al->sym) { 664 /* 665 * At the bottom of the stack, assume the missing 'call' was 666 * before the trace started. So, pop the current symbol and push 667 * the 'to' symbol. 668 */ 669 if (ts->cnt == 1) { 670 err = thread_stack__call_return(thread, ts, --ts->cnt, 671 tm, ref, false); 672 if (err) 673 return err; 674 } 675 676 if (!ts->cnt) { 677 cp = call_path__findnew(cpr, root, tsym, addr, ks); 678 679 return thread_stack__push_cp(ts, addr, tm, ref, cp, 680 true, false); 681 } 682 683 /* 684 * Otherwise assume the 'return' is being used as a jump (e.g. 685 * retpoline) and just push the 'to' symbol. 686 */ 687 cp = call_path__findnew(cpr, parent, tsym, addr, ks); 688 689 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); 690 if (!err) 691 ts->stack[ts->cnt - 1].non_call = true; 692 693 return err; 694 } 695 696 /* 697 * Assume 'parent' has not yet returned, so push 'to', and then push and 698 * pop 'from'. 699 */ 700 701 cp = call_path__findnew(cpr, parent, tsym, addr, ks); 702 703 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 704 if (err) 705 return err; 706 707 cp = call_path__findnew(cpr, cp, fsym, ip, ks); 708 709 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); 710 if (err) 711 return err; 712 713 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); 714 } 715 716 static int thread_stack__trace_begin(struct thread *thread, 717 struct thread_stack *ts, u64 timestamp, 718 u64 ref) 719 { 720 struct thread_stack_entry *tse; 721 int err; 722 723 if (!ts->cnt) 724 return 0; 725 726 /* Pop trace end */ 727 tse = &ts->stack[ts->cnt - 1]; 728 if (tse->trace_end) { 729 err = thread_stack__call_return(thread, ts, --ts->cnt, 730 timestamp, ref, false); 731 if (err) 732 return err; 733 } 734 735 return 0; 736 } 737 738 static int thread_stack__trace_end(struct thread_stack *ts, 739 struct perf_sample *sample, u64 ref) 740 { 741 struct call_path_root *cpr = ts->crp->cpr; 742 struct call_path *cp; 743 u64 ret_addr; 744 745 /* No point having 'trace end' on the bottom of the stack */ 746 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 747 return 0; 748 749 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 750 ts->kernel_start); 751 752 ret_addr = sample->ip + sample->insn_len; 753 754 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 755 false, true); 756 } 757 758 static bool is_x86_retpoline(const char *name) 759 { 760 const char *p = strstr(name, "__x86_indirect_thunk_"); 761 762 return p == name || !strcmp(name, "__indirect_thunk_start"); 763 } 764 765 /* 766 * x86 retpoline functions pollute the call graph. This function removes them. 767 * This does not handle function return thunks, nor is there any improvement 768 * for the handling of inline thunks or extern thunks. 769 */ 770 static int thread_stack__x86_retpoline(struct thread_stack *ts, 771 struct perf_sample *sample, 772 struct addr_location *to_al) 773 { 774 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; 775 struct call_path_root *cpr = ts->crp->cpr; 776 struct symbol *sym = tse->cp->sym; 777 struct symbol *tsym = to_al->sym; 778 struct call_path *cp; 779 780 if (sym && is_x86_retpoline(sym->name)) { 781 /* 782 * This is a x86 retpoline fn. It pollutes the call graph by 783 * showing up everywhere there is an indirect branch, but does 784 * not itself mean anything. Here the top-of-stack is removed, 785 * by decrementing the stack count, and then further down, the 786 * resulting top-of-stack is replaced with the actual target. 787 * The result is that the retpoline functions will no longer 788 * appear in the call graph. Note this only affects the call 789 * graph, since all the original branches are left unchanged. 790 */ 791 ts->cnt -= 1; 792 sym = ts->stack[ts->cnt - 2].cp->sym; 793 if (sym && sym == tsym && to_al->addr != tsym->start) { 794 /* 795 * Target is back to the middle of the symbol we came 796 * from so assume it is an indirect jmp and forget it 797 * altogether. 798 */ 799 ts->cnt -= 1; 800 return 0; 801 } 802 } else if (sym && sym == tsym) { 803 /* 804 * Target is back to the symbol we came from so assume it is an 805 * indirect jmp and forget it altogether. 806 */ 807 ts->cnt -= 1; 808 return 0; 809 } 810 811 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, 812 sample->addr, ts->kernel_start); 813 if (!cp) 814 return -ENOMEM; 815 816 /* Replace the top-of-stack with the actual target */ 817 ts->stack[ts->cnt - 1].cp = cp; 818 819 return 0; 820 } 821 822 int thread_stack__process(struct thread *thread, struct comm *comm, 823 struct perf_sample *sample, 824 struct addr_location *from_al, 825 struct addr_location *to_al, u64 ref, 826 struct call_return_processor *crp) 827 { 828 struct thread_stack *ts = thread__stack(thread, sample->cpu); 829 enum retpoline_state_t rstate; 830 int err = 0; 831 832 if (ts && !ts->crp) { 833 /* Supersede thread_stack__event() */ 834 thread_stack__reset(thread, ts); 835 ts = NULL; 836 } 837 838 if (!ts) { 839 ts = thread_stack__new(thread, sample->cpu, crp); 840 if (!ts) 841 return -ENOMEM; 842 ts->comm = comm; 843 } 844 845 rstate = ts->rstate; 846 if (rstate == X86_RETPOLINE_DETECTED) 847 ts->rstate = X86_RETPOLINE_POSSIBLE; 848 849 /* Flush stack on exec */ 850 if (ts->comm != comm && thread->pid_ == thread->tid) { 851 err = __thread_stack__flush(thread, ts); 852 if (err) 853 return err; 854 ts->comm = comm; 855 } 856 857 /* If the stack is empty, put the current symbol on the stack */ 858 if (!ts->cnt) { 859 err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 860 if (err) 861 return err; 862 } 863 864 ts->branch_count += 1; 865 ts->last_time = sample->time; 866 867 if (sample->flags & PERF_IP_FLAG_CALL) { 868 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 869 struct call_path_root *cpr = ts->crp->cpr; 870 struct call_path *cp; 871 u64 ret_addr; 872 873 if (!sample->ip || !sample->addr) 874 return 0; 875 876 ret_addr = sample->ip + sample->insn_len; 877 if (ret_addr == sample->addr) 878 return 0; /* Zero-length calls are excluded */ 879 880 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 881 to_al->sym, sample->addr, 882 ts->kernel_start); 883 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 884 cp, false, trace_end); 885 886 /* 887 * A call to the same symbol but not the start of the symbol, 888 * may be the start of a x86 retpoline. 889 */ 890 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && 891 from_al->sym == to_al->sym && 892 to_al->addr != to_al->sym->start) 893 ts->rstate = X86_RETPOLINE_DETECTED; 894 895 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 896 if (!sample->ip || !sample->addr) 897 return 0; 898 899 /* x86 retpoline 'return' doesn't match the stack */ 900 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && 901 ts->stack[ts->cnt - 1].ret_addr != sample->addr) 902 return thread_stack__x86_retpoline(ts, sample, to_al); 903 904 err = thread_stack__pop_cp(thread, ts, sample->addr, 905 sample->time, ref, from_al->sym); 906 if (err) { 907 if (err < 0) 908 return err; 909 err = thread_stack__no_call_return(thread, ts, sample, 910 from_al, to_al, ref); 911 } 912 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 913 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 914 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 915 err = thread_stack__trace_end(ts, sample, ref); 916 } else if (sample->flags & PERF_IP_FLAG_BRANCH && 917 from_al->sym != to_al->sym && to_al->sym && 918 to_al->addr == to_al->sym->start) { 919 struct call_path_root *cpr = ts->crp->cpr; 920 struct call_path *cp; 921 922 /* 923 * The compiler might optimize a call/ret combination by making 924 * it a jmp. Make that visible by recording on the stack a 925 * branch to the start of a different symbol. Note, that means 926 * when a ret pops the stack, all jmps must be popped off first. 927 */ 928 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 929 to_al->sym, sample->addr, 930 ts->kernel_start); 931 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 932 false); 933 if (!err) 934 ts->stack[ts->cnt - 1].non_call = true; 935 } 936 937 return err; 938 } 939 940 size_t thread_stack__depth(struct thread *thread, int cpu) 941 { 942 struct thread_stack *ts = thread__stack(thread, cpu); 943 944 if (!ts) 945 return 0; 946 return ts->cnt; 947 } 948