1 /* 2 * thread-stack.c: Synthesize a thread's stack using call / return events 3 * Copyright (c) 2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <linux/rbtree.h> 17 #include <linux/list.h> 18 #include "thread.h" 19 #include "event.h" 20 #include "machine.h" 21 #include "util.h" 22 #include "debug.h" 23 #include "symbol.h" 24 #include "comm.h" 25 #include "thread-stack.h" 26 27 #define CALL_PATH_BLOCK_SHIFT 8 28 #define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) 29 #define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) 30 31 struct call_path_block { 32 struct call_path cp[CALL_PATH_BLOCK_SIZE]; 33 struct list_head node; 34 }; 35 36 /** 37 * struct call_path_root - root of all call paths. 38 * @call_path: root call path 39 * @blocks: list of blocks to store call paths 40 * @next: next free space 41 * @sz: number of spaces 42 */ 43 struct call_path_root { 44 struct call_path call_path; 45 struct list_head blocks; 46 size_t next; 47 size_t sz; 48 }; 49 50 /** 51 * struct call_return_processor - provides a call-back to consume call-return 52 * information. 53 * @cpr: call path root 54 * @process: call-back that accepts call/return information 55 * @data: anonymous data for call-back 56 */ 57 struct call_return_processor { 58 struct call_path_root *cpr; 59 int (*process)(struct call_return *cr, void *data); 60 void *data; 61 }; 62 63 #define STACK_GROWTH 2048 64 65 /** 66 * struct thread_stack_entry - thread stack entry. 67 * @ret_addr: return address 68 * @timestamp: timestamp (if known) 69 * @ref: external reference (e.g. db_id of sample) 70 * @branch_count: the branch count when the entry was created 71 * @cp: call path 72 * @no_call: a 'call' was not seen 73 */ 74 struct thread_stack_entry { 75 u64 ret_addr; 76 u64 timestamp; 77 u64 ref; 78 u64 branch_count; 79 struct call_path *cp; 80 bool no_call; 81 }; 82 83 /** 84 * struct thread_stack - thread stack constructed from 'call' and 'return' 85 * branch samples. 86 * @stack: array that holds the stack 87 * @cnt: number of entries in the stack 88 * @sz: current maximum stack size 89 * @trace_nr: current trace number 90 * @branch_count: running branch count 91 * @kernel_start: kernel start address 92 * @last_time: last timestamp 93 * @crp: call/return processor 94 * @comm: current comm 95 */ 96 struct thread_stack { 97 struct thread_stack_entry *stack; 98 size_t cnt; 99 size_t sz; 100 u64 trace_nr; 101 u64 branch_count; 102 u64 kernel_start; 103 u64 last_time; 104 struct call_return_processor *crp; 105 struct comm *comm; 106 }; 107 108 static int thread_stack__grow(struct thread_stack *ts) 109 { 110 struct thread_stack_entry *new_stack; 111 size_t sz, new_sz; 112 113 new_sz = ts->sz + STACK_GROWTH; 114 sz = new_sz * sizeof(struct thread_stack_entry); 115 116 new_stack = realloc(ts->stack, sz); 117 if (!new_stack) 118 return -ENOMEM; 119 120 ts->stack = new_stack; 121 ts->sz = new_sz; 122 123 return 0; 124 } 125 126 static struct thread_stack *thread_stack__new(struct thread *thread, 127 struct call_return_processor *crp) 128 { 129 struct thread_stack *ts; 130 131 ts = zalloc(sizeof(struct thread_stack)); 132 if (!ts) 133 return NULL; 134 135 if (thread_stack__grow(ts)) { 136 free(ts); 137 return NULL; 138 } 139 140 if (thread->mg && thread->mg->machine) 141 ts->kernel_start = machine__kernel_start(thread->mg->machine); 142 else 143 ts->kernel_start = 1ULL << 63; 144 ts->crp = crp; 145 146 return ts; 147 } 148 149 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) 150 { 151 int err = 0; 152 153 if (ts->cnt == ts->sz) { 154 err = thread_stack__grow(ts); 155 if (err) { 156 pr_warning("Out of memory: discarding thread stack\n"); 157 ts->cnt = 0; 158 } 159 } 160 161 ts->stack[ts->cnt++].ret_addr = ret_addr; 162 163 return err; 164 } 165 166 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 167 { 168 size_t i; 169 170 /* 171 * In some cases there may be functions which are not seen to return. 172 * For example when setjmp / longjmp has been used. Or the perf context 173 * switch in the kernel which doesn't stop and start tracing in exactly 174 * the same code path. When that happens the return address will be 175 * further down the stack. If the return address is not found at all, 176 * we assume the opposite (i.e. this is a return for a call that wasn't 177 * seen for some reason) and leave the stack alone. 178 */ 179 for (i = ts->cnt; i; ) { 180 if (ts->stack[--i].ret_addr == ret_addr) { 181 ts->cnt = i; 182 return; 183 } 184 } 185 } 186 187 static bool thread_stack__in_kernel(struct thread_stack *ts) 188 { 189 if (!ts->cnt) 190 return false; 191 192 return ts->stack[ts->cnt - 1].cp->in_kernel; 193 } 194 195 static int thread_stack__call_return(struct thread *thread, 196 struct thread_stack *ts, size_t idx, 197 u64 timestamp, u64 ref, bool no_return) 198 { 199 struct call_return_processor *crp = ts->crp; 200 struct thread_stack_entry *tse; 201 struct call_return cr = { 202 .thread = thread, 203 .comm = ts->comm, 204 .db_id = 0, 205 }; 206 207 tse = &ts->stack[idx]; 208 cr.cp = tse->cp; 209 cr.call_time = tse->timestamp; 210 cr.return_time = timestamp; 211 cr.branch_count = ts->branch_count - tse->branch_count; 212 cr.call_ref = tse->ref; 213 cr.return_ref = ref; 214 if (tse->no_call) 215 cr.flags |= CALL_RETURN_NO_CALL; 216 if (no_return) 217 cr.flags |= CALL_RETURN_NO_RETURN; 218 219 return crp->process(&cr, crp->data); 220 } 221 222 static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) 223 { 224 struct call_return_processor *crp = ts->crp; 225 int err; 226 227 if (!crp) { 228 ts->cnt = 0; 229 return 0; 230 } 231 232 while (ts->cnt) { 233 err = thread_stack__call_return(thread, ts, --ts->cnt, 234 ts->last_time, 0, true); 235 if (err) { 236 pr_err("Error flushing thread stack!\n"); 237 ts->cnt = 0; 238 return err; 239 } 240 } 241 242 return 0; 243 } 244 245 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 246 u64 to_ip, u16 insn_len, u64 trace_nr) 247 { 248 if (!thread) 249 return -EINVAL; 250 251 if (!thread->ts) { 252 thread->ts = thread_stack__new(thread, NULL); 253 if (!thread->ts) { 254 pr_warning("Out of memory: no thread stack\n"); 255 return -ENOMEM; 256 } 257 thread->ts->trace_nr = trace_nr; 258 } 259 260 /* 261 * When the trace is discontinuous, the trace_nr changes. In that case 262 * the stack might be completely invalid. Better to report nothing than 263 * to report something misleading, so flush the stack. 264 */ 265 if (trace_nr != thread->ts->trace_nr) { 266 if (thread->ts->trace_nr) 267 thread_stack__flush(thread, thread->ts); 268 thread->ts->trace_nr = trace_nr; 269 } 270 271 /* Stop here if thread_stack__process() is in use */ 272 if (thread->ts->crp) 273 return 0; 274 275 if (flags & PERF_IP_FLAG_CALL) { 276 u64 ret_addr; 277 278 if (!to_ip) 279 return 0; 280 ret_addr = from_ip + insn_len; 281 if (ret_addr == to_ip) 282 return 0; /* Zero-length calls are excluded */ 283 return thread_stack__push(thread->ts, ret_addr); 284 } else if (flags & PERF_IP_FLAG_RETURN) { 285 if (!from_ip) 286 return 0; 287 thread_stack__pop(thread->ts, to_ip); 288 } 289 290 return 0; 291 } 292 293 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) 294 { 295 if (!thread || !thread->ts) 296 return; 297 298 if (trace_nr != thread->ts->trace_nr) { 299 if (thread->ts->trace_nr) 300 thread_stack__flush(thread, thread->ts); 301 thread->ts->trace_nr = trace_nr; 302 } 303 } 304 305 void thread_stack__free(struct thread *thread) 306 { 307 if (thread->ts) { 308 thread_stack__flush(thread, thread->ts); 309 zfree(&thread->ts->stack); 310 zfree(&thread->ts); 311 } 312 } 313 314 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 315 size_t sz, u64 ip) 316 { 317 size_t i; 318 319 if (!thread || !thread->ts) 320 chain->nr = 1; 321 else 322 chain->nr = min(sz, thread->ts->cnt + 1); 323 324 chain->ips[0] = ip; 325 326 for (i = 1; i < chain->nr; i++) 327 chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; 328 } 329 330 static void call_path__init(struct call_path *cp, struct call_path *parent, 331 struct symbol *sym, u64 ip, bool in_kernel) 332 { 333 cp->parent = parent; 334 cp->sym = sym; 335 cp->ip = sym ? 0 : ip; 336 cp->db_id = 0; 337 cp->in_kernel = in_kernel; 338 RB_CLEAR_NODE(&cp->rb_node); 339 cp->children = RB_ROOT; 340 } 341 342 static struct call_path_root *call_path_root__new(void) 343 { 344 struct call_path_root *cpr; 345 346 cpr = zalloc(sizeof(struct call_path_root)); 347 if (!cpr) 348 return NULL; 349 call_path__init(&cpr->call_path, NULL, NULL, 0, false); 350 INIT_LIST_HEAD(&cpr->blocks); 351 return cpr; 352 } 353 354 static void call_path_root__free(struct call_path_root *cpr) 355 { 356 struct call_path_block *pos, *n; 357 358 list_for_each_entry_safe(pos, n, &cpr->blocks, node) { 359 list_del(&pos->node); 360 free(pos); 361 } 362 free(cpr); 363 } 364 365 static struct call_path *call_path__new(struct call_path_root *cpr, 366 struct call_path *parent, 367 struct symbol *sym, u64 ip, 368 bool in_kernel) 369 { 370 struct call_path_block *cpb; 371 struct call_path *cp; 372 size_t n; 373 374 if (cpr->next < cpr->sz) { 375 cpb = list_last_entry(&cpr->blocks, struct call_path_block, 376 node); 377 } else { 378 cpb = zalloc(sizeof(struct call_path_block)); 379 if (!cpb) 380 return NULL; 381 list_add_tail(&cpb->node, &cpr->blocks); 382 cpr->sz += CALL_PATH_BLOCK_SIZE; 383 } 384 385 n = cpr->next++ & CALL_PATH_BLOCK_MASK; 386 cp = &cpb->cp[n]; 387 388 call_path__init(cp, parent, sym, ip, in_kernel); 389 390 return cp; 391 } 392 393 static struct call_path *call_path__findnew(struct call_path_root *cpr, 394 struct call_path *parent, 395 struct symbol *sym, u64 ip, u64 ks) 396 { 397 struct rb_node **p; 398 struct rb_node *node_parent = NULL; 399 struct call_path *cp; 400 bool in_kernel = ip >= ks; 401 402 if (sym) 403 ip = 0; 404 405 if (!parent) 406 return call_path__new(cpr, parent, sym, ip, in_kernel); 407 408 p = &parent->children.rb_node; 409 while (*p != NULL) { 410 node_parent = *p; 411 cp = rb_entry(node_parent, struct call_path, rb_node); 412 413 if (cp->sym == sym && cp->ip == ip) 414 return cp; 415 416 if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) 417 p = &(*p)->rb_left; 418 else 419 p = &(*p)->rb_right; 420 } 421 422 cp = call_path__new(cpr, parent, sym, ip, in_kernel); 423 if (!cp) 424 return NULL; 425 426 rb_link_node(&cp->rb_node, node_parent, p); 427 rb_insert_color(&cp->rb_node, &parent->children); 428 429 return cp; 430 } 431 432 struct call_return_processor * 433 call_return_processor__new(int (*process)(struct call_return *cr, void *data), 434 void *data) 435 { 436 struct call_return_processor *crp; 437 438 crp = zalloc(sizeof(struct call_return_processor)); 439 if (!crp) 440 return NULL; 441 crp->cpr = call_path_root__new(); 442 if (!crp->cpr) 443 goto out_free; 444 crp->process = process; 445 crp->data = data; 446 return crp; 447 448 out_free: 449 free(crp); 450 return NULL; 451 } 452 453 void call_return_processor__free(struct call_return_processor *crp) 454 { 455 if (crp) { 456 call_path_root__free(crp->cpr); 457 free(crp); 458 } 459 } 460 461 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 462 u64 timestamp, u64 ref, struct call_path *cp, 463 bool no_call) 464 { 465 struct thread_stack_entry *tse; 466 int err; 467 468 if (ts->cnt == ts->sz) { 469 err = thread_stack__grow(ts); 470 if (err) 471 return err; 472 } 473 474 tse = &ts->stack[ts->cnt++]; 475 tse->ret_addr = ret_addr; 476 tse->timestamp = timestamp; 477 tse->ref = ref; 478 tse->branch_count = ts->branch_count; 479 tse->cp = cp; 480 tse->no_call = no_call; 481 482 return 0; 483 } 484 485 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 486 u64 ret_addr, u64 timestamp, u64 ref, 487 struct symbol *sym) 488 { 489 int err; 490 491 if (!ts->cnt) 492 return 1; 493 494 if (ts->cnt == 1) { 495 struct thread_stack_entry *tse = &ts->stack[0]; 496 497 if (tse->cp->sym == sym) 498 return thread_stack__call_return(thread, ts, --ts->cnt, 499 timestamp, ref, false); 500 } 501 502 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { 503 return thread_stack__call_return(thread, ts, --ts->cnt, 504 timestamp, ref, false); 505 } else { 506 size_t i = ts->cnt - 1; 507 508 while (i--) { 509 if (ts->stack[i].ret_addr != ret_addr) 510 continue; 511 i += 1; 512 while (ts->cnt > i) { 513 err = thread_stack__call_return(thread, ts, 514 --ts->cnt, 515 timestamp, ref, 516 true); 517 if (err) 518 return err; 519 } 520 return thread_stack__call_return(thread, ts, --ts->cnt, 521 timestamp, ref, false); 522 } 523 } 524 525 return 1; 526 } 527 528 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, 529 struct perf_sample *sample, 530 struct addr_location *from_al, 531 struct addr_location *to_al, u64 ref) 532 { 533 struct call_path_root *cpr = ts->crp->cpr; 534 struct call_path *cp; 535 struct symbol *sym; 536 u64 ip; 537 538 if (sample->ip) { 539 ip = sample->ip; 540 sym = from_al->sym; 541 } else if (sample->addr) { 542 ip = sample->addr; 543 sym = to_al->sym; 544 } else { 545 return 0; 546 } 547 548 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 549 ts->kernel_start); 550 if (!cp) 551 return -ENOMEM; 552 553 return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, 554 true); 555 } 556 557 static int thread_stack__no_call_return(struct thread *thread, 558 struct thread_stack *ts, 559 struct perf_sample *sample, 560 struct addr_location *from_al, 561 struct addr_location *to_al, u64 ref) 562 { 563 struct call_path_root *cpr = ts->crp->cpr; 564 struct call_path *cp, *parent; 565 u64 ks = ts->kernel_start; 566 int err; 567 568 if (sample->ip >= ks && sample->addr < ks) { 569 /* Return to userspace, so pop all kernel addresses */ 570 while (thread_stack__in_kernel(ts)) { 571 err = thread_stack__call_return(thread, ts, --ts->cnt, 572 sample->time, ref, 573 true); 574 if (err) 575 return err; 576 } 577 578 /* If the stack is empty, push the userspace address */ 579 if (!ts->cnt) { 580 cp = call_path__findnew(cpr, &cpr->call_path, 581 to_al->sym, sample->addr, 582 ts->kernel_start); 583 if (!cp) 584 return -ENOMEM; 585 return thread_stack__push_cp(ts, 0, sample->time, ref, 586 cp, true); 587 } 588 } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { 589 /* Return to userspace, so pop all kernel addresses */ 590 while (thread_stack__in_kernel(ts)) { 591 err = thread_stack__call_return(thread, ts, --ts->cnt, 592 sample->time, ref, 593 true); 594 if (err) 595 return err; 596 } 597 } 598 599 if (ts->cnt) 600 parent = ts->stack[ts->cnt - 1].cp; 601 else 602 parent = &cpr->call_path; 603 604 /* This 'return' had no 'call', so push and pop top of stack */ 605 cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, 606 ts->kernel_start); 607 if (!cp) 608 return -ENOMEM; 609 610 err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, 611 true); 612 if (err) 613 return err; 614 615 return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, 616 to_al->sym); 617 } 618 619 static int thread_stack__trace_begin(struct thread *thread, 620 struct thread_stack *ts, u64 timestamp, 621 u64 ref) 622 { 623 struct thread_stack_entry *tse; 624 int err; 625 626 if (!ts->cnt) 627 return 0; 628 629 /* Pop trace end */ 630 tse = &ts->stack[ts->cnt - 1]; 631 if (tse->cp->sym == NULL && tse->cp->ip == 0) { 632 err = thread_stack__call_return(thread, ts, --ts->cnt, 633 timestamp, ref, false); 634 if (err) 635 return err; 636 } 637 638 return 0; 639 } 640 641 static int thread_stack__trace_end(struct thread_stack *ts, 642 struct perf_sample *sample, u64 ref) 643 { 644 struct call_path_root *cpr = ts->crp->cpr; 645 struct call_path *cp; 646 u64 ret_addr; 647 648 /* No point having 'trace end' on the bottom of the stack */ 649 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 650 return 0; 651 652 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 653 ts->kernel_start); 654 if (!cp) 655 return -ENOMEM; 656 657 ret_addr = sample->ip + sample->insn_len; 658 659 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 660 false); 661 } 662 663 int thread_stack__process(struct thread *thread, struct comm *comm, 664 struct perf_sample *sample, 665 struct addr_location *from_al, 666 struct addr_location *to_al, u64 ref, 667 struct call_return_processor *crp) 668 { 669 struct thread_stack *ts = thread->ts; 670 int err = 0; 671 672 if (ts) { 673 if (!ts->crp) { 674 /* Supersede thread_stack__event() */ 675 thread_stack__free(thread); 676 thread->ts = thread_stack__new(thread, crp); 677 if (!thread->ts) 678 return -ENOMEM; 679 ts = thread->ts; 680 ts->comm = comm; 681 } 682 } else { 683 thread->ts = thread_stack__new(thread, crp); 684 if (!thread->ts) 685 return -ENOMEM; 686 ts = thread->ts; 687 ts->comm = comm; 688 } 689 690 /* Flush stack on exec */ 691 if (ts->comm != comm && thread->pid_ == thread->tid) { 692 err = thread_stack__flush(thread, ts); 693 if (err) 694 return err; 695 ts->comm = comm; 696 } 697 698 /* If the stack is empty, put the current symbol on the stack */ 699 if (!ts->cnt) { 700 err = thread_stack__bottom(thread, ts, sample, from_al, to_al, 701 ref); 702 if (err) 703 return err; 704 } 705 706 ts->branch_count += 1; 707 ts->last_time = sample->time; 708 709 if (sample->flags & PERF_IP_FLAG_CALL) { 710 struct call_path_root *cpr = ts->crp->cpr; 711 struct call_path *cp; 712 u64 ret_addr; 713 714 if (!sample->ip || !sample->addr) 715 return 0; 716 717 ret_addr = sample->ip + sample->insn_len; 718 if (ret_addr == sample->addr) 719 return 0; /* Zero-length calls are excluded */ 720 721 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 722 to_al->sym, sample->addr, 723 ts->kernel_start); 724 if (!cp) 725 return -ENOMEM; 726 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 727 cp, false); 728 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 729 if (!sample->ip || !sample->addr) 730 return 0; 731 732 err = thread_stack__pop_cp(thread, ts, sample->addr, 733 sample->time, ref, from_al->sym); 734 if (err) { 735 if (err < 0) 736 return err; 737 err = thread_stack__no_call_return(thread, ts, sample, 738 from_al, to_al, ref); 739 } 740 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 741 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 742 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 743 err = thread_stack__trace_end(ts, sample, ref); 744 } 745 746 return err; 747 } 748