1 /* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19 #include "builtin.h" 20 21 #include "perf.h" 22 23 #include "util/color.h" 24 #include "util/session.h" 25 #include "util/symbol.h" 26 #include "util/thread.h" 27 #include "util/util.h" 28 #include <linux/rbtree.h> 29 #include "util/parse-options.h" 30 #include "util/parse-events.h" 31 32 #include "util/debug.h" 33 34 #include <assert.h> 35 #include <fcntl.h> 36 37 #include <stdio.h> 38 #include <termios.h> 39 #include <unistd.h> 40 41 #include <errno.h> 42 #include <time.h> 43 #include <sched.h> 44 #include <pthread.h> 45 46 #include <sys/syscall.h> 47 #include <sys/ioctl.h> 48 #include <sys/poll.h> 49 #include <sys/prctl.h> 50 #include <sys/wait.h> 51 #include <sys/uio.h> 52 #include <sys/mman.h> 53 54 #include <linux/unistd.h> 55 #include <linux/types.h> 56 57 static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 58 59 static int system_wide = 0; 60 61 static int default_interval = 0; 62 63 static int count_filter = 5; 64 static int print_entries; 65 66 static int target_pid = -1; 67 static int inherit = 0; 68 static int profile_cpu = -1; 69 static int nr_cpus = 0; 70 static unsigned int realtime_prio = 0; 71 static int group = 0; 72 static unsigned int page_size; 73 static unsigned int mmap_pages = 16; 74 static int freq = 1000; /* 1 KHz */ 75 76 static int delay_secs = 2; 77 static int zero = 0; 78 static int dump_symtab = 0; 79 80 static bool hide_kernel_symbols = false; 81 static bool hide_user_symbols = false; 82 static struct winsize winsize; 83 84 /* 85 * Source 86 */ 87 88 struct source_line { 89 u64 eip; 90 unsigned long count[MAX_COUNTERS]; 91 char *line; 92 struct source_line *next; 93 }; 94 95 static char *sym_filter = NULL; 96 struct sym_entry *sym_filter_entry = NULL; 97 struct sym_entry *sym_filter_entry_sched = NULL; 98 static int sym_pcnt_filter = 5; 99 static int sym_counter = 0; 100 static int display_weighted = -1; 101 102 /* 103 * Symbols 104 */ 105 106 struct sym_entry_source { 107 struct source_line *source; 108 struct source_line *lines; 109 struct source_line **lines_tail; 110 pthread_mutex_t lock; 111 }; 112 113 struct sym_entry { 114 struct rb_node rb_node; 115 struct list_head node; 116 unsigned long snap_count; 117 double weight; 118 int skip; 119 u16 name_len; 120 u8 origin; 121 struct map *map; 122 struct sym_entry_source *src; 123 unsigned long count[0]; 124 }; 125 126 /* 127 * Source functions 128 */ 129 130 static inline struct symbol *sym_entry__symbol(struct sym_entry *self) 131 { 132 return ((void *)self) + symbol_conf.priv_size; 133 } 134 135 static void get_term_dimensions(struct winsize *ws) 136 { 137 char *s = getenv("LINES"); 138 139 if (s != NULL) { 140 ws->ws_row = atoi(s); 141 s = getenv("COLUMNS"); 142 if (s != NULL) { 143 ws->ws_col = atoi(s); 144 if (ws->ws_row && ws->ws_col) 145 return; 146 } 147 } 148 #ifdef TIOCGWINSZ 149 if (ioctl(1, TIOCGWINSZ, ws) == 0 && 150 ws->ws_row && ws->ws_col) 151 return; 152 #endif 153 ws->ws_row = 25; 154 ws->ws_col = 80; 155 } 156 157 static void update_print_entries(struct winsize *ws) 158 { 159 print_entries = ws->ws_row; 160 161 if (print_entries > 9) 162 print_entries -= 9; 163 } 164 165 static void sig_winch_handler(int sig __used) 166 { 167 get_term_dimensions(&winsize); 168 update_print_entries(&winsize); 169 } 170 171 static void parse_source(struct sym_entry *syme) 172 { 173 struct symbol *sym; 174 struct sym_entry_source *source; 175 struct map *map; 176 FILE *file; 177 char command[PATH_MAX*2]; 178 const char *path; 179 u64 len; 180 181 if (!syme) 182 return; 183 184 if (syme->src == NULL) { 185 syme->src = zalloc(sizeof(*source)); 186 if (syme->src == NULL) 187 return; 188 pthread_mutex_init(&syme->src->lock, NULL); 189 } 190 191 source = syme->src; 192 193 if (source->lines) { 194 pthread_mutex_lock(&source->lock); 195 goto out_assign; 196 } 197 198 sym = sym_entry__symbol(syme); 199 map = syme->map; 200 path = map->dso->long_name; 201 202 len = sym->end - sym->start; 203 204 sprintf(command, 205 "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s", 206 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start), 207 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path); 208 209 file = popen(command, "r"); 210 if (!file) 211 return; 212 213 pthread_mutex_lock(&source->lock); 214 source->lines_tail = &source->lines; 215 while (!feof(file)) { 216 struct source_line *src; 217 size_t dummy = 0; 218 char *c, *sep; 219 220 src = malloc(sizeof(struct source_line)); 221 assert(src != NULL); 222 memset(src, 0, sizeof(struct source_line)); 223 224 if (getline(&src->line, &dummy, file) < 0) 225 break; 226 if (!src->line) 227 break; 228 229 c = strchr(src->line, '\n'); 230 if (c) 231 *c = 0; 232 233 src->next = NULL; 234 *source->lines_tail = src; 235 source->lines_tail = &src->next; 236 237 src->eip = strtoull(src->line, &sep, 16); 238 if (*sep == ':') 239 src->eip = map__objdump_2ip(map, src->eip); 240 else /* this line has no ip info (e.g. source line) */ 241 src->eip = 0; 242 } 243 pclose(file); 244 out_assign: 245 sym_filter_entry = syme; 246 pthread_mutex_unlock(&source->lock); 247 } 248 249 static void __zero_source_counters(struct sym_entry *syme) 250 { 251 int i; 252 struct source_line *line; 253 254 line = syme->src->lines; 255 while (line) { 256 for (i = 0; i < nr_counters; i++) 257 line->count[i] = 0; 258 line = line->next; 259 } 260 } 261 262 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 263 { 264 struct source_line *line; 265 266 if (syme != sym_filter_entry) 267 return; 268 269 if (pthread_mutex_trylock(&syme->src->lock)) 270 return; 271 272 if (syme->src == NULL || syme->src->source == NULL) 273 goto out_unlock; 274 275 for (line = syme->src->lines; line; line = line->next) { 276 /* skip lines without IP info */ 277 if (line->eip == 0) 278 continue; 279 if (line->eip == ip) { 280 line->count[counter]++; 281 break; 282 } 283 if (line->eip > ip) 284 break; 285 } 286 out_unlock: 287 pthread_mutex_unlock(&syme->src->lock); 288 } 289 290 #define PATTERN_LEN (BITS_PER_LONG / 4 + 2) 291 292 static void lookup_sym_source(struct sym_entry *syme) 293 { 294 struct symbol *symbol = sym_entry__symbol(syme); 295 struct source_line *line; 296 char pattern[PATTERN_LEN + 1]; 297 298 sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4, 299 map__rip_2objdump(syme->map, symbol->start)); 300 301 pthread_mutex_lock(&syme->src->lock); 302 for (line = syme->src->lines; line; line = line->next) { 303 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) { 304 syme->src->source = line; 305 break; 306 } 307 } 308 pthread_mutex_unlock(&syme->src->lock); 309 } 310 311 static void show_lines(struct source_line *queue, int count, int total) 312 { 313 int i; 314 struct source_line *line; 315 316 line = queue; 317 for (i = 0; i < count; i++) { 318 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 319 320 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 321 line = line->next; 322 } 323 } 324 325 #define TRACE_COUNT 3 326 327 static void show_details(struct sym_entry *syme) 328 { 329 struct symbol *symbol; 330 struct source_line *line; 331 struct source_line *line_queue = NULL; 332 int displayed = 0; 333 int line_queue_count = 0, total = 0, more = 0; 334 335 if (!syme) 336 return; 337 338 if (!syme->src->source) 339 lookup_sym_source(syme); 340 341 if (!syme->src->source) 342 return; 343 344 symbol = sym_entry__symbol(syme); 345 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 346 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 347 348 pthread_mutex_lock(&syme->src->lock); 349 line = syme->src->source; 350 while (line) { 351 total += line->count[sym_counter]; 352 line = line->next; 353 } 354 355 line = syme->src->source; 356 while (line) { 357 float pcnt = 0.0; 358 359 if (!line_queue_count) 360 line_queue = line; 361 line_queue_count++; 362 363 if (line->count[sym_counter]) 364 pcnt = 100.0 * line->count[sym_counter] / (float)total; 365 if (pcnt >= (float)sym_pcnt_filter) { 366 if (displayed <= print_entries) 367 show_lines(line_queue, line_queue_count, total); 368 else more++; 369 displayed += line_queue_count; 370 line_queue_count = 0; 371 line_queue = NULL; 372 } else if (line_queue_count > TRACE_COUNT) { 373 line_queue = line_queue->next; 374 line_queue_count--; 375 } 376 377 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 378 line = line->next; 379 } 380 pthread_mutex_unlock(&syme->src->lock); 381 if (more) 382 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 383 } 384 385 /* 386 * Symbols will be added here in event__process_sample and will get out 387 * after decayed. 388 */ 389 static LIST_HEAD(active_symbols); 390 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 391 392 /* 393 * Ordering weight: count-1 * count-2 * ... / count-n 394 */ 395 static double sym_weight(const struct sym_entry *sym) 396 { 397 double weight = sym->snap_count; 398 int counter; 399 400 if (!display_weighted) 401 return weight; 402 403 for (counter = 1; counter < nr_counters-1; counter++) 404 weight *= sym->count[counter]; 405 406 weight /= (sym->count[counter] + 1); 407 408 return weight; 409 } 410 411 static long samples; 412 static long userspace_samples; 413 static const char CONSOLE_CLEAR[] = "[H[2J"; 414 415 static void __list_insert_active_sym(struct sym_entry *syme) 416 { 417 list_add(&syme->node, &active_symbols); 418 } 419 420 static void list_remove_active_sym(struct sym_entry *syme) 421 { 422 pthread_mutex_lock(&active_symbols_lock); 423 list_del_init(&syme->node); 424 pthread_mutex_unlock(&active_symbols_lock); 425 } 426 427 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 428 { 429 struct rb_node **p = &tree->rb_node; 430 struct rb_node *parent = NULL; 431 struct sym_entry *iter; 432 433 while (*p != NULL) { 434 parent = *p; 435 iter = rb_entry(parent, struct sym_entry, rb_node); 436 437 if (se->weight > iter->weight) 438 p = &(*p)->rb_left; 439 else 440 p = &(*p)->rb_right; 441 } 442 443 rb_link_node(&se->rb_node, parent, p); 444 rb_insert_color(&se->rb_node, tree); 445 } 446 447 static void print_sym_table(void) 448 { 449 int printed = 0, j; 450 int counter, snap = !display_weighted ? sym_counter : 0; 451 float samples_per_sec = samples/delay_secs; 452 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 453 float sum_ksamples = 0.0; 454 struct sym_entry *syme, *n; 455 struct rb_root tmp = RB_ROOT; 456 struct rb_node *nd; 457 int sym_width = 0, dso_width = 0, max_dso_width; 458 const int win_width = winsize.ws_col - 1; 459 460 samples = userspace_samples = 0; 461 462 /* Sort the active symbols */ 463 pthread_mutex_lock(&active_symbols_lock); 464 syme = list_entry(active_symbols.next, struct sym_entry, node); 465 pthread_mutex_unlock(&active_symbols_lock); 466 467 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 468 syme->snap_count = syme->count[snap]; 469 if (syme->snap_count != 0) { 470 471 if ((hide_user_symbols && 472 syme->origin == PERF_RECORD_MISC_USER) || 473 (hide_kernel_symbols && 474 syme->origin == PERF_RECORD_MISC_KERNEL)) { 475 list_remove_active_sym(syme); 476 continue; 477 } 478 syme->weight = sym_weight(syme); 479 rb_insert_active_sym(&tmp, syme); 480 sum_ksamples += syme->snap_count; 481 482 for (j = 0; j < nr_counters; j++) 483 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 484 } else 485 list_remove_active_sym(syme); 486 } 487 488 puts(CONSOLE_CLEAR); 489 490 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 491 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 492 samples_per_sec, 493 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 494 495 if (nr_counters == 1 || !display_weighted) { 496 printf("%Ld", (u64)attrs[0].sample_period); 497 if (freq) 498 printf("Hz "); 499 else 500 printf(" "); 501 } 502 503 if (!display_weighted) 504 printf("%s", event_name(sym_counter)); 505 else for (counter = 0; counter < nr_counters; counter++) { 506 if (counter) 507 printf("/"); 508 509 printf("%s", event_name(counter)); 510 } 511 512 printf( "], "); 513 514 if (target_pid != -1) 515 printf(" (target_pid: %d", target_pid); 516 else 517 printf(" (all"); 518 519 if (profile_cpu != -1) 520 printf(", cpu: %d)\n", profile_cpu); 521 else { 522 if (target_pid != -1) 523 printf(")\n"); 524 else 525 printf(", %d CPUs)\n", nr_cpus); 526 } 527 528 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 529 530 if (sym_filter_entry) { 531 show_details(sym_filter_entry); 532 return; 533 } 534 535 /* 536 * Find the longest symbol name that will be displayed 537 */ 538 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 539 syme = rb_entry(nd, struct sym_entry, rb_node); 540 if (++printed > print_entries || 541 (int)syme->snap_count < count_filter) 542 continue; 543 544 if (syme->map->dso->long_name_len > dso_width) 545 dso_width = syme->map->dso->long_name_len; 546 547 if (syme->name_len > sym_width) 548 sym_width = syme->name_len; 549 } 550 551 printed = 0; 552 553 max_dso_width = winsize.ws_col - sym_width - 29; 554 if (dso_width > max_dso_width) 555 dso_width = max_dso_width; 556 putchar('\n'); 557 if (nr_counters == 1) 558 printf(" samples pcnt"); 559 else 560 printf(" weight samples pcnt"); 561 562 if (verbose) 563 printf(" RIP "); 564 printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); 565 printf(" %s _______ _____", 566 nr_counters == 1 ? " " : "______"); 567 if (verbose) 568 printf(" ________________"); 569 printf(" %-*.*s", sym_width, sym_width, graph_line); 570 printf(" %-*.*s", dso_width, dso_width, graph_line); 571 puts("\n"); 572 573 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 574 struct symbol *sym; 575 double pcnt; 576 577 syme = rb_entry(nd, struct sym_entry, rb_node); 578 sym = sym_entry__symbol(syme); 579 580 if (++printed > print_entries || (int)syme->snap_count < count_filter) 581 continue; 582 583 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 584 sum_ksamples)); 585 586 if (nr_counters == 1 || !display_weighted) 587 printf("%20.2f ", syme->weight); 588 else 589 printf("%9.1f %10ld ", syme->weight, syme->snap_count); 590 591 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 592 if (verbose) 593 printf(" %016llx", sym->start); 594 printf(" %-*.*s", sym_width, sym_width, sym->name); 595 printf(" %-*.*s\n", dso_width, dso_width, 596 dso_width >= syme->map->dso->long_name_len ? 597 syme->map->dso->long_name : 598 syme->map->dso->short_name); 599 } 600 } 601 602 static void prompt_integer(int *target, const char *msg) 603 { 604 char *buf = malloc(0), *p; 605 size_t dummy = 0; 606 int tmp; 607 608 fprintf(stdout, "\n%s: ", msg); 609 if (getline(&buf, &dummy, stdin) < 0) 610 return; 611 612 p = strchr(buf, '\n'); 613 if (p) 614 *p = 0; 615 616 p = buf; 617 while(*p) { 618 if (!isdigit(*p)) 619 goto out_free; 620 p++; 621 } 622 tmp = strtoul(buf, NULL, 10); 623 *target = tmp; 624 out_free: 625 free(buf); 626 } 627 628 static void prompt_percent(int *target, const char *msg) 629 { 630 int tmp = 0; 631 632 prompt_integer(&tmp, msg); 633 if (tmp >= 0 && tmp <= 100) 634 *target = tmp; 635 } 636 637 static void prompt_symbol(struct sym_entry **target, const char *msg) 638 { 639 char *buf = malloc(0), *p; 640 struct sym_entry *syme = *target, *n, *found = NULL; 641 size_t dummy = 0; 642 643 /* zero counters of active symbol */ 644 if (syme) { 645 pthread_mutex_lock(&syme->src->lock); 646 __zero_source_counters(syme); 647 *target = NULL; 648 pthread_mutex_unlock(&syme->src->lock); 649 } 650 651 fprintf(stdout, "\n%s: ", msg); 652 if (getline(&buf, &dummy, stdin) < 0) 653 goto out_free; 654 655 p = strchr(buf, '\n'); 656 if (p) 657 *p = 0; 658 659 pthread_mutex_lock(&active_symbols_lock); 660 syme = list_entry(active_symbols.next, struct sym_entry, node); 661 pthread_mutex_unlock(&active_symbols_lock); 662 663 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 664 struct symbol *sym = sym_entry__symbol(syme); 665 666 if (!strcmp(buf, sym->name)) { 667 found = syme; 668 break; 669 } 670 } 671 672 if (!found) { 673 fprintf(stderr, "Sorry, %s is not active.\n", buf); 674 sleep(1); 675 return; 676 } else 677 parse_source(found); 678 679 out_free: 680 free(buf); 681 } 682 683 static void print_mapped_keys(void) 684 { 685 char *name = NULL; 686 687 if (sym_filter_entry) { 688 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 689 name = sym->name; 690 } 691 692 fprintf(stdout, "\nMapped keys:\n"); 693 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 694 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 695 696 if (nr_counters > 1) 697 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 698 699 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 700 701 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 702 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 703 fprintf(stdout, "\t[S] stop annotation.\n"); 704 705 if (nr_counters > 1) 706 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 707 708 fprintf(stdout, 709 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 710 hide_kernel_symbols ? "yes" : "no"); 711 fprintf(stdout, 712 "\t[U] hide user symbols. \t(%s)\n", 713 hide_user_symbols ? "yes" : "no"); 714 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 715 fprintf(stdout, "\t[qQ] quit.\n"); 716 } 717 718 static int key_mapped(int c) 719 { 720 switch (c) { 721 case 'd': 722 case 'e': 723 case 'f': 724 case 'z': 725 case 'q': 726 case 'Q': 727 case 'K': 728 case 'U': 729 case 'F': 730 case 's': 731 case 'S': 732 return 1; 733 case 'E': 734 case 'w': 735 return nr_counters > 1 ? 1 : 0; 736 default: 737 break; 738 } 739 740 return 0; 741 } 742 743 static void handle_keypress(int c) 744 { 745 if (!key_mapped(c)) { 746 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 747 struct termios tc, save; 748 749 print_mapped_keys(); 750 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 751 fflush(stdout); 752 753 tcgetattr(0, &save); 754 tc = save; 755 tc.c_lflag &= ~(ICANON | ECHO); 756 tc.c_cc[VMIN] = 0; 757 tc.c_cc[VTIME] = 0; 758 tcsetattr(0, TCSANOW, &tc); 759 760 poll(&stdin_poll, 1, -1); 761 c = getc(stdin); 762 763 tcsetattr(0, TCSAFLUSH, &save); 764 if (!key_mapped(c)) 765 return; 766 } 767 768 switch (c) { 769 case 'd': 770 prompt_integer(&delay_secs, "Enter display delay"); 771 if (delay_secs < 1) 772 delay_secs = 1; 773 break; 774 case 'e': 775 prompt_integer(&print_entries, "Enter display entries (lines)"); 776 if (print_entries == 0) { 777 sig_winch_handler(SIGWINCH); 778 signal(SIGWINCH, sig_winch_handler); 779 } else 780 signal(SIGWINCH, SIG_DFL); 781 break; 782 case 'E': 783 if (nr_counters > 1) { 784 int i; 785 786 fprintf(stderr, "\nAvailable events:"); 787 for (i = 0; i < nr_counters; i++) 788 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 789 790 prompt_integer(&sym_counter, "Enter details event counter"); 791 792 if (sym_counter >= nr_counters) { 793 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 794 sym_counter = 0; 795 sleep(1); 796 } 797 } else sym_counter = 0; 798 break; 799 case 'f': 800 prompt_integer(&count_filter, "Enter display event count filter"); 801 break; 802 case 'F': 803 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 804 break; 805 case 'K': 806 hide_kernel_symbols = !hide_kernel_symbols; 807 break; 808 case 'q': 809 case 'Q': 810 printf("exiting.\n"); 811 if (dump_symtab) 812 dsos__fprintf(stderr); 813 exit(0); 814 case 's': 815 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 816 break; 817 case 'S': 818 if (!sym_filter_entry) 819 break; 820 else { 821 struct sym_entry *syme = sym_filter_entry; 822 823 pthread_mutex_lock(&syme->src->lock); 824 sym_filter_entry = NULL; 825 __zero_source_counters(syme); 826 pthread_mutex_unlock(&syme->src->lock); 827 } 828 break; 829 case 'U': 830 hide_user_symbols = !hide_user_symbols; 831 break; 832 case 'w': 833 display_weighted = ~display_weighted; 834 break; 835 case 'z': 836 zero = ~zero; 837 break; 838 default: 839 break; 840 } 841 } 842 843 static void *display_thread(void *arg __used) 844 { 845 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 846 struct termios tc, save; 847 int delay_msecs, c; 848 849 tcgetattr(0, &save); 850 tc = save; 851 tc.c_lflag &= ~(ICANON | ECHO); 852 tc.c_cc[VMIN] = 0; 853 tc.c_cc[VTIME] = 0; 854 855 repeat: 856 delay_msecs = delay_secs * 1000; 857 tcsetattr(0, TCSANOW, &tc); 858 /* trash return*/ 859 getc(stdin); 860 861 do { 862 print_sym_table(); 863 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 864 865 c = getc(stdin); 866 tcsetattr(0, TCSAFLUSH, &save); 867 868 handle_keypress(c); 869 goto repeat; 870 871 return NULL; 872 } 873 874 /* Tag samples to be skipped. */ 875 static const char *skip_symbols[] = { 876 "default_idle", 877 "cpu_idle", 878 "enter_idle", 879 "exit_idle", 880 "mwait_idle", 881 "mwait_idle_with_hints", 882 "poll_idle", 883 "ppc64_runlatch_off", 884 "pseries_dedicated_idle_sleep", 885 NULL 886 }; 887 888 static int symbol_filter(struct map *map, struct symbol *sym) 889 { 890 struct sym_entry *syme; 891 const char *name = sym->name; 892 int i; 893 894 /* 895 * ppc64 uses function descriptors and appends a '.' to the 896 * start of every instruction address. Remove it. 897 */ 898 if (name[0] == '.') 899 name++; 900 901 if (!strcmp(name, "_text") || 902 !strcmp(name, "_etext") || 903 !strcmp(name, "_sinittext") || 904 !strncmp("init_module", name, 11) || 905 !strncmp("cleanup_module", name, 14) || 906 strstr(name, "_text_start") || 907 strstr(name, "_text_end")) 908 return 1; 909 910 syme = symbol__priv(sym); 911 syme->map = map; 912 syme->src = NULL; 913 914 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { 915 /* schedule initial sym_filter_entry setup */ 916 sym_filter_entry_sched = syme; 917 sym_filter = NULL; 918 } 919 920 for (i = 0; skip_symbols[i]; i++) { 921 if (!strcmp(skip_symbols[i], name)) { 922 syme->skip = 1; 923 break; 924 } 925 } 926 927 if (!syme->skip) 928 syme->name_len = strlen(sym->name); 929 930 return 0; 931 } 932 933 static void event__process_sample(const event_t *self, 934 struct perf_session *session, int counter) 935 { 936 u64 ip = self->ip.ip; 937 struct sym_entry *syme; 938 struct addr_location al; 939 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 940 941 ++samples; 942 943 switch (origin) { 944 case PERF_RECORD_MISC_USER: 945 ++userspace_samples; 946 if (hide_user_symbols) 947 return; 948 break; 949 case PERF_RECORD_MISC_KERNEL: 950 if (hide_kernel_symbols) 951 return; 952 break; 953 default: 954 return; 955 } 956 957 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 958 al.filtered) 959 return; 960 961 if (al.sym == NULL) { 962 /* 963 * As we do lazy loading of symtabs we only will know if the 964 * specified vmlinux file is invalid when we actually have a 965 * hit in kernel space and then try to load it. So if we get 966 * here and there are _no_ symbols in the DSO backing the 967 * kernel map, bail out. 968 * 969 * We may never get here, for instance, if we use -K/ 970 * --hide-kernel-symbols, even if the user specifies an 971 * invalid --vmlinux ;-) 972 */ 973 if (al.map == session->vmlinux_maps[MAP__FUNCTION] && 974 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 975 pr_err("The %s file can't be used\n", 976 symbol_conf.vmlinux_name); 977 exit(1); 978 } 979 980 return; 981 } 982 983 /* let's see, whether we need to install initial sym_filter_entry */ 984 if (sym_filter_entry_sched) { 985 sym_filter_entry = sym_filter_entry_sched; 986 sym_filter_entry_sched = NULL; 987 parse_source(sym_filter_entry); 988 } 989 990 syme = symbol__priv(al.sym); 991 if (!syme->skip) { 992 syme->count[counter]++; 993 syme->origin = origin; 994 record_precise_ip(syme, counter, ip); 995 pthread_mutex_lock(&active_symbols_lock); 996 if (list_empty(&syme->node) || !syme->node.next) 997 __list_insert_active_sym(syme); 998 pthread_mutex_unlock(&active_symbols_lock); 999 } 1000 } 1001 1002 static int event__process(event_t *event, struct perf_session *session) 1003 { 1004 switch (event->header.type) { 1005 case PERF_RECORD_COMM: 1006 event__process_comm(event, session); 1007 break; 1008 case PERF_RECORD_MMAP: 1009 event__process_mmap(event, session); 1010 break; 1011 case PERF_RECORD_FORK: 1012 case PERF_RECORD_EXIT: 1013 event__process_task(event, session); 1014 break; 1015 default: 1016 break; 1017 } 1018 1019 return 0; 1020 } 1021 1022 struct mmap_data { 1023 int counter; 1024 void *base; 1025 int mask; 1026 unsigned int prev; 1027 }; 1028 1029 static unsigned int mmap_read_head(struct mmap_data *md) 1030 { 1031 struct perf_event_mmap_page *pc = md->base; 1032 int head; 1033 1034 head = pc->data_head; 1035 rmb(); 1036 1037 return head; 1038 } 1039 1040 static void perf_session__mmap_read_counter(struct perf_session *self, 1041 struct mmap_data *md) 1042 { 1043 unsigned int head = mmap_read_head(md); 1044 unsigned int old = md->prev; 1045 unsigned char *data = md->base + page_size; 1046 int diff; 1047 1048 /* 1049 * If we're further behind than half the buffer, there's a chance 1050 * the writer will bite our tail and mess up the samples under us. 1051 * 1052 * If we somehow ended up ahead of the head, we got messed up. 1053 * 1054 * In either case, truncate and restart at head. 1055 */ 1056 diff = head - old; 1057 if (diff > md->mask / 2 || diff < 0) { 1058 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 1059 1060 /* 1061 * head points to a known good entry, start there. 1062 */ 1063 old = head; 1064 } 1065 1066 for (; old != head;) { 1067 event_t *event = (event_t *)&data[old & md->mask]; 1068 1069 event_t event_copy; 1070 1071 size_t size = event->header.size; 1072 1073 /* 1074 * Event straddles the mmap boundary -- header should always 1075 * be inside due to u64 alignment of output. 1076 */ 1077 if ((old & md->mask) + size != ((old + size) & md->mask)) { 1078 unsigned int offset = old; 1079 unsigned int len = min(sizeof(*event), size), cpy; 1080 void *dst = &event_copy; 1081 1082 do { 1083 cpy = min(md->mask + 1 - (offset & md->mask), len); 1084 memcpy(dst, &data[offset & md->mask], cpy); 1085 offset += cpy; 1086 dst += cpy; 1087 len -= cpy; 1088 } while (len); 1089 1090 event = &event_copy; 1091 } 1092 1093 if (event->header.type == PERF_RECORD_SAMPLE) 1094 event__process_sample(event, self, md->counter); 1095 else 1096 event__process(event, self); 1097 old += size; 1098 } 1099 1100 md->prev = old; 1101 } 1102 1103 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 1104 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 1105 1106 static void perf_session__mmap_read(struct perf_session *self) 1107 { 1108 int i, counter; 1109 1110 for (i = 0; i < nr_cpus; i++) { 1111 for (counter = 0; counter < nr_counters; counter++) 1112 perf_session__mmap_read_counter(self, &mmap_array[i][counter]); 1113 } 1114 } 1115 1116 int nr_poll; 1117 int group_fd; 1118 1119 static void start_counter(int i, int counter) 1120 { 1121 struct perf_event_attr *attr; 1122 int cpu; 1123 1124 cpu = profile_cpu; 1125 if (target_pid == -1 && profile_cpu == -1) 1126 cpu = i; 1127 1128 attr = attrs + counter; 1129 1130 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1131 1132 if (freq) { 1133 attr->sample_type |= PERF_SAMPLE_PERIOD; 1134 attr->freq = 1; 1135 attr->sample_freq = freq; 1136 } 1137 1138 attr->inherit = (cpu < 0) && inherit; 1139 attr->mmap = 1; 1140 1141 try_again: 1142 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); 1143 1144 if (fd[i][counter] < 0) { 1145 int err = errno; 1146 1147 if (err == EPERM || err == EACCES) 1148 die("No permission - are you root?\n"); 1149 /* 1150 * If it's cycles then fall back to hrtimer 1151 * based cpu-clock-tick sw counter, which 1152 * is always available even if no PMU support: 1153 */ 1154 if (attr->type == PERF_TYPE_HARDWARE 1155 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1156 1157 if (verbose) 1158 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1159 1160 attr->type = PERF_TYPE_SOFTWARE; 1161 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1162 goto try_again; 1163 } 1164 printf("\n"); 1165 error("perfcounter syscall returned with %d (%s)\n", 1166 fd[i][counter], strerror(err)); 1167 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1168 exit(-1); 1169 } 1170 assert(fd[i][counter] >= 0); 1171 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); 1172 1173 /* 1174 * First counter acts as the group leader: 1175 */ 1176 if (group && group_fd == -1) 1177 group_fd = fd[i][counter]; 1178 1179 event_array[nr_poll].fd = fd[i][counter]; 1180 event_array[nr_poll].events = POLLIN; 1181 nr_poll++; 1182 1183 mmap_array[i][counter].counter = counter; 1184 mmap_array[i][counter].prev = 0; 1185 mmap_array[i][counter].mask = mmap_pages*page_size - 1; 1186 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 1187 PROT_READ, MAP_SHARED, fd[i][counter], 0); 1188 if (mmap_array[i][counter].base == MAP_FAILED) 1189 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1190 } 1191 1192 static int __cmd_top(void) 1193 { 1194 pthread_t thread; 1195 int i, counter; 1196 int ret; 1197 /* 1198 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 1199 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 1200 */ 1201 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false); 1202 if (session == NULL) 1203 return -ENOMEM; 1204 1205 if (target_pid != -1) 1206 event__synthesize_thread(target_pid, event__process, session); 1207 else 1208 event__synthesize_threads(event__process, session); 1209 1210 for (i = 0; i < nr_cpus; i++) { 1211 group_fd = -1; 1212 for (counter = 0; counter < nr_counters; counter++) 1213 start_counter(i, counter); 1214 } 1215 1216 /* Wait for a minimal set of events before starting the snapshot */ 1217 poll(event_array, nr_poll, 100); 1218 1219 perf_session__mmap_read(session); 1220 1221 if (pthread_create(&thread, NULL, display_thread, NULL)) { 1222 printf("Could not create display thread.\n"); 1223 exit(-1); 1224 } 1225 1226 if (realtime_prio) { 1227 struct sched_param param; 1228 1229 param.sched_priority = realtime_prio; 1230 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1231 printf("Could not set realtime priority.\n"); 1232 exit(-1); 1233 } 1234 } 1235 1236 while (1) { 1237 int hits = samples; 1238 1239 perf_session__mmap_read(session); 1240 1241 if (hits == samples) 1242 ret = poll(event_array, nr_poll, 100); 1243 } 1244 1245 return 0; 1246 } 1247 1248 static const char * const top_usage[] = { 1249 "perf top [<options>]", 1250 NULL 1251 }; 1252 1253 static const struct option options[] = { 1254 OPT_CALLBACK('e', "event", NULL, "event", 1255 "event selector. use 'perf list' to list available events", 1256 parse_events), 1257 OPT_INTEGER('c', "count", &default_interval, 1258 "event period to sample"), 1259 OPT_INTEGER('p', "pid", &target_pid, 1260 "profile events on existing pid"), 1261 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1262 "system-wide collection from all CPUs"), 1263 OPT_INTEGER('C', "CPU", &profile_cpu, 1264 "CPU to profile on"), 1265 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1266 "file", "vmlinux pathname"), 1267 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1268 "hide kernel symbols"), 1269 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 1270 "number of mmap data pages"), 1271 OPT_INTEGER('r', "realtime", &realtime_prio, 1272 "collect data with this RT SCHED_FIFO priority"), 1273 OPT_INTEGER('d', "delay", &delay_secs, 1274 "number of seconds to delay between refreshes"), 1275 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1276 "dump the symbol table used for profiling"), 1277 OPT_INTEGER('f', "count-filter", &count_filter, 1278 "only display functions with more events than this"), 1279 OPT_BOOLEAN('g', "group", &group, 1280 "put the counters into a counter group"), 1281 OPT_BOOLEAN('i', "inherit", &inherit, 1282 "child tasks inherit counters"), 1283 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1284 "symbol to annotate"), 1285 OPT_BOOLEAN('z', "zero", &zero, 1286 "zero history across updates"), 1287 OPT_INTEGER('F', "freq", &freq, 1288 "profile at this frequency"), 1289 OPT_INTEGER('E', "entries", &print_entries, 1290 "display this many functions"), 1291 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 1292 "hide user symbols"), 1293 OPT_BOOLEAN('v', "verbose", &verbose, 1294 "be more verbose (show counter open errors, etc)"), 1295 OPT_END() 1296 }; 1297 1298 int cmd_top(int argc, const char **argv, const char *prefix __used) 1299 { 1300 int counter; 1301 1302 page_size = sysconf(_SC_PAGE_SIZE); 1303 1304 argc = parse_options(argc, argv, options, top_usage, 0); 1305 if (argc) 1306 usage_with_options(top_usage, options); 1307 1308 /* CPU and PID are mutually exclusive */ 1309 if (target_pid != -1 && profile_cpu != -1) { 1310 printf("WARNING: PID switch overriding CPU\n"); 1311 sleep(1); 1312 profile_cpu = -1; 1313 } 1314 1315 if (!nr_counters) 1316 nr_counters = 1; 1317 1318 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1319 (nr_counters + 1) * sizeof(unsigned long)); 1320 1321 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1322 if (symbol__init() < 0) 1323 return -1; 1324 1325 if (delay_secs < 1) 1326 delay_secs = 1; 1327 1328 /* 1329 * User specified count overrides default frequency. 1330 */ 1331 if (default_interval) 1332 freq = 0; 1333 else if (freq) { 1334 default_interval = freq; 1335 } else { 1336 fprintf(stderr, "frequency and count are zero, aborting\n"); 1337 exit(EXIT_FAILURE); 1338 } 1339 1340 /* 1341 * Fill in the ones not specifically initialized via -c: 1342 */ 1343 for (counter = 0; counter < nr_counters; counter++) { 1344 if (attrs[counter].sample_period) 1345 continue; 1346 1347 attrs[counter].sample_period = default_interval; 1348 } 1349 1350 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 1351 assert(nr_cpus <= MAX_NR_CPUS); 1352 assert(nr_cpus >= 0); 1353 1354 if (target_pid != -1 || profile_cpu != -1) 1355 nr_cpus = 1; 1356 1357 get_term_dimensions(&winsize); 1358 if (print_entries == 0) { 1359 update_print_entries(&winsize); 1360 signal(SIGWINCH, sig_winch_handler); 1361 } 1362 1363 return __cmd_top(); 1364 } 1365