1 /* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19 #include "builtin.h" 20 21 #include "perf.h" 22 23 #include "util/color.h" 24 #include "util/session.h" 25 #include "util/symbol.h" 26 #include "util/thread.h" 27 #include "util/util.h" 28 #include <linux/rbtree.h> 29 #include "util/parse-options.h" 30 #include "util/parse-events.h" 31 #include "util/cpumap.h" 32 33 #include "util/debug.h" 34 35 #include <assert.h> 36 #include <fcntl.h> 37 38 #include <stdio.h> 39 #include <termios.h> 40 #include <unistd.h> 41 42 #include <errno.h> 43 #include <time.h> 44 #include <sched.h> 45 #include <pthread.h> 46 47 #include <sys/syscall.h> 48 #include <sys/ioctl.h> 49 #include <sys/poll.h> 50 #include <sys/prctl.h> 51 #include <sys/wait.h> 52 #include <sys/uio.h> 53 #include <sys/mman.h> 54 55 #include <linux/unistd.h> 56 #include <linux/types.h> 57 58 static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 59 60 static bool system_wide = false; 61 62 static int default_interval = 0; 63 64 static int count_filter = 5; 65 static int print_entries; 66 67 static int target_pid = -1; 68 static int target_tid = -1; 69 static pid_t *all_tids = NULL; 70 static int thread_num = 0; 71 static bool inherit = false; 72 static int profile_cpu = -1; 73 static int nr_cpus = 0; 74 static int realtime_prio = 0; 75 static bool group = false; 76 static unsigned int page_size; 77 static unsigned int mmap_pages = 16; 78 static int freq = 1000; /* 1 KHz */ 79 80 static int delay_secs = 2; 81 static bool zero = false; 82 static bool dump_symtab = false; 83 84 static bool hide_kernel_symbols = false; 85 static bool hide_user_symbols = false; 86 static struct winsize winsize; 87 88 /* 89 * Source 90 */ 91 92 struct source_line { 93 u64 eip; 94 unsigned long count[MAX_COUNTERS]; 95 char *line; 96 struct source_line *next; 97 }; 98 99 static const char *sym_filter = NULL; 100 struct sym_entry *sym_filter_entry = NULL; 101 struct sym_entry *sym_filter_entry_sched = NULL; 102 static int sym_pcnt_filter = 5; 103 static int sym_counter = 0; 104 static int display_weighted = -1; 105 static const char *cpu_list; 106 107 /* 108 * Symbols 109 */ 110 111 struct sym_entry_source { 112 struct source_line *source; 113 struct source_line *lines; 114 struct source_line **lines_tail; 115 pthread_mutex_t lock; 116 }; 117 118 struct sym_entry { 119 struct rb_node rb_node; 120 struct list_head node; 121 unsigned long snap_count; 122 double weight; 123 int skip; 124 u16 name_len; 125 u8 origin; 126 struct map *map; 127 struct sym_entry_source *src; 128 unsigned long count[0]; 129 }; 130 131 /* 132 * Source functions 133 */ 134 135 static inline struct symbol *sym_entry__symbol(struct sym_entry *self) 136 { 137 return ((void *)self) + symbol_conf.priv_size; 138 } 139 140 void get_term_dimensions(struct winsize *ws) 141 { 142 char *s = getenv("LINES"); 143 144 if (s != NULL) { 145 ws->ws_row = atoi(s); 146 s = getenv("COLUMNS"); 147 if (s != NULL) { 148 ws->ws_col = atoi(s); 149 if (ws->ws_row && ws->ws_col) 150 return; 151 } 152 } 153 #ifdef TIOCGWINSZ 154 if (ioctl(1, TIOCGWINSZ, ws) == 0 && 155 ws->ws_row && ws->ws_col) 156 return; 157 #endif 158 ws->ws_row = 25; 159 ws->ws_col = 80; 160 } 161 162 static void update_print_entries(struct winsize *ws) 163 { 164 print_entries = ws->ws_row; 165 166 if (print_entries > 9) 167 print_entries -= 9; 168 } 169 170 static void sig_winch_handler(int sig __used) 171 { 172 get_term_dimensions(&winsize); 173 update_print_entries(&winsize); 174 } 175 176 static int parse_source(struct sym_entry *syme) 177 { 178 struct symbol *sym; 179 struct sym_entry_source *source; 180 struct map *map; 181 FILE *file; 182 char command[PATH_MAX*2]; 183 const char *path; 184 u64 len; 185 186 if (!syme) 187 return -1; 188 189 sym = sym_entry__symbol(syme); 190 map = syme->map; 191 192 /* 193 * We can't annotate with just /proc/kallsyms 194 */ 195 if (map->dso->origin == DSO__ORIG_KERNEL) 196 return -1; 197 198 if (syme->src == NULL) { 199 syme->src = zalloc(sizeof(*source)); 200 if (syme->src == NULL) 201 return -1; 202 pthread_mutex_init(&syme->src->lock, NULL); 203 } 204 205 source = syme->src; 206 207 if (source->lines) { 208 pthread_mutex_lock(&source->lock); 209 goto out_assign; 210 } 211 path = map->dso->long_name; 212 213 len = sym->end - sym->start; 214 215 sprintf(command, 216 "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s", 217 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start), 218 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path); 219 220 file = popen(command, "r"); 221 if (!file) 222 return -1; 223 224 pthread_mutex_lock(&source->lock); 225 source->lines_tail = &source->lines; 226 while (!feof(file)) { 227 struct source_line *src; 228 size_t dummy = 0; 229 char *c, *sep; 230 231 src = malloc(sizeof(struct source_line)); 232 assert(src != NULL); 233 memset(src, 0, sizeof(struct source_line)); 234 235 if (getline(&src->line, &dummy, file) < 0) 236 break; 237 if (!src->line) 238 break; 239 240 c = strchr(src->line, '\n'); 241 if (c) 242 *c = 0; 243 244 src->next = NULL; 245 *source->lines_tail = src; 246 source->lines_tail = &src->next; 247 248 src->eip = strtoull(src->line, &sep, 16); 249 if (*sep == ':') 250 src->eip = map__objdump_2ip(map, src->eip); 251 else /* this line has no ip info (e.g. source line) */ 252 src->eip = 0; 253 } 254 pclose(file); 255 out_assign: 256 sym_filter_entry = syme; 257 pthread_mutex_unlock(&source->lock); 258 return 0; 259 } 260 261 static void __zero_source_counters(struct sym_entry *syme) 262 { 263 int i; 264 struct source_line *line; 265 266 line = syme->src->lines; 267 while (line) { 268 for (i = 0; i < nr_counters; i++) 269 line->count[i] = 0; 270 line = line->next; 271 } 272 } 273 274 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 275 { 276 struct source_line *line; 277 278 if (syme != sym_filter_entry) 279 return; 280 281 if (pthread_mutex_trylock(&syme->src->lock)) 282 return; 283 284 if (syme->src == NULL || syme->src->source == NULL) 285 goto out_unlock; 286 287 for (line = syme->src->lines; line; line = line->next) { 288 /* skip lines without IP info */ 289 if (line->eip == 0) 290 continue; 291 if (line->eip == ip) { 292 line->count[counter]++; 293 break; 294 } 295 if (line->eip > ip) 296 break; 297 } 298 out_unlock: 299 pthread_mutex_unlock(&syme->src->lock); 300 } 301 302 #define PATTERN_LEN (BITS_PER_LONG / 4 + 2) 303 304 static void lookup_sym_source(struct sym_entry *syme) 305 { 306 struct symbol *symbol = sym_entry__symbol(syme); 307 struct source_line *line; 308 char pattern[PATTERN_LEN + 1]; 309 310 sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4, 311 map__rip_2objdump(syme->map, symbol->start)); 312 313 pthread_mutex_lock(&syme->src->lock); 314 for (line = syme->src->lines; line; line = line->next) { 315 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) { 316 syme->src->source = line; 317 break; 318 } 319 } 320 pthread_mutex_unlock(&syme->src->lock); 321 } 322 323 static void show_lines(struct source_line *queue, int count, int total) 324 { 325 int i; 326 struct source_line *line; 327 328 line = queue; 329 for (i = 0; i < count; i++) { 330 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 331 332 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 333 line = line->next; 334 } 335 } 336 337 #define TRACE_COUNT 3 338 339 static void show_details(struct sym_entry *syme) 340 { 341 struct symbol *symbol; 342 struct source_line *line; 343 struct source_line *line_queue = NULL; 344 int displayed = 0; 345 int line_queue_count = 0, total = 0, more = 0; 346 347 if (!syme) 348 return; 349 350 if (!syme->src->source) 351 lookup_sym_source(syme); 352 353 if (!syme->src->source) 354 return; 355 356 symbol = sym_entry__symbol(syme); 357 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 358 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 359 360 pthread_mutex_lock(&syme->src->lock); 361 line = syme->src->source; 362 while (line) { 363 total += line->count[sym_counter]; 364 line = line->next; 365 } 366 367 line = syme->src->source; 368 while (line) { 369 float pcnt = 0.0; 370 371 if (!line_queue_count) 372 line_queue = line; 373 line_queue_count++; 374 375 if (line->count[sym_counter]) 376 pcnt = 100.0 * line->count[sym_counter] / (float)total; 377 if (pcnt >= (float)sym_pcnt_filter) { 378 if (displayed <= print_entries) 379 show_lines(line_queue, line_queue_count, total); 380 else more++; 381 displayed += line_queue_count; 382 line_queue_count = 0; 383 line_queue = NULL; 384 } else if (line_queue_count > TRACE_COUNT) { 385 line_queue = line_queue->next; 386 line_queue_count--; 387 } 388 389 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 390 line = line->next; 391 } 392 pthread_mutex_unlock(&syme->src->lock); 393 if (more) 394 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 395 } 396 397 /* 398 * Symbols will be added here in event__process_sample and will get out 399 * after decayed. 400 */ 401 static LIST_HEAD(active_symbols); 402 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 403 404 /* 405 * Ordering weight: count-1 * count-2 * ... / count-n 406 */ 407 static double sym_weight(const struct sym_entry *sym) 408 { 409 double weight = sym->snap_count; 410 int counter; 411 412 if (!display_weighted) 413 return weight; 414 415 for (counter = 1; counter < nr_counters-1; counter++) 416 weight *= sym->count[counter]; 417 418 weight /= (sym->count[counter] + 1); 419 420 return weight; 421 } 422 423 static long samples; 424 static long kernel_samples, us_samples; 425 static long exact_samples; 426 static long guest_us_samples, guest_kernel_samples; 427 static const char CONSOLE_CLEAR[] = "[H[2J"; 428 429 static void __list_insert_active_sym(struct sym_entry *syme) 430 { 431 list_add(&syme->node, &active_symbols); 432 } 433 434 static void list_remove_active_sym(struct sym_entry *syme) 435 { 436 pthread_mutex_lock(&active_symbols_lock); 437 list_del_init(&syme->node); 438 pthread_mutex_unlock(&active_symbols_lock); 439 } 440 441 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 442 { 443 struct rb_node **p = &tree->rb_node; 444 struct rb_node *parent = NULL; 445 struct sym_entry *iter; 446 447 while (*p != NULL) { 448 parent = *p; 449 iter = rb_entry(parent, struct sym_entry, rb_node); 450 451 if (se->weight > iter->weight) 452 p = &(*p)->rb_left; 453 else 454 p = &(*p)->rb_right; 455 } 456 457 rb_link_node(&se->rb_node, parent, p); 458 rb_insert_color(&se->rb_node, tree); 459 } 460 461 static void print_sym_table(void) 462 { 463 int printed = 0, j; 464 int counter, snap = !display_weighted ? sym_counter : 0; 465 float samples_per_sec = samples/delay_secs; 466 float ksamples_per_sec = kernel_samples/delay_secs; 467 float us_samples_per_sec = (us_samples)/delay_secs; 468 float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs; 469 float guest_us_samples_per_sec = (guest_us_samples)/delay_secs; 470 float esamples_percent = (100.0*exact_samples)/samples; 471 float sum_ksamples = 0.0; 472 struct sym_entry *syme, *n; 473 struct rb_root tmp = RB_ROOT; 474 struct rb_node *nd; 475 int sym_width = 0, dso_width = 0, dso_short_width = 0; 476 const int win_width = winsize.ws_col - 1; 477 478 samples = us_samples = kernel_samples = exact_samples = 0; 479 guest_kernel_samples = guest_us_samples = 0; 480 481 /* Sort the active symbols */ 482 pthread_mutex_lock(&active_symbols_lock); 483 syme = list_entry(active_symbols.next, struct sym_entry, node); 484 pthread_mutex_unlock(&active_symbols_lock); 485 486 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 487 syme->snap_count = syme->count[snap]; 488 if (syme->snap_count != 0) { 489 490 if ((hide_user_symbols && 491 syme->origin == PERF_RECORD_MISC_USER) || 492 (hide_kernel_symbols && 493 syme->origin == PERF_RECORD_MISC_KERNEL)) { 494 list_remove_active_sym(syme); 495 continue; 496 } 497 syme->weight = sym_weight(syme); 498 rb_insert_active_sym(&tmp, syme); 499 sum_ksamples += syme->snap_count; 500 501 for (j = 0; j < nr_counters; j++) 502 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 503 } else 504 list_remove_active_sym(syme); 505 } 506 507 puts(CONSOLE_CLEAR); 508 509 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 510 if (!perf_guest) { 511 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%" 512 " exact: %4.1f%% [", 513 samples_per_sec, 514 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / 515 samples_per_sec)), 516 esamples_percent); 517 } else { 518 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%" 519 " guest kernel:%4.1f%% guest us:%4.1f%%" 520 " exact: %4.1f%% [", 521 samples_per_sec, 522 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) / 523 samples_per_sec)), 524 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) / 525 samples_per_sec)), 526 100.0 - (100.0 * ((samples_per_sec - 527 guest_kernel_samples_per_sec) / 528 samples_per_sec)), 529 100.0 - (100.0 * ((samples_per_sec - 530 guest_us_samples_per_sec) / 531 samples_per_sec)), 532 esamples_percent); 533 } 534 535 if (nr_counters == 1 || !display_weighted) { 536 printf("%Ld", (u64)attrs[0].sample_period); 537 if (freq) 538 printf("Hz "); 539 else 540 printf(" "); 541 } 542 543 if (!display_weighted) 544 printf("%s", event_name(sym_counter)); 545 else for (counter = 0; counter < nr_counters; counter++) { 546 if (counter) 547 printf("/"); 548 549 printf("%s", event_name(counter)); 550 } 551 552 printf( "], "); 553 554 if (target_pid != -1) 555 printf(" (target_pid: %d", target_pid); 556 else if (target_tid != -1) 557 printf(" (target_tid: %d", target_tid); 558 else 559 printf(" (all"); 560 561 if (profile_cpu != -1) 562 printf(", cpu: %d)\n", profile_cpu); 563 else { 564 if (target_tid != -1) 565 printf(")\n"); 566 else 567 printf(", %d CPUs)\n", nr_cpus); 568 } 569 570 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 571 572 if (sym_filter_entry) { 573 show_details(sym_filter_entry); 574 return; 575 } 576 577 /* 578 * Find the longest symbol name that will be displayed 579 */ 580 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 581 syme = rb_entry(nd, struct sym_entry, rb_node); 582 if (++printed > print_entries || 583 (int)syme->snap_count < count_filter) 584 continue; 585 586 if (syme->map->dso->long_name_len > dso_width) 587 dso_width = syme->map->dso->long_name_len; 588 589 if (syme->map->dso->short_name_len > dso_short_width) 590 dso_short_width = syme->map->dso->short_name_len; 591 592 if (syme->name_len > sym_width) 593 sym_width = syme->name_len; 594 } 595 596 printed = 0; 597 598 if (sym_width + dso_width > winsize.ws_col - 29) { 599 dso_width = dso_short_width; 600 if (sym_width + dso_width > winsize.ws_col - 29) 601 sym_width = winsize.ws_col - dso_width - 29; 602 } 603 putchar('\n'); 604 if (nr_counters == 1) 605 printf(" samples pcnt"); 606 else 607 printf(" weight samples pcnt"); 608 609 if (verbose) 610 printf(" RIP "); 611 printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); 612 printf(" %s _______ _____", 613 nr_counters == 1 ? " " : "______"); 614 if (verbose) 615 printf(" ________________"); 616 printf(" %-*.*s", sym_width, sym_width, graph_line); 617 printf(" %-*.*s", dso_width, dso_width, graph_line); 618 puts("\n"); 619 620 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 621 struct symbol *sym; 622 double pcnt; 623 624 syme = rb_entry(nd, struct sym_entry, rb_node); 625 sym = sym_entry__symbol(syme); 626 if (++printed > print_entries || (int)syme->snap_count < count_filter) 627 continue; 628 629 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 630 sum_ksamples)); 631 632 if (nr_counters == 1 || !display_weighted) 633 printf("%20.2f ", syme->weight); 634 else 635 printf("%9.1f %10ld ", syme->weight, syme->snap_count); 636 637 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 638 if (verbose) 639 printf(" %016llx", sym->start); 640 printf(" %-*.*s", sym_width, sym_width, sym->name); 641 printf(" %-*.*s\n", dso_width, dso_width, 642 dso_width >= syme->map->dso->long_name_len ? 643 syme->map->dso->long_name : 644 syme->map->dso->short_name); 645 } 646 } 647 648 static void prompt_integer(int *target, const char *msg) 649 { 650 char *buf = malloc(0), *p; 651 size_t dummy = 0; 652 int tmp; 653 654 fprintf(stdout, "\n%s: ", msg); 655 if (getline(&buf, &dummy, stdin) < 0) 656 return; 657 658 p = strchr(buf, '\n'); 659 if (p) 660 *p = 0; 661 662 p = buf; 663 while(*p) { 664 if (!isdigit(*p)) 665 goto out_free; 666 p++; 667 } 668 tmp = strtoul(buf, NULL, 10); 669 *target = tmp; 670 out_free: 671 free(buf); 672 } 673 674 static void prompt_percent(int *target, const char *msg) 675 { 676 int tmp = 0; 677 678 prompt_integer(&tmp, msg); 679 if (tmp >= 0 && tmp <= 100) 680 *target = tmp; 681 } 682 683 static void prompt_symbol(struct sym_entry **target, const char *msg) 684 { 685 char *buf = malloc(0), *p; 686 struct sym_entry *syme = *target, *n, *found = NULL; 687 size_t dummy = 0; 688 689 /* zero counters of active symbol */ 690 if (syme) { 691 pthread_mutex_lock(&syme->src->lock); 692 __zero_source_counters(syme); 693 *target = NULL; 694 pthread_mutex_unlock(&syme->src->lock); 695 } 696 697 fprintf(stdout, "\n%s: ", msg); 698 if (getline(&buf, &dummy, stdin) < 0) 699 goto out_free; 700 701 p = strchr(buf, '\n'); 702 if (p) 703 *p = 0; 704 705 pthread_mutex_lock(&active_symbols_lock); 706 syme = list_entry(active_symbols.next, struct sym_entry, node); 707 pthread_mutex_unlock(&active_symbols_lock); 708 709 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 710 struct symbol *sym = sym_entry__symbol(syme); 711 712 if (!strcmp(buf, sym->name)) { 713 found = syme; 714 break; 715 } 716 } 717 718 if (!found) { 719 fprintf(stderr, "Sorry, %s is not active.\n", buf); 720 sleep(1); 721 return; 722 } else 723 parse_source(found); 724 725 out_free: 726 free(buf); 727 } 728 729 static void print_mapped_keys(void) 730 { 731 char *name = NULL; 732 733 if (sym_filter_entry) { 734 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 735 name = sym->name; 736 } 737 738 fprintf(stdout, "\nMapped keys:\n"); 739 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 740 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 741 742 if (nr_counters > 1) 743 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 744 745 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 746 747 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 748 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 749 fprintf(stdout, "\t[S] stop annotation.\n"); 750 751 if (nr_counters > 1) 752 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 753 754 fprintf(stdout, 755 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 756 hide_kernel_symbols ? "yes" : "no"); 757 fprintf(stdout, 758 "\t[U] hide user symbols. \t(%s)\n", 759 hide_user_symbols ? "yes" : "no"); 760 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 761 fprintf(stdout, "\t[qQ] quit.\n"); 762 } 763 764 static int key_mapped(int c) 765 { 766 switch (c) { 767 case 'd': 768 case 'e': 769 case 'f': 770 case 'z': 771 case 'q': 772 case 'Q': 773 case 'K': 774 case 'U': 775 case 'F': 776 case 's': 777 case 'S': 778 return 1; 779 case 'E': 780 case 'w': 781 return nr_counters > 1 ? 1 : 0; 782 default: 783 break; 784 } 785 786 return 0; 787 } 788 789 static void handle_keypress(struct perf_session *session, int c) 790 { 791 if (!key_mapped(c)) { 792 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 793 struct termios tc, save; 794 795 print_mapped_keys(); 796 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 797 fflush(stdout); 798 799 tcgetattr(0, &save); 800 tc = save; 801 tc.c_lflag &= ~(ICANON | ECHO); 802 tc.c_cc[VMIN] = 0; 803 tc.c_cc[VTIME] = 0; 804 tcsetattr(0, TCSANOW, &tc); 805 806 poll(&stdin_poll, 1, -1); 807 c = getc(stdin); 808 809 tcsetattr(0, TCSAFLUSH, &save); 810 if (!key_mapped(c)) 811 return; 812 } 813 814 switch (c) { 815 case 'd': 816 prompt_integer(&delay_secs, "Enter display delay"); 817 if (delay_secs < 1) 818 delay_secs = 1; 819 break; 820 case 'e': 821 prompt_integer(&print_entries, "Enter display entries (lines)"); 822 if (print_entries == 0) { 823 sig_winch_handler(SIGWINCH); 824 signal(SIGWINCH, sig_winch_handler); 825 } else 826 signal(SIGWINCH, SIG_DFL); 827 break; 828 case 'E': 829 if (nr_counters > 1) { 830 int i; 831 832 fprintf(stderr, "\nAvailable events:"); 833 for (i = 0; i < nr_counters; i++) 834 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 835 836 prompt_integer(&sym_counter, "Enter details event counter"); 837 838 if (sym_counter >= nr_counters) { 839 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 840 sym_counter = 0; 841 sleep(1); 842 } 843 } else sym_counter = 0; 844 break; 845 case 'f': 846 prompt_integer(&count_filter, "Enter display event count filter"); 847 break; 848 case 'F': 849 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 850 break; 851 case 'K': 852 hide_kernel_symbols = !hide_kernel_symbols; 853 break; 854 case 'q': 855 case 'Q': 856 printf("exiting.\n"); 857 if (dump_symtab) 858 perf_session__fprintf_dsos(session, stderr); 859 exit(0); 860 case 's': 861 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 862 break; 863 case 'S': 864 if (!sym_filter_entry) 865 break; 866 else { 867 struct sym_entry *syme = sym_filter_entry; 868 869 pthread_mutex_lock(&syme->src->lock); 870 sym_filter_entry = NULL; 871 __zero_source_counters(syme); 872 pthread_mutex_unlock(&syme->src->lock); 873 } 874 break; 875 case 'U': 876 hide_user_symbols = !hide_user_symbols; 877 break; 878 case 'w': 879 display_weighted = ~display_weighted; 880 break; 881 case 'z': 882 zero = !zero; 883 break; 884 default: 885 break; 886 } 887 } 888 889 static void *display_thread(void *arg __used) 890 { 891 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 892 struct termios tc, save; 893 int delay_msecs, c; 894 struct perf_session *session = (struct perf_session *) arg; 895 896 tcgetattr(0, &save); 897 tc = save; 898 tc.c_lflag &= ~(ICANON | ECHO); 899 tc.c_cc[VMIN] = 0; 900 tc.c_cc[VTIME] = 0; 901 902 repeat: 903 delay_msecs = delay_secs * 1000; 904 tcsetattr(0, TCSANOW, &tc); 905 /* trash return*/ 906 getc(stdin); 907 908 do { 909 print_sym_table(); 910 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 911 912 c = getc(stdin); 913 tcsetattr(0, TCSAFLUSH, &save); 914 915 handle_keypress(session, c); 916 goto repeat; 917 918 return NULL; 919 } 920 921 /* Tag samples to be skipped. */ 922 static const char *skip_symbols[] = { 923 "default_idle", 924 "cpu_idle", 925 "enter_idle", 926 "exit_idle", 927 "mwait_idle", 928 "mwait_idle_with_hints", 929 "poll_idle", 930 "ppc64_runlatch_off", 931 "pseries_dedicated_idle_sleep", 932 NULL 933 }; 934 935 static int symbol_filter(struct map *map, struct symbol *sym) 936 { 937 struct sym_entry *syme; 938 const char *name = sym->name; 939 int i; 940 941 /* 942 * ppc64 uses function descriptors and appends a '.' to the 943 * start of every instruction address. Remove it. 944 */ 945 if (name[0] == '.') 946 name++; 947 948 if (!strcmp(name, "_text") || 949 !strcmp(name, "_etext") || 950 !strcmp(name, "_sinittext") || 951 !strncmp("init_module", name, 11) || 952 !strncmp("cleanup_module", name, 14) || 953 strstr(name, "_text_start") || 954 strstr(name, "_text_end")) 955 return 1; 956 957 syme = symbol__priv(sym); 958 syme->map = map; 959 syme->src = NULL; 960 961 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { 962 /* schedule initial sym_filter_entry setup */ 963 sym_filter_entry_sched = syme; 964 sym_filter = NULL; 965 } 966 967 for (i = 0; skip_symbols[i]; i++) { 968 if (!strcmp(skip_symbols[i], name)) { 969 syme->skip = 1; 970 break; 971 } 972 } 973 974 if (!syme->skip) 975 syme->name_len = strlen(sym->name); 976 977 return 0; 978 } 979 980 static void event__process_sample(const event_t *self, 981 struct perf_session *session, int counter) 982 { 983 u64 ip = self->ip.ip; 984 struct sym_entry *syme; 985 struct addr_location al; 986 struct sample_data data; 987 struct machine *machine; 988 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 989 990 ++samples; 991 992 switch (origin) { 993 case PERF_RECORD_MISC_USER: 994 ++us_samples; 995 if (hide_user_symbols) 996 return; 997 machine = perf_session__find_host_machine(session); 998 break; 999 case PERF_RECORD_MISC_KERNEL: 1000 ++kernel_samples; 1001 if (hide_kernel_symbols) 1002 return; 1003 machine = perf_session__find_host_machine(session); 1004 break; 1005 case PERF_RECORD_MISC_GUEST_KERNEL: 1006 ++guest_kernel_samples; 1007 machine = perf_session__find_machine(session, self->ip.pid); 1008 break; 1009 case PERF_RECORD_MISC_GUEST_USER: 1010 ++guest_us_samples; 1011 /* 1012 * TODO: we don't process guest user from host side 1013 * except simple counting. 1014 */ 1015 return; 1016 default: 1017 return; 1018 } 1019 1020 if (!machine && perf_guest) { 1021 pr_err("Can't find guest [%d]'s kernel information\n", 1022 self->ip.pid); 1023 return; 1024 } 1025 1026 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1027 exact_samples++; 1028 1029 if (event__preprocess_sample(self, session, &al, &data, 1030 symbol_filter) < 0 || 1031 al.filtered) 1032 return; 1033 1034 if (al.sym == NULL) { 1035 /* 1036 * As we do lazy loading of symtabs we only will know if the 1037 * specified vmlinux file is invalid when we actually have a 1038 * hit in kernel space and then try to load it. So if we get 1039 * here and there are _no_ symbols in the DSO backing the 1040 * kernel map, bail out. 1041 * 1042 * We may never get here, for instance, if we use -K/ 1043 * --hide-kernel-symbols, even if the user specifies an 1044 * invalid --vmlinux ;-) 1045 */ 1046 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && 1047 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 1048 pr_err("The %s file can't be used\n", 1049 symbol_conf.vmlinux_name); 1050 exit(1); 1051 } 1052 1053 return; 1054 } 1055 1056 /* let's see, whether we need to install initial sym_filter_entry */ 1057 if (sym_filter_entry_sched) { 1058 sym_filter_entry = sym_filter_entry_sched; 1059 sym_filter_entry_sched = NULL; 1060 if (parse_source(sym_filter_entry) < 0) { 1061 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 1062 1063 pr_err("Can't annotate %s", sym->name); 1064 if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { 1065 pr_err(": No vmlinux file was found in the path:\n"); 1066 machine__fprintf_vmlinux_path(machine, stderr); 1067 } else 1068 pr_err(".\n"); 1069 exit(1); 1070 } 1071 } 1072 1073 syme = symbol__priv(al.sym); 1074 if (!syme->skip) { 1075 syme->count[counter]++; 1076 syme->origin = origin; 1077 record_precise_ip(syme, counter, ip); 1078 pthread_mutex_lock(&active_symbols_lock); 1079 if (list_empty(&syme->node) || !syme->node.next) 1080 __list_insert_active_sym(syme); 1081 pthread_mutex_unlock(&active_symbols_lock); 1082 } 1083 } 1084 1085 struct mmap_data { 1086 int counter; 1087 void *base; 1088 int mask; 1089 unsigned int prev; 1090 }; 1091 1092 static unsigned int mmap_read_head(struct mmap_data *md) 1093 { 1094 struct perf_event_mmap_page *pc = md->base; 1095 int head; 1096 1097 head = pc->data_head; 1098 rmb(); 1099 1100 return head; 1101 } 1102 1103 static void perf_session__mmap_read_counter(struct perf_session *self, 1104 struct mmap_data *md) 1105 { 1106 unsigned int head = mmap_read_head(md); 1107 unsigned int old = md->prev; 1108 unsigned char *data = md->base + page_size; 1109 int diff; 1110 1111 /* 1112 * If we're further behind than half the buffer, there's a chance 1113 * the writer will bite our tail and mess up the samples under us. 1114 * 1115 * If we somehow ended up ahead of the head, we got messed up. 1116 * 1117 * In either case, truncate and restart at head. 1118 */ 1119 diff = head - old; 1120 if (diff > md->mask / 2 || diff < 0) { 1121 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 1122 1123 /* 1124 * head points to a known good entry, start there. 1125 */ 1126 old = head; 1127 } 1128 1129 for (; old != head;) { 1130 event_t *event = (event_t *)&data[old & md->mask]; 1131 1132 event_t event_copy; 1133 1134 size_t size = event->header.size; 1135 1136 /* 1137 * Event straddles the mmap boundary -- header should always 1138 * be inside due to u64 alignment of output. 1139 */ 1140 if ((old & md->mask) + size != ((old + size) & md->mask)) { 1141 unsigned int offset = old; 1142 unsigned int len = min(sizeof(*event), size), cpy; 1143 void *dst = &event_copy; 1144 1145 do { 1146 cpy = min(md->mask + 1 - (offset & md->mask), len); 1147 memcpy(dst, &data[offset & md->mask], cpy); 1148 offset += cpy; 1149 dst += cpy; 1150 len -= cpy; 1151 } while (len); 1152 1153 event = &event_copy; 1154 } 1155 1156 if (event->header.type == PERF_RECORD_SAMPLE) 1157 event__process_sample(event, self, md->counter); 1158 else 1159 event__process(event, self); 1160 old += size; 1161 } 1162 1163 md->prev = old; 1164 } 1165 1166 static struct pollfd *event_array; 1167 static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 1168 1169 static void perf_session__mmap_read(struct perf_session *self) 1170 { 1171 int i, counter, thread_index; 1172 1173 for (i = 0; i < nr_cpus; i++) { 1174 for (counter = 0; counter < nr_counters; counter++) 1175 for (thread_index = 0; 1176 thread_index < thread_num; 1177 thread_index++) { 1178 perf_session__mmap_read_counter(self, 1179 &mmap_array[i][counter][thread_index]); 1180 } 1181 } 1182 } 1183 1184 int nr_poll; 1185 int group_fd; 1186 1187 static void start_counter(int i, int counter) 1188 { 1189 struct perf_event_attr *attr; 1190 int cpu; 1191 int thread_index; 1192 1193 cpu = profile_cpu; 1194 if (target_tid == -1 && profile_cpu == -1) 1195 cpu = cpumap[i]; 1196 1197 attr = attrs + counter; 1198 1199 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1200 1201 if (freq) { 1202 attr->sample_type |= PERF_SAMPLE_PERIOD; 1203 attr->freq = 1; 1204 attr->sample_freq = freq; 1205 } 1206 1207 attr->inherit = (cpu < 0) && inherit; 1208 attr->mmap = 1; 1209 1210 for (thread_index = 0; thread_index < thread_num; thread_index++) { 1211 try_again: 1212 fd[i][counter][thread_index] = sys_perf_event_open(attr, 1213 all_tids[thread_index], cpu, group_fd, 0); 1214 1215 if (fd[i][counter][thread_index] < 0) { 1216 int err = errno; 1217 1218 if (err == EPERM || err == EACCES) 1219 die("No permission - are you root?\n"); 1220 /* 1221 * If it's cycles then fall back to hrtimer 1222 * based cpu-clock-tick sw counter, which 1223 * is always available even if no PMU support: 1224 */ 1225 if (attr->type == PERF_TYPE_HARDWARE 1226 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1227 1228 if (verbose) 1229 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1230 1231 attr->type = PERF_TYPE_SOFTWARE; 1232 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1233 goto try_again; 1234 } 1235 printf("\n"); 1236 error("perfcounter syscall returned with %d (%s)\n", 1237 fd[i][counter][thread_index], strerror(err)); 1238 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1239 exit(-1); 1240 } 1241 assert(fd[i][counter][thread_index] >= 0); 1242 fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); 1243 1244 /* 1245 * First counter acts as the group leader: 1246 */ 1247 if (group && group_fd == -1) 1248 group_fd = fd[i][counter][thread_index]; 1249 1250 event_array[nr_poll].fd = fd[i][counter][thread_index]; 1251 event_array[nr_poll].events = POLLIN; 1252 nr_poll++; 1253 1254 mmap_array[i][counter][thread_index].counter = counter; 1255 mmap_array[i][counter][thread_index].prev = 0; 1256 mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; 1257 mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, 1258 PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); 1259 if (mmap_array[i][counter][thread_index].base == MAP_FAILED) 1260 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1261 } 1262 } 1263 1264 static int __cmd_top(void) 1265 { 1266 pthread_t thread; 1267 int i, counter; 1268 int ret; 1269 /* 1270 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 1271 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 1272 */ 1273 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false); 1274 if (session == NULL) 1275 return -ENOMEM; 1276 1277 if (target_tid != -1) 1278 event__synthesize_thread(target_tid, event__process, session); 1279 else 1280 event__synthesize_threads(event__process, session); 1281 1282 for (i = 0; i < nr_cpus; i++) { 1283 group_fd = -1; 1284 for (counter = 0; counter < nr_counters; counter++) 1285 start_counter(i, counter); 1286 } 1287 1288 /* Wait for a minimal set of events before starting the snapshot */ 1289 poll(&event_array[0], nr_poll, 100); 1290 1291 perf_session__mmap_read(session); 1292 1293 if (pthread_create(&thread, NULL, display_thread, session)) { 1294 printf("Could not create display thread.\n"); 1295 exit(-1); 1296 } 1297 1298 if (realtime_prio) { 1299 struct sched_param param; 1300 1301 param.sched_priority = realtime_prio; 1302 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1303 printf("Could not set realtime priority.\n"); 1304 exit(-1); 1305 } 1306 } 1307 1308 while (1) { 1309 int hits = samples; 1310 1311 perf_session__mmap_read(session); 1312 1313 if (hits == samples) 1314 ret = poll(event_array, nr_poll, 100); 1315 } 1316 1317 return 0; 1318 } 1319 1320 static const char * const top_usage[] = { 1321 "perf top [<options>]", 1322 NULL 1323 }; 1324 1325 static const struct option options[] = { 1326 OPT_CALLBACK('e', "event", NULL, "event", 1327 "event selector. use 'perf list' to list available events", 1328 parse_events), 1329 OPT_INTEGER('c', "count", &default_interval, 1330 "event period to sample"), 1331 OPT_INTEGER('p', "pid", &target_pid, 1332 "profile events on existing process id"), 1333 OPT_INTEGER('t', "tid", &target_tid, 1334 "profile events on existing thread id"), 1335 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1336 "system-wide collection from all CPUs"), 1337 OPT_STRING('C', "cpu", &cpu_list, "cpu", 1338 "list of cpus to monitor"), 1339 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1340 "file", "vmlinux pathname"), 1341 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1342 "hide kernel symbols"), 1343 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 1344 OPT_INTEGER('r', "realtime", &realtime_prio, 1345 "collect data with this RT SCHED_FIFO priority"), 1346 OPT_INTEGER('d', "delay", &delay_secs, 1347 "number of seconds to delay between refreshes"), 1348 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1349 "dump the symbol table used for profiling"), 1350 OPT_INTEGER('f', "count-filter", &count_filter, 1351 "only display functions with more events than this"), 1352 OPT_BOOLEAN('g', "group", &group, 1353 "put the counters into a counter group"), 1354 OPT_BOOLEAN('i', "inherit", &inherit, 1355 "child tasks inherit counters"), 1356 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1357 "symbol to annotate"), 1358 OPT_BOOLEAN('z', "zero", &zero, 1359 "zero history across updates"), 1360 OPT_INTEGER('F', "freq", &freq, 1361 "profile at this frequency"), 1362 OPT_INTEGER('E', "entries", &print_entries, 1363 "display this many functions"), 1364 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 1365 "hide user symbols"), 1366 OPT_INCR('v', "verbose", &verbose, 1367 "be more verbose (show counter open errors, etc)"), 1368 OPT_END() 1369 }; 1370 1371 int cmd_top(int argc, const char **argv, const char *prefix __used) 1372 { 1373 int counter; 1374 int i,j; 1375 1376 page_size = sysconf(_SC_PAGE_SIZE); 1377 1378 argc = parse_options(argc, argv, options, top_usage, 0); 1379 if (argc) 1380 usage_with_options(top_usage, options); 1381 1382 if (target_pid != -1) { 1383 target_tid = target_pid; 1384 thread_num = find_all_tid(target_pid, &all_tids); 1385 if (thread_num <= 0) { 1386 fprintf(stderr, "Can't find all threads of pid %d\n", 1387 target_pid); 1388 usage_with_options(top_usage, options); 1389 } 1390 } else { 1391 all_tids=malloc(sizeof(pid_t)); 1392 if (!all_tids) 1393 return -ENOMEM; 1394 1395 all_tids[0] = target_tid; 1396 thread_num = 1; 1397 } 1398 1399 for (i = 0; i < MAX_NR_CPUS; i++) { 1400 for (j = 0; j < MAX_COUNTERS; j++) { 1401 fd[i][j] = malloc(sizeof(int)*thread_num); 1402 mmap_array[i][j] = zalloc( 1403 sizeof(struct mmap_data)*thread_num); 1404 if (!fd[i][j] || !mmap_array[i][j]) 1405 return -ENOMEM; 1406 } 1407 } 1408 event_array = malloc( 1409 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); 1410 if (!event_array) 1411 return -ENOMEM; 1412 1413 /* CPU and PID are mutually exclusive */ 1414 if (target_tid > 0 && cpu_list) { 1415 printf("WARNING: PID switch overriding CPU\n"); 1416 sleep(1); 1417 cpu_list = NULL; 1418 } 1419 1420 if (!nr_counters) 1421 nr_counters = 1; 1422 1423 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1424 (nr_counters + 1) * sizeof(unsigned long)); 1425 1426 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1427 if (symbol__init() < 0) 1428 return -1; 1429 1430 if (delay_secs < 1) 1431 delay_secs = 1; 1432 1433 /* 1434 * User specified count overrides default frequency. 1435 */ 1436 if (default_interval) 1437 freq = 0; 1438 else if (freq) { 1439 default_interval = freq; 1440 } else { 1441 fprintf(stderr, "frequency and count are zero, aborting\n"); 1442 exit(EXIT_FAILURE); 1443 } 1444 1445 /* 1446 * Fill in the ones not specifically initialized via -c: 1447 */ 1448 for (counter = 0; counter < nr_counters; counter++) { 1449 if (attrs[counter].sample_period) 1450 continue; 1451 1452 attrs[counter].sample_period = default_interval; 1453 } 1454 1455 if (target_tid != -1) 1456 nr_cpus = 1; 1457 else 1458 nr_cpus = read_cpu_map(cpu_list); 1459 1460 if (nr_cpus < 1) 1461 usage_with_options(top_usage, options); 1462 1463 get_term_dimensions(&winsize); 1464 if (print_entries == 0) { 1465 update_print_entries(&winsize); 1466 signal(SIGWINCH, sig_winch_handler); 1467 } 1468 1469 return __cmd_top(); 1470 } 1471