1 /* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19 #include "builtin.h" 20 21 #include "perf.h" 22 23 #include "util/symbol.h" 24 #include "util/color.h" 25 #include "util/util.h" 26 #include <linux/rbtree.h> 27 #include "util/parse-options.h" 28 #include "util/parse-events.h" 29 30 #include "util/debug.h" 31 32 #include <assert.h> 33 #include <fcntl.h> 34 35 #include <stdio.h> 36 #include <termios.h> 37 #include <unistd.h> 38 39 #include <errno.h> 40 #include <time.h> 41 #include <sched.h> 42 #include <pthread.h> 43 44 #include <sys/syscall.h> 45 #include <sys/ioctl.h> 46 #include <sys/poll.h> 47 #include <sys/prctl.h> 48 #include <sys/wait.h> 49 #include <sys/uio.h> 50 #include <sys/mman.h> 51 52 #include <linux/unistd.h> 53 #include <linux/types.h> 54 55 static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 56 57 static int system_wide = 0; 58 59 static int default_interval = 100000; 60 61 static int count_filter = 5; 62 static int print_entries = 15; 63 64 static int target_pid = -1; 65 static int inherit = 0; 66 static int profile_cpu = -1; 67 static int nr_cpus = 0; 68 static unsigned int realtime_prio = 0; 69 static int group = 0; 70 static unsigned int page_size; 71 static unsigned int mmap_pages = 16; 72 static int freq = 0; 73 74 static int delay_secs = 2; 75 static int zero; 76 static int dump_symtab; 77 78 /* 79 * Source 80 */ 81 82 struct source_line { 83 u64 eip; 84 unsigned long count[MAX_COUNTERS]; 85 char *line; 86 struct source_line *next; 87 }; 88 89 static char *sym_filter = NULL; 90 struct sym_entry *sym_filter_entry = NULL; 91 static int sym_pcnt_filter = 5; 92 static int sym_counter = 0; 93 static int display_weighted = -1; 94 95 /* 96 * Symbols 97 */ 98 99 static u64 min_ip; 100 static u64 max_ip = -1ll; 101 102 struct sym_entry { 103 struct rb_node rb_node; 104 struct list_head node; 105 unsigned long count[MAX_COUNTERS]; 106 unsigned long snap_count; 107 double weight; 108 int skip; 109 struct source_line *source; 110 struct source_line *lines; 111 struct source_line **lines_tail; 112 pthread_mutex_t source_lock; 113 }; 114 115 /* 116 * Source functions 117 */ 118 119 static void parse_source(struct sym_entry *syme) 120 { 121 struct symbol *sym; 122 struct module *module; 123 struct section *section = NULL; 124 FILE *file; 125 char command[PATH_MAX*2]; 126 const char *path = vmlinux_name; 127 u64 start, end, len; 128 129 if (!syme) 130 return; 131 132 if (syme->lines) { 133 pthread_mutex_lock(&syme->source_lock); 134 goto out_assign; 135 } 136 137 sym = (struct symbol *)(syme + 1); 138 module = sym->module; 139 140 if (module) 141 path = module->path; 142 if (!path) 143 return; 144 145 start = sym->obj_start; 146 if (!start) 147 start = sym->start; 148 149 if (module) { 150 section = module->sections->find_section(module->sections, ".text"); 151 if (section) 152 start -= section->vma; 153 } 154 155 end = start + sym->end - sym->start + 1; 156 len = sym->end - sym->start; 157 158 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); 159 160 file = popen(command, "r"); 161 if (!file) 162 return; 163 164 pthread_mutex_lock(&syme->source_lock); 165 syme->lines_tail = &syme->lines; 166 while (!feof(file)) { 167 struct source_line *src; 168 size_t dummy = 0; 169 char *c; 170 171 src = malloc(sizeof(struct source_line)); 172 assert(src != NULL); 173 memset(src, 0, sizeof(struct source_line)); 174 175 if (getline(&src->line, &dummy, file) < 0) 176 break; 177 if (!src->line) 178 break; 179 180 c = strchr(src->line, '\n'); 181 if (c) 182 *c = 0; 183 184 src->next = NULL; 185 *syme->lines_tail = src; 186 syme->lines_tail = &src->next; 187 188 if (strlen(src->line)>8 && src->line[8] == ':') { 189 src->eip = strtoull(src->line, NULL, 16); 190 if (section) 191 src->eip += section->vma; 192 } 193 if (strlen(src->line)>8 && src->line[16] == ':') { 194 src->eip = strtoull(src->line, NULL, 16); 195 if (section) 196 src->eip += section->vma; 197 } 198 } 199 pclose(file); 200 out_assign: 201 sym_filter_entry = syme; 202 pthread_mutex_unlock(&syme->source_lock); 203 } 204 205 static void __zero_source_counters(struct sym_entry *syme) 206 { 207 int i; 208 struct source_line *line; 209 210 line = syme->lines; 211 while (line) { 212 for (i = 0; i < nr_counters; i++) 213 line->count[i] = 0; 214 line = line->next; 215 } 216 } 217 218 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 219 { 220 struct source_line *line; 221 222 if (syme != sym_filter_entry) 223 return; 224 225 if (pthread_mutex_trylock(&syme->source_lock)) 226 return; 227 228 if (!syme->source) 229 goto out_unlock; 230 231 for (line = syme->lines; line; line = line->next) { 232 if (line->eip == ip) { 233 line->count[counter]++; 234 break; 235 } 236 if (line->eip > ip) 237 break; 238 } 239 out_unlock: 240 pthread_mutex_unlock(&syme->source_lock); 241 } 242 243 static void lookup_sym_source(struct sym_entry *syme) 244 { 245 struct symbol *symbol = (struct symbol *)(syme + 1); 246 struct source_line *line; 247 char pattern[PATH_MAX]; 248 char *idx; 249 250 sprintf(pattern, "<%s>:", symbol->name); 251 252 if (symbol->module) { 253 idx = strstr(pattern, "\t"); 254 if (idx) 255 *idx = 0; 256 } 257 258 pthread_mutex_lock(&syme->source_lock); 259 for (line = syme->lines; line; line = line->next) { 260 if (strstr(line->line, pattern)) { 261 syme->source = line; 262 break; 263 } 264 } 265 pthread_mutex_unlock(&syme->source_lock); 266 } 267 268 static void show_lines(struct source_line *queue, int count, int total) 269 { 270 int i; 271 struct source_line *line; 272 273 line = queue; 274 for (i = 0; i < count; i++) { 275 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 276 277 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 278 line = line->next; 279 } 280 } 281 282 #define TRACE_COUNT 3 283 284 static void show_details(struct sym_entry *syme) 285 { 286 struct symbol *symbol; 287 struct source_line *line; 288 struct source_line *line_queue = NULL; 289 int displayed = 0; 290 int line_queue_count = 0, total = 0, more = 0; 291 292 if (!syme) 293 return; 294 295 if (!syme->source) 296 lookup_sym_source(syme); 297 298 if (!syme->source) 299 return; 300 301 symbol = (struct symbol *)(syme + 1); 302 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 303 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 304 305 pthread_mutex_lock(&syme->source_lock); 306 line = syme->source; 307 while (line) { 308 total += line->count[sym_counter]; 309 line = line->next; 310 } 311 312 line = syme->source; 313 while (line) { 314 float pcnt = 0.0; 315 316 if (!line_queue_count) 317 line_queue = line; 318 line_queue_count++; 319 320 if (line->count[sym_counter]) 321 pcnt = 100.0 * line->count[sym_counter] / (float)total; 322 if (pcnt >= (float)sym_pcnt_filter) { 323 if (displayed <= print_entries) 324 show_lines(line_queue, line_queue_count, total); 325 else more++; 326 displayed += line_queue_count; 327 line_queue_count = 0; 328 line_queue = NULL; 329 } else if (line_queue_count > TRACE_COUNT) { 330 line_queue = line_queue->next; 331 line_queue_count--; 332 } 333 334 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 335 line = line->next; 336 } 337 pthread_mutex_unlock(&syme->source_lock); 338 if (more) 339 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 340 } 341 342 /* 343 * Symbols will be added here in record_ip and will get out 344 * after decayed. 345 */ 346 static LIST_HEAD(active_symbols); 347 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 348 349 /* 350 * Ordering weight: count-1 * count-2 * ... / count-n 351 */ 352 static double sym_weight(const struct sym_entry *sym) 353 { 354 double weight = sym->snap_count; 355 int counter; 356 357 if (!display_weighted) 358 return weight; 359 360 for (counter = 1; counter < nr_counters-1; counter++) 361 weight *= sym->count[counter]; 362 363 weight /= (sym->count[counter] + 1); 364 365 return weight; 366 } 367 368 static long samples; 369 static long userspace_samples; 370 static const char CONSOLE_CLEAR[] = "[H[2J"; 371 372 static void __list_insert_active_sym(struct sym_entry *syme) 373 { 374 list_add(&syme->node, &active_symbols); 375 } 376 377 static void list_remove_active_sym(struct sym_entry *syme) 378 { 379 pthread_mutex_lock(&active_symbols_lock); 380 list_del_init(&syme->node); 381 pthread_mutex_unlock(&active_symbols_lock); 382 } 383 384 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 385 { 386 struct rb_node **p = &tree->rb_node; 387 struct rb_node *parent = NULL; 388 struct sym_entry *iter; 389 390 while (*p != NULL) { 391 parent = *p; 392 iter = rb_entry(parent, struct sym_entry, rb_node); 393 394 if (se->weight > iter->weight) 395 p = &(*p)->rb_left; 396 else 397 p = &(*p)->rb_right; 398 } 399 400 rb_link_node(&se->rb_node, parent, p); 401 rb_insert_color(&se->rb_node, tree); 402 } 403 404 static void print_sym_table(void) 405 { 406 int printed = 0, j; 407 int counter, snap = !display_weighted ? sym_counter : 0; 408 float samples_per_sec = samples/delay_secs; 409 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 410 float sum_ksamples = 0.0; 411 struct sym_entry *syme, *n; 412 struct rb_root tmp = RB_ROOT; 413 struct rb_node *nd; 414 415 samples = userspace_samples = 0; 416 417 /* Sort the active symbols */ 418 pthread_mutex_lock(&active_symbols_lock); 419 syme = list_entry(active_symbols.next, struct sym_entry, node); 420 pthread_mutex_unlock(&active_symbols_lock); 421 422 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 423 syme->snap_count = syme->count[snap]; 424 if (syme->snap_count != 0) { 425 syme->weight = sym_weight(syme); 426 rb_insert_active_sym(&tmp, syme); 427 sum_ksamples += syme->snap_count; 428 429 for (j = 0; j < nr_counters; j++) 430 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 431 } else 432 list_remove_active_sym(syme); 433 } 434 435 puts(CONSOLE_CLEAR); 436 437 printf( 438 "------------------------------------------------------------------------------\n"); 439 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 440 samples_per_sec, 441 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 442 443 if (nr_counters == 1 || !display_weighted) { 444 printf("%Ld", (u64)attrs[0].sample_period); 445 if (freq) 446 printf("Hz "); 447 else 448 printf(" "); 449 } 450 451 if (!display_weighted) 452 printf("%s", event_name(sym_counter)); 453 else for (counter = 0; counter < nr_counters; counter++) { 454 if (counter) 455 printf("/"); 456 457 printf("%s", event_name(counter)); 458 } 459 460 printf( "], "); 461 462 if (target_pid != -1) 463 printf(" (target_pid: %d", target_pid); 464 else 465 printf(" (all"); 466 467 if (profile_cpu != -1) 468 printf(", cpu: %d)\n", profile_cpu); 469 else { 470 if (target_pid != -1) 471 printf(")\n"); 472 else 473 printf(", %d CPUs)\n", nr_cpus); 474 } 475 476 printf("------------------------------------------------------------------------------\n\n"); 477 478 if (sym_filter_entry) { 479 show_details(sym_filter_entry); 480 return; 481 } 482 483 if (nr_counters == 1) 484 printf(" samples pcnt"); 485 else 486 printf(" weight samples pcnt"); 487 488 if (verbose) 489 printf(" RIP "); 490 printf(" kernel function\n"); 491 printf(" %s _______ _____", 492 nr_counters == 1 ? " " : "______"); 493 if (verbose) 494 printf(" ________________"); 495 printf(" _______________\n\n"); 496 497 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 498 struct symbol *sym; 499 double pcnt; 500 501 syme = rb_entry(nd, struct sym_entry, rb_node); 502 sym = (struct symbol *)(syme + 1); 503 504 if (++printed > print_entries || (int)syme->snap_count < count_filter) 505 continue; 506 507 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 508 sum_ksamples)); 509 510 if (nr_counters == 1 || !display_weighted) 511 printf("%20.2f - ", syme->weight); 512 else 513 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 514 515 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 516 if (verbose) 517 printf(" - %016llx", sym->start); 518 printf(" : %s", sym->name); 519 if (sym->module) 520 printf("\t[%s]", sym->module->name); 521 printf("\n"); 522 } 523 } 524 525 static void prompt_integer(int *target, const char *msg) 526 { 527 char *buf = malloc(0), *p; 528 size_t dummy = 0; 529 int tmp; 530 531 fprintf(stdout, "\n%s: ", msg); 532 if (getline(&buf, &dummy, stdin) < 0) 533 return; 534 535 p = strchr(buf, '\n'); 536 if (p) 537 *p = 0; 538 539 p = buf; 540 while(*p) { 541 if (!isdigit(*p)) 542 goto out_free; 543 p++; 544 } 545 tmp = strtoul(buf, NULL, 10); 546 *target = tmp; 547 out_free: 548 free(buf); 549 } 550 551 static void prompt_percent(int *target, const char *msg) 552 { 553 int tmp = 0; 554 555 prompt_integer(&tmp, msg); 556 if (tmp >= 0 && tmp <= 100) 557 *target = tmp; 558 } 559 560 static void prompt_symbol(struct sym_entry **target, const char *msg) 561 { 562 char *buf = malloc(0), *p; 563 struct sym_entry *syme = *target, *n, *found = NULL; 564 size_t dummy = 0; 565 566 /* zero counters of active symbol */ 567 if (syme) { 568 pthread_mutex_lock(&syme->source_lock); 569 __zero_source_counters(syme); 570 *target = NULL; 571 pthread_mutex_unlock(&syme->source_lock); 572 } 573 574 fprintf(stdout, "\n%s: ", msg); 575 if (getline(&buf, &dummy, stdin) < 0) 576 goto out_free; 577 578 p = strchr(buf, '\n'); 579 if (p) 580 *p = 0; 581 582 pthread_mutex_lock(&active_symbols_lock); 583 syme = list_entry(active_symbols.next, struct sym_entry, node); 584 pthread_mutex_unlock(&active_symbols_lock); 585 586 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 587 struct symbol *sym = (struct symbol *)(syme + 1); 588 589 if (!strcmp(buf, sym->name)) { 590 found = syme; 591 break; 592 } 593 } 594 595 if (!found) { 596 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); 597 sleep(1); 598 return; 599 } else 600 parse_source(found); 601 602 out_free: 603 free(buf); 604 } 605 606 static void print_mapped_keys(void) 607 { 608 char *name = NULL; 609 610 if (sym_filter_entry) { 611 struct symbol *sym = (struct symbol *)(sym_filter_entry+1); 612 name = sym->name; 613 } 614 615 fprintf(stdout, "\nMapped keys:\n"); 616 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 617 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 618 619 if (nr_counters > 1) 620 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 621 622 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 623 624 if (vmlinux_name) { 625 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 626 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 627 fprintf(stdout, "\t[S] stop annotation.\n"); 628 } 629 630 if (nr_counters > 1) 631 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 632 633 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 634 fprintf(stdout, "\t[qQ] quit.\n"); 635 } 636 637 static int key_mapped(int c) 638 { 639 switch (c) { 640 case 'd': 641 case 'e': 642 case 'f': 643 case 'z': 644 case 'q': 645 case 'Q': 646 return 1; 647 case 'E': 648 case 'w': 649 return nr_counters > 1 ? 1 : 0; 650 case 'F': 651 case 's': 652 case 'S': 653 return vmlinux_name ? 1 : 0; 654 default: 655 break; 656 } 657 658 return 0; 659 } 660 661 static void handle_keypress(int c) 662 { 663 if (!key_mapped(c)) { 664 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 665 struct termios tc, save; 666 667 print_mapped_keys(); 668 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 669 fflush(stdout); 670 671 tcgetattr(0, &save); 672 tc = save; 673 tc.c_lflag &= ~(ICANON | ECHO); 674 tc.c_cc[VMIN] = 0; 675 tc.c_cc[VTIME] = 0; 676 tcsetattr(0, TCSANOW, &tc); 677 678 poll(&stdin_poll, 1, -1); 679 c = getc(stdin); 680 681 tcsetattr(0, TCSAFLUSH, &save); 682 if (!key_mapped(c)) 683 return; 684 } 685 686 switch (c) { 687 case 'd': 688 prompt_integer(&delay_secs, "Enter display delay"); 689 if (delay_secs < 1) 690 delay_secs = 1; 691 break; 692 case 'e': 693 prompt_integer(&print_entries, "Enter display entries (lines)"); 694 break; 695 case 'E': 696 if (nr_counters > 1) { 697 int i; 698 699 fprintf(stderr, "\nAvailable events:"); 700 for (i = 0; i < nr_counters; i++) 701 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 702 703 prompt_integer(&sym_counter, "Enter details event counter"); 704 705 if (sym_counter >= nr_counters) { 706 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 707 sym_counter = 0; 708 sleep(1); 709 } 710 } else sym_counter = 0; 711 break; 712 case 'f': 713 prompt_integer(&count_filter, "Enter display event count filter"); 714 break; 715 case 'F': 716 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 717 break; 718 case 'q': 719 case 'Q': 720 printf("exiting.\n"); 721 exit(0); 722 case 's': 723 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 724 break; 725 case 'S': 726 if (!sym_filter_entry) 727 break; 728 else { 729 struct sym_entry *syme = sym_filter_entry; 730 731 pthread_mutex_lock(&syme->source_lock); 732 sym_filter_entry = NULL; 733 __zero_source_counters(syme); 734 pthread_mutex_unlock(&syme->source_lock); 735 } 736 break; 737 case 'w': 738 display_weighted = ~display_weighted; 739 break; 740 case 'z': 741 zero = ~zero; 742 break; 743 default: 744 break; 745 } 746 } 747 748 static void *display_thread(void *arg __used) 749 { 750 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 751 struct termios tc, save; 752 int delay_msecs, c; 753 754 tcgetattr(0, &save); 755 tc = save; 756 tc.c_lflag &= ~(ICANON | ECHO); 757 tc.c_cc[VMIN] = 0; 758 tc.c_cc[VTIME] = 0; 759 760 repeat: 761 delay_msecs = delay_secs * 1000; 762 tcsetattr(0, TCSANOW, &tc); 763 /* trash return*/ 764 getc(stdin); 765 766 do { 767 print_sym_table(); 768 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 769 770 c = getc(stdin); 771 tcsetattr(0, TCSAFLUSH, &save); 772 773 handle_keypress(c); 774 goto repeat; 775 776 return NULL; 777 } 778 779 /* Tag samples to be skipped. */ 780 static const char *skip_symbols[] = { 781 "default_idle", 782 "cpu_idle", 783 "enter_idle", 784 "exit_idle", 785 "mwait_idle", 786 "mwait_idle_with_hints", 787 "poll_idle", 788 "ppc64_runlatch_off", 789 "pseries_dedicated_idle_sleep", 790 NULL 791 }; 792 793 static int symbol_filter(struct dso *self, struct symbol *sym) 794 { 795 struct sym_entry *syme; 796 const char *name = sym->name; 797 int i; 798 799 /* 800 * ppc64 uses function descriptors and appends a '.' to the 801 * start of every instruction address. Remove it. 802 */ 803 if (name[0] == '.') 804 name++; 805 806 if (!strcmp(name, "_text") || 807 !strcmp(name, "_etext") || 808 !strcmp(name, "_sinittext") || 809 !strncmp("init_module", name, 11) || 810 !strncmp("cleanup_module", name, 14) || 811 strstr(name, "_text_start") || 812 strstr(name, "_text_end")) 813 return 1; 814 815 syme = dso__sym_priv(self, sym); 816 pthread_mutex_init(&syme->source_lock, NULL); 817 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 818 sym_filter_entry = syme; 819 820 for (i = 0; skip_symbols[i]; i++) { 821 if (!strcmp(skip_symbols[i], name)) { 822 syme->skip = 1; 823 break; 824 } 825 } 826 827 return 0; 828 } 829 830 static int parse_symbols(void) 831 { 832 struct rb_node *node; 833 struct symbol *sym; 834 int use_modules = vmlinux_name ? 1 : 0; 835 836 kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); 837 if (kernel_dso == NULL) 838 return -1; 839 840 if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) 841 goto out_delete_dso; 842 843 node = rb_first(&kernel_dso->syms); 844 sym = rb_entry(node, struct symbol, rb_node); 845 min_ip = sym->start; 846 847 node = rb_last(&kernel_dso->syms); 848 sym = rb_entry(node, struct symbol, rb_node); 849 max_ip = sym->end; 850 851 if (dump_symtab) 852 dso__fprintf(kernel_dso, stderr); 853 854 return 0; 855 856 out_delete_dso: 857 dso__delete(kernel_dso); 858 kernel_dso = NULL; 859 return -1; 860 } 861 862 /* 863 * Binary search in the histogram table and record the hit: 864 */ 865 static void record_ip(u64 ip, int counter) 866 { 867 struct symbol *sym = dso__find_symbol(kernel_dso, ip); 868 869 if (sym != NULL) { 870 struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); 871 872 if (!syme->skip) { 873 syme->count[counter]++; 874 record_precise_ip(syme, counter, ip); 875 pthread_mutex_lock(&active_symbols_lock); 876 if (list_empty(&syme->node) || !syme->node.next) 877 __list_insert_active_sym(syme); 878 pthread_mutex_unlock(&active_symbols_lock); 879 return; 880 } 881 } 882 883 samples--; 884 } 885 886 static void process_event(u64 ip, int counter, int user) 887 { 888 samples++; 889 890 if (user) { 891 userspace_samples++; 892 return; 893 } 894 895 record_ip(ip, counter); 896 } 897 898 struct mmap_data { 899 int counter; 900 void *base; 901 int mask; 902 unsigned int prev; 903 }; 904 905 static unsigned int mmap_read_head(struct mmap_data *md) 906 { 907 struct perf_event_mmap_page *pc = md->base; 908 int head; 909 910 head = pc->data_head; 911 rmb(); 912 913 return head; 914 } 915 916 struct timeval last_read, this_read; 917 918 static void mmap_read_counter(struct mmap_data *md) 919 { 920 unsigned int head = mmap_read_head(md); 921 unsigned int old = md->prev; 922 unsigned char *data = md->base + page_size; 923 int diff; 924 925 gettimeofday(&this_read, NULL); 926 927 /* 928 * If we're further behind than half the buffer, there's a chance 929 * the writer will bite our tail and mess up the samples under us. 930 * 931 * If we somehow ended up ahead of the head, we got messed up. 932 * 933 * In either case, truncate and restart at head. 934 */ 935 diff = head - old; 936 if (diff > md->mask / 2 || diff < 0) { 937 struct timeval iv; 938 unsigned long msecs; 939 940 timersub(&this_read, &last_read, &iv); 941 msecs = iv.tv_sec*1000 + iv.tv_usec/1000; 942 943 fprintf(stderr, "WARNING: failed to keep up with mmap data." 944 " Last read %lu msecs ago.\n", msecs); 945 946 /* 947 * head points to a known good entry, start there. 948 */ 949 old = head; 950 } 951 952 last_read = this_read; 953 954 for (; old != head;) { 955 event_t *event = (event_t *)&data[old & md->mask]; 956 957 event_t event_copy; 958 959 size_t size = event->header.size; 960 961 /* 962 * Event straddles the mmap boundary -- header should always 963 * be inside due to u64 alignment of output. 964 */ 965 if ((old & md->mask) + size != ((old + size) & md->mask)) { 966 unsigned int offset = old; 967 unsigned int len = min(sizeof(*event), size), cpy; 968 void *dst = &event_copy; 969 970 do { 971 cpy = min(md->mask + 1 - (offset & md->mask), len); 972 memcpy(dst, &data[offset & md->mask], cpy); 973 offset += cpy; 974 dst += cpy; 975 len -= cpy; 976 } while (len); 977 978 event = &event_copy; 979 } 980 981 old += size; 982 983 if (event->header.type == PERF_RECORD_SAMPLE) { 984 int user = 985 (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; 986 process_event(event->ip.ip, md->counter, user); 987 } 988 } 989 990 md->prev = old; 991 } 992 993 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 994 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 995 996 static void mmap_read(void) 997 { 998 int i, counter; 999 1000 for (i = 0; i < nr_cpus; i++) { 1001 for (counter = 0; counter < nr_counters; counter++) 1002 mmap_read_counter(&mmap_array[i][counter]); 1003 } 1004 } 1005 1006 int nr_poll; 1007 int group_fd; 1008 1009 static void start_counter(int i, int counter) 1010 { 1011 struct perf_event_attr *attr; 1012 int cpu; 1013 1014 cpu = profile_cpu; 1015 if (target_pid == -1 && profile_cpu == -1) 1016 cpu = i; 1017 1018 attr = attrs + counter; 1019 1020 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1021 attr->freq = freq; 1022 attr->inherit = (cpu < 0) && inherit; 1023 1024 try_again: 1025 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); 1026 1027 if (fd[i][counter] < 0) { 1028 int err = errno; 1029 1030 if (err == EPERM || err == EACCES) 1031 die("No permission - are you root?\n"); 1032 /* 1033 * If it's cycles then fall back to hrtimer 1034 * based cpu-clock-tick sw counter, which 1035 * is always available even if no PMU support: 1036 */ 1037 if (attr->type == PERF_TYPE_HARDWARE 1038 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1039 1040 if (verbose) 1041 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1042 1043 attr->type = PERF_TYPE_SOFTWARE; 1044 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1045 goto try_again; 1046 } 1047 printf("\n"); 1048 error("perfcounter syscall returned with %d (%s)\n", 1049 fd[i][counter], strerror(err)); 1050 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1051 exit(-1); 1052 } 1053 assert(fd[i][counter] >= 0); 1054 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); 1055 1056 /* 1057 * First counter acts as the group leader: 1058 */ 1059 if (group && group_fd == -1) 1060 group_fd = fd[i][counter]; 1061 1062 event_array[nr_poll].fd = fd[i][counter]; 1063 event_array[nr_poll].events = POLLIN; 1064 nr_poll++; 1065 1066 mmap_array[i][counter].counter = counter; 1067 mmap_array[i][counter].prev = 0; 1068 mmap_array[i][counter].mask = mmap_pages*page_size - 1; 1069 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 1070 PROT_READ, MAP_SHARED, fd[i][counter], 0); 1071 if (mmap_array[i][counter].base == MAP_FAILED) 1072 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1073 } 1074 1075 static int __cmd_top(void) 1076 { 1077 pthread_t thread; 1078 int i, counter; 1079 int ret; 1080 1081 for (i = 0; i < nr_cpus; i++) { 1082 group_fd = -1; 1083 for (counter = 0; counter < nr_counters; counter++) 1084 start_counter(i, counter); 1085 } 1086 1087 /* Wait for a minimal set of events before starting the snapshot */ 1088 poll(event_array, nr_poll, 100); 1089 1090 mmap_read(); 1091 1092 if (pthread_create(&thread, NULL, display_thread, NULL)) { 1093 printf("Could not create display thread.\n"); 1094 exit(-1); 1095 } 1096 1097 if (realtime_prio) { 1098 struct sched_param param; 1099 1100 param.sched_priority = realtime_prio; 1101 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1102 printf("Could not set realtime priority.\n"); 1103 exit(-1); 1104 } 1105 } 1106 1107 while (1) { 1108 int hits = samples; 1109 1110 mmap_read(); 1111 1112 if (hits == samples) 1113 ret = poll(event_array, nr_poll, 100); 1114 } 1115 1116 return 0; 1117 } 1118 1119 static const char * const top_usage[] = { 1120 "perf top [<options>]", 1121 NULL 1122 }; 1123 1124 static const struct option options[] = { 1125 OPT_CALLBACK('e', "event", NULL, "event", 1126 "event selector. use 'perf list' to list available events", 1127 parse_events), 1128 OPT_INTEGER('c', "count", &default_interval, 1129 "event period to sample"), 1130 OPT_INTEGER('p', "pid", &target_pid, 1131 "profile events on existing pid"), 1132 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1133 "system-wide collection from all CPUs"), 1134 OPT_INTEGER('C', "CPU", &profile_cpu, 1135 "CPU to profile on"), 1136 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), 1137 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 1138 "number of mmap data pages"), 1139 OPT_INTEGER('r', "realtime", &realtime_prio, 1140 "collect data with this RT SCHED_FIFO priority"), 1141 OPT_INTEGER('d', "delay", &delay_secs, 1142 "number of seconds to delay between refreshes"), 1143 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1144 "dump the symbol table used for profiling"), 1145 OPT_INTEGER('f', "count-filter", &count_filter, 1146 "only display functions with more events than this"), 1147 OPT_BOOLEAN('g', "group", &group, 1148 "put the counters into a counter group"), 1149 OPT_BOOLEAN('i', "inherit", &inherit, 1150 "child tasks inherit counters"), 1151 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1152 "symbol to annotate - requires -k option"), 1153 OPT_BOOLEAN('z', "zero", &zero, 1154 "zero history across updates"), 1155 OPT_INTEGER('F', "freq", &freq, 1156 "profile at this frequency"), 1157 OPT_INTEGER('E', "entries", &print_entries, 1158 "display this many functions"), 1159 OPT_BOOLEAN('v', "verbose", &verbose, 1160 "be more verbose (show counter open errors, etc)"), 1161 OPT_END() 1162 }; 1163 1164 int cmd_top(int argc, const char **argv, const char *prefix __used) 1165 { 1166 int counter; 1167 1168 symbol__init(); 1169 1170 page_size = sysconf(_SC_PAGE_SIZE); 1171 1172 argc = parse_options(argc, argv, options, top_usage, 0); 1173 if (argc) 1174 usage_with_options(top_usage, options); 1175 1176 if (freq) { 1177 default_interval = freq; 1178 freq = 1; 1179 } 1180 1181 /* CPU and PID are mutually exclusive */ 1182 if (target_pid != -1 && profile_cpu != -1) { 1183 printf("WARNING: PID switch overriding CPU\n"); 1184 sleep(1); 1185 profile_cpu = -1; 1186 } 1187 1188 if (!nr_counters) 1189 nr_counters = 1; 1190 1191 if (delay_secs < 1) 1192 delay_secs = 1; 1193 1194 parse_symbols(); 1195 parse_source(sym_filter_entry); 1196 1197 /* 1198 * Fill in the ones not specifically initialized via -c: 1199 */ 1200 for (counter = 0; counter < nr_counters; counter++) { 1201 if (attrs[counter].sample_period) 1202 continue; 1203 1204 attrs[counter].sample_period = default_interval; 1205 } 1206 1207 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 1208 assert(nr_cpus <= MAX_NR_CPUS); 1209 assert(nr_cpus >= 0); 1210 1211 if (target_pid != -1 || profile_cpu != -1) 1212 nr_cpus = 1; 1213 1214 return __cmd_top(); 1215 } 1216