1 /* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19 #include "builtin.h" 20 21 #include "perf.h" 22 23 #include "util/symbol.h" 24 #include "util/color.h" 25 #include "util/util.h" 26 #include <linux/rbtree.h> 27 #include "util/parse-options.h" 28 #include "util/parse-events.h" 29 30 #include "util/debug.h" 31 32 #include <assert.h> 33 #include <fcntl.h> 34 35 #include <stdio.h> 36 #include <termios.h> 37 #include <unistd.h> 38 39 #include <errno.h> 40 #include <time.h> 41 #include <sched.h> 42 #include <pthread.h> 43 44 #include <sys/syscall.h> 45 #include <sys/ioctl.h> 46 #include <sys/poll.h> 47 #include <sys/prctl.h> 48 #include <sys/wait.h> 49 #include <sys/uio.h> 50 #include <sys/mman.h> 51 52 #include <linux/unistd.h> 53 #include <linux/types.h> 54 55 static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 56 57 static int system_wide = 0; 58 59 static int default_interval = 100000; 60 61 static int count_filter = 5; 62 static int print_entries = 15; 63 64 static int target_pid = -1; 65 static int inherit = 0; 66 static int profile_cpu = -1; 67 static int nr_cpus = 0; 68 static unsigned int realtime_prio = 0; 69 static int group = 0; 70 static unsigned int page_size; 71 static unsigned int mmap_pages = 16; 72 static int freq = 0; 73 74 static int delay_secs = 2; 75 static int zero; 76 static int dump_symtab; 77 78 /* 79 * Source 80 */ 81 82 struct source_line { 83 u64 eip; 84 unsigned long count[MAX_COUNTERS]; 85 char *line; 86 struct source_line *next; 87 }; 88 89 static char *sym_filter = NULL; 90 struct sym_entry *sym_filter_entry = NULL; 91 static int sym_pcnt_filter = 5; 92 static int sym_counter = 0; 93 static int display_weighted = -1; 94 95 /* 96 * Symbols 97 */ 98 99 static u64 min_ip; 100 static u64 max_ip = -1ll; 101 102 struct sym_entry { 103 struct rb_node rb_node; 104 struct list_head node; 105 unsigned long count[MAX_COUNTERS]; 106 unsigned long snap_count; 107 double weight; 108 int skip; 109 struct source_line *source; 110 struct source_line *lines; 111 struct source_line **lines_tail; 112 pthread_mutex_t source_lock; 113 }; 114 115 /* 116 * Source functions 117 */ 118 119 static void parse_source(struct sym_entry *syme) 120 { 121 struct symbol *sym; 122 struct module *module; 123 struct section *section = NULL; 124 FILE *file; 125 char command[PATH_MAX*2]; 126 const char *path = vmlinux_name; 127 u64 start, end, len; 128 129 if (!syme) 130 return; 131 132 if (syme->lines) { 133 pthread_mutex_lock(&syme->source_lock); 134 goto out_assign; 135 } 136 137 sym = (struct symbol *)(syme + 1); 138 module = sym->module; 139 140 if (module) 141 path = module->path; 142 if (!path) 143 return; 144 145 start = sym->obj_start; 146 if (!start) 147 start = sym->start; 148 149 if (module) { 150 section = module->sections->find_section(module->sections, ".text"); 151 if (section) 152 start -= section->vma; 153 } 154 155 end = start + sym->end - sym->start + 1; 156 len = sym->end - sym->start; 157 158 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); 159 160 file = popen(command, "r"); 161 if (!file) 162 return; 163 164 pthread_mutex_lock(&syme->source_lock); 165 syme->lines_tail = &syme->lines; 166 while (!feof(file)) { 167 struct source_line *src; 168 size_t dummy = 0; 169 char *c; 170 171 src = malloc(sizeof(struct source_line)); 172 assert(src != NULL); 173 memset(src, 0, sizeof(struct source_line)); 174 175 if (getline(&src->line, &dummy, file) < 0) 176 break; 177 if (!src->line) 178 break; 179 180 c = strchr(src->line, '\n'); 181 if (c) 182 *c = 0; 183 184 src->next = NULL; 185 *syme->lines_tail = src; 186 syme->lines_tail = &src->next; 187 188 if (strlen(src->line)>8 && src->line[8] == ':') { 189 src->eip = strtoull(src->line, NULL, 16); 190 if (section) 191 src->eip += section->vma; 192 } 193 if (strlen(src->line)>8 && src->line[16] == ':') { 194 src->eip = strtoull(src->line, NULL, 16); 195 if (section) 196 src->eip += section->vma; 197 } 198 } 199 pclose(file); 200 out_assign: 201 sym_filter_entry = syme; 202 pthread_mutex_unlock(&syme->source_lock); 203 } 204 205 static void __zero_source_counters(struct sym_entry *syme) 206 { 207 int i; 208 struct source_line *line; 209 210 line = syme->lines; 211 while (line) { 212 for (i = 0; i < nr_counters; i++) 213 line->count[i] = 0; 214 line = line->next; 215 } 216 } 217 218 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 219 { 220 struct source_line *line; 221 222 if (syme != sym_filter_entry) 223 return; 224 225 if (pthread_mutex_trylock(&syme->source_lock)) 226 return; 227 228 if (!syme->source) 229 goto out_unlock; 230 231 for (line = syme->lines; line; line = line->next) { 232 if (line->eip == ip) { 233 line->count[counter]++; 234 break; 235 } 236 if (line->eip > ip) 237 break; 238 } 239 out_unlock: 240 pthread_mutex_unlock(&syme->source_lock); 241 } 242 243 static void lookup_sym_source(struct sym_entry *syme) 244 { 245 struct symbol *symbol = (struct symbol *)(syme + 1); 246 struct source_line *line; 247 char pattern[PATH_MAX]; 248 char *idx; 249 250 sprintf(pattern, "<%s>:", symbol->name); 251 252 if (symbol->module) { 253 idx = strstr(pattern, "\t"); 254 if (idx) 255 *idx = 0; 256 } 257 258 pthread_mutex_lock(&syme->source_lock); 259 for (line = syme->lines; line; line = line->next) { 260 if (strstr(line->line, pattern)) { 261 syme->source = line; 262 break; 263 } 264 } 265 pthread_mutex_unlock(&syme->source_lock); 266 } 267 268 static void show_lines(struct source_line *queue, int count, int total) 269 { 270 int i; 271 struct source_line *line; 272 273 line = queue; 274 for (i = 0; i < count; i++) { 275 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 276 277 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 278 line = line->next; 279 } 280 } 281 282 #define TRACE_COUNT 3 283 284 static void show_details(struct sym_entry *syme) 285 { 286 struct symbol *symbol; 287 struct source_line *line; 288 struct source_line *line_queue = NULL; 289 int displayed = 0; 290 int line_queue_count = 0, total = 0, more = 0; 291 292 if (!syme) 293 return; 294 295 if (!syme->source) 296 lookup_sym_source(syme); 297 298 if (!syme->source) 299 return; 300 301 symbol = (struct symbol *)(syme + 1); 302 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 303 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 304 305 pthread_mutex_lock(&syme->source_lock); 306 line = syme->source; 307 while (line) { 308 total += line->count[sym_counter]; 309 line = line->next; 310 } 311 312 line = syme->source; 313 while (line) { 314 float pcnt = 0.0; 315 316 if (!line_queue_count) 317 line_queue = line; 318 line_queue_count++; 319 320 if (line->count[sym_counter]) 321 pcnt = 100.0 * line->count[sym_counter] / (float)total; 322 if (pcnt >= (float)sym_pcnt_filter) { 323 if (displayed <= print_entries) 324 show_lines(line_queue, line_queue_count, total); 325 else more++; 326 displayed += line_queue_count; 327 line_queue_count = 0; 328 line_queue = NULL; 329 } else if (line_queue_count > TRACE_COUNT) { 330 line_queue = line_queue->next; 331 line_queue_count--; 332 } 333 334 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 335 line = line->next; 336 } 337 pthread_mutex_unlock(&syme->source_lock); 338 if (more) 339 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 340 } 341 342 /* 343 * Symbols will be added here in record_ip and will get out 344 * after decayed. 345 */ 346 static LIST_HEAD(active_symbols); 347 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 348 349 /* 350 * Ordering weight: count-1 * count-2 * ... / count-n 351 */ 352 static double sym_weight(const struct sym_entry *sym) 353 { 354 double weight = sym->snap_count; 355 int counter; 356 357 if (!display_weighted) 358 return weight; 359 360 for (counter = 1; counter < nr_counters-1; counter++) 361 weight *= sym->count[counter]; 362 363 weight /= (sym->count[counter] + 1); 364 365 return weight; 366 } 367 368 static long samples; 369 static long userspace_samples; 370 static const char CONSOLE_CLEAR[] = "[H[2J"; 371 372 static void __list_insert_active_sym(struct sym_entry *syme) 373 { 374 list_add(&syme->node, &active_symbols); 375 } 376 377 static void list_remove_active_sym(struct sym_entry *syme) 378 { 379 pthread_mutex_lock(&active_symbols_lock); 380 list_del_init(&syme->node); 381 pthread_mutex_unlock(&active_symbols_lock); 382 } 383 384 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 385 { 386 struct rb_node **p = &tree->rb_node; 387 struct rb_node *parent = NULL; 388 struct sym_entry *iter; 389 390 while (*p != NULL) { 391 parent = *p; 392 iter = rb_entry(parent, struct sym_entry, rb_node); 393 394 if (se->weight > iter->weight) 395 p = &(*p)->rb_left; 396 else 397 p = &(*p)->rb_right; 398 } 399 400 rb_link_node(&se->rb_node, parent, p); 401 rb_insert_color(&se->rb_node, tree); 402 } 403 404 static void print_sym_table(void) 405 { 406 int printed = 0, j; 407 int counter, snap = !display_weighted ? sym_counter : 0; 408 float samples_per_sec = samples/delay_secs; 409 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 410 float sum_ksamples = 0.0; 411 struct sym_entry *syme, *n; 412 struct rb_root tmp = RB_ROOT; 413 struct rb_node *nd; 414 415 samples = userspace_samples = 0; 416 417 /* Sort the active symbols */ 418 pthread_mutex_lock(&active_symbols_lock); 419 syme = list_entry(active_symbols.next, struct sym_entry, node); 420 pthread_mutex_unlock(&active_symbols_lock); 421 422 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 423 syme->snap_count = syme->count[snap]; 424 if (syme->snap_count != 0) { 425 syme->weight = sym_weight(syme); 426 rb_insert_active_sym(&tmp, syme); 427 sum_ksamples += syme->snap_count; 428 429 for (j = 0; j < nr_counters; j++) 430 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 431 } else 432 list_remove_active_sym(syme); 433 } 434 435 puts(CONSOLE_CLEAR); 436 437 printf( 438 "------------------------------------------------------------------------------\n"); 439 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 440 samples_per_sec, 441 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 442 443 if (nr_counters == 1 || !display_weighted) { 444 printf("%Ld", (u64)attrs[0].sample_period); 445 if (freq) 446 printf("Hz "); 447 else 448 printf(" "); 449 } 450 451 if (!display_weighted) 452 printf("%s", event_name(sym_counter)); 453 else for (counter = 0; counter < nr_counters; counter++) { 454 if (counter) 455 printf("/"); 456 457 printf("%s", event_name(counter)); 458 } 459 460 printf( "], "); 461 462 if (target_pid != -1) 463 printf(" (target_pid: %d", target_pid); 464 else 465 printf(" (all"); 466 467 if (profile_cpu != -1) 468 printf(", cpu: %d)\n", profile_cpu); 469 else { 470 if (target_pid != -1) 471 printf(")\n"); 472 else 473 printf(", %d CPUs)\n", nr_cpus); 474 } 475 476 printf("------------------------------------------------------------------------------\n\n"); 477 478 if (sym_filter_entry) { 479 show_details(sym_filter_entry); 480 return; 481 } 482 483 if (nr_counters == 1) 484 printf(" samples pcnt"); 485 else 486 printf(" weight samples pcnt"); 487 488 if (verbose) 489 printf(" RIP "); 490 printf(" kernel function\n"); 491 printf(" %s _______ _____", 492 nr_counters == 1 ? " " : "______"); 493 if (verbose) 494 printf(" ________________"); 495 printf(" _______________\n\n"); 496 497 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 498 struct symbol *sym; 499 double pcnt; 500 501 syme = rb_entry(nd, struct sym_entry, rb_node); 502 sym = (struct symbol *)(syme + 1); 503 504 if (++printed > print_entries || (int)syme->snap_count < count_filter) 505 continue; 506 507 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 508 sum_ksamples)); 509 510 if (nr_counters == 1 || !display_weighted) 511 printf("%20.2f - ", syme->weight); 512 else 513 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 514 515 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 516 if (verbose) 517 printf(" - %016llx", sym->start); 518 printf(" : %s", sym->name); 519 if (sym->module) 520 printf("\t[%s]", sym->module->name); 521 printf("\n"); 522 } 523 } 524 525 static void prompt_integer(int *target, const char *msg) 526 { 527 char *buf = malloc(0), *p; 528 size_t dummy = 0; 529 int tmp; 530 531 fprintf(stdout, "\n%s: ", msg); 532 if (getline(&buf, &dummy, stdin) < 0) 533 return; 534 535 p = strchr(buf, '\n'); 536 if (p) 537 *p = 0; 538 539 p = buf; 540 while(*p) { 541 if (!isdigit(*p)) 542 goto out_free; 543 p++; 544 } 545 tmp = strtoul(buf, NULL, 10); 546 *target = tmp; 547 out_free: 548 free(buf); 549 } 550 551 static void prompt_percent(int *target, const char *msg) 552 { 553 int tmp = 0; 554 555 prompt_integer(&tmp, msg); 556 if (tmp >= 0 && tmp <= 100) 557 *target = tmp; 558 } 559 560 static void prompt_symbol(struct sym_entry **target, const char *msg) 561 { 562 char *buf = malloc(0), *p; 563 struct sym_entry *syme = *target, *n, *found = NULL; 564 size_t dummy = 0; 565 566 /* zero counters of active symbol */ 567 if (syme) { 568 pthread_mutex_lock(&syme->source_lock); 569 __zero_source_counters(syme); 570 *target = NULL; 571 pthread_mutex_unlock(&syme->source_lock); 572 } 573 574 fprintf(stdout, "\n%s: ", msg); 575 if (getline(&buf, &dummy, stdin) < 0) 576 goto out_free; 577 578 p = strchr(buf, '\n'); 579 if (p) 580 *p = 0; 581 582 pthread_mutex_lock(&active_symbols_lock); 583 syme = list_entry(active_symbols.next, struct sym_entry, node); 584 pthread_mutex_unlock(&active_symbols_lock); 585 586 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 587 struct symbol *sym = (struct symbol *)(syme + 1); 588 589 if (!strcmp(buf, sym->name)) { 590 found = syme; 591 break; 592 } 593 } 594 595 if (!found) { 596 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); 597 sleep(1); 598 return; 599 } else 600 parse_source(found); 601 602 out_free: 603 free(buf); 604 } 605 606 static void print_mapped_keys(void) 607 { 608 char *name = NULL; 609 610 if (sym_filter_entry) { 611 struct symbol *sym = (struct symbol *)(sym_filter_entry+1); 612 name = sym->name; 613 } 614 615 fprintf(stdout, "\nMapped keys:\n"); 616 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 617 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 618 619 if (nr_counters > 1) 620 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 621 622 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 623 624 if (vmlinux_name) { 625 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 626 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 627 fprintf(stdout, "\t[S] stop annotation.\n"); 628 } 629 630 if (nr_counters > 1) 631 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 632 633 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 634 fprintf(stdout, "\t[qQ] quit.\n"); 635 } 636 637 static int key_mapped(int c) 638 { 639 switch (c) { 640 case 'd': 641 case 'e': 642 case 'f': 643 case 'z': 644 case 'q': 645 case 'Q': 646 return 1; 647 case 'E': 648 case 'w': 649 return nr_counters > 1 ? 1 : 0; 650 case 'F': 651 case 's': 652 case 'S': 653 return vmlinux_name ? 1 : 0; 654 default: 655 break; 656 } 657 658 return 0; 659 } 660 661 static void handle_keypress(int c) 662 { 663 if (!key_mapped(c)) { 664 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 665 struct termios tc, save; 666 667 print_mapped_keys(); 668 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 669 fflush(stdout); 670 671 tcgetattr(0, &save); 672 tc = save; 673 tc.c_lflag &= ~(ICANON | ECHO); 674 tc.c_cc[VMIN] = 0; 675 tc.c_cc[VTIME] = 0; 676 tcsetattr(0, TCSANOW, &tc); 677 678 poll(&stdin_poll, 1, -1); 679 c = getc(stdin); 680 681 tcsetattr(0, TCSAFLUSH, &save); 682 if (!key_mapped(c)) 683 return; 684 } 685 686 switch (c) { 687 case 'd': 688 prompt_integer(&delay_secs, "Enter display delay"); 689 break; 690 case 'e': 691 prompt_integer(&print_entries, "Enter display entries (lines)"); 692 break; 693 case 'E': 694 if (nr_counters > 1) { 695 int i; 696 697 fprintf(stderr, "\nAvailable events:"); 698 for (i = 0; i < nr_counters; i++) 699 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 700 701 prompt_integer(&sym_counter, "Enter details event counter"); 702 703 if (sym_counter >= nr_counters) { 704 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 705 sym_counter = 0; 706 sleep(1); 707 } 708 } else sym_counter = 0; 709 break; 710 case 'f': 711 prompt_integer(&count_filter, "Enter display event count filter"); 712 break; 713 case 'F': 714 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 715 break; 716 case 'q': 717 case 'Q': 718 printf("exiting.\n"); 719 exit(0); 720 case 's': 721 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 722 break; 723 case 'S': 724 if (!sym_filter_entry) 725 break; 726 else { 727 struct sym_entry *syme = sym_filter_entry; 728 729 pthread_mutex_lock(&syme->source_lock); 730 sym_filter_entry = NULL; 731 __zero_source_counters(syme); 732 pthread_mutex_unlock(&syme->source_lock); 733 } 734 break; 735 case 'w': 736 display_weighted = ~display_weighted; 737 break; 738 case 'z': 739 zero = ~zero; 740 break; 741 default: 742 break; 743 } 744 } 745 746 static void *display_thread(void *arg __used) 747 { 748 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 749 struct termios tc, save; 750 int delay_msecs, c; 751 752 tcgetattr(0, &save); 753 tc = save; 754 tc.c_lflag &= ~(ICANON | ECHO); 755 tc.c_cc[VMIN] = 0; 756 tc.c_cc[VTIME] = 0; 757 758 repeat: 759 delay_msecs = delay_secs * 1000; 760 tcsetattr(0, TCSANOW, &tc); 761 /* trash return*/ 762 getc(stdin); 763 764 do { 765 print_sym_table(); 766 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 767 768 c = getc(stdin); 769 tcsetattr(0, TCSAFLUSH, &save); 770 771 handle_keypress(c); 772 goto repeat; 773 774 return NULL; 775 } 776 777 /* Tag samples to be skipped. */ 778 static const char *skip_symbols[] = { 779 "default_idle", 780 "cpu_idle", 781 "enter_idle", 782 "exit_idle", 783 "mwait_idle", 784 "mwait_idle_with_hints", 785 "ppc64_runlatch_off", 786 "pseries_dedicated_idle_sleep", 787 NULL 788 }; 789 790 static int symbol_filter(struct dso *self, struct symbol *sym) 791 { 792 struct sym_entry *syme; 793 const char *name = sym->name; 794 int i; 795 796 /* 797 * ppc64 uses function descriptors and appends a '.' to the 798 * start of every instruction address. Remove it. 799 */ 800 if (name[0] == '.') 801 name++; 802 803 if (!strcmp(name, "_text") || 804 !strcmp(name, "_etext") || 805 !strcmp(name, "_sinittext") || 806 !strncmp("init_module", name, 11) || 807 !strncmp("cleanup_module", name, 14) || 808 strstr(name, "_text_start") || 809 strstr(name, "_text_end")) 810 return 1; 811 812 syme = dso__sym_priv(self, sym); 813 pthread_mutex_init(&syme->source_lock, NULL); 814 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 815 sym_filter_entry = syme; 816 817 for (i = 0; skip_symbols[i]; i++) { 818 if (!strcmp(skip_symbols[i], name)) { 819 syme->skip = 1; 820 break; 821 } 822 } 823 824 return 0; 825 } 826 827 static int parse_symbols(void) 828 { 829 struct rb_node *node; 830 struct symbol *sym; 831 int use_modules = vmlinux_name ? 1 : 0; 832 833 kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); 834 if (kernel_dso == NULL) 835 return -1; 836 837 if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) 838 goto out_delete_dso; 839 840 node = rb_first(&kernel_dso->syms); 841 sym = rb_entry(node, struct symbol, rb_node); 842 min_ip = sym->start; 843 844 node = rb_last(&kernel_dso->syms); 845 sym = rb_entry(node, struct symbol, rb_node); 846 max_ip = sym->end; 847 848 if (dump_symtab) 849 dso__fprintf(kernel_dso, stderr); 850 851 return 0; 852 853 out_delete_dso: 854 dso__delete(kernel_dso); 855 kernel_dso = NULL; 856 return -1; 857 } 858 859 /* 860 * Binary search in the histogram table and record the hit: 861 */ 862 static void record_ip(u64 ip, int counter) 863 { 864 struct symbol *sym = dso__find_symbol(kernel_dso, ip); 865 866 if (sym != NULL) { 867 struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); 868 869 if (!syme->skip) { 870 syme->count[counter]++; 871 record_precise_ip(syme, counter, ip); 872 pthread_mutex_lock(&active_symbols_lock); 873 if (list_empty(&syme->node) || !syme->node.next) 874 __list_insert_active_sym(syme); 875 pthread_mutex_unlock(&active_symbols_lock); 876 return; 877 } 878 } 879 880 samples--; 881 } 882 883 static void process_event(u64 ip, int counter, int user) 884 { 885 samples++; 886 887 if (user) { 888 userspace_samples++; 889 return; 890 } 891 892 record_ip(ip, counter); 893 } 894 895 struct mmap_data { 896 int counter; 897 void *base; 898 int mask; 899 unsigned int prev; 900 }; 901 902 static unsigned int mmap_read_head(struct mmap_data *md) 903 { 904 struct perf_event_mmap_page *pc = md->base; 905 int head; 906 907 head = pc->data_head; 908 rmb(); 909 910 return head; 911 } 912 913 struct timeval last_read, this_read; 914 915 static void mmap_read_counter(struct mmap_data *md) 916 { 917 unsigned int head = mmap_read_head(md); 918 unsigned int old = md->prev; 919 unsigned char *data = md->base + page_size; 920 int diff; 921 922 gettimeofday(&this_read, NULL); 923 924 /* 925 * If we're further behind than half the buffer, there's a chance 926 * the writer will bite our tail and mess up the samples under us. 927 * 928 * If we somehow ended up ahead of the head, we got messed up. 929 * 930 * In either case, truncate and restart at head. 931 */ 932 diff = head - old; 933 if (diff > md->mask / 2 || diff < 0) { 934 struct timeval iv; 935 unsigned long msecs; 936 937 timersub(&this_read, &last_read, &iv); 938 msecs = iv.tv_sec*1000 + iv.tv_usec/1000; 939 940 fprintf(stderr, "WARNING: failed to keep up with mmap data." 941 " Last read %lu msecs ago.\n", msecs); 942 943 /* 944 * head points to a known good entry, start there. 945 */ 946 old = head; 947 } 948 949 last_read = this_read; 950 951 for (; old != head;) { 952 event_t *event = (event_t *)&data[old & md->mask]; 953 954 event_t event_copy; 955 956 size_t size = event->header.size; 957 958 /* 959 * Event straddles the mmap boundary -- header should always 960 * be inside due to u64 alignment of output. 961 */ 962 if ((old & md->mask) + size != ((old + size) & md->mask)) { 963 unsigned int offset = old; 964 unsigned int len = min(sizeof(*event), size), cpy; 965 void *dst = &event_copy; 966 967 do { 968 cpy = min(md->mask + 1 - (offset & md->mask), len); 969 memcpy(dst, &data[offset & md->mask], cpy); 970 offset += cpy; 971 dst += cpy; 972 len -= cpy; 973 } while (len); 974 975 event = &event_copy; 976 } 977 978 old += size; 979 980 if (event->header.type == PERF_RECORD_SAMPLE) { 981 int user = 982 (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; 983 process_event(event->ip.ip, md->counter, user); 984 } 985 } 986 987 md->prev = old; 988 } 989 990 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 991 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 992 993 static void mmap_read(void) 994 { 995 int i, counter; 996 997 for (i = 0; i < nr_cpus; i++) { 998 for (counter = 0; counter < nr_counters; counter++) 999 mmap_read_counter(&mmap_array[i][counter]); 1000 } 1001 } 1002 1003 int nr_poll; 1004 int group_fd; 1005 1006 static void start_counter(int i, int counter) 1007 { 1008 struct perf_event_attr *attr; 1009 int cpu; 1010 1011 cpu = profile_cpu; 1012 if (target_pid == -1 && profile_cpu == -1) 1013 cpu = i; 1014 1015 attr = attrs + counter; 1016 1017 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1018 attr->freq = freq; 1019 attr->inherit = (cpu < 0) && inherit; 1020 1021 try_again: 1022 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); 1023 1024 if (fd[i][counter] < 0) { 1025 int err = errno; 1026 1027 if (err == EPERM) 1028 die("No permission - are you root?\n"); 1029 /* 1030 * If it's cycles then fall back to hrtimer 1031 * based cpu-clock-tick sw counter, which 1032 * is always available even if no PMU support: 1033 */ 1034 if (attr->type == PERF_TYPE_HARDWARE 1035 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1036 1037 if (verbose) 1038 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1039 1040 attr->type = PERF_TYPE_SOFTWARE; 1041 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1042 goto try_again; 1043 } 1044 printf("\n"); 1045 error("perfcounter syscall returned with %d (%s)\n", 1046 fd[i][counter], strerror(err)); 1047 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1048 exit(-1); 1049 } 1050 assert(fd[i][counter] >= 0); 1051 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); 1052 1053 /* 1054 * First counter acts as the group leader: 1055 */ 1056 if (group && group_fd == -1) 1057 group_fd = fd[i][counter]; 1058 1059 event_array[nr_poll].fd = fd[i][counter]; 1060 event_array[nr_poll].events = POLLIN; 1061 nr_poll++; 1062 1063 mmap_array[i][counter].counter = counter; 1064 mmap_array[i][counter].prev = 0; 1065 mmap_array[i][counter].mask = mmap_pages*page_size - 1; 1066 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 1067 PROT_READ, MAP_SHARED, fd[i][counter], 0); 1068 if (mmap_array[i][counter].base == MAP_FAILED) 1069 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1070 } 1071 1072 static int __cmd_top(void) 1073 { 1074 pthread_t thread; 1075 int i, counter; 1076 int ret; 1077 1078 for (i = 0; i < nr_cpus; i++) { 1079 group_fd = -1; 1080 for (counter = 0; counter < nr_counters; counter++) 1081 start_counter(i, counter); 1082 } 1083 1084 /* Wait for a minimal set of events before starting the snapshot */ 1085 poll(event_array, nr_poll, 100); 1086 1087 mmap_read(); 1088 1089 if (pthread_create(&thread, NULL, display_thread, NULL)) { 1090 printf("Could not create display thread.\n"); 1091 exit(-1); 1092 } 1093 1094 if (realtime_prio) { 1095 struct sched_param param; 1096 1097 param.sched_priority = realtime_prio; 1098 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1099 printf("Could not set realtime priority.\n"); 1100 exit(-1); 1101 } 1102 } 1103 1104 while (1) { 1105 int hits = samples; 1106 1107 mmap_read(); 1108 1109 if (hits == samples) 1110 ret = poll(event_array, nr_poll, 100); 1111 } 1112 1113 return 0; 1114 } 1115 1116 static const char * const top_usage[] = { 1117 "perf top [<options>]", 1118 NULL 1119 }; 1120 1121 static const struct option options[] = { 1122 OPT_CALLBACK('e', "event", NULL, "event", 1123 "event selector. use 'perf list' to list available events", 1124 parse_events), 1125 OPT_INTEGER('c', "count", &default_interval, 1126 "event period to sample"), 1127 OPT_INTEGER('p', "pid", &target_pid, 1128 "profile events on existing pid"), 1129 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1130 "system-wide collection from all CPUs"), 1131 OPT_INTEGER('C', "CPU", &profile_cpu, 1132 "CPU to profile on"), 1133 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), 1134 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 1135 "number of mmap data pages"), 1136 OPT_INTEGER('r', "realtime", &realtime_prio, 1137 "collect data with this RT SCHED_FIFO priority"), 1138 OPT_INTEGER('d', "delay", &delay_secs, 1139 "number of seconds to delay between refreshes"), 1140 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1141 "dump the symbol table used for profiling"), 1142 OPT_INTEGER('f', "count-filter", &count_filter, 1143 "only display functions with more events than this"), 1144 OPT_BOOLEAN('g', "group", &group, 1145 "put the counters into a counter group"), 1146 OPT_BOOLEAN('i', "inherit", &inherit, 1147 "child tasks inherit counters"), 1148 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1149 "symbol to annotate - requires -k option"), 1150 OPT_BOOLEAN('z', "zero", &zero, 1151 "zero history across updates"), 1152 OPT_INTEGER('F', "freq", &freq, 1153 "profile at this frequency"), 1154 OPT_INTEGER('E', "entries", &print_entries, 1155 "display this many functions"), 1156 OPT_BOOLEAN('v', "verbose", &verbose, 1157 "be more verbose (show counter open errors, etc)"), 1158 OPT_END() 1159 }; 1160 1161 int cmd_top(int argc, const char **argv, const char *prefix __used) 1162 { 1163 int counter; 1164 1165 symbol__init(); 1166 1167 page_size = sysconf(_SC_PAGE_SIZE); 1168 1169 argc = parse_options(argc, argv, options, top_usage, 0); 1170 if (argc) 1171 usage_with_options(top_usage, options); 1172 1173 if (freq) { 1174 default_interval = freq; 1175 freq = 1; 1176 } 1177 1178 /* CPU and PID are mutually exclusive */ 1179 if (target_pid != -1 && profile_cpu != -1) { 1180 printf("WARNING: PID switch overriding CPU\n"); 1181 sleep(1); 1182 profile_cpu = -1; 1183 } 1184 1185 if (!nr_counters) 1186 nr_counters = 1; 1187 1188 if (delay_secs < 1) 1189 delay_secs = 1; 1190 1191 parse_symbols(); 1192 parse_source(sym_filter_entry); 1193 1194 /* 1195 * Fill in the ones not specifically initialized via -c: 1196 */ 1197 for (counter = 0; counter < nr_counters; counter++) { 1198 if (attrs[counter].sample_period) 1199 continue; 1200 1201 attrs[counter].sample_period = default_interval; 1202 } 1203 1204 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 1205 assert(nr_cpus <= MAX_NR_CPUS); 1206 assert(nr_cpus >= 0); 1207 1208 if (target_pid != -1 || profile_cpu != -1) 1209 nr_cpus = 1; 1210 1211 return __cmd_top(); 1212 } 1213