1 /* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19 #include "builtin.h" 20 21 #include "perf.h" 22 23 #include "util/symbol.h" 24 #include "util/color.h" 25 #include "util/util.h" 26 #include <linux/rbtree.h> 27 #include "util/parse-options.h" 28 #include "util/parse-events.h" 29 30 #include <assert.h> 31 #include <fcntl.h> 32 33 #include <stdio.h> 34 #include <termios.h> 35 #include <unistd.h> 36 37 #include <errno.h> 38 #include <time.h> 39 #include <sched.h> 40 #include <pthread.h> 41 42 #include <sys/syscall.h> 43 #include <sys/ioctl.h> 44 #include <sys/poll.h> 45 #include <sys/prctl.h> 46 #include <sys/wait.h> 47 #include <sys/uio.h> 48 #include <sys/mman.h> 49 50 #include <linux/unistd.h> 51 #include <linux/types.h> 52 53 static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 54 55 static int system_wide = 0; 56 57 static int default_interval = 100000; 58 59 static int count_filter = 5; 60 static int print_entries = 15; 61 62 static int target_pid = -1; 63 static int inherit = 0; 64 static int profile_cpu = -1; 65 static int nr_cpus = 0; 66 static unsigned int realtime_prio = 0; 67 static int group = 0; 68 static unsigned int page_size; 69 static unsigned int mmap_pages = 16; 70 static int freq = 0; 71 static int verbose = 0; 72 static char *vmlinux = NULL; 73 74 static int delay_secs = 2; 75 static int zero; 76 static int dump_symtab; 77 78 /* 79 * Source 80 */ 81 82 struct source_line { 83 u64 eip; 84 unsigned long count[MAX_COUNTERS]; 85 char *line; 86 struct source_line *next; 87 }; 88 89 static char *sym_filter = NULL; 90 struct sym_entry *sym_filter_entry = NULL; 91 static int sym_pcnt_filter = 5; 92 static int sym_counter = 0; 93 static int display_weighted = -1; 94 95 /* 96 * Symbols 97 */ 98 99 static u64 min_ip; 100 static u64 max_ip = -1ll; 101 102 struct sym_entry { 103 struct rb_node rb_node; 104 struct list_head node; 105 unsigned long count[MAX_COUNTERS]; 106 unsigned long snap_count; 107 double weight; 108 int skip; 109 struct source_line *source; 110 struct source_line *lines; 111 struct source_line **lines_tail; 112 pthread_mutex_t source_lock; 113 }; 114 115 /* 116 * Source functions 117 */ 118 119 static void parse_source(struct sym_entry *syme) 120 { 121 struct symbol *sym; 122 struct module *module; 123 struct section *section = NULL; 124 FILE *file; 125 char command[PATH_MAX*2], *path = vmlinux; 126 u64 start, end, len; 127 128 if (!syme) 129 return; 130 131 if (syme->lines) { 132 pthread_mutex_lock(&syme->source_lock); 133 goto out_assign; 134 } 135 136 sym = (struct symbol *)(syme + 1); 137 module = sym->module; 138 139 if (module) 140 path = module->path; 141 if (!path) 142 return; 143 144 start = sym->obj_start; 145 if (!start) 146 start = sym->start; 147 148 if (module) { 149 section = module->sections->find_section(module->sections, ".text"); 150 if (section) 151 start -= section->vma; 152 } 153 154 end = start + sym->end - sym->start + 1; 155 len = sym->end - sym->start; 156 157 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); 158 159 file = popen(command, "r"); 160 if (!file) 161 return; 162 163 pthread_mutex_lock(&syme->source_lock); 164 syme->lines_tail = &syme->lines; 165 while (!feof(file)) { 166 struct source_line *src; 167 size_t dummy = 0; 168 char *c; 169 170 src = malloc(sizeof(struct source_line)); 171 assert(src != NULL); 172 memset(src, 0, sizeof(struct source_line)); 173 174 if (getline(&src->line, &dummy, file) < 0) 175 break; 176 if (!src->line) 177 break; 178 179 c = strchr(src->line, '\n'); 180 if (c) 181 *c = 0; 182 183 src->next = NULL; 184 *syme->lines_tail = src; 185 syme->lines_tail = &src->next; 186 187 if (strlen(src->line)>8 && src->line[8] == ':') { 188 src->eip = strtoull(src->line, NULL, 16); 189 if (section) 190 src->eip += section->vma; 191 } 192 if (strlen(src->line)>8 && src->line[16] == ':') { 193 src->eip = strtoull(src->line, NULL, 16); 194 if (section) 195 src->eip += section->vma; 196 } 197 } 198 pclose(file); 199 out_assign: 200 sym_filter_entry = syme; 201 pthread_mutex_unlock(&syme->source_lock); 202 } 203 204 static void __zero_source_counters(struct sym_entry *syme) 205 { 206 int i; 207 struct source_line *line; 208 209 line = syme->lines; 210 while (line) { 211 for (i = 0; i < nr_counters; i++) 212 line->count[i] = 0; 213 line = line->next; 214 } 215 } 216 217 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 218 { 219 struct source_line *line; 220 221 if (syme != sym_filter_entry) 222 return; 223 224 if (pthread_mutex_trylock(&syme->source_lock)) 225 return; 226 227 if (!syme->source) 228 goto out_unlock; 229 230 for (line = syme->lines; line; line = line->next) { 231 if (line->eip == ip) { 232 line->count[counter]++; 233 break; 234 } 235 if (line->eip > ip) 236 break; 237 } 238 out_unlock: 239 pthread_mutex_unlock(&syme->source_lock); 240 } 241 242 static void lookup_sym_source(struct sym_entry *syme) 243 { 244 struct symbol *symbol = (struct symbol *)(syme + 1); 245 struct source_line *line; 246 char pattern[PATH_MAX]; 247 char *idx; 248 249 sprintf(pattern, "<%s>:", symbol->name); 250 251 if (symbol->module) { 252 idx = strstr(pattern, "\t"); 253 if (idx) 254 *idx = 0; 255 } 256 257 pthread_mutex_lock(&syme->source_lock); 258 for (line = syme->lines; line; line = line->next) { 259 if (strstr(line->line, pattern)) { 260 syme->source = line; 261 break; 262 } 263 } 264 pthread_mutex_unlock(&syme->source_lock); 265 } 266 267 static void show_lines(struct source_line *queue, int count, int total) 268 { 269 int i; 270 struct source_line *line; 271 272 line = queue; 273 for (i = 0; i < count; i++) { 274 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 275 276 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 277 line = line->next; 278 } 279 } 280 281 #define TRACE_COUNT 3 282 283 static void show_details(struct sym_entry *syme) 284 { 285 struct symbol *symbol; 286 struct source_line *line; 287 struct source_line *line_queue = NULL; 288 int displayed = 0; 289 int line_queue_count = 0, total = 0, more = 0; 290 291 if (!syme) 292 return; 293 294 if (!syme->source) 295 lookup_sym_source(syme); 296 297 if (!syme->source) 298 return; 299 300 symbol = (struct symbol *)(syme + 1); 301 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 302 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 303 304 pthread_mutex_lock(&syme->source_lock); 305 line = syme->source; 306 while (line) { 307 total += line->count[sym_counter]; 308 line = line->next; 309 } 310 311 line = syme->source; 312 while (line) { 313 float pcnt = 0.0; 314 315 if (!line_queue_count) 316 line_queue = line; 317 line_queue_count++; 318 319 if (line->count[sym_counter]) 320 pcnt = 100.0 * line->count[sym_counter] / (float)total; 321 if (pcnt >= (float)sym_pcnt_filter) { 322 if (displayed <= print_entries) 323 show_lines(line_queue, line_queue_count, total); 324 else more++; 325 displayed += line_queue_count; 326 line_queue_count = 0; 327 line_queue = NULL; 328 } else if (line_queue_count > TRACE_COUNT) { 329 line_queue = line_queue->next; 330 line_queue_count--; 331 } 332 333 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 334 line = line->next; 335 } 336 pthread_mutex_unlock(&syme->source_lock); 337 if (more) 338 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 339 } 340 341 struct dso *kernel_dso; 342 343 /* 344 * Symbols will be added here in record_ip and will get out 345 * after decayed. 346 */ 347 static LIST_HEAD(active_symbols); 348 static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 349 350 /* 351 * Ordering weight: count-1 * count-2 * ... / count-n 352 */ 353 static double sym_weight(const struct sym_entry *sym) 354 { 355 double weight = sym->snap_count; 356 int counter; 357 358 if (!display_weighted) 359 return weight; 360 361 for (counter = 1; counter < nr_counters-1; counter++) 362 weight *= sym->count[counter]; 363 364 weight /= (sym->count[counter] + 1); 365 366 return weight; 367 } 368 369 static long samples; 370 static long userspace_samples; 371 static const char CONSOLE_CLEAR[] = "[H[2J"; 372 373 static void __list_insert_active_sym(struct sym_entry *syme) 374 { 375 list_add(&syme->node, &active_symbols); 376 } 377 378 static void list_remove_active_sym(struct sym_entry *syme) 379 { 380 pthread_mutex_lock(&active_symbols_lock); 381 list_del_init(&syme->node); 382 pthread_mutex_unlock(&active_symbols_lock); 383 } 384 385 static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 386 { 387 struct rb_node **p = &tree->rb_node; 388 struct rb_node *parent = NULL; 389 struct sym_entry *iter; 390 391 while (*p != NULL) { 392 parent = *p; 393 iter = rb_entry(parent, struct sym_entry, rb_node); 394 395 if (se->weight > iter->weight) 396 p = &(*p)->rb_left; 397 else 398 p = &(*p)->rb_right; 399 } 400 401 rb_link_node(&se->rb_node, parent, p); 402 rb_insert_color(&se->rb_node, tree); 403 } 404 405 static void print_sym_table(void) 406 { 407 int printed = 0, j; 408 int counter, snap = !display_weighted ? sym_counter : 0; 409 float samples_per_sec = samples/delay_secs; 410 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 411 float sum_ksamples = 0.0; 412 struct sym_entry *syme, *n; 413 struct rb_root tmp = RB_ROOT; 414 struct rb_node *nd; 415 416 samples = userspace_samples = 0; 417 418 /* Sort the active symbols */ 419 pthread_mutex_lock(&active_symbols_lock); 420 syme = list_entry(active_symbols.next, struct sym_entry, node); 421 pthread_mutex_unlock(&active_symbols_lock); 422 423 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 424 syme->snap_count = syme->count[snap]; 425 if (syme->snap_count != 0) { 426 syme->weight = sym_weight(syme); 427 rb_insert_active_sym(&tmp, syme); 428 sum_ksamples += syme->snap_count; 429 430 for (j = 0; j < nr_counters; j++) 431 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 432 } else 433 list_remove_active_sym(syme); 434 } 435 436 puts(CONSOLE_CLEAR); 437 438 printf( 439 "------------------------------------------------------------------------------\n"); 440 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 441 samples_per_sec, 442 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 443 444 if (nr_counters == 1 || !display_weighted) { 445 printf("%Ld", (u64)attrs[0].sample_period); 446 if (freq) 447 printf("Hz "); 448 else 449 printf(" "); 450 } 451 452 if (!display_weighted) 453 printf("%s", event_name(sym_counter)); 454 else for (counter = 0; counter < nr_counters; counter++) { 455 if (counter) 456 printf("/"); 457 458 printf("%s", event_name(counter)); 459 } 460 461 printf( "], "); 462 463 if (target_pid != -1) 464 printf(" (target_pid: %d", target_pid); 465 else 466 printf(" (all"); 467 468 if (profile_cpu != -1) 469 printf(", cpu: %d)\n", profile_cpu); 470 else { 471 if (target_pid != -1) 472 printf(")\n"); 473 else 474 printf(", %d CPUs)\n", nr_cpus); 475 } 476 477 printf("------------------------------------------------------------------------------\n\n"); 478 479 if (sym_filter_entry) { 480 show_details(sym_filter_entry); 481 return; 482 } 483 484 if (nr_counters == 1) 485 printf(" samples pcnt"); 486 else 487 printf(" weight samples pcnt"); 488 489 printf(" RIP kernel function\n" 490 " ______ _______ _____ ________________ _______________\n\n" 491 ); 492 493 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 494 struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); 495 struct symbol *sym = (struct symbol *)(syme + 1); 496 double pcnt; 497 498 if (++printed > print_entries || (int)syme->snap_count < count_filter) 499 continue; 500 501 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 502 sum_ksamples)); 503 504 if (nr_counters == 1 || !display_weighted) 505 printf("%20.2f - ", syme->weight); 506 else 507 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 508 509 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 510 printf(" - %016llx : %s", sym->start, sym->name); 511 if (sym->module) 512 printf("\t[%s]", sym->module->name); 513 printf("\n"); 514 } 515 } 516 517 static void prompt_integer(int *target, const char *msg) 518 { 519 char *buf = malloc(0), *p; 520 size_t dummy = 0; 521 int tmp; 522 523 fprintf(stdout, "\n%s: ", msg); 524 if (getline(&buf, &dummy, stdin) < 0) 525 return; 526 527 p = strchr(buf, '\n'); 528 if (p) 529 *p = 0; 530 531 p = buf; 532 while(*p) { 533 if (!isdigit(*p)) 534 goto out_free; 535 p++; 536 } 537 tmp = strtoul(buf, NULL, 10); 538 *target = tmp; 539 out_free: 540 free(buf); 541 } 542 543 static void prompt_percent(int *target, const char *msg) 544 { 545 int tmp = 0; 546 547 prompt_integer(&tmp, msg); 548 if (tmp >= 0 && tmp <= 100) 549 *target = tmp; 550 } 551 552 static void prompt_symbol(struct sym_entry **target, const char *msg) 553 { 554 char *buf = malloc(0), *p; 555 struct sym_entry *syme = *target, *n, *found = NULL; 556 size_t dummy = 0; 557 558 /* zero counters of active symbol */ 559 if (syme) { 560 pthread_mutex_lock(&syme->source_lock); 561 __zero_source_counters(syme); 562 *target = NULL; 563 pthread_mutex_unlock(&syme->source_lock); 564 } 565 566 fprintf(stdout, "\n%s: ", msg); 567 if (getline(&buf, &dummy, stdin) < 0) 568 goto out_free; 569 570 p = strchr(buf, '\n'); 571 if (p) 572 *p = 0; 573 574 pthread_mutex_lock(&active_symbols_lock); 575 syme = list_entry(active_symbols.next, struct sym_entry, node); 576 pthread_mutex_unlock(&active_symbols_lock); 577 578 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 579 struct symbol *sym = (struct symbol *)(syme + 1); 580 581 if (!strcmp(buf, sym->name)) { 582 found = syme; 583 break; 584 } 585 } 586 587 if (!found) { 588 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); 589 sleep(1); 590 return; 591 } else 592 parse_source(found); 593 594 out_free: 595 free(buf); 596 } 597 598 static void print_mapped_keys(void) 599 { 600 char *name = NULL; 601 602 if (sym_filter_entry) { 603 struct symbol *sym = (struct symbol *)(sym_filter_entry+1); 604 name = sym->name; 605 } 606 607 fprintf(stdout, "\nMapped keys:\n"); 608 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 609 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 610 611 if (nr_counters > 1) 612 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 613 614 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 615 616 if (vmlinux) { 617 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 618 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 619 fprintf(stdout, "\t[S] stop annotation.\n"); 620 } 621 622 if (nr_counters > 1) 623 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 624 625 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 626 fprintf(stdout, "\t[qQ] quit.\n"); 627 } 628 629 static int key_mapped(int c) 630 { 631 switch (c) { 632 case 'd': 633 case 'e': 634 case 'f': 635 case 'z': 636 case 'q': 637 case 'Q': 638 return 1; 639 case 'E': 640 case 'w': 641 return nr_counters > 1 ? 1 : 0; 642 case 'F': 643 case 's': 644 case 'S': 645 return vmlinux ? 1 : 0; 646 } 647 648 return 0; 649 } 650 651 static void handle_keypress(int c) 652 { 653 if (!key_mapped(c)) { 654 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 655 struct termios tc, save; 656 657 print_mapped_keys(); 658 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 659 fflush(stdout); 660 661 tcgetattr(0, &save); 662 tc = save; 663 tc.c_lflag &= ~(ICANON | ECHO); 664 tc.c_cc[VMIN] = 0; 665 tc.c_cc[VTIME] = 0; 666 tcsetattr(0, TCSANOW, &tc); 667 668 poll(&stdin_poll, 1, -1); 669 c = getc(stdin); 670 671 tcsetattr(0, TCSAFLUSH, &save); 672 if (!key_mapped(c)) 673 return; 674 } 675 676 switch (c) { 677 case 'd': 678 prompt_integer(&delay_secs, "Enter display delay"); 679 break; 680 case 'e': 681 prompt_integer(&print_entries, "Enter display entries (lines)"); 682 break; 683 case 'E': 684 if (nr_counters > 1) { 685 int i; 686 687 fprintf(stderr, "\nAvailable events:"); 688 for (i = 0; i < nr_counters; i++) 689 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 690 691 prompt_integer(&sym_counter, "Enter details event counter"); 692 693 if (sym_counter >= nr_counters) { 694 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 695 sym_counter = 0; 696 sleep(1); 697 } 698 } else sym_counter = 0; 699 break; 700 case 'f': 701 prompt_integer(&count_filter, "Enter display event count filter"); 702 break; 703 case 'F': 704 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 705 break; 706 case 'q': 707 case 'Q': 708 printf("exiting.\n"); 709 exit(0); 710 case 's': 711 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 712 break; 713 case 'S': 714 if (!sym_filter_entry) 715 break; 716 else { 717 struct sym_entry *syme = sym_filter_entry; 718 719 pthread_mutex_lock(&syme->source_lock); 720 sym_filter_entry = NULL; 721 __zero_source_counters(syme); 722 pthread_mutex_unlock(&syme->source_lock); 723 } 724 break; 725 case 'w': 726 display_weighted = ~display_weighted; 727 break; 728 case 'z': 729 zero = ~zero; 730 break; 731 } 732 } 733 734 static void *display_thread(void *arg __used) 735 { 736 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 737 struct termios tc, save; 738 int delay_msecs, c; 739 740 tcgetattr(0, &save); 741 tc = save; 742 tc.c_lflag &= ~(ICANON | ECHO); 743 tc.c_cc[VMIN] = 0; 744 tc.c_cc[VTIME] = 0; 745 746 repeat: 747 delay_msecs = delay_secs * 1000; 748 tcsetattr(0, TCSANOW, &tc); 749 /* trash return*/ 750 getc(stdin); 751 752 do { 753 print_sym_table(); 754 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 755 756 c = getc(stdin); 757 tcsetattr(0, TCSAFLUSH, &save); 758 759 handle_keypress(c); 760 goto repeat; 761 762 return NULL; 763 } 764 765 /* Tag samples to be skipped. */ 766 static const char *skip_symbols[] = { 767 "default_idle", 768 "cpu_idle", 769 "enter_idle", 770 "exit_idle", 771 "mwait_idle", 772 "mwait_idle_with_hints", 773 "ppc64_runlatch_off", 774 "pseries_dedicated_idle_sleep", 775 NULL 776 }; 777 778 static int symbol_filter(struct dso *self, struct symbol *sym) 779 { 780 struct sym_entry *syme; 781 const char *name = sym->name; 782 int i; 783 784 /* 785 * ppc64 uses function descriptors and appends a '.' to the 786 * start of every instruction address. Remove it. 787 */ 788 if (name[0] == '.') 789 name++; 790 791 if (!strcmp(name, "_text") || 792 !strcmp(name, "_etext") || 793 !strcmp(name, "_sinittext") || 794 !strncmp("init_module", name, 11) || 795 !strncmp("cleanup_module", name, 14) || 796 strstr(name, "_text_start") || 797 strstr(name, "_text_end")) 798 return 1; 799 800 syme = dso__sym_priv(self, sym); 801 pthread_mutex_init(&syme->source_lock, NULL); 802 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 803 sym_filter_entry = syme; 804 805 for (i = 0; skip_symbols[i]; i++) { 806 if (!strcmp(skip_symbols[i], name)) { 807 syme->skip = 1; 808 break; 809 } 810 } 811 812 return 0; 813 } 814 815 static int parse_symbols(void) 816 { 817 struct rb_node *node; 818 struct symbol *sym; 819 int modules = vmlinux ? 1 : 0; 820 821 kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); 822 if (kernel_dso == NULL) 823 return -1; 824 825 if (dso__load_kernel(kernel_dso, vmlinux, symbol_filter, verbose, modules) <= 0) 826 goto out_delete_dso; 827 828 node = rb_first(&kernel_dso->syms); 829 sym = rb_entry(node, struct symbol, rb_node); 830 min_ip = sym->start; 831 832 node = rb_last(&kernel_dso->syms); 833 sym = rb_entry(node, struct symbol, rb_node); 834 max_ip = sym->end; 835 836 if (dump_symtab) 837 dso__fprintf(kernel_dso, stderr); 838 839 return 0; 840 841 out_delete_dso: 842 dso__delete(kernel_dso); 843 kernel_dso = NULL; 844 return -1; 845 } 846 847 /* 848 * Binary search in the histogram table and record the hit: 849 */ 850 static void record_ip(u64 ip, int counter) 851 { 852 struct symbol *sym = dso__find_symbol(kernel_dso, ip); 853 854 if (sym != NULL) { 855 struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); 856 857 if (!syme->skip) { 858 syme->count[counter]++; 859 record_precise_ip(syme, counter, ip); 860 pthread_mutex_lock(&active_symbols_lock); 861 if (list_empty(&syme->node) || !syme->node.next) 862 __list_insert_active_sym(syme); 863 pthread_mutex_unlock(&active_symbols_lock); 864 return; 865 } 866 } 867 868 samples--; 869 } 870 871 static void process_event(u64 ip, int counter, int user) 872 { 873 samples++; 874 875 if (user) { 876 userspace_samples++; 877 return; 878 } 879 880 record_ip(ip, counter); 881 } 882 883 struct mmap_data { 884 int counter; 885 void *base; 886 int mask; 887 unsigned int prev; 888 }; 889 890 static unsigned int mmap_read_head(struct mmap_data *md) 891 { 892 struct perf_counter_mmap_page *pc = md->base; 893 int head; 894 895 head = pc->data_head; 896 rmb(); 897 898 return head; 899 } 900 901 struct timeval last_read, this_read; 902 903 static void mmap_read_counter(struct mmap_data *md) 904 { 905 unsigned int head = mmap_read_head(md); 906 unsigned int old = md->prev; 907 unsigned char *data = md->base + page_size; 908 int diff; 909 910 gettimeofday(&this_read, NULL); 911 912 /* 913 * If we're further behind than half the buffer, there's a chance 914 * the writer will bite our tail and mess up the samples under us. 915 * 916 * If we somehow ended up ahead of the head, we got messed up. 917 * 918 * In either case, truncate and restart at head. 919 */ 920 diff = head - old; 921 if (diff > md->mask / 2 || diff < 0) { 922 struct timeval iv; 923 unsigned long msecs; 924 925 timersub(&this_read, &last_read, &iv); 926 msecs = iv.tv_sec*1000 + iv.tv_usec/1000; 927 928 fprintf(stderr, "WARNING: failed to keep up with mmap data." 929 " Last read %lu msecs ago.\n", msecs); 930 931 /* 932 * head points to a known good entry, start there. 933 */ 934 old = head; 935 } 936 937 last_read = this_read; 938 939 for (; old != head;) { 940 struct ip_event { 941 struct perf_event_header header; 942 u64 ip; 943 u32 pid, target_pid; 944 }; 945 struct mmap_event { 946 struct perf_event_header header; 947 u32 pid, target_pid; 948 u64 start; 949 u64 len; 950 u64 pgoff; 951 char filename[PATH_MAX]; 952 }; 953 954 typedef union event_union { 955 struct perf_event_header header; 956 struct ip_event ip; 957 struct mmap_event mmap; 958 } event_t; 959 960 event_t *event = (event_t *)&data[old & md->mask]; 961 962 event_t event_copy; 963 964 size_t size = event->header.size; 965 966 /* 967 * Event straddles the mmap boundary -- header should always 968 * be inside due to u64 alignment of output. 969 */ 970 if ((old & md->mask) + size != ((old + size) & md->mask)) { 971 unsigned int offset = old; 972 unsigned int len = min(sizeof(*event), size), cpy; 973 void *dst = &event_copy; 974 975 do { 976 cpy = min(md->mask + 1 - (offset & md->mask), len); 977 memcpy(dst, &data[offset & md->mask], cpy); 978 offset += cpy; 979 dst += cpy; 980 len -= cpy; 981 } while (len); 982 983 event = &event_copy; 984 } 985 986 old += size; 987 988 if (event->header.type == PERF_EVENT_SAMPLE) { 989 int user = 990 (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; 991 process_event(event->ip.ip, md->counter, user); 992 } 993 } 994 995 md->prev = old; 996 } 997 998 static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 999 static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 1000 1001 static void mmap_read(void) 1002 { 1003 int i, counter; 1004 1005 for (i = 0; i < nr_cpus; i++) { 1006 for (counter = 0; counter < nr_counters; counter++) 1007 mmap_read_counter(&mmap_array[i][counter]); 1008 } 1009 } 1010 1011 int nr_poll; 1012 int group_fd; 1013 1014 static void start_counter(int i, int counter) 1015 { 1016 struct perf_counter_attr *attr; 1017 int cpu; 1018 1019 cpu = profile_cpu; 1020 if (target_pid == -1 && profile_cpu == -1) 1021 cpu = i; 1022 1023 attr = attrs + counter; 1024 1025 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1026 attr->freq = freq; 1027 attr->inherit = (cpu < 0) && inherit; 1028 1029 try_again: 1030 fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); 1031 1032 if (fd[i][counter] < 0) { 1033 int err = errno; 1034 1035 if (err == EPERM) 1036 die("No permission - are you root?\n"); 1037 /* 1038 * If it's cycles then fall back to hrtimer 1039 * based cpu-clock-tick sw counter, which 1040 * is always available even if no PMU support: 1041 */ 1042 if (attr->type == PERF_TYPE_HARDWARE 1043 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1044 1045 if (verbose) 1046 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1047 1048 attr->type = PERF_TYPE_SOFTWARE; 1049 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1050 goto try_again; 1051 } 1052 printf("\n"); 1053 error("perfcounter syscall returned with %d (%s)\n", 1054 fd[i][counter], strerror(err)); 1055 die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); 1056 exit(-1); 1057 } 1058 assert(fd[i][counter] >= 0); 1059 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); 1060 1061 /* 1062 * First counter acts as the group leader: 1063 */ 1064 if (group && group_fd == -1) 1065 group_fd = fd[i][counter]; 1066 1067 event_array[nr_poll].fd = fd[i][counter]; 1068 event_array[nr_poll].events = POLLIN; 1069 nr_poll++; 1070 1071 mmap_array[i][counter].counter = counter; 1072 mmap_array[i][counter].prev = 0; 1073 mmap_array[i][counter].mask = mmap_pages*page_size - 1; 1074 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 1075 PROT_READ, MAP_SHARED, fd[i][counter], 0); 1076 if (mmap_array[i][counter].base == MAP_FAILED) 1077 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1078 } 1079 1080 static int __cmd_top(void) 1081 { 1082 pthread_t thread; 1083 int i, counter; 1084 int ret; 1085 1086 for (i = 0; i < nr_cpus; i++) { 1087 group_fd = -1; 1088 for (counter = 0; counter < nr_counters; counter++) 1089 start_counter(i, counter); 1090 } 1091 1092 /* Wait for a minimal set of events before starting the snapshot */ 1093 poll(event_array, nr_poll, 100); 1094 1095 mmap_read(); 1096 1097 if (pthread_create(&thread, NULL, display_thread, NULL)) { 1098 printf("Could not create display thread.\n"); 1099 exit(-1); 1100 } 1101 1102 if (realtime_prio) { 1103 struct sched_param param; 1104 1105 param.sched_priority = realtime_prio; 1106 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1107 printf("Could not set realtime priority.\n"); 1108 exit(-1); 1109 } 1110 } 1111 1112 while (1) { 1113 int hits = samples; 1114 1115 mmap_read(); 1116 1117 if (hits == samples) 1118 ret = poll(event_array, nr_poll, 100); 1119 } 1120 1121 return 0; 1122 } 1123 1124 static const char * const top_usage[] = { 1125 "perf top [<options>]", 1126 NULL 1127 }; 1128 1129 static const struct option options[] = { 1130 OPT_CALLBACK('e', "event", NULL, "event", 1131 "event selector. use 'perf list' to list available events", 1132 parse_events), 1133 OPT_INTEGER('c', "count", &default_interval, 1134 "event period to sample"), 1135 OPT_INTEGER('p', "pid", &target_pid, 1136 "profile events on existing pid"), 1137 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1138 "system-wide collection from all CPUs"), 1139 OPT_INTEGER('C', "CPU", &profile_cpu, 1140 "CPU to profile on"), 1141 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), 1142 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 1143 "number of mmap data pages"), 1144 OPT_INTEGER('r', "realtime", &realtime_prio, 1145 "collect data with this RT SCHED_FIFO priority"), 1146 OPT_INTEGER('d', "delay", &delay_secs, 1147 "number of seconds to delay between refreshes"), 1148 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1149 "dump the symbol table used for profiling"), 1150 OPT_INTEGER('f', "count-filter", &count_filter, 1151 "only display functions with more events than this"), 1152 OPT_BOOLEAN('g', "group", &group, 1153 "put the counters into a counter group"), 1154 OPT_BOOLEAN('i', "inherit", &inherit, 1155 "child tasks inherit counters"), 1156 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1157 "symbol to annotate - requires -k option"), 1158 OPT_BOOLEAN('z', "zero", &zero, 1159 "zero history across updates"), 1160 OPT_INTEGER('F', "freq", &freq, 1161 "profile at this frequency"), 1162 OPT_INTEGER('E', "entries", &print_entries, 1163 "display this many functions"), 1164 OPT_BOOLEAN('v', "verbose", &verbose, 1165 "be more verbose (show counter open errors, etc)"), 1166 OPT_END() 1167 }; 1168 1169 int cmd_top(int argc, const char **argv, const char *prefix __used) 1170 { 1171 int counter; 1172 1173 symbol__init(); 1174 1175 page_size = sysconf(_SC_PAGE_SIZE); 1176 1177 argc = parse_options(argc, argv, options, top_usage, 0); 1178 if (argc) 1179 usage_with_options(top_usage, options); 1180 1181 if (freq) { 1182 default_interval = freq; 1183 freq = 1; 1184 } 1185 1186 /* CPU and PID are mutually exclusive */ 1187 if (target_pid != -1 && profile_cpu != -1) { 1188 printf("WARNING: PID switch overriding CPU\n"); 1189 sleep(1); 1190 profile_cpu = -1; 1191 } 1192 1193 if (!nr_counters) 1194 nr_counters = 1; 1195 1196 if (delay_secs < 1) 1197 delay_secs = 1; 1198 1199 parse_symbols(); 1200 parse_source(sym_filter_entry); 1201 1202 /* 1203 * Fill in the ones not specifically initialized via -c: 1204 */ 1205 for (counter = 0; counter < nr_counters; counter++) { 1206 if (attrs[counter].sample_period) 1207 continue; 1208 1209 attrs[counter].sample_period = default_interval; 1210 } 1211 1212 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 1213 assert(nr_cpus <= MAX_NR_CPUS); 1214 assert(nr_cpus >= 0); 1215 1216 if (target_pid != -1 || profile_cpu != -1) 1217 nr_cpus = 1; 1218 1219 return __cmd_top(); 1220 } 1221