xref: /linux/tools/perf/builtin-top.c (revision 800c5eb7b5eba6cb2a32738d763fd59f0fbcdde4)
1 /*
2  * builtin-top.c
3  *
4  * Builtin top command: Display a continuously updated profile of
5  * any workload, CPU or specific PID.
6  *
7  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8  *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Improvements and fixes by:
11  *
12  *   Arjan van de Ven <arjan@linux.intel.com>
13  *   Yanmin Zhang <yanmin.zhang@intel.com>
14  *   Wu Fengguang <fengguang.wu@intel.com>
15  *   Mike Galbraith <efault@gmx.de>
16  *   Paul Mackerras <paulus@samba.org>
17  *
18  * Released under the GPL v2. (and only v2, not any later version)
19  */
20 #include "builtin.h"
21 
22 #include "perf.h"
23 
24 #include "util/annotate.h"
25 #include "util/cache.h"
26 #include "util/color.h"
27 #include "util/evlist.h"
28 #include "util/evsel.h"
29 #include "util/session.h"
30 #include "util/symbol.h"
31 #include "util/thread.h"
32 #include "util/thread_map.h"
33 #include "util/top.h"
34 #include "util/util.h"
35 #include <linux/rbtree.h>
36 #include "util/parse-options.h"
37 #include "util/parse-events.h"
38 #include "util/cpumap.h"
39 #include "util/xyarray.h"
40 #include "util/sort.h"
41 
42 #include "util/debug.h"
43 
44 #include <assert.h>
45 #include <fcntl.h>
46 
47 #include <stdio.h>
48 #include <termios.h>
49 #include <unistd.h>
50 #include <inttypes.h>
51 
52 #include <errno.h>
53 #include <time.h>
54 #include <sched.h>
55 
56 #include <sys/syscall.h>
57 #include <sys/ioctl.h>
58 #include <sys/poll.h>
59 #include <sys/prctl.h>
60 #include <sys/wait.h>
61 #include <sys/uio.h>
62 #include <sys/mman.h>
63 
64 #include <linux/unistd.h>
65 #include <linux/types.h>
66 
67 
68 void get_term_dimensions(struct winsize *ws)
69 {
70 	char *s = getenv("LINES");
71 
72 	if (s != NULL) {
73 		ws->ws_row = atoi(s);
74 		s = getenv("COLUMNS");
75 		if (s != NULL) {
76 			ws->ws_col = atoi(s);
77 			if (ws->ws_row && ws->ws_col)
78 				return;
79 		}
80 	}
81 #ifdef TIOCGWINSZ
82 	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
83 	    ws->ws_row && ws->ws_col)
84 		return;
85 #endif
86 	ws->ws_row = 25;
87 	ws->ws_col = 80;
88 }
89 
90 static void perf_top__update_print_entries(struct perf_top *top)
91 {
92 	top->print_entries = top->winsize.ws_row;
93 
94 	if (top->print_entries > 9)
95 		top->print_entries -= 9;
96 }
97 
98 static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
99 {
100 	struct perf_top *top = arg;
101 
102 	get_term_dimensions(&top->winsize);
103 	perf_top__update_print_entries(top);
104 }
105 
106 static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
107 {
108 	struct symbol *sym;
109 	struct annotation *notes;
110 	struct map *map;
111 	int err = -1;
112 
113 	if (!he || !he->ms.sym)
114 		return -1;
115 
116 	sym = he->ms.sym;
117 	map = he->ms.map;
118 
119 	/*
120 	 * We can't annotate with just /proc/kallsyms
121 	 */
122 	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
123 		pr_err("Can't annotate %s: No vmlinux file was found in the "
124 		       "path\n", sym->name);
125 		sleep(1);
126 		return -1;
127 	}
128 
129 	notes = symbol__annotation(sym);
130 	if (notes->src != NULL) {
131 		pthread_mutex_lock(&notes->lock);
132 		goto out_assign;
133 	}
134 
135 	pthread_mutex_lock(&notes->lock);
136 
137 	if (symbol__alloc_hist(sym) < 0) {
138 		pthread_mutex_unlock(&notes->lock);
139 		pr_err("Not enough memory for annotating '%s' symbol!\n",
140 		       sym->name);
141 		sleep(1);
142 		return err;
143 	}
144 
145 	err = symbol__annotate(sym, map, 0);
146 	if (err == 0) {
147 out_assign:
148 		top->sym_filter_entry = he;
149 	}
150 
151 	pthread_mutex_unlock(&notes->lock);
152 	return err;
153 }
154 
155 static void __zero_source_counters(struct hist_entry *he)
156 {
157 	struct symbol *sym = he->ms.sym;
158 	symbol__annotate_zero_histograms(sym);
159 }
160 
161 static void perf_top__record_precise_ip(struct perf_top *top,
162 					struct hist_entry *he,
163 					int counter, u64 ip)
164 {
165 	struct annotation *notes;
166 	struct symbol *sym;
167 
168 	if (he == NULL || he->ms.sym == NULL ||
169 	    ((top->sym_filter_entry == NULL ||
170 	      top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
171 		return;
172 
173 	sym = he->ms.sym;
174 	notes = symbol__annotation(sym);
175 
176 	if (pthread_mutex_trylock(&notes->lock))
177 		return;
178 
179 	if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
180 		pthread_mutex_unlock(&notes->lock);
181 		pr_err("Not enough memory for annotating '%s' symbol!\n",
182 		       sym->name);
183 		sleep(1);
184 		return;
185 	}
186 
187 	ip = he->ms.map->map_ip(he->ms.map, ip);
188 	symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
189 
190 	pthread_mutex_unlock(&notes->lock);
191 }
192 
193 static void perf_top__show_details(struct perf_top *top)
194 {
195 	struct hist_entry *he = top->sym_filter_entry;
196 	struct annotation *notes;
197 	struct symbol *symbol;
198 	int more;
199 
200 	if (!he)
201 		return;
202 
203 	symbol = he->ms.sym;
204 	notes = symbol__annotation(symbol);
205 
206 	pthread_mutex_lock(&notes->lock);
207 
208 	if (notes->src == NULL)
209 		goto out_unlock;
210 
211 	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
212 	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
213 
214 	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
215 				       0, top->sym_pcnt_filter, top->print_entries, 4);
216 	if (top->zero)
217 		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
218 	else
219 		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
220 	if (more != 0)
221 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
222 out_unlock:
223 	pthread_mutex_unlock(&notes->lock);
224 }
225 
226 static const char		CONSOLE_CLEAR[] = "";
227 
228 static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
229 						     struct addr_location *al,
230 						     struct perf_sample *sample)
231 {
232 	struct hist_entry *he;
233 
234 	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
235 	if (he == NULL)
236 		return NULL;
237 
238 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
239 	return he;
240 }
241 
242 static void perf_top__print_sym_table(struct perf_top *top)
243 {
244 	char bf[160];
245 	int printed = 0;
246 	const int win_width = top->winsize.ws_col - 1;
247 
248 	puts(CONSOLE_CLEAR);
249 
250 	perf_top__header_snprintf(top, bf, sizeof(bf));
251 	printf("%s\n", bf);
252 
253 	perf_top__reset_sample_counters(top);
254 
255 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
256 
257 	if (top->sym_evsel->hists.stats.nr_lost_warned !=
258 	    top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
259 		top->sym_evsel->hists.stats.nr_lost_warned =
260 			top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
261 		color_fprintf(stdout, PERF_COLOR_RED,
262 			      "WARNING: LOST %d chunks, Check IO/CPU overload",
263 			      top->sym_evsel->hists.stats.nr_lost_warned);
264 		++printed;
265 	}
266 
267 	if (top->sym_filter_entry) {
268 		perf_top__show_details(top);
269 		return;
270 	}
271 
272 	hists__collapse_resort_threaded(&top->sym_evsel->hists);
273 	hists__output_resort_threaded(&top->sym_evsel->hists);
274 	hists__decay_entries_threaded(&top->sym_evsel->hists,
275 				      top->hide_user_symbols,
276 				      top->hide_kernel_symbols);
277 	hists__output_recalc_col_len(&top->sym_evsel->hists,
278 				     top->winsize.ws_row - 3);
279 	putchar('\n');
280 	hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
281 		       top->winsize.ws_row - 4 - printed, win_width, stdout);
282 }
283 
284 static void prompt_integer(int *target, const char *msg)
285 {
286 	char *buf = malloc(0), *p;
287 	size_t dummy = 0;
288 	int tmp;
289 
290 	fprintf(stdout, "\n%s: ", msg);
291 	if (getline(&buf, &dummy, stdin) < 0)
292 		return;
293 
294 	p = strchr(buf, '\n');
295 	if (p)
296 		*p = 0;
297 
298 	p = buf;
299 	while(*p) {
300 		if (!isdigit(*p))
301 			goto out_free;
302 		p++;
303 	}
304 	tmp = strtoul(buf, NULL, 10);
305 	*target = tmp;
306 out_free:
307 	free(buf);
308 }
309 
310 static void prompt_percent(int *target, const char *msg)
311 {
312 	int tmp = 0;
313 
314 	prompt_integer(&tmp, msg);
315 	if (tmp >= 0 && tmp <= 100)
316 		*target = tmp;
317 }
318 
319 static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
320 {
321 	char *buf = malloc(0), *p;
322 	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
323 	struct rb_node *next;
324 	size_t dummy = 0;
325 
326 	/* zero counters of active symbol */
327 	if (syme) {
328 		__zero_source_counters(syme);
329 		top->sym_filter_entry = NULL;
330 	}
331 
332 	fprintf(stdout, "\n%s: ", msg);
333 	if (getline(&buf, &dummy, stdin) < 0)
334 		goto out_free;
335 
336 	p = strchr(buf, '\n');
337 	if (p)
338 		*p = 0;
339 
340 	next = rb_first(&top->sym_evsel->hists.entries);
341 	while (next) {
342 		n = rb_entry(next, struct hist_entry, rb_node);
343 		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
344 			found = n;
345 			break;
346 		}
347 		next = rb_next(&n->rb_node);
348 	}
349 
350 	if (!found) {
351 		fprintf(stderr, "Sorry, %s is not active.\n", buf);
352 		sleep(1);
353 	} else
354 		perf_top__parse_source(top, found);
355 
356 out_free:
357 	free(buf);
358 }
359 
360 static void perf_top__print_mapped_keys(struct perf_top *top)
361 {
362 	char *name = NULL;
363 
364 	if (top->sym_filter_entry) {
365 		struct symbol *sym = top->sym_filter_entry->ms.sym;
366 		name = sym->name;
367 	}
368 
369 	fprintf(stdout, "\nMapped keys:\n");
370 	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
371 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
372 
373 	if (top->evlist->nr_entries > 1)
374 		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
375 
376 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
377 
378 	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
379 	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
380 	fprintf(stdout, "\t[S]     stop annotation.\n");
381 
382 	fprintf(stdout,
383 		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
384 		top->hide_kernel_symbols ? "yes" : "no");
385 	fprintf(stdout,
386 		"\t[U]     hide user symbols.               \t(%s)\n",
387 		top->hide_user_symbols ? "yes" : "no");
388 	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
389 	fprintf(stdout, "\t[qQ]    quit.\n");
390 }
391 
392 static int perf_top__key_mapped(struct perf_top *top, int c)
393 {
394 	switch (c) {
395 		case 'd':
396 		case 'e':
397 		case 'f':
398 		case 'z':
399 		case 'q':
400 		case 'Q':
401 		case 'K':
402 		case 'U':
403 		case 'F':
404 		case 's':
405 		case 'S':
406 			return 1;
407 		case 'E':
408 			return top->evlist->nr_entries > 1 ? 1 : 0;
409 		default:
410 			break;
411 	}
412 
413 	return 0;
414 }
415 
416 static void perf_top__handle_keypress(struct perf_top *top, int c)
417 {
418 	if (!perf_top__key_mapped(top, c)) {
419 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
420 		struct termios tc, save;
421 
422 		perf_top__print_mapped_keys(top);
423 		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
424 		fflush(stdout);
425 
426 		tcgetattr(0, &save);
427 		tc = save;
428 		tc.c_lflag &= ~(ICANON | ECHO);
429 		tc.c_cc[VMIN] = 0;
430 		tc.c_cc[VTIME] = 0;
431 		tcsetattr(0, TCSANOW, &tc);
432 
433 		poll(&stdin_poll, 1, -1);
434 		c = getc(stdin);
435 
436 		tcsetattr(0, TCSAFLUSH, &save);
437 		if (!perf_top__key_mapped(top, c))
438 			return;
439 	}
440 
441 	switch (c) {
442 		case 'd':
443 			prompt_integer(&top->delay_secs, "Enter display delay");
444 			if (top->delay_secs < 1)
445 				top->delay_secs = 1;
446 			break;
447 		case 'e':
448 			prompt_integer(&top->print_entries, "Enter display entries (lines)");
449 			if (top->print_entries == 0) {
450 				struct sigaction act = {
451 					.sa_sigaction = perf_top__sig_winch,
452 					.sa_flags     = SA_SIGINFO,
453 				};
454 				perf_top__sig_winch(SIGWINCH, NULL, top);
455 				sigaction(SIGWINCH, &act, NULL);
456 			} else
457 				signal(SIGWINCH, SIG_DFL);
458 			break;
459 		case 'E':
460 			if (top->evlist->nr_entries > 1) {
461 				/* Select 0 as the default event: */
462 				int counter = 0;
463 
464 				fprintf(stderr, "\nAvailable events:");
465 
466 				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
467 					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
468 
469 				prompt_integer(&counter, "Enter details event counter");
470 
471 				if (counter >= top->evlist->nr_entries) {
472 					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
473 					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
474 					sleep(1);
475 					break;
476 				}
477 				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
478 					if (top->sym_evsel->idx == counter)
479 						break;
480 			} else
481 				top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
482 			break;
483 		case 'f':
484 			prompt_integer(&top->count_filter, "Enter display event count filter");
485 			break;
486 		case 'F':
487 			prompt_percent(&top->sym_pcnt_filter,
488 				       "Enter details display event filter (percent)");
489 			break;
490 		case 'K':
491 			top->hide_kernel_symbols = !top->hide_kernel_symbols;
492 			break;
493 		case 'q':
494 		case 'Q':
495 			printf("exiting.\n");
496 			if (top->dump_symtab)
497 				perf_session__fprintf_dsos(top->session, stderr);
498 			exit(0);
499 		case 's':
500 			perf_top__prompt_symbol(top, "Enter details symbol");
501 			break;
502 		case 'S':
503 			if (!top->sym_filter_entry)
504 				break;
505 			else {
506 				struct hist_entry *syme = top->sym_filter_entry;
507 
508 				top->sym_filter_entry = NULL;
509 				__zero_source_counters(syme);
510 			}
511 			break;
512 		case 'U':
513 			top->hide_user_symbols = !top->hide_user_symbols;
514 			break;
515 		case 'z':
516 			top->zero = !top->zero;
517 			break;
518 		default:
519 			break;
520 	}
521 }
522 
523 static void perf_top__sort_new_samples(void *arg)
524 {
525 	struct perf_top *t = arg;
526 	perf_top__reset_sample_counters(t);
527 
528 	if (t->evlist->selected != NULL)
529 		t->sym_evsel = t->evlist->selected;
530 
531 	hists__collapse_resort_threaded(&t->sym_evsel->hists);
532 	hists__output_resort_threaded(&t->sym_evsel->hists);
533 	hists__decay_entries_threaded(&t->sym_evsel->hists,
534 				      t->hide_user_symbols,
535 				      t->hide_kernel_symbols);
536 }
537 
538 static void *display_thread_tui(void *arg)
539 {
540 	struct perf_top *top = arg;
541 	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
542 
543 	perf_top__sort_new_samples(top);
544 	perf_evlist__tui_browse_hists(top->evlist, help,
545 				      perf_top__sort_new_samples,
546 				      top, top->delay_secs);
547 
548 	exit_browser(0);
549 	exit(0);
550 	return NULL;
551 }
552 
553 static void *display_thread(void *arg)
554 {
555 	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
556 	struct termios tc, save;
557 	struct perf_top *top = arg;
558 	int delay_msecs, c;
559 
560 	tcgetattr(0, &save);
561 	tc = save;
562 	tc.c_lflag &= ~(ICANON | ECHO);
563 	tc.c_cc[VMIN] = 0;
564 	tc.c_cc[VTIME] = 0;
565 
566 	pthread__unblock_sigwinch();
567 repeat:
568 	delay_msecs = top->delay_secs * 1000;
569 	tcsetattr(0, TCSANOW, &tc);
570 	/* trash return*/
571 	getc(stdin);
572 
573 	while (1) {
574 		perf_top__print_sym_table(top);
575 		/*
576 		 * Either timeout expired or we got an EINTR due to SIGWINCH,
577 		 * refresh screen in both cases.
578 		 */
579 		switch (poll(&stdin_poll, 1, delay_msecs)) {
580 		case 0:
581 			continue;
582 		case -1:
583 			if (errno == EINTR)
584 				continue;
585 			/* Fall trhu */
586 		default:
587 			goto process_hotkey;
588 		}
589 	}
590 process_hotkey:
591 	c = getc(stdin);
592 	tcsetattr(0, TCSAFLUSH, &save);
593 
594 	perf_top__handle_keypress(top, c);
595 	goto repeat;
596 
597 	return NULL;
598 }
599 
600 /* Tag samples to be skipped. */
601 static const char *skip_symbols[] = {
602 	"default_idle",
603 	"native_safe_halt",
604 	"cpu_idle",
605 	"enter_idle",
606 	"exit_idle",
607 	"mwait_idle",
608 	"mwait_idle_with_hints",
609 	"poll_idle",
610 	"ppc64_runlatch_off",
611 	"pseries_dedicated_idle_sleep",
612 	NULL
613 };
614 
615 static int symbol_filter(struct map *map __used, struct symbol *sym)
616 {
617 	const char *name = sym->name;
618 	int i;
619 
620 	/*
621 	 * ppc64 uses function descriptors and appends a '.' to the
622 	 * start of every instruction address. Remove it.
623 	 */
624 	if (name[0] == '.')
625 		name++;
626 
627 	if (!strcmp(name, "_text") ||
628 	    !strcmp(name, "_etext") ||
629 	    !strcmp(name, "_sinittext") ||
630 	    !strncmp("init_module", name, 11) ||
631 	    !strncmp("cleanup_module", name, 14) ||
632 	    strstr(name, "_text_start") ||
633 	    strstr(name, "_text_end"))
634 		return 1;
635 
636 	for (i = 0; skip_symbols[i]; i++) {
637 		if (!strcmp(skip_symbols[i], name)) {
638 			sym->ignore = true;
639 			break;
640 		}
641 	}
642 
643 	return 0;
644 }
645 
646 static void perf_event__process_sample(struct perf_tool *tool,
647 				       const union perf_event *event,
648 				       struct perf_evsel *evsel,
649 				       struct perf_sample *sample,
650 				       struct machine *machine)
651 {
652 	struct perf_top *top = container_of(tool, struct perf_top, tool);
653 	struct symbol *parent = NULL;
654 	u64 ip = event->ip.ip;
655 	struct addr_location al;
656 	int err;
657 
658 	if (!machine && perf_guest) {
659 		pr_err("Can't find guest [%d]'s kernel information\n",
660 			event->ip.pid);
661 		return;
662 	}
663 
664 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
665 		top->exact_samples++;
666 
667 	if (perf_event__preprocess_sample(event, machine, &al, sample,
668 					  symbol_filter) < 0 ||
669 	    al.filtered)
670 		return;
671 
672 	if (!top->kptr_restrict_warned &&
673 	    symbol_conf.kptr_restrict &&
674 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
675 		ui__warning(
676 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
677 "Check /proc/sys/kernel/kptr_restrict.\n\n"
678 "Kernel%s samples will not be resolved.\n",
679 			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
680 			  " modules" : "");
681 		if (use_browser <= 0)
682 			sleep(5);
683 		top->kptr_restrict_warned = true;
684 	}
685 
686 	if (al.sym == NULL) {
687 		const char *msg = "Kernel samples will not be resolved.\n";
688 		/*
689 		 * As we do lazy loading of symtabs we only will know if the
690 		 * specified vmlinux file is invalid when we actually have a
691 		 * hit in kernel space and then try to load it. So if we get
692 		 * here and there are _no_ symbols in the DSO backing the
693 		 * kernel map, bail out.
694 		 *
695 		 * We may never get here, for instance, if we use -K/
696 		 * --hide-kernel-symbols, even if the user specifies an
697 		 * invalid --vmlinux ;-)
698 		 */
699 		if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
700 		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
701 		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
702 			if (symbol_conf.vmlinux_name) {
703 				ui__warning("The %s file can't be used.\n%s",
704 					    symbol_conf.vmlinux_name, msg);
705 			} else {
706 				ui__warning("A vmlinux file was not found.\n%s",
707 					    msg);
708 			}
709 
710 			if (use_browser <= 0)
711 				sleep(5);
712 			top->vmlinux_warned = true;
713 		}
714 	}
715 
716 	if (al.sym == NULL || !al.sym->ignore) {
717 		struct hist_entry *he;
718 
719 		if ((sort__has_parent || symbol_conf.use_callchain) &&
720 		    sample->callchain) {
721 			err = machine__resolve_callchain(machine, evsel, al.thread,
722 							 sample->callchain, &parent);
723 			if (err)
724 				return;
725 		}
726 
727 		he = perf_evsel__add_hist_entry(evsel, &al, sample);
728 		if (he == NULL) {
729 			pr_err("Problem incrementing symbol period, skipping event\n");
730 			return;
731 		}
732 
733 		if (symbol_conf.use_callchain) {
734 			err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
735 					       sample->period);
736 			if (err)
737 				return;
738 		}
739 
740 		if (top->sort_has_symbols)
741 			perf_top__record_precise_ip(top, he, evsel->idx, ip);
742 	}
743 
744 	return;
745 }
746 
747 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
748 {
749 	struct perf_sample sample;
750 	struct perf_evsel *evsel;
751 	struct perf_session *session = top->session;
752 	union perf_event *event;
753 	struct machine *machine;
754 	u8 origin;
755 	int ret;
756 
757 	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
758 		ret = perf_session__parse_sample(session, event, &sample);
759 		if (ret) {
760 			pr_err("Can't parse sample, err = %d\n", ret);
761 			continue;
762 		}
763 
764 		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
765 		assert(evsel != NULL);
766 
767 		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
768 
769 		if (event->header.type == PERF_RECORD_SAMPLE)
770 			++top->samples;
771 
772 		switch (origin) {
773 		case PERF_RECORD_MISC_USER:
774 			++top->us_samples;
775 			if (top->hide_user_symbols)
776 				continue;
777 			machine = perf_session__find_host_machine(session);
778 			break;
779 		case PERF_RECORD_MISC_KERNEL:
780 			++top->kernel_samples;
781 			if (top->hide_kernel_symbols)
782 				continue;
783 			machine = perf_session__find_host_machine(session);
784 			break;
785 		case PERF_RECORD_MISC_GUEST_KERNEL:
786 			++top->guest_kernel_samples;
787 			machine = perf_session__find_machine(session, event->ip.pid);
788 			break;
789 		case PERF_RECORD_MISC_GUEST_USER:
790 			++top->guest_us_samples;
791 			/*
792 			 * TODO: we don't process guest user from host side
793 			 * except simple counting.
794 			 */
795 			/* Fall thru */
796 		default:
797 			continue;
798 		}
799 
800 
801 		if (event->header.type == PERF_RECORD_SAMPLE) {
802 			perf_event__process_sample(&top->tool, event, evsel,
803 						   &sample, machine);
804 		} else if (event->header.type < PERF_RECORD_MAX) {
805 			hists__inc_nr_events(&evsel->hists, event->header.type);
806 			perf_event__process(&top->tool, event, &sample, machine);
807 		} else
808 			++session->hists.stats.nr_unknown_events;
809 	}
810 }
811 
812 static void perf_top__mmap_read(struct perf_top *top)
813 {
814 	int i;
815 
816 	for (i = 0; i < top->evlist->nr_mmaps; i++)
817 		perf_top__mmap_read_idx(top, i);
818 }
819 
820 static void perf_top__start_counters(struct perf_top *top)
821 {
822 	struct perf_evsel *counter, *first;
823 	struct perf_evlist *evlist = top->evlist;
824 
825 	first = list_entry(evlist->entries.next, struct perf_evsel, node);
826 
827 	list_for_each_entry(counter, &evlist->entries, node) {
828 		struct perf_event_attr *attr = &counter->attr;
829 		struct xyarray *group_fd = NULL;
830 
831 		if (top->group && counter != first)
832 			group_fd = first->fd;
833 
834 		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
835 
836 		if (top->freq) {
837 			attr->sample_type |= PERF_SAMPLE_PERIOD;
838 			attr->freq	  = 1;
839 			attr->sample_freq = top->freq;
840 		}
841 
842 		if (evlist->nr_entries > 1) {
843 			attr->sample_type |= PERF_SAMPLE_ID;
844 			attr->read_format |= PERF_FORMAT_ID;
845 		}
846 
847 		if (symbol_conf.use_callchain)
848 			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
849 
850 		attr->mmap = 1;
851 		attr->comm = 1;
852 		attr->inherit = top->inherit;
853 retry_sample_id:
854 		attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
855 try_again:
856 		if (perf_evsel__open(counter, top->evlist->cpus,
857 				     top->evlist->threads, top->group,
858 				     group_fd) < 0) {
859 			int err = errno;
860 
861 			if (err == EPERM || err == EACCES) {
862 				ui__error_paranoid();
863 				goto out_err;
864 			} else if (err == EINVAL && top->sample_id_all_avail) {
865 				/*
866 				 * Old kernel, no attr->sample_id_type_all field
867 				 */
868 				top->sample_id_all_avail = false;
869 				goto retry_sample_id;
870 			}
871 			/*
872 			 * If it's cycles then fall back to hrtimer
873 			 * based cpu-clock-tick sw counter, which
874 			 * is always available even if no PMU support:
875 			 */
876 			if (attr->type == PERF_TYPE_HARDWARE &&
877 			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
878 				if (verbose)
879 					ui__warning("Cycles event not supported,\n"
880 						    "trying to fall back to cpu-clock-ticks\n");
881 
882 				attr->type = PERF_TYPE_SOFTWARE;
883 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
884 				goto try_again;
885 			}
886 
887 			if (err == ENOENT) {
888 				ui__warning("The %s event is not supported.\n",
889 					    event_name(counter));
890 				goto out_err;
891 			} else if (err == EMFILE) {
892 				ui__warning("Too many events are opened.\n"
893 					    "Try again after reducing the number of events\n");
894 				goto out_err;
895 			}
896 
897 			ui__warning("The sys_perf_event_open() syscall "
898 				    "returned with %d (%s).  /bin/dmesg "
899 				    "may provide additional information.\n"
900 				    "No CONFIG_PERF_EVENTS=y kernel support "
901 				    "configured?\n", err, strerror(err));
902 			goto out_err;
903 		}
904 	}
905 
906 	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
907 		ui__warning("Failed to mmap with %d (%s)\n",
908 			    errno, strerror(errno));
909 		goto out_err;
910 	}
911 
912 	return;
913 
914 out_err:
915 	exit_browser(0);
916 	exit(0);
917 }
918 
919 static int perf_top__setup_sample_type(struct perf_top *top)
920 {
921 	if (!top->sort_has_symbols) {
922 		if (symbol_conf.use_callchain) {
923 			ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
924 			return -EINVAL;
925 		}
926 	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
927 		if (callchain_register_param(&callchain_param) < 0) {
928 			ui__warning("Can't register callchain params.\n");
929 			return -EINVAL;
930 		}
931 	}
932 
933 	return 0;
934 }
935 
936 static int __cmd_top(struct perf_top *top)
937 {
938 	pthread_t thread;
939 	int ret;
940 	/*
941 	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
942 	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
943 	 */
944 	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
945 	if (top->session == NULL)
946 		return -ENOMEM;
947 
948 	ret = perf_top__setup_sample_type(top);
949 	if (ret)
950 		goto out_delete;
951 
952 	if (top->target_tid != -1)
953 		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
954 						  perf_event__process,
955 						  &top->session->host_machine);
956 	else
957 		perf_event__synthesize_threads(&top->tool, perf_event__process,
958 					       &top->session->host_machine);
959 	perf_top__start_counters(top);
960 	top->session->evlist = top->evlist;
961 	perf_session__update_sample_type(top->session);
962 
963 	/* Wait for a minimal set of events before starting the snapshot */
964 	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
965 
966 	perf_top__mmap_read(top);
967 
968 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
969 							    display_thread), top)) {
970 		printf("Could not create display thread.\n");
971 		exit(-1);
972 	}
973 
974 	if (top->realtime_prio) {
975 		struct sched_param param;
976 
977 		param.sched_priority = top->realtime_prio;
978 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
979 			printf("Could not set realtime priority.\n");
980 			exit(-1);
981 		}
982 	}
983 
984 	while (1) {
985 		u64 hits = top->samples;
986 
987 		perf_top__mmap_read(top);
988 
989 		if (hits == top->samples)
990 			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
991 	}
992 
993 out_delete:
994 	perf_session__delete(top->session);
995 	top->session = NULL;
996 
997 	return 0;
998 }
999 
1000 static int
1001 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1002 {
1003 	struct perf_top *top = (struct perf_top *)opt->value;
1004 	char *tok, *tok2;
1005 	char *endptr;
1006 
1007 	/*
1008 	 * --no-call-graph
1009 	 */
1010 	if (unset) {
1011 		top->dont_use_callchains = true;
1012 		return 0;
1013 	}
1014 
1015 	symbol_conf.use_callchain = true;
1016 
1017 	if (!arg)
1018 		return 0;
1019 
1020 	tok = strtok((char *)arg, ",");
1021 	if (!tok)
1022 		return -1;
1023 
1024 	/* get the output mode */
1025 	if (!strncmp(tok, "graph", strlen(arg)))
1026 		callchain_param.mode = CHAIN_GRAPH_ABS;
1027 
1028 	else if (!strncmp(tok, "flat", strlen(arg)))
1029 		callchain_param.mode = CHAIN_FLAT;
1030 
1031 	else if (!strncmp(tok, "fractal", strlen(arg)))
1032 		callchain_param.mode = CHAIN_GRAPH_REL;
1033 
1034 	else if (!strncmp(tok, "none", strlen(arg))) {
1035 		callchain_param.mode = CHAIN_NONE;
1036 		symbol_conf.use_callchain = false;
1037 
1038 		return 0;
1039 	} else
1040 		return -1;
1041 
1042 	/* get the min percentage */
1043 	tok = strtok(NULL, ",");
1044 	if (!tok)
1045 		goto setup;
1046 
1047 	callchain_param.min_percent = strtod(tok, &endptr);
1048 	if (tok == endptr)
1049 		return -1;
1050 
1051 	/* get the print limit */
1052 	tok2 = strtok(NULL, ",");
1053 	if (!tok2)
1054 		goto setup;
1055 
1056 	if (tok2[0] != 'c') {
1057 		callchain_param.print_limit = strtod(tok2, &endptr);
1058 		tok2 = strtok(NULL, ",");
1059 		if (!tok2)
1060 			goto setup;
1061 	}
1062 
1063 	/* get the call chain order */
1064 	if (!strcmp(tok2, "caller"))
1065 		callchain_param.order = ORDER_CALLER;
1066 	else if (!strcmp(tok2, "callee"))
1067 		callchain_param.order = ORDER_CALLEE;
1068 	else
1069 		return -1;
1070 setup:
1071 	if (callchain_register_param(&callchain_param) < 0) {
1072 		fprintf(stderr, "Can't register callchain params\n");
1073 		return -1;
1074 	}
1075 	return 0;
1076 }
1077 
1078 static const char * const top_usage[] = {
1079 	"perf top [<options>]",
1080 	NULL
1081 };
1082 
1083 int cmd_top(int argc, const char **argv, const char *prefix __used)
1084 {
1085 	struct perf_evsel *pos;
1086 	int status = -ENOMEM;
1087 	struct perf_top top = {
1088 		.count_filter	     = 5,
1089 		.delay_secs	     = 2,
1090 		.target_pid	     = -1,
1091 		.target_tid	     = -1,
1092 		.freq		     = 1000, /* 1 KHz */
1093 		.sample_id_all_avail = true,
1094 		.mmap_pages	     = 128,
1095 		.sym_pcnt_filter     = 5,
1096 	};
1097 	char callchain_default_opt[] = "fractal,0.5,callee";
1098 	const struct option options[] = {
1099 	OPT_CALLBACK('e', "event", &top.evlist, "event",
1100 		     "event selector. use 'perf list' to list available events",
1101 		     parse_events_option),
1102 	OPT_INTEGER('c', "count", &top.default_interval,
1103 		    "event period to sample"),
1104 	OPT_INTEGER('p', "pid", &top.target_pid,
1105 		    "profile events on existing process id"),
1106 	OPT_INTEGER('t', "tid", &top.target_tid,
1107 		    "profile events on existing thread id"),
1108 	OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1109 			    "system-wide collection from all CPUs"),
1110 	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1111 		    "list of cpus to monitor"),
1112 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1113 		   "file", "vmlinux pathname"),
1114 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1115 		    "hide kernel symbols"),
1116 	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
1117 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1118 		    "collect data with this RT SCHED_FIFO priority"),
1119 	OPT_INTEGER('d', "delay", &top.delay_secs,
1120 		    "number of seconds to delay between refreshes"),
1121 	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1122 			    "dump the symbol table used for profiling"),
1123 	OPT_INTEGER('f', "count-filter", &top.count_filter,
1124 		    "only display functions with more events than this"),
1125 	OPT_BOOLEAN('g', "group", &top.group,
1126 			    "put the counters into a counter group"),
1127 	OPT_BOOLEAN('i', "inherit", &top.inherit,
1128 		    "child tasks inherit counters"),
1129 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1130 		    "symbol to annotate"),
1131 	OPT_BOOLEAN('z', "zero", &top.zero,
1132 		    "zero history across updates"),
1133 	OPT_INTEGER('F', "freq", &top.freq,
1134 		    "profile at this frequency"),
1135 	OPT_INTEGER('E', "entries", &top.print_entries,
1136 		    "display this many functions"),
1137 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1138 		    "hide user symbols"),
1139 	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
1140 	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1141 	OPT_INCR('v', "verbose", &verbose,
1142 		    "be more verbose (show counter open errors, etc)"),
1143 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1144 		   "sort by key(s): pid, comm, dso, symbol, parent"),
1145 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1146 		    "Show a column with the number of samples"),
1147 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1148 		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
1149 		     "Default: fractal,0.5,callee", &parse_callchain_opt,
1150 		     callchain_default_opt),
1151 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1152 		    "Show a column with the sum of periods"),
1153 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
1154 		   "only consider symbols in these dsos"),
1155 	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
1156 		   "only consider symbols in these comms"),
1157 	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
1158 		   "only consider these symbols"),
1159 	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
1160 		    "Interleave source code with assembly code (default)"),
1161 	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
1162 		    "Display raw encoding of assembly instructions (default)"),
1163 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1164 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1165 	OPT_END()
1166 	};
1167 
1168 	top.evlist = perf_evlist__new(NULL, NULL);
1169 	if (top.evlist == NULL)
1170 		return -ENOMEM;
1171 
1172 	symbol_conf.exclude_other = false;
1173 
1174 	argc = parse_options(argc, argv, options, top_usage, 0);
1175 	if (argc)
1176 		usage_with_options(top_usage, options);
1177 
1178 	if (sort_order == default_sort_order)
1179 		sort_order = "dso,symbol";
1180 
1181 	setup_sorting(top_usage, options);
1182 
1183 	if (top.use_stdio)
1184 		use_browser = 0;
1185 	else if (top.use_tui)
1186 		use_browser = 1;
1187 
1188 	setup_browser(false);
1189 
1190 	/* CPU and PID are mutually exclusive */
1191 	if (top.target_tid > 0 && top.cpu_list) {
1192 		printf("WARNING: PID switch overriding CPU\n");
1193 		sleep(1);
1194 		top.cpu_list = NULL;
1195 	}
1196 
1197 	if (top.target_pid != -1)
1198 		top.target_tid = top.target_pid;
1199 
1200 	if (perf_evlist__create_maps(top.evlist, top.target_pid,
1201 				     top.target_tid, top.cpu_list) < 0)
1202 		usage_with_options(top_usage, options);
1203 
1204 	if (!top.evlist->nr_entries &&
1205 	    perf_evlist__add_default(top.evlist) < 0) {
1206 		pr_err("Not enough memory for event selector list\n");
1207 		return -ENOMEM;
1208 	}
1209 
1210 	symbol_conf.nr_events = top.evlist->nr_entries;
1211 
1212 	if (top.delay_secs < 1)
1213 		top.delay_secs = 1;
1214 
1215 	/*
1216 	 * User specified count overrides default frequency.
1217 	 */
1218 	if (top.default_interval)
1219 		top.freq = 0;
1220 	else if (top.freq) {
1221 		top.default_interval = top.freq;
1222 	} else {
1223 		fprintf(stderr, "frequency and count are zero, aborting\n");
1224 		exit(EXIT_FAILURE);
1225 	}
1226 
1227 	list_for_each_entry(pos, &top.evlist->entries, node) {
1228 		/*
1229 		 * Fill in the ones not specifically initialized via -c:
1230 		 */
1231 		if (!pos->attr.sample_period)
1232 			pos->attr.sample_period = top.default_interval;
1233 	}
1234 
1235 	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1236 
1237 	symbol_conf.priv_size = sizeof(struct annotation);
1238 
1239 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1240 	if (symbol__init() < 0)
1241 		return -1;
1242 
1243 	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
1244 	sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
1245 	sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
1246 
1247 	/*
1248 	 * Avoid annotation data structures overhead when symbols aren't on the
1249 	 * sort list.
1250 	 */
1251 	top.sort_has_symbols = sort_sym.list.next != NULL;
1252 
1253 	get_term_dimensions(&top.winsize);
1254 	if (top.print_entries == 0) {
1255 		struct sigaction act = {
1256 			.sa_sigaction = perf_top__sig_winch,
1257 			.sa_flags     = SA_SIGINFO,
1258 		};
1259 		perf_top__update_print_entries(&top);
1260 		sigaction(SIGWINCH, &act, NULL);
1261 	}
1262 
1263 	status = __cmd_top(&top);
1264 
1265 	perf_evlist__delete(top.evlist);
1266 
1267 	return status;
1268 }
1269