xref: /linux/tools/perf/builtin-top.c (revision 046fd8206d820b71e7870f7b894b46f8a15ae974)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * builtin-top.c
4  *
5  * Builtin top command: Display a continuously updated profile of
6  * any workload, CPU or specific PID.
7  *
8  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
9  *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Improvements and fixes by:
12  *
13  *   Arjan van de Ven <arjan@linux.intel.com>
14  *   Yanmin Zhang <yanmin.zhang@intel.com>
15  *   Wu Fengguang <fengguang.wu@intel.com>
16  *   Mike Galbraith <efault@gmx.de>
17  *   Paul Mackerras <paulus@samba.org>
18  */
19 #include "builtin.h"
20 
21 #include "perf.h"
22 
23 #include "util/annotate.h"
24 #include "util/bpf-event.h"
25 #include "util/cgroup.h"
26 #include "util/config.h"
27 #include "util/color.h"
28 #include "util/dso.h"
29 #include "util/evlist.h"
30 #include "util/evsel.h"
31 #include "util/evsel_config.h"
32 #include "util/event.h"
33 #include "util/machine.h"
34 #include "util/map.h"
35 #include "util/mmap.h"
36 #include "util/session.h"
37 #include "util/thread.h"
38 #include "util/stat.h"
39 #include "util/symbol.h"
40 #include "util/synthetic-events.h"
41 #include "util/top.h"
42 #include "util/util.h"
43 #include <linux/rbtree.h>
44 #include <subcmd/parse-options.h>
45 #include "util/parse-events.h"
46 #include "util/callchain.h"
47 #include "util/cpumap.h"
48 #include "util/sort.h"
49 #include "util/string2.h"
50 #include "util/term.h"
51 #include "util/intlist.h"
52 #include "util/parse-branch-options.h"
53 #include "arch/common.h"
54 #include "ui/ui.h"
55 
56 #include "util/debug.h"
57 #include "util/ordered-events.h"
58 #include "util/pfm.h"
59 #include "dwarf-regs.h"
60 
61 #include <assert.h>
62 #include <elf.h>
63 #include <fcntl.h>
64 
65 #include <stdio.h>
66 #include <termios.h>
67 #include <unistd.h>
68 #include <inttypes.h>
69 
70 #include <errno.h>
71 #include <time.h>
72 #include <sched.h>
73 #include <signal.h>
74 
75 #include <sys/syscall.h>
76 #include <sys/ioctl.h>
77 #include <poll.h>
78 #include <sys/prctl.h>
79 #include <sys/wait.h>
80 #include <sys/uio.h>
81 #include <sys/utsname.h>
82 #include <sys/mman.h>
83 
84 #include <linux/stringify.h>
85 #include <linux/time64.h>
86 #include <linux/types.h>
87 #include <linux/err.h>
88 
89 #include <linux/ctype.h>
90 #include <perf/mmap.h>
91 
92 static volatile sig_atomic_t done;
93 static volatile sig_atomic_t resize;
94 
95 #define HEADER_LINE_NR  5
96 
97 static void perf_top__update_print_entries(struct perf_top *top)
98 {
99 	top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
100 }
101 
102 static void winch_sig(int sig __maybe_unused)
103 {
104 	resize = 1;
105 }
106 
107 static void perf_top__resize(struct perf_top *top)
108 {
109 	get_term_dimensions(&top->winsize);
110 	perf_top__update_print_entries(top);
111 }
112 
113 static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
114 {
115 	struct evsel *evsel;
116 	struct symbol *sym;
117 	struct annotation *notes;
118 	struct map *map;
119 	struct dso *dso;
120 	int err = -1;
121 
122 	if (!he || !he->ms.sym)
123 		return -1;
124 
125 	evsel = hists_to_evsel(he->hists);
126 
127 	sym = he->ms.sym;
128 	map = he->ms.map;
129 	dso = map__dso(map);
130 
131 	/*
132 	 * We can't annotate with just /proc/kallsyms
133 	 */
134 	if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) {
135 		pr_err("Can't annotate %s: No vmlinux file was found in the "
136 		       "path\n", sym->name);
137 		sleep(1);
138 		return -1;
139 	}
140 
141 	notes = symbol__annotation(sym);
142 	annotation__lock(notes);
143 
144 	if (!symbol__hists(sym, top->evlist->core.nr_entries)) {
145 		annotation__unlock(notes);
146 		pr_err("Not enough memory for annotating '%s' symbol!\n",
147 		       sym->name);
148 		sleep(1);
149 		return err;
150 	}
151 
152 	err = symbol__annotate(&he->ms, evsel, NULL);
153 	if (err == 0) {
154 		top->sym_filter_entry = he;
155 	} else {
156 		char msg[BUFSIZ];
157 		symbol__strerror_disassemble(&he->ms, err, msg, sizeof(msg));
158 		pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
159 	}
160 
161 	annotation__unlock(notes);
162 	return err;
163 }
164 
165 static void __zero_source_counters(struct hist_entry *he)
166 {
167 	struct symbol *sym = he->ms.sym;
168 	symbol__annotate_zero_histograms(sym);
169 }
170 
171 static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
172 {
173 	struct utsname uts;
174 	int err = uname(&uts);
175 	struct dso *dso = map__dso(map);
176 
177 	ui__warning("Out of bounds address found:\n\n"
178 		    "Addr:   %" PRIx64 "\n"
179 		    "DSO:    %s %c\n"
180 		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
181 		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
182 		    "Arch:   %s\n"
183 		    "Kernel: %s\n"
184 		    "Tools:  %s\n\n"
185 		    "Not all samples will be on the annotation output.\n\n"
186 		    "Please report to linux-kernel@vger.kernel.org\n",
187 		    ip, dso__long_name(dso), dso__symtab_origin(dso),
188 		    map__start(map), map__end(map), sym->start, sym->end,
189 		    sym->binding == STB_GLOBAL ? 'g' :
190 		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
191 		    err ? "[unknown]" : uts.machine,
192 		    err ? "[unknown]" : uts.release, perf_version_string);
193 	if (use_browser <= 0)
194 		sleep(5);
195 
196 	map__set_erange_warned(map);
197 }
198 
199 static void perf_top__record_precise_ip(struct perf_top *top,
200 					struct hist_entry *he,
201 					struct perf_sample *sample,
202 					struct evsel *evsel, u64 ip)
203 	EXCLUSIVE_LOCKS_REQUIRED(he->hists->lock)
204 {
205 	struct annotation *notes;
206 	struct symbol *sym = he->ms.sym;
207 	int err = 0;
208 
209 	if (sym == NULL || (use_browser == 0 &&
210 			    (top->sym_filter_entry == NULL ||
211 			     top->sym_filter_entry->ms.sym != sym)))
212 		return;
213 
214 	notes = symbol__annotation(sym);
215 
216 	if (!annotation__trylock(notes))
217 		return;
218 
219 	err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
220 
221 	annotation__unlock(notes);
222 
223 	if (unlikely(err)) {
224 		/*
225 		 * This function is now called with he->hists->lock held.
226 		 * Release it before going to sleep.
227 		 */
228 		mutex_unlock(&he->hists->lock);
229 
230 		if (err == -ERANGE && !map__erange_warned(he->ms.map))
231 			ui__warn_map_erange(he->ms.map, sym, ip);
232 		else if (err == -ENOMEM) {
233 			pr_err("Not enough memory for annotating '%s' symbol!\n",
234 			       sym->name);
235 			sleep(1);
236 		}
237 
238 		mutex_lock(&he->hists->lock);
239 	}
240 }
241 
242 static void perf_top__show_details(struct perf_top *top)
243 {
244 	struct hist_entry *he = top->sym_filter_entry;
245 	struct evsel *evsel;
246 	struct annotation *notes;
247 	struct symbol *symbol;
248 	int more;
249 
250 	if (!he)
251 		return;
252 
253 	evsel = hists_to_evsel(he->hists);
254 
255 	symbol = he->ms.sym;
256 	notes = symbol__annotation(symbol);
257 
258 	annotation__lock(notes);
259 
260 	symbol__calc_percent(symbol, evsel);
261 
262 	if (notes->src == NULL)
263 		goto out_unlock;
264 
265 	printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name);
266 	printf("  Events  Pcnt (>=%d%%)\n", annotate_opts.min_pcnt);
267 
268 	more = hist_entry__annotate_printf(he, top->sym_evsel);
269 
270 	if (top->evlist->enabled) {
271 		if (top->zero)
272 			symbol__annotate_zero_histogram(symbol, top->sym_evsel);
273 		else
274 			symbol__annotate_decay_histogram(symbol, top->sym_evsel);
275 	}
276 	if (more != 0)
277 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
278 out_unlock:
279 	annotation__unlock(notes);
280 }
281 
282 static void perf_top__resort_hists(struct perf_top *t)
283 {
284 	struct evlist *evlist = t->evlist;
285 	struct evsel *pos;
286 
287 	evlist__for_each_entry(evlist, pos) {
288 		struct hists *hists = evsel__hists(pos);
289 
290 		/*
291 		 * unlink existing entries so that they can be linked
292 		 * in a correct order in hists__match() below.
293 		 */
294 		hists__unlink(hists);
295 
296 		if (evlist->enabled) {
297 			if (t->zero) {
298 				hists__delete_entries(hists);
299 			} else {
300 				hists__decay_entries(hists, t->hide_user_symbols,
301 						     t->hide_kernel_symbols);
302 			}
303 		}
304 
305 		hists__collapse_resort(hists, NULL);
306 
307 		/* Non-group events are considered as leader */
308 		if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
309 			struct hists *leader_hists = evsel__hists(evsel__leader(pos));
310 
311 			hists__match(leader_hists, hists);
312 			hists__link(leader_hists, hists);
313 		}
314 	}
315 
316 	evlist__for_each_entry(evlist, pos) {
317 		evsel__output_resort(pos, NULL);
318 	}
319 }
320 
321 static void perf_top__print_sym_table(struct perf_top *top)
322 {
323 	char bf[160];
324 	int printed = 0;
325 	const int win_width = top->winsize.ws_col - 1;
326 	struct evsel *evsel = top->sym_evsel;
327 	struct hists *hists = evsel__hists(evsel);
328 
329 	puts(CONSOLE_CLEAR);
330 
331 	perf_top__header_snprintf(top, bf, sizeof(bf));
332 	printf("%s\n", bf);
333 
334 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
335 
336 	if (!top->record_opts.overwrite &&
337 	    (top->evlist->stats.nr_lost_warned !=
338 	     top->evlist->stats.nr_events[PERF_RECORD_LOST])) {
339 		top->evlist->stats.nr_lost_warned =
340 			      top->evlist->stats.nr_events[PERF_RECORD_LOST];
341 		color_fprintf(stdout, PERF_COLOR_RED,
342 			      "WARNING: LOST %d chunks, Check IO/CPU overload",
343 			      top->evlist->stats.nr_lost_warned);
344 		++printed;
345 	}
346 
347 	if (top->sym_filter_entry) {
348 		perf_top__show_details(top);
349 		return;
350 	}
351 
352 	perf_top__resort_hists(top);
353 
354 	hists__output_recalc_col_len(hists, top->print_entries - printed);
355 	putchar('\n');
356 	hists__fprintf(hists, false, top->print_entries - printed, win_width,
357 		       top->min_percent, stdout, !symbol_conf.use_callchain);
358 }
359 
360 static void prompt_integer(int *target, const char *msg)
361 {
362 	char *buf = NULL, *p;
363 	size_t dummy = 0;
364 	int tmp;
365 
366 	fprintf(stdout, "\n%s: ", msg);
367 	if (getline(&buf, &dummy, stdin) < 0)
368 		return;
369 
370 	p = strchr(buf, '\n');
371 	if (p)
372 		*p = 0;
373 
374 	p = buf;
375 	while(*p) {
376 		if (!isdigit(*p))
377 			goto out_free;
378 		p++;
379 	}
380 	tmp = strtoul(buf, NULL, 10);
381 	*target = tmp;
382 out_free:
383 	free(buf);
384 }
385 
386 static void prompt_percent(int *target, const char *msg)
387 {
388 	int tmp = 0;
389 
390 	prompt_integer(&tmp, msg);
391 	if (tmp >= 0 && tmp <= 100)
392 		*target = tmp;
393 }
394 
395 static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
396 {
397 	char *buf = NULL, *p;
398 	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
399 	struct hists *hists = evsel__hists(top->sym_evsel);
400 	struct rb_node *next;
401 	size_t dummy = 0;
402 
403 	/* zero counters of active symbol */
404 	if (syme) {
405 		__zero_source_counters(syme);
406 		top->sym_filter_entry = NULL;
407 	}
408 
409 	fprintf(stdout, "\n%s: ", msg);
410 	if (getline(&buf, &dummy, stdin) < 0)
411 		goto out_free;
412 
413 	p = strchr(buf, '\n');
414 	if (p)
415 		*p = 0;
416 
417 	next = rb_first_cached(&hists->entries);
418 	while (next) {
419 		n = rb_entry(next, struct hist_entry, rb_node);
420 		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
421 			found = n;
422 			break;
423 		}
424 		next = rb_next(&n->rb_node);
425 	}
426 
427 	if (!found) {
428 		fprintf(stderr, "Sorry, %s is not active.\n", buf);
429 		sleep(1);
430 	} else
431 		perf_top__parse_source(top, found);
432 
433 out_free:
434 	free(buf);
435 }
436 
437 static void perf_top__print_mapped_keys(struct perf_top *top)
438 {
439 	char *name = NULL;
440 
441 	if (top->sym_filter_entry) {
442 		struct symbol *sym = top->sym_filter_entry->ms.sym;
443 		name = sym->name;
444 	}
445 
446 	fprintf(stdout, "\nMapped keys:\n");
447 	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
448 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
449 
450 	if (top->evlist->core.nr_entries > 1)
451 		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", evsel__name(top->sym_evsel));
452 
453 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
454 
455 	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt);
456 	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
457 	fprintf(stdout, "\t[S]     stop annotation.\n");
458 
459 	fprintf(stdout,
460 		"\t[K]     hide kernel symbols.             \t(%s)\n",
461 		top->hide_kernel_symbols ? "yes" : "no");
462 	fprintf(stdout,
463 		"\t[U]     hide user symbols.               \t(%s)\n",
464 		top->hide_user_symbols ? "yes" : "no");
465 	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
466 	fprintf(stdout, "\t[qQ]    quit.\n");
467 }
468 
469 static int perf_top__key_mapped(struct perf_top *top, int c)
470 {
471 	switch (c) {
472 		case 'd':
473 		case 'e':
474 		case 'f':
475 		case 'z':
476 		case 'q':
477 		case 'Q':
478 		case 'K':
479 		case 'U':
480 		case 'F':
481 		case 's':
482 		case 'S':
483 			return 1;
484 		case 'E':
485 			return top->evlist->core.nr_entries > 1 ? 1 : 0;
486 		default:
487 			break;
488 	}
489 
490 	return 0;
491 }
492 
493 static bool perf_top__handle_keypress(struct perf_top *top, int c)
494 {
495 	bool ret = true;
496 
497 	if (!perf_top__key_mapped(top, c)) {
498 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
499 		struct termios save;
500 
501 		perf_top__print_mapped_keys(top);
502 		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
503 		fflush(stdout);
504 
505 		set_term_quiet_input(&save);
506 
507 		poll(&stdin_poll, 1, -1);
508 		c = getc(stdin);
509 
510 		tcsetattr(0, TCSAFLUSH, &save);
511 		if (!perf_top__key_mapped(top, c))
512 			return ret;
513 	}
514 
515 	switch (c) {
516 		case 'd':
517 			prompt_integer(&top->delay_secs, "Enter display delay");
518 			if (top->delay_secs < 1)
519 				top->delay_secs = 1;
520 			break;
521 		case 'e':
522 			prompt_integer(&top->print_entries, "Enter display entries (lines)");
523 			if (top->print_entries == 0) {
524 				perf_top__resize(top);
525 				signal(SIGWINCH, winch_sig);
526 			} else {
527 				signal(SIGWINCH, SIG_DFL);
528 			}
529 			break;
530 		case 'E':
531 			if (top->evlist->core.nr_entries > 1) {
532 				/* Select 0 as the default event: */
533 				int counter = 0;
534 
535 				fprintf(stderr, "\nAvailable events:");
536 
537 				evlist__for_each_entry(top->evlist, top->sym_evsel)
538 					fprintf(stderr, "\n\t%d %s", top->sym_evsel->core.idx, evsel__name(top->sym_evsel));
539 
540 				prompt_integer(&counter, "Enter details event counter");
541 
542 				if (counter >= top->evlist->core.nr_entries) {
543 					top->sym_evsel = evlist__first(top->evlist);
544 					fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel));
545 					sleep(1);
546 					break;
547 				}
548 				evlist__for_each_entry(top->evlist, top->sym_evsel)
549 					if (top->sym_evsel->core.idx == counter)
550 						break;
551 			} else
552 				top->sym_evsel = evlist__first(top->evlist);
553 			break;
554 		case 'f':
555 			prompt_integer(&top->count_filter, "Enter display event count filter");
556 			break;
557 		case 'F':
558 			prompt_percent(&annotate_opts.min_pcnt,
559 				       "Enter details display event filter (percent)");
560 			break;
561 		case 'K':
562 			top->hide_kernel_symbols = !top->hide_kernel_symbols;
563 			break;
564 		case 'q':
565 		case 'Q':
566 			printf("exiting.\n");
567 			if (top->dump_symtab)
568 				perf_session__fprintf_dsos(top->session, stderr);
569 			ret = false;
570 			break;
571 		case 's':
572 			perf_top__prompt_symbol(top, "Enter details symbol");
573 			break;
574 		case 'S':
575 			if (!top->sym_filter_entry)
576 				break;
577 			else {
578 				struct hist_entry *syme = top->sym_filter_entry;
579 
580 				top->sym_filter_entry = NULL;
581 				__zero_source_counters(syme);
582 			}
583 			break;
584 		case 'U':
585 			top->hide_user_symbols = !top->hide_user_symbols;
586 			break;
587 		case 'z':
588 			top->zero = !top->zero;
589 			break;
590 		default:
591 			break;
592 	}
593 
594 	return ret;
595 }
596 
597 static void perf_top__sort_new_samples(void *arg)
598 {
599 	struct perf_top *t = arg;
600 
601 	if (t->evlist->selected != NULL)
602 		t->sym_evsel = t->evlist->selected;
603 
604 	perf_top__resort_hists(t);
605 
606 	if (t->lost || t->drop)
607 		pr_warning("Too slow to read ring buffer (change period (-c/-F) or limit CPUs (-C)\n");
608 }
609 
610 static void stop_top(void)
611 {
612 	session_done = 1;
613 	done = 1;
614 }
615 
616 static void *display_thread_tui(void *arg)
617 {
618 	struct evsel *pos;
619 	struct perf_top *top = arg;
620 	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
621 	struct hist_browser_timer hbt = {
622 		.timer		= perf_top__sort_new_samples,
623 		.arg		= top,
624 		.refresh	= top->delay_secs,
625 	};
626 	int ret;
627 
628 	/* In order to read symbols from other namespaces perf to  needs to call
629 	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
630 	 * unshare(2) the fs so that we may continue to setns into namespaces
631 	 * that we're observing.
632 	 */
633 	unshare(CLONE_FS);
634 
635 	prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
636 
637 repeat:
638 	perf_top__sort_new_samples(top);
639 
640 	/*
641 	 * Initialize the uid_filter_str, in the future the TUI will allow
642 	 * Zooming in/out UIDs. For now just use whatever the user passed
643 	 * via --uid.
644 	 */
645 	evlist__for_each_entry(top->evlist, pos) {
646 		struct hists *hists = evsel__hists(pos);
647 		hists->uid_filter_str = top->uid_str;
648 	}
649 
650 	ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
651 				       perf_session__env(top->session),
652 				       !top->record_opts.overwrite);
653 	if (ret == K_RELOAD) {
654 		top->zero = true;
655 		goto repeat;
656 	} else
657 		stop_top();
658 
659 	return NULL;
660 }
661 
662 static void display_sig(int sig __maybe_unused)
663 {
664 	stop_top();
665 }
666 
667 static void display_setup_sig(void)
668 {
669 	signal(SIGSEGV, sighandler_dump_stack);
670 	signal(SIGFPE, sighandler_dump_stack);
671 	signal(SIGINT,  display_sig);
672 	signal(SIGQUIT, display_sig);
673 	signal(SIGTERM, display_sig);
674 }
675 
676 static void *display_thread(void *arg)
677 {
678 	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
679 	struct termios save;
680 	struct perf_top *top = arg;
681 	int delay_msecs, c;
682 
683 	/* In order to read symbols from other namespaces perf to  needs to call
684 	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
685 	 * unshare(2) the fs so that we may continue to setns into namespaces
686 	 * that we're observing.
687 	 */
688 	unshare(CLONE_FS);
689 
690 	prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
691 
692 	display_setup_sig();
693 	pthread__unblock_sigwinch();
694 repeat:
695 	delay_msecs = top->delay_secs * MSEC_PER_SEC;
696 	set_term_quiet_input(&save);
697 	/* trash return*/
698 	clearerr(stdin);
699 	if (poll(&stdin_poll, 1, 0) > 0)
700 		getc(stdin);
701 
702 	while (!done) {
703 		perf_top__print_sym_table(top);
704 		/*
705 		 * Either timeout expired or we got an EINTR due to SIGWINCH,
706 		 * refresh screen in both cases.
707 		 */
708 		switch (poll(&stdin_poll, 1, delay_msecs)) {
709 		case 0:
710 			continue;
711 		case -1:
712 			if (errno == EINTR)
713 				continue;
714 			fallthrough;
715 		default:
716 			c = getc(stdin);
717 			tcsetattr(0, TCSAFLUSH, &save);
718 
719 			if (perf_top__handle_keypress(top, c))
720 				goto repeat;
721 			stop_top();
722 		}
723 	}
724 
725 	tcsetattr(0, TCSAFLUSH, &save);
726 	return NULL;
727 }
728 
729 static int hist_iter__top_callback(struct hist_entry_iter *iter,
730 				   struct addr_location *al, bool single,
731 				   void *arg)
732 	EXCLUSIVE_LOCKS_REQUIRED(iter->he->hists->lock)
733 {
734 	struct perf_top *top = arg;
735 	struct evsel *evsel = iter->evsel;
736 
737 	if (perf_hpp_list.sym && single)
738 		perf_top__record_precise_ip(top, iter->he, iter->sample, evsel, al->addr);
739 
740 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
741 			     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
742 			     NULL, evsel);
743 	return 0;
744 }
745 
746 static void perf_event__process_sample(const struct perf_tool *tool,
747 				       const union perf_event *event,
748 				       struct evsel *evsel,
749 				       struct perf_sample *sample,
750 				       struct machine *machine)
751 {
752 	struct perf_top *top = container_of(tool, struct perf_top, tool);
753 	struct addr_location al;
754 
755 	if (!machine && perf_guest) {
756 		static struct intlist *seen;
757 
758 		if (!seen)
759 			seen = intlist__new(NULL);
760 
761 		if (!intlist__has_entry(seen, sample->pid)) {
762 			pr_err("Can't find guest [%d]'s kernel information\n",
763 				sample->pid);
764 			intlist__add(seen, sample->pid);
765 		}
766 		return;
767 	}
768 
769 	if (!machine) {
770 		pr_err("%u unprocessable samples recorded.\r",
771 		       top->session->evlist->stats.nr_unprocessable_samples++);
772 		return;
773 	}
774 
775 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
776 		top->exact_samples++;
777 
778 	addr_location__init(&al);
779 	if (machine__resolve(machine, &al, sample) < 0)
780 		goto out;
781 
782 	if (top->stitch_lbr)
783 		thread__set_lbr_stitch_enable(al.thread, true);
784 
785 	if (!machine->kptr_restrict_warned &&
786 	    symbol_conf.kptr_restrict &&
787 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
788 		if (!evlist__exclude_kernel(top->session->evlist)) {
789 			ui__warning(
790 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
791 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
792 "Kernel%s samples will not be resolved.\n",
793 			  al.map && map__has_symbols(al.map) ?
794 			  " modules" : "");
795 			if (use_browser <= 0)
796 				sleep(5);
797 		}
798 		machine->kptr_restrict_warned = true;
799 	}
800 
801 	if (al.sym == NULL && al.map != NULL) {
802 		const char *msg = "Kernel samples will not be resolved.\n";
803 		/*
804 		 * As we do lazy loading of symtabs we only will know if the
805 		 * specified vmlinux file is invalid when we actually have a
806 		 * hit in kernel space and then try to load it. So if we get
807 		 * here and there are _no_ symbols in the DSO backing the
808 		 * kernel map, bail out.
809 		 *
810 		 * We may never get here, for instance, if we use -K/
811 		 * --hide-kernel-symbols, even if the user specifies an
812 		 * invalid --vmlinux ;-)
813 		 */
814 		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
815 		    __map__is_kernel(al.map) && !map__has_symbols(al.map)) {
816 			if (symbol_conf.vmlinux_name) {
817 				char serr[256];
818 
819 				dso__strerror_load(map__dso(al.map), serr, sizeof(serr));
820 				ui__warning("The %s file can't be used: %s\n%s",
821 					    symbol_conf.vmlinux_name, serr, msg);
822 			} else {
823 				ui__warning("A vmlinux file was not found.\n%s",
824 					    msg);
825 			}
826 
827 			if (use_browser <= 0)
828 				sleep(5);
829 			top->vmlinux_warned = true;
830 		}
831 	}
832 
833 	if (al.sym == NULL || !al.sym->idle) {
834 		struct hists *hists = evsel__hists(evsel);
835 		struct hist_entry_iter iter = {
836 			.evsel		= evsel,
837 			.sample 	= sample,
838 			.add_entry_cb 	= hist_iter__top_callback,
839 		};
840 
841 		if (symbol_conf.cumulate_callchain)
842 			iter.ops = &hist_iter_cumulative;
843 		else
844 			iter.ops = &hist_iter_normal;
845 
846 		mutex_lock(&hists->lock);
847 
848 		if (hist_entry_iter__add(&iter, &al, top->max_stack, top) < 0)
849 			pr_err("Problem incrementing symbol period, skipping event\n");
850 
851 		mutex_unlock(&hists->lock);
852 	}
853 
854 out:
855 	addr_location__exit(&al);
856 }
857 
858 static void
859 perf_top__process_lost(struct perf_top *top, union perf_event *event,
860 		       struct evsel *evsel)
861 {
862 	top->lost += event->lost.lost;
863 	top->lost_total += event->lost.lost;
864 	evsel->evlist->stats.total_lost += event->lost.lost;
865 }
866 
867 static void
868 perf_top__process_lost_samples(struct perf_top *top,
869 			       union perf_event *event,
870 			       struct evsel *evsel)
871 {
872 	top->lost += event->lost_samples.lost;
873 	top->lost_total += event->lost_samples.lost;
874 	evsel->evlist->stats.total_lost_samples += event->lost_samples.lost;
875 }
876 
877 static u64 last_timestamp;
878 
879 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
880 {
881 	struct record_opts *opts = &top->record_opts;
882 	struct evlist *evlist = top->evlist;
883 	struct mmap *md;
884 	union perf_event *event;
885 
886 	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
887 	if (perf_mmap__read_init(&md->core) < 0)
888 		return;
889 
890 	while ((event = perf_mmap__read_event(&md->core)) != NULL) {
891 		int ret;
892 
893 		ret = evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
894 		if (ret && ret != -1)
895 			break;
896 
897 		ret = ordered_events__queue(top->qe.in, event, last_timestamp, 0, NULL);
898 		if (ret)
899 			break;
900 
901 		perf_mmap__consume(&md->core);
902 
903 		if (top->qe.rotate) {
904 			mutex_lock(&top->qe.mutex);
905 			top->qe.rotate = false;
906 			cond_signal(&top->qe.cond);
907 			mutex_unlock(&top->qe.mutex);
908 		}
909 	}
910 
911 	perf_mmap__read_done(&md->core);
912 }
913 
914 static void perf_top__mmap_read(struct perf_top *top)
915 {
916 	bool overwrite = top->record_opts.overwrite;
917 	struct evlist *evlist = top->evlist;
918 	int i;
919 
920 	if (overwrite)
921 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
922 
923 	for (i = 0; i < top->evlist->core.nr_mmaps; i++)
924 		perf_top__mmap_read_idx(top, i);
925 
926 	if (overwrite) {
927 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
928 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
929 	}
930 }
931 
932 /*
933  * Check per-event overwrite term.
934  * perf top should support consistent term for all events.
935  * - All events don't have per-event term
936  *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
937  *   Nothing change, return 0.
938  * - All events have same per-event term
939  *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
940  *   Using the per-event setting to replace the opts->overwrite if
941  *   they are different, then return 0.
942  * - Events have different per-event term
943  *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
944  *   Return -1
945  * - Some of the event set per-event term, but some not.
946  *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
947  *   Return -1
948  */
949 static int perf_top__overwrite_check(struct perf_top *top)
950 {
951 	struct record_opts *opts = &top->record_opts;
952 	struct evlist *evlist = top->evlist;
953 	struct evsel_config_term *term;
954 	struct list_head *config_terms;
955 	struct evsel *evsel;
956 	int set, overwrite = -1;
957 
958 	evlist__for_each_entry(evlist, evsel) {
959 		set = -1;
960 		config_terms = &evsel->config_terms;
961 		list_for_each_entry(term, config_terms, list) {
962 			if (term->type == EVSEL__CONFIG_TERM_OVERWRITE)
963 				set = term->val.overwrite ? 1 : 0;
964 		}
965 
966 		/* no term for current and previous event (likely) */
967 		if ((overwrite < 0) && (set < 0))
968 			continue;
969 
970 		/* has term for both current and previous event, compare */
971 		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
972 			return -1;
973 
974 		/* no term for current event but has term for previous one */
975 		if ((overwrite >= 0) && (set < 0))
976 			return -1;
977 
978 		/* has term for current event */
979 		if ((overwrite < 0) && (set >= 0)) {
980 			/* if it's first event, set overwrite */
981 			if (evsel == evlist__first(evlist))
982 				overwrite = set;
983 			else
984 				return -1;
985 		}
986 	}
987 
988 	if ((overwrite >= 0) && (opts->overwrite != overwrite))
989 		opts->overwrite = overwrite;
990 
991 	return 0;
992 }
993 
994 static int perf_top_overwrite_fallback(struct perf_top *top,
995 				       struct evsel *evsel)
996 {
997 	struct record_opts *opts = &top->record_opts;
998 	struct evlist *evlist = top->evlist;
999 	struct evsel *counter;
1000 
1001 	if (!opts->overwrite)
1002 		return 0;
1003 
1004 	/* only fall back when first event fails */
1005 	if (evsel != evlist__first(evlist))
1006 		return 0;
1007 
1008 	evlist__for_each_entry(evlist, counter)
1009 		counter->core.attr.write_backward = false;
1010 	opts->overwrite = false;
1011 	pr_debug2("fall back to non-overwrite mode\n");
1012 	return 1;
1013 }
1014 
1015 static int perf_top__start_counters(struct perf_top *top)
1016 {
1017 	char msg[BUFSIZ];
1018 	struct evsel *counter;
1019 	struct evlist *evlist = top->evlist;
1020 	struct record_opts *opts = &top->record_opts;
1021 
1022 	if (perf_top__overwrite_check(top)) {
1023 		ui__error("perf top only support consistent per-event "
1024 			  "overwrite setting for all events\n");
1025 		goto out_err;
1026 	}
1027 
1028 	evlist__config(evlist, opts, &callchain_param);
1029 
1030 	evlist__for_each_entry(evlist, counter) {
1031 try_again:
1032 		if (evsel__open(counter, counter->core.cpus,
1033 				counter->core.threads) < 0) {
1034 
1035 			/*
1036 			 * Specially handle overwrite fall back.
1037 			 * Because perf top is the only tool which has
1038 			 * overwrite mode by default, support
1039 			 * both overwrite and non-overwrite mode, and
1040 			 * require consistent mode for all events.
1041 			 *
1042 			 * May move it to generic code with more tools
1043 			 * have similar attribute.
1044 			 */
1045 			if (perf_missing_features.write_backward &&
1046 			    perf_top_overwrite_fallback(top, counter))
1047 				goto try_again;
1048 
1049 			if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) {
1050 				if (verbose > 0)
1051 					ui__warning("%s\n", msg);
1052 				goto try_again;
1053 			}
1054 
1055 			evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg));
1056 			ui__error("%s\n", msg);
1057 			goto out_err;
1058 		}
1059 	}
1060 
1061 	if (evlist__apply_filters(evlist, &counter, &opts->target)) {
1062 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1063 			counter->filter ?: "BPF", evsel__name(counter), errno,
1064 			str_error_r(errno, msg, sizeof(msg)));
1065 		goto out_err;
1066 	}
1067 
1068 	if (evlist__mmap(evlist, opts->mmap_pages) < 0) {
1069 		ui__error("Failed to mmap with %d (%s)\n",
1070 			    errno, str_error_r(errno, msg, sizeof(msg)));
1071 		goto out_err;
1072 	}
1073 
1074 	return 0;
1075 
1076 out_err:
1077 	return -1;
1078 }
1079 
1080 static int callchain_param__setup_sample_type(struct callchain_param *callchain)
1081 {
1082 	if (callchain->mode != CHAIN_NONE) {
1083 		if (callchain_register_param(callchain) < 0) {
1084 			ui__error("Can't register callchain params.\n");
1085 			return -EINVAL;
1086 		}
1087 	}
1088 
1089 	return 0;
1090 }
1091 
1092 static struct ordered_events *rotate_queues(struct perf_top *top)
1093 {
1094 	struct ordered_events *in = top->qe.in;
1095 
1096 	if (top->qe.in == &top->qe.data[1])
1097 		top->qe.in = &top->qe.data[0];
1098 	else
1099 		top->qe.in = &top->qe.data[1];
1100 
1101 	return in;
1102 }
1103 
1104 static void *process_thread(void *arg)
1105 {
1106 	struct perf_top *top = arg;
1107 
1108 	while (!done) {
1109 		struct ordered_events *out, *in = top->qe.in;
1110 
1111 		if (!in->nr_events) {
1112 			usleep(100);
1113 			continue;
1114 		}
1115 
1116 		out = rotate_queues(top);
1117 
1118 		mutex_lock(&top->qe.mutex);
1119 		top->qe.rotate = true;
1120 		cond_wait(&top->qe.cond, &top->qe.mutex);
1121 		mutex_unlock(&top->qe.mutex);
1122 
1123 		if (ordered_events__flush(out, OE_FLUSH__TOP))
1124 			pr_err("failed to process events\n");
1125 	}
1126 
1127 	return NULL;
1128 }
1129 
1130 /*
1131  * Allow only 'top->delay_secs' seconds behind samples.
1132  */
1133 static int should_drop(struct ordered_event *qevent, struct perf_top *top)
1134 {
1135 	union perf_event *event = qevent->event;
1136 	u64 delay_timestamp;
1137 
1138 	if (event->header.type != PERF_RECORD_SAMPLE)
1139 		return false;
1140 
1141 	delay_timestamp = qevent->timestamp + top->delay_secs * NSEC_PER_SEC;
1142 	return delay_timestamp < last_timestamp;
1143 }
1144 
1145 static int deliver_event(struct ordered_events *qe,
1146 			 struct ordered_event *qevent)
1147 {
1148 	struct perf_top *top = qe->data;
1149 	struct evlist *evlist = top->evlist;
1150 	struct perf_session *session = top->session;
1151 	union perf_event *event = qevent->event;
1152 	struct perf_sample sample;
1153 	struct evsel *evsel;
1154 	struct machine *machine;
1155 	int ret = -1;
1156 
1157 	if (should_drop(qevent, top)) {
1158 		top->drop++;
1159 		top->drop_total++;
1160 		return 0;
1161 	}
1162 
1163 	perf_sample__init(&sample, /*all=*/false);
1164 	ret = evlist__parse_sample(evlist, event, &sample);
1165 	if (ret) {
1166 		pr_err("Can't parse sample, err = %d\n", ret);
1167 		goto next_event;
1168 	}
1169 
1170 	evsel = evlist__id2evsel(session->evlist, sample.id);
1171 	assert(evsel != NULL);
1172 
1173 	if (event->header.type == PERF_RECORD_SAMPLE) {
1174 		if (evswitch__discard(&top->evswitch, evsel)) {
1175 			ret = 0;
1176 			goto next_event;
1177 		}
1178 		++top->samples;
1179 	}
1180 
1181 	switch (sample.cpumode) {
1182 	case PERF_RECORD_MISC_USER:
1183 		++top->us_samples;
1184 		if (top->hide_user_symbols)
1185 			goto next_event;
1186 		machine = &session->machines.host;
1187 		break;
1188 	case PERF_RECORD_MISC_KERNEL:
1189 		++top->kernel_samples;
1190 		if (top->hide_kernel_symbols)
1191 			goto next_event;
1192 		machine = &session->machines.host;
1193 		break;
1194 	case PERF_RECORD_MISC_GUEST_KERNEL:
1195 		++top->guest_kernel_samples;
1196 		machine = perf_session__find_machine(session,
1197 						     sample.pid);
1198 		break;
1199 	case PERF_RECORD_MISC_GUEST_USER:
1200 		++top->guest_us_samples;
1201 		/*
1202 		 * TODO: we don't process guest user from host side
1203 		 * except simple counting.
1204 		 */
1205 		goto next_event;
1206 	default:
1207 		if (event->header.type == PERF_RECORD_SAMPLE)
1208 			goto next_event;
1209 		machine = &session->machines.host;
1210 		break;
1211 	}
1212 
1213 	if (event->header.type == PERF_RECORD_SAMPLE) {
1214 		perf_event__process_sample(&top->tool, event, evsel,
1215 					   &sample, machine);
1216 	} else if (event->header.type == PERF_RECORD_LOST) {
1217 		perf_top__process_lost(top, event, evsel);
1218 	} else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
1219 		perf_top__process_lost_samples(top, event, evsel);
1220 	} else if (event->header.type < PERF_RECORD_MAX) {
1221 		events_stats__inc(&session->evlist->stats, event->header.type);
1222 		machine__process_event(machine, event, &sample);
1223 	} else
1224 		++session->evlist->stats.nr_unknown_events;
1225 
1226 	ret = 0;
1227 next_event:
1228 	perf_sample__exit(&sample);
1229 	return ret;
1230 }
1231 
1232 static void init_process_thread(struct perf_top *top)
1233 {
1234 	ordered_events__init(&top->qe.data[0], deliver_event, top);
1235 	ordered_events__init(&top->qe.data[1], deliver_event, top);
1236 	ordered_events__set_copy_on_queue(&top->qe.data[0], true);
1237 	ordered_events__set_copy_on_queue(&top->qe.data[1], true);
1238 	top->qe.in = &top->qe.data[0];
1239 	mutex_init(&top->qe.mutex);
1240 	cond_init(&top->qe.cond);
1241 }
1242 
1243 static void exit_process_thread(struct perf_top *top)
1244 {
1245 	ordered_events__free(&top->qe.data[0]);
1246 	ordered_events__free(&top->qe.data[1]);
1247 	mutex_destroy(&top->qe.mutex);
1248 	cond_destroy(&top->qe.cond);
1249 }
1250 
1251 static int __cmd_top(struct perf_top *top)
1252 {
1253 	struct record_opts *opts = &top->record_opts;
1254 	pthread_t thread, thread_process;
1255 	int ret;
1256 
1257 	if (!annotate_opts.objdump_path) {
1258 		ret = perf_env__lookup_objdump(perf_session__env(top->session),
1259 					       &annotate_opts.objdump_path);
1260 		if (ret)
1261 			return ret;
1262 	}
1263 
1264 	ret = callchain_param__setup_sample_type(&callchain_param);
1265 	if (ret)
1266 		return ret;
1267 
1268 	if (perf_session__register_idle_thread(top->session) < 0)
1269 		return ret;
1270 
1271 	if (top->nr_threads_synthesize > 1)
1272 		perf_set_multithreaded();
1273 
1274 	init_process_thread(top);
1275 
1276 	if (opts->record_namespaces)
1277 		top->tool.namespace_events = true;
1278 	if (opts->record_cgroup) {
1279 #ifdef HAVE_FILE_HANDLE
1280 		top->tool.cgroup_events = true;
1281 #else
1282 		pr_err("cgroup tracking is not supported.\n");
1283 		return -1;
1284 #endif
1285 	}
1286 
1287 	ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
1288 						&top->session->machines.host,
1289 						&top->record_opts);
1290 	if (ret < 0)
1291 		pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
1292 
1293 	ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process,
1294 					     &top->session->machines.host);
1295 	if (ret < 0)
1296 		pr_debug("Couldn't synthesize cgroup events.\n");
1297 
1298 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
1299 				    top->evlist->core.threads, true, false,
1300 				    top->nr_threads_synthesize);
1301 
1302 	perf_set_multithreaded();
1303 
1304 	if (perf_hpp_list.socket) {
1305 		ret = perf_env__read_cpu_topology_map(perf_session__env(top->session));
1306 		if (ret < 0) {
1307 			char errbuf[BUFSIZ];
1308 			const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));
1309 
1310 			ui__error("Could not read the CPU topology map: %s\n", err);
1311 			return ret;
1312 		}
1313 	}
1314 
1315 	/*
1316 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
1317 	 * and hybrid_merge is false.
1318 	 */
1319 	evlist__uniquify_evsel_names(top->evlist, &stat_config);
1320 	ret = perf_top__start_counters(top);
1321 	if (ret)
1322 		return ret;
1323 
1324 	top->session->evlist = top->evlist;
1325 	perf_session__set_id_hdr_size(top->session);
1326 
1327 	/*
1328 	 * When perf is starting the traced process, all the events (apart from
1329 	 * group members) have enable_on_exec=1 set, so don't spoil it by
1330 	 * prematurely enabling them.
1331 	 *
1332 	 * XXX 'top' still doesn't start workloads like record, trace, but should,
1333 	 * so leave the check here.
1334 	 */
1335         if (!target__none(&opts->target))
1336 		evlist__enable(top->evlist);
1337 
1338 	ret = -1;
1339 	if (pthread_create(&thread_process, NULL, process_thread, top)) {
1340 		ui__error("Could not create process thread.\n");
1341 		return ret;
1342 	}
1343 
1344 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1345 							    display_thread), top)) {
1346 		ui__error("Could not create display thread.\n");
1347 		goto out_join_thread;
1348 	}
1349 
1350 	if (top->realtime_prio) {
1351 		struct sched_param param;
1352 
1353 		param.sched_priority = top->realtime_prio;
1354 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1355 			ui__error("Could not set realtime priority.\n");
1356 			goto out_join;
1357 		}
1358 	}
1359 
1360 	/* Wait for a minimal set of events before starting the snapshot */
1361 	evlist__poll(top->evlist, 100);
1362 
1363 	perf_top__mmap_read(top);
1364 
1365 	while (!done) {
1366 		u64 hits = top->samples;
1367 
1368 		perf_top__mmap_read(top);
1369 
1370 		if (opts->overwrite || (hits == top->samples))
1371 			ret = evlist__poll(top->evlist, 100);
1372 
1373 		if (resize) {
1374 			perf_top__resize(top);
1375 			resize = 0;
1376 		}
1377 	}
1378 
1379 	ret = 0;
1380 out_join:
1381 	pthread_join(thread, NULL);
1382 out_join_thread:
1383 	cond_signal(&top->qe.cond);
1384 	pthread_join(thread_process, NULL);
1385 	perf_set_singlethreaded();
1386 	exit_process_thread(top);
1387 	return ret;
1388 }
1389 
1390 static int
1391 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1392 {
1393 	struct callchain_param *callchain = opt->value;
1394 
1395 	callchain->enabled = !unset;
1396 	callchain->record_mode = CALLCHAIN_FP;
1397 
1398 	/*
1399 	 * --no-call-graph
1400 	 */
1401 	if (unset) {
1402 		symbol_conf.use_callchain = false;
1403 		callchain->record_mode = CALLCHAIN_NONE;
1404 		return 0;
1405 	}
1406 
1407 	return parse_callchain_top_opt(arg);
1408 }
1409 
1410 static int
1411 callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
1412 {
1413 	struct callchain_param *callchain = opt->value;
1414 
1415 	/*
1416 	 * The -g option only sets the callchain if not already configured by
1417 	 * .perfconfig. It does, however, enable it.
1418 	 */
1419 	if (callchain->record_mode != CALLCHAIN_NONE) {
1420 		callchain->enabled = true;
1421 		return 0;
1422 	}
1423 
1424 	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
1425 }
1426 
1427 
1428 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
1429 {
1430 	if (!strcmp(var, "top.call-graph")) {
1431 		var = "call-graph.record-mode";
1432 		return perf_default_config(var, value, cb);
1433 	}
1434 	if (!strcmp(var, "top.children")) {
1435 		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
1436 		return 0;
1437 	}
1438 
1439 	return 0;
1440 }
1441 
1442 static int
1443 parse_percent_limit(const struct option *opt, const char *arg,
1444 		    int unset __maybe_unused)
1445 {
1446 	struct perf_top *top = opt->value;
1447 
1448 	top->min_percent = strtof(arg, NULL);
1449 	return 0;
1450 }
1451 
1452 int cmd_top(int argc, const char **argv)
1453 {
1454 	static const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP
1455 		"\n\t\t\t\tDefault: fp,graph,0.5,caller,function";
1456 	char errbuf[BUFSIZ];
1457 	struct perf_top top = {
1458 		.count_filter	     = 5,
1459 		.delay_secs	     = 2,
1460 		.record_opts = {
1461 			.mmap_pages	= UINT_MAX,
1462 			.user_freq	= UINT_MAX,
1463 			.user_interval	= ULLONG_MAX,
1464 			.freq		= 4000, /* 4 KHz */
1465 			.target		= {
1466 				.uses_mmap   = true,
1467 			},
1468 			/*
1469 			 * FIXME: This will lose PERF_RECORD_MMAP and other metadata
1470 			 * when we pause, fix that and reenable. Probably using a
1471 			 * separate evlist with a dummy event, i.e. a non-overwrite
1472 			 * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
1473 			 * stays in overwrite mode. -acme
1474 			 * */
1475 			.overwrite	= 0,
1476 			.sample_time	= true,
1477 			.sample_time_set = true,
1478 		},
1479 		.max_stack	     = sysctl__max_stack(),
1480 		.nr_threads_synthesize = UINT_MAX,
1481 	};
1482 	struct parse_events_option_args parse_events_option_args = {
1483 		.evlistp = &top.evlist,
1484 	};
1485 	bool branch_call_mode = false;
1486 	struct record_opts *opts = &top.record_opts;
1487 	struct target *target = &opts->target;
1488 	const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL;
1489 	const struct option options[] = {
1490 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
1491 		     "event selector. use 'perf list' to list available events",
1492 		     parse_events_option),
1493 	OPT_CALLBACK(0, "filter", &top.evlist, "filter",
1494 		     "event filter", parse_filter),
1495 	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
1496 	OPT_STRING('p', "pid", &target->pid, "pid",
1497 		    "profile events on existing process id"),
1498 	OPT_STRING('t', "tid", &target->tid, "tid",
1499 		    "profile events on existing thread id"),
1500 	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1501 			    "system-wide collection from all CPUs"),
1502 	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1503 		    "list of cpus to monitor"),
1504 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1505 		   "file", "vmlinux pathname"),
1506 	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
1507 		    "don't load vmlinux even if found"),
1508 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
1509 		   "file", "kallsyms pathname"),
1510 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1511 		    "hide kernel symbols"),
1512 	OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
1513 		     "number of mmap data pages", evlist__parse_mmap_pages),
1514 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
1515 		    "collect data with this RT SCHED_FIFO priority"),
1516 	OPT_INTEGER('d', "delay", &top.delay_secs,
1517 		    "number of seconds to delay between refreshes"),
1518 	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1519 			    "dump the symbol table used for profiling"),
1520 	OPT_INTEGER('f', "count-filter", &top.count_filter,
1521 		    "only display functions with more events than this"),
1522 	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
1523 		    "child tasks do not inherit counters"),
1524 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1525 		    "symbol to annotate"),
1526 	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
1527 	OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
1528 		     "profile at this frequency",
1529 		      record__parse_freq),
1530 	OPT_INTEGER('E', "entries", &top.print_entries,
1531 		    "display this many functions"),
1532 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1533 		    "hide user symbols"),
1534 #ifdef HAVE_SLANG_SUPPORT
1535 	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
1536 #endif
1537 	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1538 	OPT_INCR('v', "verbose", &verbose,
1539 		    "be more verbose (show counter open errors, etc)"),
1540 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1541 		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
1542 		   " Please refer the man page for the complete list."),
1543 	OPT_STRING(0, "fields", &field_order, "key[,keys...]",
1544 		   "output field(s): overhead, period, sample plus all of sort keys"),
1545 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1546 		    "Show a column with the number of samples"),
1547 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1548 			   NULL, "enables call-graph recording and display",
1549 			   &callchain_opt),
1550 	OPT_CALLBACK(0, "call-graph", &callchain_param,
1551 		     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
1552 		     top_callchain_help, &parse_callchain_opt),
1553 	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
1554 		    "Accumulate callchains of children and show total overhead as well"),
1555 	OPT_INTEGER(0, "max-stack", &top.max_stack,
1556 		    "Set the maximum stack depth when parsing the callchain. "
1557 		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
1558 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
1559 		   "ignore callees of these functions in call graphs",
1560 		   report_parse_ignore_callees_opt),
1561 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1562 		    "Show a column with the sum of periods"),
1563 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
1564 		   "only consider symbols in these dsos"),
1565 	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
1566 		   "only consider symbols in these comms"),
1567 	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
1568 		   "only consider these symbols"),
1569 	OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
1570 		    "Interleave source code with assembly code (default)"),
1571 	OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw,
1572 		    "Display raw encoding of assembly instructions (default)"),
1573 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
1574 		    "Enable kernel symbol demangling"),
1575 	OPT_BOOLEAN(0, "no-bpf-event", &top.record_opts.no_bpf_event, "do not record bpf events"),
1576 	OPT_STRING(0, "objdump", &objdump_path, "path",
1577 		    "objdump binary to use for disassembly and annotations"),
1578 	OPT_STRING(0, "addr2line", &addr2line_path, "path",
1579 		   "addr2line binary to use for line numbers"),
1580 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1581 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
1582 	OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix",
1583 		    "Add prefix to source file path names in programs (with --prefix-strip)"),
1584 	OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N",
1585 		    "Strip first N entries of source file path name in programs (with --prefix)"),
1586 	OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
1587 	OPT_CALLBACK(0, "percent-limit", &top, "percent",
1588 		     "Don't show entries under that percent", parse_percent_limit),
1589 	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
1590 		     "How to display percentage of filtered entries", parse_filter_percentage),
1591 	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
1592 		   "width[,width...]",
1593 		   "don't try to adjust column width, use these fixed values"),
1594 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1595 			"per thread proc mmap processing timeout in ms"),
1596 	OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
1597 		     "branch any", "sample any taken branches",
1598 		     parse_branch_stack),
1599 	OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
1600 		     "branch filter mask", "branch stack filter modes",
1601 		     parse_branch_stack),
1602 	OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
1603 		    "add last branch records to call history"),
1604 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
1605 		    "Show raw trace event output (do not use print fmt or plugins)"),
1606 	OPT_BOOLEAN('H', "hierarchy", &symbol_conf.report_hierarchy,
1607 		    "Show entries in a hierarchy"),
1608 	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
1609 		    "Use a backward ring buffer, default: no"),
1610 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
1611 	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
1612 			"number of thread to run event synthesize"),
1613 	OPT_CALLBACK('G', "cgroup", &top.evlist, "name",
1614 		     "monitor event in cgroup name only", parse_cgroups),
1615 	OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
1616 		    "Record namespaces events"),
1617 	OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup,
1618 		    "Record cgroup events"),
1619 	OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
1620 		    "Sort the output by the event at the index n in group. "
1621 		    "If n is invalid, sort by the first event. "
1622 		    "WARNING: should be used on grouped events."),
1623 	OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
1624 		    "Enable LBR callgraph stitching approach"),
1625 #ifdef HAVE_LIBPFM
1626 	OPT_CALLBACK(0, "pfm-events", &top.evlist, "event",
1627 		"libpfm4 event selector. use 'perf list' to list available events",
1628 		parse_libpfm_events_option),
1629 #endif
1630 	OPTS_EVSWITCH(&top.evswitch),
1631 	OPT_END()
1632 	};
1633 	const char * const top_usage[] = {
1634 		"perf top [<options>]",
1635 		NULL
1636 	};
1637 	int status = hists__init();
1638 	struct perf_env host_env;
1639 
1640 	if (status < 0)
1641 		return status;
1642 
1643 	annotation_options__init();
1644 
1645 	annotate_opts.min_pcnt = 5;
1646 	annotate_opts.context  = 4;
1647 
1648 	top.evlist = evlist__new();
1649 	if (top.evlist == NULL)
1650 		return -ENOMEM;
1651 
1652 	perf_env__init(&host_env);
1653 	status = perf_config(perf_top_config, &top);
1654 	if (status)
1655 		goto out_delete_evlist;
1656 	/*
1657 	 * Since the per arch annotation init routine may need the cpuid, read
1658 	 * it here, since we are not getting this from the perf.data header.
1659 	 */
1660 	status = perf_env__set_cmdline(&host_env, argc, argv);
1661 	if (status)
1662 		goto out_delete_evlist;
1663 
1664 	status = perf_env__read_cpuid(&host_env);
1665 	if (status) {
1666 		/*
1667 		 * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise
1668 		 * warn the user explicitly.
1669 		 */
1670 		eprintf(status == ENOSYS ? 1 : 0, verbose,
1671 			"Couldn't read the cpuid for this machine: %s\n",
1672 			str_error_r(errno, errbuf, sizeof(errbuf)));
1673 	}
1674 
1675 	argc = parse_options(argc, argv, options, top_usage, 0);
1676 	if (argc)
1677 		usage_with_options(top_usage, options);
1678 
1679 	if (disassembler_style) {
1680 		annotate_opts.disassembler_style = strdup(disassembler_style);
1681 		if (!annotate_opts.disassembler_style) {
1682 			status = -ENOMEM;
1683 			goto out_delete_evlist;
1684 		}
1685 	}
1686 	if (objdump_path) {
1687 		annotate_opts.objdump_path = strdup(objdump_path);
1688 		if (!annotate_opts.objdump_path) {
1689 			status = -ENOMEM;
1690 			goto out_delete_evlist;
1691 		}
1692 	}
1693 	if (addr2line_path) {
1694 		symbol_conf.addr2line_path = strdup(addr2line_path);
1695 		if (!symbol_conf.addr2line_path) {
1696 			status = -ENOMEM;
1697 			goto out_delete_evlist;
1698 		}
1699 	}
1700 
1701 	status = symbol__validate_sym_arguments();
1702 	if (status)
1703 		goto out_delete_evlist;
1704 
1705 	if (annotate_check_args() < 0)
1706 		goto out_delete_evlist;
1707 
1708 	status = target__validate(target);
1709 	if (status) {
1710 		target__strerror(target, status, errbuf, BUFSIZ);
1711 		ui__warning("%s\n", errbuf);
1712 	}
1713 
1714 	if (target__none(target))
1715 		target->system_wide = true;
1716 
1717 	if (!top.evlist->core.nr_entries) {
1718 		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
1719 
1720 		if (!def_evlist)
1721 			goto out_delete_evlist;
1722 
1723 		evlist__splice_list_tail(top.evlist, &def_evlist->core.entries);
1724 		evlist__delete(def_evlist);
1725 	}
1726 
1727 	status = evswitch__init(&top.evswitch, top.evlist, stderr);
1728 	if (status)
1729 		goto out_delete_evlist;
1730 
1731 	if (symbol_conf.report_hierarchy) {
1732 		/* disable incompatible options */
1733 		symbol_conf.event_group = false;
1734 		symbol_conf.cumulate_callchain = false;
1735 
1736 		if (field_order) {
1737 			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
1738 			parse_options_usage(top_usage, options, "fields", 0);
1739 			parse_options_usage(NULL, options, "hierarchy", 0);
1740 			goto out_delete_evlist;
1741 		}
1742 	}
1743 
1744 	if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
1745 		pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
1746 		goto out_delete_evlist;
1747 	}
1748 
1749 	if (nr_cgroups > 0 && opts->record_cgroup) {
1750 		pr_err("--cgroup and --all-cgroups cannot be used together\n");
1751 		goto out_delete_evlist;
1752 	}
1753 
1754 	if (branch_call_mode) {
1755 		if (!opts->branch_stack)
1756 			opts->branch_stack = PERF_SAMPLE_BRANCH_ANY;
1757 		symbol_conf.use_callchain = true;
1758 		callchain_param.key = CCKEY_ADDRESS;
1759 		callchain_param.branch_callstack = true;
1760 		callchain_param.enabled = true;
1761 		if (callchain_param.record_mode == CALLCHAIN_NONE)
1762 			callchain_param.record_mode = CALLCHAIN_FP;
1763 		callchain_register_param(&callchain_param);
1764 		if (!sort_order)
1765 			sort_order = "srcline,symbol,dso";
1766 	}
1767 
1768 	if (opts->branch_stack && callchain_param.enabled)
1769 		symbol_conf.show_branchflag_count = true;
1770 
1771 	if (opts->branch_stack) {
1772 		status = perf_env__read_core_pmu_caps(&host_env);
1773 		if (status) {
1774 			pr_err("PMU capability data is not available\n");
1775 			goto out_delete_evlist;
1776 		}
1777 	}
1778 
1779 	sort__mode = SORT_MODE__TOP;
1780 	/* display thread wants entries to be collapsed in a different tree */
1781 	perf_hpp_list.need_collapse = 1;
1782 
1783 	if (top.use_stdio)
1784 		use_browser = 0;
1785 #ifdef HAVE_SLANG_SUPPORT
1786 	else if (top.use_tui)
1787 		use_browser = 1;
1788 #endif
1789 
1790 	setup_browser(false);
1791 
1792 	top.session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
1793 					  /*trace_event_repipe=*/false,
1794 					  &host_env);
1795 	if (IS_ERR(top.session)) {
1796 		status = PTR_ERR(top.session);
1797 		top.session = NULL;
1798 		goto out_delete_evlist;
1799 	}
1800 	top.evlist->session = top.session;
1801 
1802 	if (setup_sorting(top.evlist, perf_session__env(top.session)) < 0) {
1803 		if (sort_order)
1804 			parse_options_usage(top_usage, options, "s", 1);
1805 		if (field_order)
1806 			parse_options_usage(sort_order ? NULL : top_usage,
1807 					    options, "fields", 0);
1808 		goto out_delete_evlist;
1809 	}
1810 
1811 	if (top.uid_str) {
1812 		uid_t uid = parse_uid(top.uid_str);
1813 
1814 		if (uid == UINT_MAX) {
1815 			ui__error("Invalid User: %s", top.uid_str);
1816 			status = -EINVAL;
1817 			goto out_delete_evlist;
1818 		}
1819 		status = parse_uid_filter(top.evlist, uid);
1820 		if (status)
1821 			goto out_delete_evlist;
1822 	}
1823 
1824 	if (evlist__create_maps(top.evlist, target) < 0) {
1825 		ui__error("Couldn't create thread/CPU maps: %s\n",
1826 			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
1827 		status = -errno;
1828 		goto out_delete_evlist;
1829 	}
1830 
1831 	if (top.delay_secs < 1)
1832 		top.delay_secs = 1;
1833 
1834 	if (record_opts__config(opts)) {
1835 		status = -EINVAL;
1836 		goto out_delete_evlist;
1837 	}
1838 
1839 	top.sym_evsel = evlist__first(top.evlist);
1840 
1841 	if (!callchain_param.enabled) {
1842 		symbol_conf.cumulate_callchain = false;
1843 		perf_hpp__cancel_cumulate(top.evlist);
1844 	}
1845 
1846 	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
1847 		callchain_param.order = ORDER_CALLER;
1848 
1849 	status = symbol__annotation_init();
1850 	if (status < 0)
1851 		goto out_delete_evlist;
1852 
1853 	annotation_config__init();
1854 
1855 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1856 	status = symbol__init(NULL);
1857 	if (status < 0)
1858 		goto out_delete_evlist;
1859 
1860 	sort__setup_elide(stdout);
1861 
1862 	get_term_dimensions(&top.winsize);
1863 	if (top.print_entries == 0) {
1864 		perf_top__update_print_entries(&top);
1865 		signal(SIGWINCH, winch_sig);
1866 	}
1867 
1868 	if (!evlist__needs_bpf_sb_event(top.evlist))
1869 		top.record_opts.no_bpf_event = true;
1870 
1871 #ifdef HAVE_LIBBPF_SUPPORT
1872 	if (!top.record_opts.no_bpf_event) {
1873 		top.sb_evlist = evlist__new();
1874 
1875 		if (top.sb_evlist == NULL) {
1876 			pr_err("Couldn't create side band evlist.\n.");
1877 			status = -EINVAL;
1878 			goto out_delete_evlist;
1879 		}
1880 
1881 		if (evlist__add_bpf_sb_event(top.sb_evlist, &host_env)) {
1882 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1883 			status = -EINVAL;
1884 			goto out_delete_evlist;
1885 		}
1886 	}
1887 #endif
1888 
1889 	if (evlist__start_sb_thread(top.sb_evlist, target)) {
1890 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1891 		opts->no_bpf_event = true;
1892 	}
1893 
1894 	status = __cmd_top(&top);
1895 
1896 	if (!opts->no_bpf_event)
1897 		evlist__stop_sb_thread(top.sb_evlist);
1898 
1899 out_delete_evlist:
1900 	evlist__delete(top.evlist);
1901 	perf_session__delete(top.session);
1902 	annotation_options__exit();
1903 	perf_env__exit(&host_env);
1904 
1905 	return status;
1906 }
1907