builtin-stat.c - OpenGrok cross reference for /linux/tools/perf/builtin-stat.c

Lines Matching +full:per +full:- +full:cpu +full:- +full:cluster
1 // SPDX-License-Identifier: GPL-2.0-only
3  * builtin-stat.c
6  * overview about any workload, CPU or specific PID.
16        1708.761321 task-clock                #   11.037 CPUs utilized
17             41,190 context-switches          #    0.024 M/sec
18              6,735 CPU-migrations            #    0.004 M/sec
19             17,318 page-faults               #    0.010 M/sec
21      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
22      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
23      2,603,501,247 instructions              #    0.50  insns per cycle
24                                              #    1.48  stalled cycles per insn
26          6,388,934 branch-misses             #    1.32% of all branches
31  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
45 #include <subcmd/parse-options.h>
46 #include "util/parse-events.h"
65 #include "util/synthetic-events.h"
67 #include "util/time-utils.h"
74 #include "util/intel-tpebs.h"
100 #define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
117 static volatile sig_atomic_t	child_pid			= -1;
123 static int			big_num_opt			=  -1;
164 	.ctl_fd			= -1,
165 	.ctl_fd_ack		= -1,
171 	bool node, socket, die, cluster, cache, core, thread, no_aggr;
179 	if (opt_mode->node)
181 	if (opt_mode->socket)
183 	if (opt_mode->die)
185 	if (opt_mode->cluster)
187 	if (opt_mode->cache)
189 	if (opt_mode->core)
191 	if (opt_mode->thread)
193 	if (opt_mode->no_aggr)
208 		if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
225 			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
226 			pr_warning("     %s: %s\n", leader->name, buf);
227 			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
228 			pr_warning("     %s: %s\n", evsel->name, buf);
238 	r->tv_sec = a->tv_sec - b->tv_sec;
239 	if (a->tv_nsec < b->tv_nsec) {
240 		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
241 		r->tv_sec--;
243 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
258 	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
260 		return -1;
263 	perf_stat.bytes_written += event->header.size;
277 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
283 	struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
285 	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
302 			perf_counts(counter->counts, cpu_map_idx, thread);
304 		start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread);
309 		count->ena = count->run = *start_time + val;
310 		count->val = val;
318  * do not aggregate counts across CPUs in system-wide mode
322 	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
325 	if (!counter->supported)
326 		return -ENOENT;
331 		count = perf_counts(counter->counts, cpu_map_idx, thread);
335 		 * (via evsel__read_counter()) and sets their count->loaded.
337 		if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
339 			counter->counts->scaled = -1;
340 			perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
341 			perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
342 			return -1;
345 		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
350 				return -1;
359 							  cpu_map_idx).cpu,
360 					count->val, count->ena, count->run);
378 		return -1;
388 		if (!counter->err)
389 			counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx);
418 			return -1;
428 		if (counter->err)
429 			pr_debug("failed to read counter %s\n", counter->name);
430 		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
431 			pr_warning("failed to process counter %s\n", counter->name);
432 		counter->err = 0;
465 		if (interval_count && !(--(*times)))
497 	 * If we don't have tracee (attaching to task or cpu), counters may
519 	workload_exec_errno = info->si_value.sival_int;
524 	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
536 	for (i = 0; i < threads->nr; i++) {
540 			  threads->map[i].pid);
581 	tts -= time_diff.tv_sec * MSEC_PER_SEC +
609 			child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
647 		counter->supported = false;
650 		 * cpu event had a problem and needs to be reexamined.
652 		counter->errored = true;
655 		    !(counter->core.leader->nr_members > 1))
662 		   evsel_list->core.threads &&
663 		   evsel_list->core.threads->err_thread != -1) {
665 		 * For global --per-thread case, skip current
668 		if (!thread_map__remove(evsel_list->core.threads,
669 					evsel_list->core.threads->err_thread)) {
670 			evsel_list->core.threads->err_thread = -1;
673 	} else if (counter->skippable) {
677 		counter->supported = false;
678 		counter->errored = true;
687 		counter->supported = false;
688 		counter->errored = true;
691 		    !(counter->core.leader->nr_members > 1))
698 	if (child_pid != -1)
726 			return -1;
728 		child_pid = evsel_list->workload.pid;
731 	if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
733 			err = -1;
740 		counter->reset_group = false;
742 			err = -1;
761 		if (counter->reset_group || counter->errored)
778 				counter->weak_group) {
780 				assert(counter->reset_group);
787 				err = -1;
798 		counter->supported = true;
811 			if (!counter->reset_group && !counter->errored)
814 			perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
820 			if (!counter->reset_group)
829 					err = -1;
839 			counter->supported = true;
846 		if (!counter->supported) {
847 			perf_evsel__free_fd(&counter->core);
851 		l = strlen(counter->unit);
857 			err = -1;
864 			counter->filter, evsel__name(counter), errno,
866 		return -1;
894 			err = -1;
907 			err = -1;
920 		if (child_pid != -1) {
929 			err = -1;
944 		stat_config.walltime_run[run_idx] = t1 - t0;
950 		update_stats(&walltime_nsecs_stats, t1 - t0);
956 		update_stats(&walltime_nsecs_stats, t1 - t0);
987  * Returns -1 for fatal errors which signifies to not continue
990  * Returns < -1 error codes when stat record is used. These
1031 static volatile sig_atomic_t signr = -1;
1035 	if ((child_pid == -1) || stat_config.interval)
1045 	child_pid = -1;
1062 	if (child_pid != -1)
1067 	if (signr == -1)
1108 			return -ENOMEM;
1114 			return -ENOMEM;
1123 	struct perf_stat_config *config = opt->value;
1125 	return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1132 		pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1133 		return -1;
1144 	struct evlist *evlist = *(struct evlist **)opt->value;
1146 	if (!list_empty(&evlist->core.entries)) {
1148 		return -1;
1153 		fprintf(stderr, "--cputype %s is not supported!\n", str);
1154 		return -1;
1156 	parse_events_option_args.pmu_filter = pmu->name;
1166 	struct opt_aggr_mode *opt_aggr_mode = (struct opt_aggr_mode *)opt->value;
1167 	u32 *aggr_level = (u32 *)opt->data;
1185 		pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1188 		return -EINVAL;
1193 		pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1196 		return -EINVAL;
1202 		return -EINVAL;
1205 	opt_aggr_mode->cache = true;
1212  * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1213  * Cache instance ID is the first CPU reported in the shared_cpu_list file.
1215 static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
1221 	 * If the map contains no CPU, consider the current CPU to
1222 	 * be the first online CPU in the cache domain else use the
1223 	 * first online CPU of the cache domain as the ID.
1225 	id = perf_cpu_map__min(cpu_map).cpu;
1226 	if (id == -1)
1227 		id = cpu.cpu;
1236  * cpu__get_cache_id - Returns 0 if successful in populating the
1238  * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
1239  * is the first CPU reported by
1240  * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1242 static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
1249 	cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1250 	cache->cache = -1;
1252 	ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
1266 		return -1;
1280 		cache->cache_lvl = caches[max_level_index].level;
1281 		cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
1290 			cache->cache_lvl = cache_level;
1291 			cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1308  * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
1310  * die and socket for cpu. The function signature is compatible with
1313 static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
1319 	id = aggr_cpu_id__die(cpu, data);
1323 	ret = cpu__get_cache_details(cpu, &cache);
1335 	[AGGR_CLUSTER] = "cluster",
1346 						struct perf_cpu cpu)
1348 	return aggr_cpu_id__socket(cpu, /*data=*/NULL);
1352 					     struct perf_cpu cpu)
1354 	return aggr_cpu_id__die(cpu, /*data=*/NULL);
1358 						  struct perf_cpu cpu)
1360 	return aggr_cpu_id__cache(cpu, /*data=*/NULL);
1364 						 struct perf_cpu cpu)
1366 	return aggr_cpu_id__cluster(cpu, /*data=*/NULL);
1370 					      struct perf_cpu cpu)
1372 	return aggr_cpu_id__core(cpu, /*data=*/NULL);
1376 					      struct perf_cpu cpu)
1378 	return aggr_cpu_id__node(cpu, /*data=*/NULL);
1382 						struct perf_cpu cpu)
1384 	return aggr_cpu_id__global(cpu, /*data=*/NULL);
1388 					     struct perf_cpu cpu)
1390 	return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
1394 					      aggr_get_id_t get_id, struct perf_cpu cpu)
1398 	/* per-process mode - should use global aggr mode */
1399 	if (cpu.cpu == -1)
1400 		return get_id(config, cpu);
1402 	if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
1403 		config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
1405 	id = config->cpus_aggr_map->map[cpu.cpu];
1410 						       struct perf_cpu cpu)
1412 	return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
1416 						    struct perf_cpu cpu)
1418 	return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
1422 							struct perf_cpu cpu)
1424 	return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu);
1428 							 struct perf_cpu cpu)
1430 	return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
1434 						     struct perf_cpu cpu)
1436 	return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
1440 						     struct perf_cpu cpu)
1442 	return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
1446 						       struct perf_cpu cpu)
1448 	return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
1452 						    struct perf_cpu cpu)
1454 	return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
1518 		stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1522 			return -1;
1528 		nr = perf_thread_map__nr(evsel_list->core.threads);
1531 			return -ENOMEM;
1537 			stat_config.aggr_map->map[s] = id;
1543 	 * The evsel_list->cpus is the base we operate on,
1544 	 * taking the highest cpu number to be the size of
1547 	if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
1548 		nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
1552 	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1568 static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
1573 	if (cpu.cpu != -1)
1574 		id.socket = env->cpu[cpu.cpu].socket_id;
1579 static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
1584 	if (cpu.cpu != -1) {
1590 		id.socket = env->cpu[cpu.cpu].socket_id;
1591 		id.die = env->cpu[cpu.cpu].die_id;
1597 static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
1601 	int caches_cnt = env->caches_cnt;
1602 	struct cpu_cache_level *caches = env->caches;
1604 	id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1605 	id->cache = -1;
1610 	for (i = caches_cnt - 1; i > -1; --i) {
1616 		 * the cpu in the map. Since building the map is expensive, do
1623 		map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
1626 		if (map_contains_cpu != -1) {
1627 			id->cache_lvl = caches[i].level;
1628 			id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1634 static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
1640 	if (cpu.cpu != -1) {
1643 		id.socket = env->cpu[cpu.cpu].socket_id;
1644 		id.die = env->cpu[cpu.cpu].die_id;
1645 		perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
1651 static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu,
1657 	if (cpu.cpu != -1) {
1658 		id.socket = env->cpu[cpu.cpu].socket_id;
1659 		id.die = env->cpu[cpu.cpu].die_id;
1660 		id.cluster = env->cpu[cpu.cpu].cluster_id;
1666 static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
1671 	if (cpu.cpu != -1) {
1673 		 * core_id is relative to socket, die and cluster, we need a
1674 		 * global id. So we set socket, die id, cluster id and core id.
1676 		id.socket = env->cpu[cpu.cpu].socket_id;
1677 		id.die = env->cpu[cpu.cpu].die_id;
1678 		id.cluster = env->cpu[cpu.cpu].cluster_id;
1679 		id.core = env->cpu[cpu.cpu].core_id;
1685 static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
1690 	if (cpu.cpu != -1) {
1696 		id.socket = env->cpu[cpu.cpu].socket_id;
1697 		id.die = env->cpu[cpu.cpu].die_id;
1698 		id.core = env->cpu[cpu.cpu].core_id;
1699 		id.cpu = cpu;
1705 static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
1709 	id.node = perf_env__numa_node(data, cpu);
1713 static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
1718 	/* it always aggregates to the cpu 0 */
1719 	id.cpu = (struct perf_cpu){ .cpu = 0 };
1724 						     struct perf_cpu cpu)
1726 	return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1729 						  struct perf_cpu cpu)
1731 	return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1735 						      struct perf_cpu cpu)
1737 	return perf_env__get_cluster_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1741 						    struct perf_cpu cpu)
1743 	return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1747 						   struct perf_cpu cpu)
1749 	return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1753 						  struct perf_cpu cpu)
1755 	return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1759 						   struct perf_cpu cpu)
1761 	return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1765 						     struct perf_cpu cpu)
1767 	return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1826 	struct perf_env *env = &st->session->header.env;
1831 		int nr = perf_thread_map__nr(evsel_list->core.threads);
1835 			return -ENOMEM;
1841 			stat_config.aggr_map->map[s] = id;
1849 	stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1853 		return -1;
1861  * if -d/--detailed, -d -d or -d -d -d is used:
1872 		return -ENOMEM;
1881 		/* Handle -T as -M transaction. Once platform specific metrics
1888 			ret = -1;
1907 			ret = -1;
1914 				ret = -1;
1922 			ret = -1;
1950 			ret = -1;
1954 			pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
1955 			ret = -1;
1963 				"Please print the result regularly, e.g. -I1000\n");
1975 			ret = -1;
1983 	if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
1986 			ret = parse_events(evlist, "cpu-clock", &err);
1988 			ret = parse_events(evlist, "task-clock", &err);
1993 				"context-switches,"
1994 				"cpu-migrations,"
1995 				"page-faults,"
1998 				"stalled-cycles-frontend,"
1999 				"stalled-cycles-backend,"
2001 				"branch-misses",
2014 				ret = -ENOMEM;
2025 				ret = -1;
2030 				evsel->default_metricgroup = true;
2032 			evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
2041 		 * Detailed stats (-d), covering the L1 and last level data
2045 				"L1-dcache-loads,"
2046 				"L1-dcache-load-misses,"
2047 				"LLC-loads,"
2048 				"LLC-load-misses",
2053 		 * Very detailed stats (-d -d), covering the instruction cache
2057 				"L1-icache-loads,"
2058 				"L1-icache-load-misses,"
2059 				"dTLB-loads,"
2060 				"dTLB-load-misses,"
2061 				"iTLB-loads,"
2062 				"iTLB-load-misses",
2067 		 * Very, very detailed stats (-d -d -d), adding prefetch events:
2070 				"L1-dcache-prefetches,"
2071 				"L1-dcache-prefetch-misses",
2078 			 * Make at least one event non-skippable so fatal errors are visible.
2079 			 * 'cycles' always used to be default and non-skippable, so use that.
2082 				evsel->skippable = true;
2086 	evlist__splice_list_tail(evsel_list, &evlist->core.entries);
2101 		perf_header__set_feat(&session->header, feat);
2103 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
2104 	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
2105 	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
2106 	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
2107 	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
2121 		data->path = output_name;
2124 		pr_err("Cannot use -r option with perf stat record.\n");
2125 		return -1;
2136 	session->evlist   = evsel_list;
2145 	struct perf_record_stat_round *stat_round = &event->stat_round;
2147 	const char **argv = session->header.env.cmdline_argv;
2148 	int argc = session->header.env.nr_cmdline;
2152 	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2153 		update_stats(&walltime_nsecs_stats, stat_round->time);
2155 	if (stat_config.interval && stat_round->time) {
2156 		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
2157 		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
2169 	const struct perf_tool *tool = session->tool;
2172 	perf_event__read_stat_config(&stat_config, &event->stat_config);
2174 	if (perf_cpu_map__is_empty(st->cpus)) {
2175 		if (st->aggr_mode != AGGR_UNSET)
2177 	} else if (st->aggr_mode != AGGR_UNSET) {
2178 		stat_config.aggr_mode = st->aggr_mode;
2187 		int nr_aggr = stat_config.aggr_map->nr;
2189 		if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
2191 			return -1;
2199 	if (!st->cpus || !st->threads)
2202 	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2203 		return -EINVAL;
2205 	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
2208 		return -ENOMEM;
2210 	st->maps_allocated = true;
2218 	const struct perf_tool *tool = session->tool;
2221 	if (st->threads) {
2226 	st->threads = thread_map__new_event(&event->thread_map);
2227 	if (!st->threads)
2228 		return -ENOMEM;
2237 	const struct perf_tool *tool = session->tool;
2241 	if (st->cpus) {
2242 		pr_warning("Extra cpu map event, ignoring.\n");
2246 	cpus = cpu_map__new_data(&event->cpu_map.data);
2248 		return -ENOMEM;
2250 	st->cpus = cpus;
2269 	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2270 		     "aggregate counts per processor socket", AGGR_SOCKET),
2271 	OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
2272 		     "aggregate counts per processor die", AGGR_DIE),
2273 	OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode,
2274 		     "aggregate counts perf processor cluster", AGGR_CLUSTER),
2275 	OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
2279 	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2280 		     "aggregate counts per physical processor core", AGGR_CORE),
2281 	OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
2282 		     "aggregate counts per numa node", AGGR_NODE),
2283 	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2284 		     "disable CPU count aggregation", AGGR_NONE),
2294 			input_name = "-";
2318 	evsel_list         = session->evlist;
2331 	 * Make system wide (-a) the default target if
2335 	 *   - there's no workload specified
2336 	 *   - there is workload specified but all requested
2348 			if (!counter->core.requires_cpu &&
2354 		if (evsel_list->core.nr_entries)
2370 		OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
2377 		OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
2379 		OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
2381 		OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
2384 		OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2385 			"system-wide collection from all CPUs"),
2387 			"Use --no-scale to disable counter scaling for multiplexing"),
2393 			"display details about each run (only with -r option)"),
2395 			"null run - dont start any counters"),
2397 			"detailed run - start a lot of events"),
2400 		OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
2403 		OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2404 			"list of cpus to monitor in system-wide"),
2405 		OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr,
2407 		OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr,
2408 			"disable aggregation the same as -A or -no-aggr"),
2409 		OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
2411 		OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
2413 		OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
2417 		OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
2421 		OPT_INTEGER(0, "log-fd", &output_fd,
2427 		OPT_UINTEGER('I', "interval-print", &stat_config.interval,
2430 		OPT_INTEGER(0, "interval-count", &stat_config.times,
2432 		OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
2436 		OPT_BOOLEAN(0, "per-socket", &opt_mode.socket,
2437 			"aggregate counts per processor socket"),
2438 		OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
2439 		OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster,
2440 			"aggregate counts per processor cluster"),
2441 		OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level,
2444 		OPT_BOOLEAN(0, "per-core", &opt_mode.core,
2445 			"aggregate counts per physical processor core"),
2446 		OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
2447 		OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
2449 			"ms to wait before starting measurement after program start (-1: start with events disabled)"),
2450 		OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
2452 		OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
2454 		OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
2456 		OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
2459 			"measure top-down statistics"),
2461 		OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
2464 		OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
2465 			"Set the metrics level for the top-down statistics (0: max level)"),
2466 		OPT_BOOLEAN(0, "smi-cost", &smi_cost,
2471 		OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
2474 		OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
2477 		OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
2483 		OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
2487 		OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
2488 			"Only enable events on applying cpu with this type "
2492 		OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
2496 		OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
2497 			"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
2498 			"\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
2499 			"\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
2510 	int status = -EINVAL, run_idx, err;
2521 		return -ENOMEM;
2525 	/* String-parsing callback-based options would segfault when negated */
2546 			return -1;
2554 	 * For record command the -o is already taken care of.
2556 	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2560 		fprintf(stderr, "cannot use both --output and --log-fd\n");
2562 		parse_options_usage(NULL, stat_options, "log-fd", 0);
2567 		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2572 		fprintf(stderr, "--metric-only is not supported with -r\n");
2578 		 * Current CSV and metric-only JSON output doesn't display the
2585 		fprintf(stderr, "--table is only supported with -r\n");
2592 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
2593 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2604 			return -1;
2615 			return -errno;
2620 		fprintf(stderr, "--interval-clear does not work with output\n");
2622 		parse_options_usage(NULL, stat_options, "log-fd", 0);
2623 		parse_options_usage(NULL, stat_options, "interval-clear", 0);
2624 		return -1;
2630 	 * let the spreadsheet do the pretty-printing
2633 		/* User explicitly passed -B? */
2635 			fprintf(stderr, "-B option not supported with -x\n");
2641 	} else if (big_num_opt == 0) /* User passed --no-big-num */
2672 			pr_err("failed to setup -r option");
2680 			fprintf(stderr, "The --per-thread option is only "
2681 				"available when monitoring via -p -t -a "
2682 				"options or only --per-thread.\n");
2690 	 * no_aggr, cgroup are for system-wide only
2691 	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
2697 		fprintf(stderr, "both cgroup and no-aggregation "
2698 			"modes only available in system-wide mode\n");
2703 		parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2727 			status = -ENOMEM;
2734 	 * knowing the target is system-wide.
2759 			pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2761 			parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2768 					    "for-each-cgroup", 0);
2795 		thread_map__read_comms(evsel_list->core.threads);
2804 		pr_err("interval-count option should be used together with "
2805 				"interval-print.\n");
2806 		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2822 		pr_err("timeout option is not supported with interval-print.\n");
2840 	 * by attr->sample_type != 0, and we can't run it on
2846 	 * We dont want to block the signals - that would cause
2847 	 * child tasks to inherit that and Ctrl-C would not work.
2848 	 * What we want is for Ctrl-C to work in the exec()-ed
2861 	/* Enable ignoring missing threads when -p option is defined. */
2862 	evlist__first(evsel_list)->ignore_missing_thread = target.pid;
2873 		if (status == -1)
2882 	if (!forever && status != -1 && (!interval || stat_config.summary)) {
2901 		 * tools remain  -acme
2907 							 &perf_stat.session->machines.host);
2919 			perf_stat.session->header.data_size += perf_stat.bytes_written;