Lines Matching +full:per +full:- +full:cpu +full:- +full:cluster
1 // SPDX-License-Identifier: GPL-2.0-only
3 * builtin-stat.c
6 * overview about any workload, CPU or specific PID.
16 1708.761321 task-clock # 11.037 CPUs utilized
17 41,190 context-switches # 0.024 M/sec
18 6,735 CPU-migrations # 0.004 M/sec
19 17,318 page-faults # 0.010 M/sec
21 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
22 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
23 2,603,501,247 instructions # 0.50 insns per cycle
24 # 1.48 stalled cycles per insn
26 6,388,934 branch-misses # 1.32% of all branches
31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
45 #include <subcmd/parse-options.h>
46 #include "util/parse-events.h"
65 #include "util/synthetic-events.h"
67 #include "util/time-utils.h"
74 #include "util/intel-tpebs.h"
100 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
117 static volatile sig_atomic_t child_pid = -1;
123 static int big_num_opt = -1;
164 .ctl_fd = -1,
165 .ctl_fd_ack = -1,
171 bool node, socket, die, cluster, cache, core, thread, no_aggr;
179 if (opt_mode->node)
181 if (opt_mode->socket)
183 if (opt_mode->die)
185 if (opt_mode->cluster)
187 if (opt_mode->cache)
189 if (opt_mode->core)
191 if (opt_mode->thread)
193 if (opt_mode->no_aggr)
208 if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
225 cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
226 pr_warning(" %s: %s\n", leader->name, buf);
227 cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
228 pr_warning(" %s: %s\n", evsel->name, buf);
238 r->tv_sec = a->tv_sec - b->tv_sec;
239 if (a->tv_nsec < b->tv_nsec) {
240 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
241 r->tv_sec--;
243 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
258 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
260 return -1;
263 perf_stat.bytes_written += event->header.size;
277 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
283 struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
285 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
302 perf_counts(counter->counts, cpu_map_idx, thread);
304 start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread);
309 count->ena = count->run = *start_time + val;
310 count->val = val;
318 * do not aggregate counts across CPUs in system-wide mode
322 int nthreads = perf_thread_map__nr(evsel_list->core.threads);
325 if (!counter->supported)
326 return -ENOENT;
331 count = perf_counts(counter->counts, cpu_map_idx, thread);
335 * (via evsel__read_counter()) and sets their count->loaded.
337 if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
339 counter->counts->scaled = -1;
340 perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
341 perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
342 return -1;
345 perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
350 return -1;
359 cpu_map_idx).cpu,
360 count->val, count->ena, count->run);
378 return -1;
388 if (!counter->err)
389 counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx);
418 return -1;
428 if (counter->err)
429 pr_debug("failed to read counter %s\n", counter->name);
430 if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
431 pr_warning("failed to process counter %s\n", counter->name);
432 counter->err = 0;
465 if (interval_count && !(--(*times)))
497 * If we don't have tracee (attaching to task or cpu), counters may
519 workload_exec_errno = info->si_value.sival_int;
524 return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
536 for (i = 0; i < threads->nr; i++) {
540 threads->map[i].pid);
581 tts -= time_diff.tv_sec * MSEC_PER_SEC +
609 child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
647 counter->supported = false;
650 * cpu event had a problem and needs to be reexamined.
652 counter->errored = true;
655 !(counter->core.leader->nr_members > 1))
662 evsel_list->core.threads &&
663 evsel_list->core.threads->err_thread != -1) {
665 * For global --per-thread case, skip current
668 if (!thread_map__remove(evsel_list->core.threads,
669 evsel_list->core.threads->err_thread)) {
670 evsel_list->core.threads->err_thread = -1;
673 } else if (counter->skippable) {
677 counter->supported = false;
678 counter->errored = true;
687 counter->supported = false;
688 counter->errored = true;
691 !(counter->core.leader->nr_members > 1))
698 if (child_pid != -1)
726 return -1;
728 child_pid = evsel_list->workload.pid;
731 if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
733 err = -1;
740 counter->reset_group = false;
742 err = -1;
761 if (counter->reset_group || counter->errored)
778 counter->weak_group) {
780 assert(counter->reset_group);
787 err = -1;
798 counter->supported = true;
811 if (!counter->reset_group && !counter->errored)
814 perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
820 if (!counter->reset_group)
829 err = -1;
839 counter->supported = true;
846 if (!counter->supported) {
847 perf_evsel__free_fd(&counter->core);
851 l = strlen(counter->unit);
857 err = -1;
864 counter->filter, evsel__name(counter), errno,
866 return -1;
894 err = -1;
907 err = -1;
920 if (child_pid != -1) {
929 err = -1;
944 stat_config.walltime_run[run_idx] = t1 - t0;
950 update_stats(&walltime_nsecs_stats, t1 - t0);
956 update_stats(&walltime_nsecs_stats, t1 - t0);
987 * Returns -1 for fatal errors which signifies to not continue
990 * Returns < -1 error codes when stat record is used. These
1031 static volatile sig_atomic_t signr = -1;
1035 if ((child_pid == -1) || stat_config.interval)
1045 child_pid = -1;
1062 if (child_pid != -1)
1067 if (signr == -1)
1108 return -ENOMEM;
1114 return -ENOMEM;
1123 struct perf_stat_config *config = opt->value;
1125 return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1132 pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1133 return -1;
1144 struct evlist *evlist = *(struct evlist **)opt->value;
1146 if (!list_empty(&evlist->core.entries)) {
1148 return -1;
1153 fprintf(stderr, "--cputype %s is not supported!\n", str);
1154 return -1;
1156 parse_events_option_args.pmu_filter = pmu->name;
1166 struct opt_aggr_mode *opt_aggr_mode = (struct opt_aggr_mode *)opt->value;
1167 u32 *aggr_level = (u32 *)opt->data;
1185 pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1188 return -EINVAL;
1193 pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1196 return -EINVAL;
1202 return -EINVAL;
1205 opt_aggr_mode->cache = true;
1212 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1213 * Cache instance ID is the first CPU reported in the shared_cpu_list file.
1215 static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
1221 * If the map contains no CPU, consider the current CPU to
1222 * be the first online CPU in the cache domain else use the
1223 * first online CPU of the cache domain as the ID.
1225 id = perf_cpu_map__min(cpu_map).cpu;
1226 if (id == -1)
1227 id = cpu.cpu;
1236 * cpu__get_cache_id - Returns 0 if successful in populating the
1238 * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
1239 * is the first CPU reported by
1240 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1242 static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
1249 cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1250 cache->cache = -1;
1252 ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
1266 return -1;
1280 cache->cache_lvl = caches[max_level_index].level;
1281 cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
1290 cache->cache_lvl = cache_level;
1291 cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1308 * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
1310 * die and socket for cpu. The function signature is compatible with
1313 static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
1319 id = aggr_cpu_id__die(cpu, data);
1323 ret = cpu__get_cache_details(cpu, &cache);
1335 [AGGR_CLUSTER] = "cluster",
1346 struct perf_cpu cpu)
1348 return aggr_cpu_id__socket(cpu, /*data=*/NULL);
1352 struct perf_cpu cpu)
1354 return aggr_cpu_id__die(cpu, /*data=*/NULL);
1358 struct perf_cpu cpu)
1360 return aggr_cpu_id__cache(cpu, /*data=*/NULL);
1364 struct perf_cpu cpu)
1366 return aggr_cpu_id__cluster(cpu, /*data=*/NULL);
1370 struct perf_cpu cpu)
1372 return aggr_cpu_id__core(cpu, /*data=*/NULL);
1376 struct perf_cpu cpu)
1378 return aggr_cpu_id__node(cpu, /*data=*/NULL);
1382 struct perf_cpu cpu)
1384 return aggr_cpu_id__global(cpu, /*data=*/NULL);
1388 struct perf_cpu cpu)
1390 return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
1394 aggr_get_id_t get_id, struct perf_cpu cpu)
1398 /* per-process mode - should use global aggr mode */
1399 if (cpu.cpu == -1)
1400 return get_id(config, cpu);
1402 if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
1403 config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
1405 id = config->cpus_aggr_map->map[cpu.cpu];
1410 struct perf_cpu cpu)
1412 return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
1416 struct perf_cpu cpu)
1418 return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
1422 struct perf_cpu cpu)
1424 return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu);
1428 struct perf_cpu cpu)
1430 return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
1434 struct perf_cpu cpu)
1436 return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
1440 struct perf_cpu cpu)
1442 return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
1446 struct perf_cpu cpu)
1448 return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
1452 struct perf_cpu cpu)
1454 return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
1518 stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1522 return -1;
1528 nr = perf_thread_map__nr(evsel_list->core.threads);
1531 return -ENOMEM;
1537 stat_config.aggr_map->map[s] = id;
1543 * The evsel_list->cpus is the base we operate on,
1544 * taking the highest cpu number to be the size of
1547 if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
1548 nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
1552 return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1568 static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
1573 if (cpu.cpu != -1)
1574 id.socket = env->cpu[cpu.cpu].socket_id;
1579 static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
1584 if (cpu.cpu != -1) {
1590 id.socket = env->cpu[cpu.cpu].socket_id;
1591 id.die = env->cpu[cpu.cpu].die_id;
1597 static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
1601 int caches_cnt = env->caches_cnt;
1602 struct cpu_cache_level *caches = env->caches;
1604 id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1605 id->cache = -1;
1610 for (i = caches_cnt - 1; i > -1; --i) {
1616 * the cpu in the map. Since building the map is expensive, do
1623 map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
1626 if (map_contains_cpu != -1) {
1627 id->cache_lvl = caches[i].level;
1628 id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1634 static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
1640 if (cpu.cpu != -1) {
1643 id.socket = env->cpu[cpu.cpu].socket_id;
1644 id.die = env->cpu[cpu.cpu].die_id;
1645 perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
1651 static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu,
1657 if (cpu.cpu != -1) {
1658 id.socket = env->cpu[cpu.cpu].socket_id;
1659 id.die = env->cpu[cpu.cpu].die_id;
1660 id.cluster = env->cpu[cpu.cpu].cluster_id;
1666 static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
1671 if (cpu.cpu != -1) {
1673 * core_id is relative to socket, die and cluster, we need a
1674 * global id. So we set socket, die id, cluster id and core id.
1676 id.socket = env->cpu[cpu.cpu].socket_id;
1677 id.die = env->cpu[cpu.cpu].die_id;
1678 id.cluster = env->cpu[cpu.cpu].cluster_id;
1679 id.core = env->cpu[cpu.cpu].core_id;
1685 static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
1690 if (cpu.cpu != -1) {
1696 id.socket = env->cpu[cpu.cpu].socket_id;
1697 id.die = env->cpu[cpu.cpu].die_id;
1698 id.core = env->cpu[cpu.cpu].core_id;
1699 id.cpu = cpu;
1705 static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
1709 id.node = perf_env__numa_node(data, cpu);
1713 static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
1718 /* it always aggregates to the cpu 0 */
1719 id.cpu = (struct perf_cpu){ .cpu = 0 };
1724 struct perf_cpu cpu)
1726 return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1729 struct perf_cpu cpu)
1731 return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1735 struct perf_cpu cpu)
1737 return perf_env__get_cluster_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1741 struct perf_cpu cpu)
1743 return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1747 struct perf_cpu cpu)
1749 return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1753 struct perf_cpu cpu)
1755 return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1759 struct perf_cpu cpu)
1761 return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1765 struct perf_cpu cpu)
1767 return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env);
1826 struct perf_env *env = &st->session->header.env;
1831 int nr = perf_thread_map__nr(evsel_list->core.threads);
1835 return -ENOMEM;
1841 stat_config.aggr_map->map[s] = id;
1849 stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1853 return -1;
1861 * if -d/--detailed, -d -d or -d -d -d is used:
1872 return -ENOMEM;
1881 /* Handle -T as -M transaction. Once platform specific metrics
1888 ret = -1;
1907 ret = -1;
1914 ret = -1;
1922 ret = -1;
1950 ret = -1;
1954 pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
1955 ret = -1;
1963 "Please print the result regularly, e.g. -I1000\n");
1975 ret = -1;
1983 if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
1986 ret = parse_events(evlist, "cpu-clock", &err);
1988 ret = parse_events(evlist, "task-clock", &err);
1993 "context-switches,"
1994 "cpu-migrations,"
1995 "page-faults,"
1998 "stalled-cycles-frontend,"
1999 "stalled-cycles-backend,"
2001 "branch-misses",
2014 ret = -ENOMEM;
2025 ret = -1;
2030 evsel->default_metricgroup = true;
2032 evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
2041 * Detailed stats (-d), covering the L1 and last level data
2045 "L1-dcache-loads,"
2046 "L1-dcache-load-misses,"
2047 "LLC-loads,"
2048 "LLC-load-misses",
2053 * Very detailed stats (-d -d), covering the instruction cache
2057 "L1-icache-loads,"
2058 "L1-icache-load-misses,"
2059 "dTLB-loads,"
2060 "dTLB-load-misses,"
2061 "iTLB-loads,"
2062 "iTLB-load-misses",
2067 * Very, very detailed stats (-d -d -d), adding prefetch events:
2070 "L1-dcache-prefetches,"
2071 "L1-dcache-prefetch-misses",
2078 * Make at least one event non-skippable so fatal errors are visible.
2079 * 'cycles' always used to be default and non-skippable, so use that.
2082 evsel->skippable = true;
2086 evlist__splice_list_tail(evsel_list, &evlist->core.entries);
2101 perf_header__set_feat(&session->header, feat);
2103 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
2104 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
2105 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
2106 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
2107 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
2121 data->path = output_name;
2124 pr_err("Cannot use -r option with perf stat record.\n");
2125 return -1;
2136 session->evlist = evsel_list;
2145 struct perf_record_stat_round *stat_round = &event->stat_round;
2147 const char **argv = session->header.env.cmdline_argv;
2148 int argc = session->header.env.nr_cmdline;
2152 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2153 update_stats(&walltime_nsecs_stats, stat_round->time);
2155 if (stat_config.interval && stat_round->time) {
2156 tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
2157 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
2169 const struct perf_tool *tool = session->tool;
2172 perf_event__read_stat_config(&stat_config, &event->stat_config);
2174 if (perf_cpu_map__is_empty(st->cpus)) {
2175 if (st->aggr_mode != AGGR_UNSET)
2177 } else if (st->aggr_mode != AGGR_UNSET) {
2178 stat_config.aggr_mode = st->aggr_mode;
2187 int nr_aggr = stat_config.aggr_map->nr;
2189 if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
2191 return -1;
2199 if (!st->cpus || !st->threads)
2202 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2203 return -EINVAL;
2205 perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
2208 return -ENOMEM;
2210 st->maps_allocated = true;
2218 const struct perf_tool *tool = session->tool;
2221 if (st->threads) {
2226 st->threads = thread_map__new_event(&event->thread_map);
2227 if (!st->threads)
2228 return -ENOMEM;
2237 const struct perf_tool *tool = session->tool;
2241 if (st->cpus) {
2242 pr_warning("Extra cpu map event, ignoring.\n");
2246 cpus = cpu_map__new_data(&event->cpu_map.data);
2248 return -ENOMEM;
2250 st->cpus = cpus;
2269 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2270 "aggregate counts per processor socket", AGGR_SOCKET),
2271 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
2272 "aggregate counts per processor die", AGGR_DIE),
2273 OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode,
2274 "aggregate counts perf processor cluster", AGGR_CLUSTER),
2275 OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
2279 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2280 "aggregate counts per physical processor core", AGGR_CORE),
2281 OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
2282 "aggregate counts per numa node", AGGR_NODE),
2283 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2284 "disable CPU count aggregation", AGGR_NONE),
2294 input_name = "-";
2318 evsel_list = session->evlist;
2331 * Make system wide (-a) the default target if
2335 * - there's no workload specified
2336 * - there is workload specified but all requested
2348 if (!counter->core.requires_cpu &&
2354 if (evsel_list->core.nr_entries)
2370 OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
2377 OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
2379 OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
2381 OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
2384 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2385 "system-wide collection from all CPUs"),
2387 "Use --no-scale to disable counter scaling for multiplexing"),
2393 "display details about each run (only with -r option)"),
2395 "null run - dont start any counters"),
2397 "detailed run - start a lot of events"),
2400 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
2403 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2404 "list of cpus to monitor in system-wide"),
2405 OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr,
2407 OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr,
2408 "disable aggregation the same as -A or -no-aggr"),
2409 OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
2411 OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
2413 OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
2417 OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
2421 OPT_INTEGER(0, "log-fd", &output_fd,
2427 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
2430 OPT_INTEGER(0, "interval-count", &stat_config.times,
2432 OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
2436 OPT_BOOLEAN(0, "per-socket", &opt_mode.socket,
2437 "aggregate counts per processor socket"),
2438 OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
2439 OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster,
2440 "aggregate counts per processor cluster"),
2441 OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level,
2444 OPT_BOOLEAN(0, "per-core", &opt_mode.core,
2445 "aggregate counts per physical processor core"),
2446 OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
2447 OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
2449 "ms to wait before starting measurement after program start (-1: start with events disabled)"),
2450 OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
2452 OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
2454 OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
2456 OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
2459 "measure top-down statistics"),
2461 OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
2464 OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
2465 "Set the metrics level for the top-down statistics (0: max level)"),
2466 OPT_BOOLEAN(0, "smi-cost", &smi_cost,
2471 OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
2474 OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
2477 OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
2483 OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
2487 OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
2488 "Only enable events on applying cpu with this type "
2492 OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
2496 OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
2497 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
2498 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
2499 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
2510 int status = -EINVAL, run_idx, err;
2521 return -ENOMEM;
2525 /* String-parsing callback-based options would segfault when negated */
2546 return -1;
2554 * For record command the -o is already taken care of.
2556 if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2560 fprintf(stderr, "cannot use both --output and --log-fd\n");
2562 parse_options_usage(NULL, stat_options, "log-fd", 0);
2567 fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2572 fprintf(stderr, "--metric-only is not supported with -r\n");
2578 * Current CSV and metric-only JSON output doesn't display the
2585 fprintf(stderr, "--table is only supported with -r\n");
2592 fprintf(stderr, "argument to --log-fd must be a > 0\n");
2593 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2604 return -1;
2615 return -errno;
2620 fprintf(stderr, "--interval-clear does not work with output\n");
2622 parse_options_usage(NULL, stat_options, "log-fd", 0);
2623 parse_options_usage(NULL, stat_options, "interval-clear", 0);
2624 return -1;
2630 * let the spreadsheet do the pretty-printing
2633 /* User explicitly passed -B? */
2635 fprintf(stderr, "-B option not supported with -x\n");
2641 } else if (big_num_opt == 0) /* User passed --no-big-num */
2672 pr_err("failed to setup -r option");
2680 fprintf(stderr, "The --per-thread option is only "
2681 "available when monitoring via -p -t -a "
2682 "options or only --per-thread.\n");
2690 * no_aggr, cgroup are for system-wide only
2691 * --per-thread is aggregated per thread, we dont mix it with cpu mode
2697 fprintf(stderr, "both cgroup and no-aggregation "
2698 "modes only available in system-wide mode\n");
2703 parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2727 status = -ENOMEM;
2734 * knowing the target is system-wide.
2759 pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2761 parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2768 "for-each-cgroup", 0);
2795 thread_map__read_comms(evsel_list->core.threads);
2804 pr_err("interval-count option should be used together with "
2805 "interval-print.\n");
2806 parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2822 pr_err("timeout option is not supported with interval-print.\n");
2840 * by attr->sample_type != 0, and we can't run it on
2846 * We dont want to block the signals - that would cause
2847 * child tasks to inherit that and Ctrl-C would not work.
2848 * What we want is for Ctrl-C to work in the exec()-ed
2861 /* Enable ignoring missing threads when -p option is defined. */
2862 evlist__first(evsel_list)->ignore_missing_thread = target.pid;
2873 if (status == -1)
2882 if (!forever && status != -1 && (!interval || stat_config.summary)) {
2901 * tools remain -acme
2907 &perf_stat.session->machines.host);
2919 perf_stat.session->header.data_size += perf_stat.bytes_written;