1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * builtin-stat.c 4 * 5 * Builtin stat command: Give a precise performance counters summary 6 * overview about any workload, CPU or specific PID. 7 * 8 * Sample output: 9 10 $ perf stat ./hackbench 10 11 12 Time: 0.118 13 14 Performance counter stats for './hackbench 10': 15 16 1708.761321 task-clock # 11.037 CPUs utilized 17 41,190 context-switches # 0.024 M/sec 18 6,735 CPU-migrations # 0.004 M/sec 19 17,318 page-faults # 0.010 M/sec 20 5,205,202,243 cycles # 3.046 GHz 21 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 22 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 23 2,603,501,247 instructions # 0.50 insns per cycle 24 # 1.48 stalled cycles per insn 25 484,357,498 branches # 283.455 M/sec 26 6,388,934 branch-misses # 1.32% of all branches 27 28 0.154822978 seconds time elapsed 29 30 * 31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 32 * 33 * Improvements and fixes by: 34 * 35 * Arjan van de Ven <arjan@linux.intel.com> 36 * Yanmin Zhang <yanmin.zhang@intel.com> 37 * Wu Fengguang <fengguang.wu@intel.com> 38 * Mike Galbraith <efault@gmx.de> 39 * Paul Mackerras <paulus@samba.org> 40 * Jaswinder Singh Rajput <jaswinder@kernel.org> 41 */ 42 43 #include "builtin.h" 44 #include "util/cgroup.h" 45 #include <subcmd/parse-options.h> 46 #include "util/parse-events.h" 47 #include "util/pmus.h" 48 #include "util/pmu.h" 49 #include "util/tool_pmu.h" 50 #include "util/event.h" 51 #include "util/evlist.h" 52 #include "util/evsel.h" 53 #include "util/debug.h" 54 #include "util/color.h" 55 #include "util/stat.h" 56 #include "util/header.h" 57 #include "util/cpumap.h" 58 #include "util/thread_map.h" 59 #include "util/counts.h" 60 #include "util/topdown.h" 61 #include "util/session.h" 62 #include "util/tool.h" 63 #include "util/string2.h" 64 #include "util/metricgroup.h" 65 #include "util/synthetic-events.h" 66 #include "util/target.h" 67 #include "util/time-utils.h" 68 #include "util/top.h" 69 #include "util/affinity.h" 70 #include "util/pfm.h" 71 #include "util/bpf_counter.h" 72 #include "util/iostat.h" 73 #include "util/util.h" 74 #include "util/intel-tpebs.h" 75 #include "asm/bug.h" 76 77 #include <linux/list_sort.h> 78 #include <linux/time64.h> 79 #include <linux/zalloc.h> 80 #include <api/fs/fs.h> 81 #include <errno.h> 82 #include <signal.h> 83 #include <stdlib.h> 84 #include <sys/prctl.h> 85 #include <inttypes.h> 86 #include <locale.h> 87 #include <math.h> 88 #include <sys/types.h> 89 #include <sys/stat.h> 90 #include <sys/wait.h> 91 #include <unistd.h> 92 #include <sys/time.h> 93 #include <sys/resource.h> 94 #include <linux/err.h> 95 96 #include <linux/ctype.h> 97 #include <perf/evlist.h> 98 #include <internal/threadmap.h> 99 100 #ifdef HAVE_BPF_SKEL 101 #include "util/bpf_skel/bperf_cgroup.h" 102 #endif 103 104 #define DEFAULT_SEPARATOR " " 105 #define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi" 106 107 static void print_counters(struct timespec *ts, int argc, const char **argv); 108 109 static struct evlist *evsel_list; 110 static struct parse_events_option_args parse_events_option_args = { 111 .evlistp = &evsel_list, 112 }; 113 114 static bool all_counters_use_bpf = true; 115 116 static struct target target; 117 118 static volatile sig_atomic_t child_pid = -1; 119 static int detailed_run = 0; 120 static bool transaction_run; 121 static bool topdown_run = false; 122 static bool smi_cost = false; 123 static bool smi_reset = false; 124 static int big_num_opt = -1; 125 static const char *pre_cmd = NULL; 126 static const char *post_cmd = NULL; 127 static bool sync_run = false; 128 static bool forever = false; 129 static bool force_metric_only = false; 130 static struct timespec ref_time; 131 static bool append_file; 132 static bool interval_count; 133 static const char *output_name; 134 static int output_fd; 135 static char *metrics; 136 137 struct perf_stat { 138 bool record; 139 struct perf_data data; 140 struct perf_session *session; 141 u64 bytes_written; 142 struct perf_tool tool; 143 bool maps_allocated; 144 struct perf_cpu_map *cpus; 145 struct perf_thread_map *threads; 146 enum aggr_mode aggr_mode; 147 u32 aggr_level; 148 }; 149 150 static struct perf_stat perf_stat; 151 #define STAT_RECORD perf_stat.record 152 153 static volatile sig_atomic_t done = 0; 154 155 /* Options set from the command line. */ 156 struct opt_aggr_mode { 157 bool node, socket, die, cluster, cache, core, thread, no_aggr; 158 }; 159 160 /* Turn command line option into most generic aggregation mode setting. */ 161 static enum aggr_mode opt_aggr_mode_to_aggr_mode(struct opt_aggr_mode *opt_mode) 162 { 163 enum aggr_mode mode = AGGR_GLOBAL; 164 165 if (opt_mode->node) 166 mode = AGGR_NODE; 167 if (opt_mode->socket) 168 mode = AGGR_SOCKET; 169 if (opt_mode->die) 170 mode = AGGR_DIE; 171 if (opt_mode->cluster) 172 mode = AGGR_CLUSTER; 173 if (opt_mode->cache) 174 mode = AGGR_CACHE; 175 if (opt_mode->core) 176 mode = AGGR_CORE; 177 if (opt_mode->thread) 178 mode = AGGR_THREAD; 179 if (opt_mode->no_aggr) 180 mode = AGGR_NONE; 181 return mode; 182 } 183 184 static void evlist__check_cpu_maps(struct evlist *evlist) 185 { 186 struct evsel *evsel, *warned_leader = NULL; 187 188 evlist__for_each_entry(evlist, evsel) { 189 struct evsel *leader = evsel__leader(evsel); 190 191 /* Check that leader matches cpus with each member. */ 192 if (leader == evsel) 193 continue; 194 if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus)) 195 continue; 196 197 /* If there's mismatch disable the group and warn user. */ 198 if (warned_leader != leader) { 199 char buf[200]; 200 201 pr_warning("WARNING: grouped events cpus do not match.\n" 202 "Events with CPUs not matching the leader will " 203 "be removed from the group.\n"); 204 evsel__group_desc(leader, buf, sizeof(buf)); 205 pr_warning(" %s\n", buf); 206 warned_leader = leader; 207 } 208 if (verbose > 0) { 209 char buf[200]; 210 211 cpu_map__snprint(leader->core.cpus, buf, sizeof(buf)); 212 pr_warning(" %s: %s\n", leader->name, buf); 213 cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf)); 214 pr_warning(" %s: %s\n", evsel->name, buf); 215 } 216 217 evsel__remove_from_group(evsel, leader); 218 } 219 } 220 221 static inline void diff_timespec(struct timespec *r, struct timespec *a, 222 struct timespec *b) 223 { 224 r->tv_sec = a->tv_sec - b->tv_sec; 225 if (a->tv_nsec < b->tv_nsec) { 226 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 227 r->tv_sec--; 228 } else { 229 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 230 } 231 } 232 233 static void perf_stat__reset_stats(void) 234 { 235 evlist__reset_stats(evsel_list); 236 perf_stat__reset_shadow_stats(); 237 } 238 239 static int process_synthesized_event(const struct perf_tool *tool __maybe_unused, 240 union perf_event *event, 241 struct perf_sample *sample __maybe_unused, 242 struct machine *machine __maybe_unused) 243 { 244 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 245 pr_err("failed to write perf data, error: %m\n"); 246 return -1; 247 } 248 249 perf_stat.bytes_written += event->header.size; 250 return 0; 251 } 252 253 static int write_stat_round_event(u64 tm, u64 type) 254 { 255 return perf_event__synthesize_stat_round(NULL, tm, type, 256 process_synthesized_event, 257 NULL); 258 } 259 260 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 261 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 262 263 #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) 264 265 static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread, 266 struct perf_counts_values *count) 267 { 268 struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); 269 struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); 270 271 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 272 process_synthesized_event, NULL); 273 } 274 275 static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread) 276 { 277 int err = evsel__read_counter(counter, cpu_map_idx, thread); 278 279 /* 280 * Reading user and system time will fail when the process 281 * terminates. Use the wait4 values in that case. 282 */ 283 if (err && cpu_map_idx == 0 && 284 (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME || 285 evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) { 286 u64 val, *start_time; 287 struct perf_counts_values *count = 288 perf_counts(counter->counts, cpu_map_idx, thread); 289 290 start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread); 291 if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME) 292 val = ru_stats.ru_utime_usec_stat.mean; 293 else 294 val = ru_stats.ru_stime_usec_stat.mean; 295 count->ena = count->run = *start_time + val; 296 count->val = val; 297 return 0; 298 } 299 return err; 300 } 301 302 /* 303 * Read out the results of a single counter: 304 * do not aggregate counts across CPUs in system-wide mode 305 */ 306 static int read_counter_cpu(struct evsel *counter, int cpu_map_idx) 307 { 308 int nthreads = perf_thread_map__nr(evsel_list->core.threads); 309 int thread; 310 311 if (!counter->supported) 312 return -ENOENT; 313 314 for (thread = 0; thread < nthreads; thread++) { 315 struct perf_counts_values *count; 316 317 count = perf_counts(counter->counts, cpu_map_idx, thread); 318 319 /* 320 * The leader's group read loads data into its group members 321 * (via evsel__read_counter()) and sets their count->loaded. 322 */ 323 if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && 324 read_single_counter(counter, cpu_map_idx, thread)) { 325 counter->counts->scaled = -1; 326 perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; 327 perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; 328 return -1; 329 } 330 331 perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false); 332 333 if (STAT_RECORD) { 334 if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) { 335 pr_err("failed to write stat event\n"); 336 return -1; 337 } 338 } 339 340 if (verbose > 1) { 341 fprintf(stat_config.output, 342 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 343 evsel__name(counter), 344 perf_cpu_map__cpu(evsel__cpus(counter), 345 cpu_map_idx).cpu, 346 count->val, count->ena, count->run); 347 } 348 } 349 350 return 0; 351 } 352 353 static int read_affinity_counters(void) 354 { 355 struct evlist_cpu_iterator evlist_cpu_itr; 356 struct affinity saved_affinity, *affinity; 357 358 if (all_counters_use_bpf) 359 return 0; 360 361 if (!target__has_cpu(&target) || target__has_per_thread(&target)) 362 affinity = NULL; 363 else if (affinity__setup(&saved_affinity) < 0) 364 return -1; 365 else 366 affinity = &saved_affinity; 367 368 evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 369 struct evsel *counter = evlist_cpu_itr.evsel; 370 371 if (evsel__is_bpf(counter)) 372 continue; 373 374 if (!counter->err) 375 counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx); 376 } 377 if (affinity) 378 affinity__cleanup(&saved_affinity); 379 380 return 0; 381 } 382 383 static int read_bpf_map_counters(void) 384 { 385 struct evsel *counter; 386 int err; 387 388 evlist__for_each_entry(evsel_list, counter) { 389 if (!evsel__is_bpf(counter)) 390 continue; 391 392 err = bpf_counter__read(counter); 393 if (err) 394 return err; 395 } 396 return 0; 397 } 398 399 static int read_counters(void) 400 { 401 if (!stat_config.stop_read_counter) { 402 if (read_bpf_map_counters() || 403 read_affinity_counters()) 404 return -1; 405 } 406 return 0; 407 } 408 409 static void process_counters(void) 410 { 411 struct evsel *counter; 412 413 evlist__for_each_entry(evsel_list, counter) { 414 if (counter->err) 415 pr_debug("failed to read counter %s\n", counter->name); 416 if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter)) 417 pr_warning("failed to process counter %s\n", counter->name); 418 counter->err = 0; 419 } 420 421 perf_stat_merge_counters(&stat_config, evsel_list); 422 perf_stat_process_percore(&stat_config, evsel_list); 423 } 424 425 static void process_interval(void) 426 { 427 struct timespec ts, rs; 428 429 clock_gettime(CLOCK_MONOTONIC, &ts); 430 diff_timespec(&rs, &ts, &ref_time); 431 432 evlist__reset_aggr_stats(evsel_list); 433 434 if (read_counters() == 0) 435 process_counters(); 436 437 if (STAT_RECORD) { 438 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 439 pr_err("failed to write stat round event\n"); 440 } 441 442 init_stats(&walltime_nsecs_stats); 443 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL); 444 print_counters(&rs, 0, NULL); 445 } 446 447 static bool handle_interval(unsigned int interval, int *times) 448 { 449 if (interval) { 450 process_interval(); 451 if (interval_count && !(--(*times))) 452 return true; 453 } 454 return false; 455 } 456 457 static int enable_counters(void) 458 { 459 struct evsel *evsel; 460 int err; 461 462 evlist__for_each_entry(evsel_list, evsel) { 463 if (!evsel__is_bpf(evsel)) 464 continue; 465 466 err = bpf_counter__enable(evsel); 467 if (err) 468 return err; 469 } 470 471 if (!target__enable_on_exec(&target)) { 472 if (!all_counters_use_bpf) 473 evlist__enable(evsel_list); 474 } 475 return 0; 476 } 477 478 static void disable_counters(void) 479 { 480 struct evsel *counter; 481 482 /* 483 * If we don't have tracee (attaching to task or cpu), counters may 484 * still be running. To get accurate group ratios, we must stop groups 485 * from counting before reading their constituent counters. 486 */ 487 if (!target__none(&target)) { 488 evlist__for_each_entry(evsel_list, counter) 489 bpf_counter__disable(counter); 490 if (!all_counters_use_bpf) 491 evlist__disable(evsel_list); 492 } 493 } 494 495 static volatile sig_atomic_t workload_exec_errno; 496 497 /* 498 * evlist__prepare_workload will send a SIGUSR1 499 * if the fork fails, since we asked by setting its 500 * want_signal to true. 501 */ 502 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 503 void *ucontext __maybe_unused) 504 { 505 workload_exec_errno = info->si_value.sival_int; 506 } 507 508 static bool evsel__should_store_id(struct evsel *counter) 509 { 510 return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID; 511 } 512 513 static bool is_target_alive(struct target *_target, 514 struct perf_thread_map *threads) 515 { 516 struct stat st; 517 int i; 518 519 if (!target__has_task(_target)) 520 return true; 521 522 for (i = 0; i < threads->nr; i++) { 523 char path[PATH_MAX]; 524 525 scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(), 526 threads->map[i].pid); 527 528 if (!stat(path, &st)) 529 return true; 530 } 531 532 return false; 533 } 534 535 static void process_evlist(struct evlist *evlist, unsigned int interval) 536 { 537 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 538 539 if (evlist__ctlfd_process(evlist, &cmd) > 0) { 540 switch (cmd) { 541 case EVLIST_CTL_CMD_ENABLE: 542 fallthrough; 543 case EVLIST_CTL_CMD_DISABLE: 544 if (interval) 545 process_interval(); 546 break; 547 case EVLIST_CTL_CMD_SNAPSHOT: 548 case EVLIST_CTL_CMD_ACK: 549 case EVLIST_CTL_CMD_UNSUPPORTED: 550 case EVLIST_CTL_CMD_EVLIST: 551 case EVLIST_CTL_CMD_STOP: 552 case EVLIST_CTL_CMD_PING: 553 default: 554 break; 555 } 556 } 557 } 558 559 static void compute_tts(struct timespec *time_start, struct timespec *time_stop, 560 int *time_to_sleep) 561 { 562 int tts = *time_to_sleep; 563 struct timespec time_diff; 564 565 diff_timespec(&time_diff, time_stop, time_start); 566 567 tts -= time_diff.tv_sec * MSEC_PER_SEC + 568 time_diff.tv_nsec / NSEC_PER_MSEC; 569 570 if (tts < 0) 571 tts = 0; 572 573 *time_to_sleep = tts; 574 } 575 576 static int dispatch_events(bool forks, int timeout, int interval, int *times) 577 { 578 int child_exited = 0, status = 0; 579 int time_to_sleep, sleep_time; 580 struct timespec time_start, time_stop; 581 582 if (interval) 583 sleep_time = interval; 584 else if (timeout) 585 sleep_time = timeout; 586 else 587 sleep_time = 1000; 588 589 time_to_sleep = sleep_time; 590 591 while (!done) { 592 if (forks) 593 child_exited = waitpid(child_pid, &status, WNOHANG); 594 else 595 child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0; 596 597 if (child_exited) 598 break; 599 600 clock_gettime(CLOCK_MONOTONIC, &time_start); 601 if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */ 602 if (timeout || handle_interval(interval, times)) 603 break; 604 time_to_sleep = sleep_time; 605 } else { /* fd revent */ 606 process_evlist(evsel_list, interval); 607 clock_gettime(CLOCK_MONOTONIC, &time_stop); 608 compute_tts(&time_start, &time_stop, &time_to_sleep); 609 } 610 } 611 612 return status; 613 } 614 615 enum counter_recovery { 616 COUNTER_SKIP, 617 COUNTER_RETRY, 618 }; 619 620 static enum counter_recovery stat_handle_error(struct evsel *counter, int err) 621 { 622 char msg[BUFSIZ]; 623 624 assert(!counter->supported); 625 626 /* 627 * PPC returns ENXIO for HW counters until 2.6.37 628 * (behavior changed with commit b0a873e). 629 */ 630 if (err == EINVAL || err == ENOSYS || err == ENOENT || err == ENXIO) { 631 if (verbose > 0) { 632 evsel__open_strerror(counter, &target, err, msg, sizeof(msg)); 633 ui__warning("%s event is not supported by the kernel.\n%s\n", 634 evsel__name(counter), msg); 635 } 636 return COUNTER_SKIP; 637 } 638 if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) { 639 if (verbose > 0) 640 ui__warning("%s\n", msg); 641 counter->supported = true; 642 return COUNTER_RETRY; 643 } 644 if (target__has_per_thread(&target) && err != EOPNOTSUPP && 645 evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) { 646 /* 647 * For global --per-thread case, skip current 648 * error thread. 649 */ 650 if (!thread_map__remove(evsel_list->core.threads, 651 evsel_list->core.threads->err_thread)) { 652 evsel_list->core.threads->err_thread = -1; 653 counter->supported = true; 654 return COUNTER_RETRY; 655 } 656 } 657 if (verbose > 0) { 658 evsel__open_strerror(counter, &target, err, msg, sizeof(msg)); 659 ui__warning(err == EOPNOTSUPP 660 ? "%s event is not supported by the kernel.\n%s\n" 661 : "skipping event %s that kernel failed to open.\n%s\n", 662 evsel__name(counter), msg); 663 } 664 return COUNTER_SKIP; 665 } 666 667 static int create_perf_stat_counter(struct evsel *evsel, 668 struct perf_stat_config *config, 669 int cpu_map_idx) 670 { 671 struct perf_event_attr *attr = &evsel->core.attr; 672 struct evsel *leader = evsel__leader(evsel); 673 674 /* Reset supported flag as creating a stat counter is retried. */ 675 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 676 PERF_FORMAT_TOTAL_TIME_RUNNING; 677 678 /* 679 * The event is part of non trivial group, let's enable 680 * the group read (for leader) and ID retrieval for all 681 * members. 682 */ 683 if (leader->core.nr_members > 1) 684 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 685 686 attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); 687 688 /* 689 * Some events get initialized with sample_(period/type) set, 690 * like tracepoints. Clear it up for counting. 691 */ 692 attr->sample_period = 0; 693 694 if (config->identifier) 695 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 696 697 if (config->all_user) { 698 attr->exclude_kernel = 1; 699 attr->exclude_user = 0; 700 } 701 702 if (config->all_kernel) { 703 attr->exclude_kernel = 0; 704 attr->exclude_user = 1; 705 } 706 707 /* 708 * Disabling all counters initially, they will be enabled 709 * either manually by us or by kernel via enable_on_exec 710 * set later. 711 */ 712 if (evsel__is_group_leader(evsel)) { 713 attr->disabled = 1; 714 715 if (target__enable_on_exec(&target)) 716 attr->enable_on_exec = 1; 717 } 718 719 return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx, 720 evsel->core.threads); 721 } 722 723 static int __run_perf_stat(int argc, const char **argv, int run_idx) 724 { 725 int interval = stat_config.interval; 726 int times = stat_config.times; 727 int timeout = stat_config.timeout; 728 char msg[BUFSIZ]; 729 unsigned long long t0, t1; 730 struct evsel *counter; 731 size_t l; 732 int status = 0; 733 const bool forks = (argc > 0); 734 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 735 struct evlist_cpu_iterator evlist_cpu_itr; 736 struct affinity saved_affinity, *affinity = NULL; 737 int err, open_err = 0; 738 bool second_pass = false, has_supported_counters; 739 740 if (forks) { 741 if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { 742 perror("failed to prepare workload"); 743 return -1; 744 } 745 child_pid = evsel_list->workload.pid; 746 } 747 748 if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { 749 if (affinity__setup(&saved_affinity) < 0) { 750 err = -1; 751 goto err_out; 752 } 753 affinity = &saved_affinity; 754 } 755 756 evlist__for_each_entry(evsel_list, counter) { 757 counter->reset_group = false; 758 if (bpf_counter__load(counter, &target)) { 759 err = -1; 760 goto err_out; 761 } 762 if (!(evsel__is_bperf(counter))) 763 all_counters_use_bpf = false; 764 } 765 766 evlist__reset_aggr_stats(evsel_list); 767 768 evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 769 counter = evlist_cpu_itr.evsel; 770 771 /* 772 * bperf calls evsel__open_per_cpu() in bperf__load(), so 773 * no need to call it again here. 774 */ 775 if (target.use_bpf) 776 break; 777 778 if (counter->reset_group || !counter->supported) 779 continue; 780 if (evsel__is_bperf(counter)) 781 continue; 782 783 while (true) { 784 if (create_perf_stat_counter(counter, &stat_config, 785 evlist_cpu_itr.cpu_map_idx) == 0) 786 break; 787 788 open_err = errno; 789 /* 790 * Weak group failed. We cannot just undo this here 791 * because earlier CPUs might be in group mode, and the kernel 792 * doesn't support mixing group and non group reads. Defer 793 * it to later. 794 * Don't close here because we're in the wrong affinity. 795 */ 796 if ((open_err == EINVAL || open_err == EBADF) && 797 evsel__leader(counter) != counter && 798 counter->weak_group) { 799 evlist__reset_weak_group(evsel_list, counter, false); 800 assert(counter->reset_group); 801 counter->supported = true; 802 second_pass = true; 803 break; 804 } 805 806 if (stat_handle_error(counter, open_err) != COUNTER_RETRY) 807 break; 808 } 809 } 810 811 if (second_pass) { 812 /* 813 * Now redo all the weak group after closing them, 814 * and also close errored counters. 815 */ 816 817 /* First close errored or weak retry */ 818 evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 819 counter = evlist_cpu_itr.evsel; 820 821 if (!counter->reset_group && counter->supported) 822 continue; 823 824 perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); 825 } 826 /* Now reopen weak */ 827 evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { 828 counter = evlist_cpu_itr.evsel; 829 830 if (!counter->reset_group) 831 continue; 832 833 while (true) { 834 pr_debug2("reopening weak %s\n", evsel__name(counter)); 835 if (create_perf_stat_counter(counter, &stat_config, 836 evlist_cpu_itr.cpu_map_idx) == 0) 837 break; 838 839 open_err = errno; 840 if (stat_handle_error(counter, open_err) != COUNTER_RETRY) 841 break; 842 } 843 } 844 } 845 affinity__cleanup(affinity); 846 affinity = NULL; 847 848 has_supported_counters = false; 849 evlist__for_each_entry(evsel_list, counter) { 850 if (!counter->supported) { 851 perf_evsel__free_fd(&counter->core); 852 continue; 853 } 854 has_supported_counters = true; 855 856 l = strlen(counter->unit); 857 if (l > stat_config.unit_width) 858 stat_config.unit_width = l; 859 860 if (evsel__should_store_id(counter) && 861 evsel__store_ids(counter, evsel_list)) { 862 err = -1; 863 goto err_out; 864 } 865 } 866 if (!has_supported_counters) { 867 evsel__open_strerror(evlist__first(evsel_list), &target, open_err, 868 msg, sizeof(msg)); 869 ui__error("No supported events found.\n%s\n", msg); 870 871 if (child_pid != -1) 872 kill(child_pid, SIGTERM); 873 err = -1; 874 goto err_out; 875 } 876 877 if (evlist__apply_filters(evsel_list, &counter, &target)) { 878 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 879 counter->filter, evsel__name(counter), errno, 880 str_error_r(errno, msg, sizeof(msg))); 881 return -1; 882 } 883 884 if (STAT_RECORD) { 885 int fd = perf_data__fd(&perf_stat.data); 886 887 if (is_pipe) { 888 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 889 } else { 890 err = perf_session__write_header(perf_stat.session, evsel_list, 891 fd, false); 892 } 893 894 if (err < 0) 895 goto err_out; 896 897 err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, 898 process_synthesized_event, is_pipe); 899 if (err < 0) 900 goto err_out; 901 902 } 903 904 if (target.initial_delay) { 905 pr_info(EVLIST_DISABLED_MSG); 906 } else { 907 err = enable_counters(); 908 if (err) { 909 err = -1; 910 goto err_out; 911 } 912 } 913 914 /* Exec the command, if any */ 915 if (forks) 916 evlist__start_workload(evsel_list); 917 918 if (target.initial_delay > 0) { 919 usleep(target.initial_delay * USEC_PER_MSEC); 920 err = enable_counters(); 921 if (err) { 922 err = -1; 923 goto err_out; 924 } 925 926 pr_info(EVLIST_ENABLED_MSG); 927 } 928 929 t0 = rdclock(); 930 clock_gettime(CLOCK_MONOTONIC, &ref_time); 931 932 if (forks) { 933 if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) 934 status = dispatch_events(forks, timeout, interval, ×); 935 if (child_pid != -1) { 936 if (timeout) 937 kill(child_pid, SIGTERM); 938 wait4(child_pid, &status, 0, &stat_config.ru_data); 939 } 940 941 if (workload_exec_errno) { 942 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 943 pr_err("Workload failed: %s\n", emsg); 944 err = -1; 945 goto err_out; 946 } 947 948 if (WIFSIGNALED(status)) 949 psignal(WTERMSIG(status), argv[0]); 950 } else { 951 status = dispatch_events(forks, timeout, interval, ×); 952 } 953 954 disable_counters(); 955 956 t1 = rdclock(); 957 958 if (stat_config.walltime_run_table) 959 stat_config.walltime_run[run_idx] = t1 - t0; 960 961 if (interval && stat_config.summary) { 962 stat_config.interval = 0; 963 stat_config.stop_read_counter = true; 964 init_stats(&walltime_nsecs_stats); 965 update_stats(&walltime_nsecs_stats, t1 - t0); 966 967 evlist__copy_prev_raw_counts(evsel_list); 968 evlist__reset_prev_raw_counts(evsel_list); 969 evlist__reset_aggr_stats(evsel_list); 970 } else { 971 update_stats(&walltime_nsecs_stats, t1 - t0); 972 update_rusage_stats(&ru_stats, &stat_config.ru_data); 973 } 974 975 /* 976 * Closing a group leader splits the group, and as we only disable 977 * group leaders, results in remaining events becoming enabled. To 978 * avoid arbitrary skew, we must read all counters before closing any 979 * group leaders. 980 */ 981 if (read_counters() == 0) 982 process_counters(); 983 984 /* 985 * We need to keep evsel_list alive, because it's processed 986 * later the evsel_list will be closed after. 987 */ 988 if (!STAT_RECORD) 989 evlist__close(evsel_list); 990 991 return WEXITSTATUS(status); 992 993 err_out: 994 if (forks) 995 evlist__cancel_workload(evsel_list); 996 997 affinity__cleanup(affinity); 998 return err; 999 } 1000 1001 /* 1002 * Returns -1 for fatal errors which signifies to not continue 1003 * when in repeat mode. 1004 * 1005 * Returns < -1 error codes when stat record is used. These 1006 * result in the stat information being displayed, but writing 1007 * to the file fails and is non fatal. 1008 */ 1009 static int run_perf_stat(int argc, const char **argv, int run_idx) 1010 { 1011 int ret; 1012 1013 if (pre_cmd) { 1014 ret = system(pre_cmd); 1015 if (ret) 1016 return ret; 1017 } 1018 1019 if (sync_run) 1020 sync(); 1021 1022 ret = __run_perf_stat(argc, argv, run_idx); 1023 if (ret) 1024 return ret; 1025 1026 if (post_cmd) { 1027 ret = system(post_cmd); 1028 if (ret) 1029 return ret; 1030 } 1031 1032 return ret; 1033 } 1034 1035 static void print_counters(struct timespec *ts, int argc, const char **argv) 1036 { 1037 /* Do not print anything if we record to the pipe. */ 1038 if (STAT_RECORD && perf_stat.data.is_pipe) 1039 return; 1040 if (quiet) 1041 return; 1042 1043 evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); 1044 } 1045 1046 static volatile sig_atomic_t signr = -1; 1047 1048 static void skip_signal(int signo) 1049 { 1050 if ((child_pid == -1) || stat_config.interval) 1051 done = 1; 1052 1053 signr = signo; 1054 /* 1055 * render child_pid harmless 1056 * won't send SIGTERM to a random 1057 * process in case of race condition 1058 * and fast PID recycling 1059 */ 1060 child_pid = -1; 1061 } 1062 1063 static void sig_atexit(void) 1064 { 1065 sigset_t set, oset; 1066 1067 /* 1068 * avoid race condition with SIGCHLD handler 1069 * in skip_signal() which is modifying child_pid 1070 * goal is to avoid send SIGTERM to a random 1071 * process 1072 */ 1073 sigemptyset(&set); 1074 sigaddset(&set, SIGCHLD); 1075 sigprocmask(SIG_BLOCK, &set, &oset); 1076 1077 if (child_pid != -1) 1078 kill(child_pid, SIGTERM); 1079 1080 sigprocmask(SIG_SETMASK, &oset, NULL); 1081 1082 if (signr == -1) 1083 return; 1084 1085 signal(signr, SIG_DFL); 1086 kill(getpid(), signr); 1087 } 1088 1089 static int stat__set_big_num(const struct option *opt __maybe_unused, 1090 const char *s __maybe_unused, int unset) 1091 { 1092 big_num_opt = unset ? 0 : 1; 1093 perf_stat__set_big_num(!unset); 1094 return 0; 1095 } 1096 1097 static int enable_metric_only(const struct option *opt __maybe_unused, 1098 const char *s __maybe_unused, int unset) 1099 { 1100 force_metric_only = true; 1101 stat_config.metric_only = !unset; 1102 return 0; 1103 } 1104 1105 static int append_metric_groups(const struct option *opt __maybe_unused, 1106 const char *str, 1107 int unset __maybe_unused) 1108 { 1109 if (metrics) { 1110 char *tmp; 1111 1112 if (asprintf(&tmp, "%s,%s", metrics, str) < 0) 1113 return -ENOMEM; 1114 free(metrics); 1115 metrics = tmp; 1116 } else { 1117 metrics = strdup(str); 1118 if (!metrics) 1119 return -ENOMEM; 1120 } 1121 return 0; 1122 } 1123 1124 static int parse_control_option(const struct option *opt, 1125 const char *str, 1126 int unset __maybe_unused) 1127 { 1128 struct perf_stat_config *config = opt->value; 1129 1130 return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close); 1131 } 1132 1133 static int parse_stat_cgroups(const struct option *opt, 1134 const char *str, int unset) 1135 { 1136 if (stat_config.cgroup_list) { 1137 pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); 1138 return -1; 1139 } 1140 1141 return parse_cgroups(opt, str, unset); 1142 } 1143 1144 static int parse_cputype(const struct option *opt, 1145 const char *str, 1146 int unset __maybe_unused) 1147 { 1148 const struct perf_pmu *pmu; 1149 struct evlist *evlist = *(struct evlist **)opt->value; 1150 1151 if (!list_empty(&evlist->core.entries)) { 1152 fprintf(stderr, "Must define cputype before events/metrics\n"); 1153 return -1; 1154 } 1155 1156 pmu = perf_pmus__pmu_for_pmu_filter(str); 1157 if (!pmu) { 1158 fprintf(stderr, "--cputype %s is not supported!\n", str); 1159 return -1; 1160 } 1161 parse_events_option_args.pmu_filter = pmu->name; 1162 1163 return 0; 1164 } 1165 1166 static int parse_cache_level(const struct option *opt, 1167 const char *str, 1168 int unset __maybe_unused) 1169 { 1170 int level; 1171 struct opt_aggr_mode *opt_aggr_mode = (struct opt_aggr_mode *)opt->value; 1172 u32 *aggr_level = (u32 *)opt->data; 1173 1174 /* 1175 * If no string is specified, aggregate based on the topology of 1176 * Last Level Cache (LLC). Since the LLC level can change from 1177 * architecture to architecture, set level greater than 1178 * MAX_CACHE_LVL which will be interpreted as LLC. 1179 */ 1180 if (str == NULL) { 1181 level = MAX_CACHE_LVL + 1; 1182 goto out; 1183 } 1184 1185 /* 1186 * The format to specify cache level is LX or lX where X is the 1187 * cache level. 1188 */ 1189 if (strlen(str) != 2 || (str[0] != 'l' && str[0] != 'L')) { 1190 pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n", 1191 MAX_CACHE_LVL, 1192 MAX_CACHE_LVL); 1193 return -EINVAL; 1194 } 1195 1196 level = atoi(&str[1]); 1197 if (level < 1) { 1198 pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n", 1199 MAX_CACHE_LVL, 1200 MAX_CACHE_LVL); 1201 return -EINVAL; 1202 } 1203 1204 if (level > MAX_CACHE_LVL) { 1205 pr_err("perf only supports max cache level of %d.\n" 1206 "Consider increasing MAX_CACHE_LVL\n", MAX_CACHE_LVL); 1207 return -EINVAL; 1208 } 1209 out: 1210 opt_aggr_mode->cache = true; 1211 *aggr_level = level; 1212 return 0; 1213 } 1214 1215 /** 1216 * Calculate the cache instance ID from the map in 1217 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list 1218 * Cache instance ID is the first CPU reported in the shared_cpu_list file. 1219 */ 1220 static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map) 1221 { 1222 int id; 1223 struct perf_cpu_map *cpu_map = perf_cpu_map__new(map); 1224 1225 /* 1226 * If the map contains no CPU, consider the current CPU to 1227 * be the first online CPU in the cache domain else use the 1228 * first online CPU of the cache domain as the ID. 1229 */ 1230 id = perf_cpu_map__min(cpu_map).cpu; 1231 if (id == -1) 1232 id = cpu.cpu; 1233 1234 /* Free the perf_cpu_map used to find the cache ID */ 1235 perf_cpu_map__put(cpu_map); 1236 1237 return id; 1238 } 1239 1240 /** 1241 * cpu__get_cache_id - Returns 0 if successful in populating the 1242 * cache level and cache id. Cache level is read from 1243 * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID 1244 * is the first CPU reported by 1245 * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list 1246 */ 1247 static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache) 1248 { 1249 int ret = 0; 1250 u32 cache_level = stat_config.aggr_level; 1251 struct cpu_cache_level caches[MAX_CACHE_LVL]; 1252 u32 i = 0, caches_cnt = 0; 1253 1254 cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level; 1255 cache->cache = -1; 1256 1257 ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt); 1258 if (ret) { 1259 /* 1260 * If caches_cnt is not 0, cpu_cache_level data 1261 * was allocated when building the topology. 1262 * Free the allocated data before returning. 1263 */ 1264 if (caches_cnt) 1265 goto free_caches; 1266 1267 return ret; 1268 } 1269 1270 if (!caches_cnt) 1271 return -1; 1272 1273 /* 1274 * Save the data for the highest level if no 1275 * level was specified by the user. 1276 */ 1277 if (cache_level > MAX_CACHE_LVL) { 1278 int max_level_index = 0; 1279 1280 for (i = 1; i < caches_cnt; ++i) { 1281 if (caches[i].level > caches[max_level_index].level) 1282 max_level_index = i; 1283 } 1284 1285 cache->cache_lvl = caches[max_level_index].level; 1286 cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map); 1287 1288 /* Reset i to 0 to free entire caches[] */ 1289 i = 0; 1290 goto free_caches; 1291 } 1292 1293 for (i = 0; i < caches_cnt; ++i) { 1294 if (caches[i].level == cache_level) { 1295 cache->cache_lvl = cache_level; 1296 cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map); 1297 } 1298 1299 cpu_cache_level__free(&caches[i]); 1300 } 1301 1302 free_caches: 1303 /* 1304 * Free all the allocated cpu_cache_level data. 1305 */ 1306 while (i < caches_cnt) 1307 cpu_cache_level__free(&caches[i++]); 1308 1309 return ret; 1310 } 1311 1312 /** 1313 * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache 1314 * level, die and socket populated with the cache instache ID, cache level, 1315 * die and socket for cpu. The function signature is compatible with 1316 * aggr_cpu_id_get_t. 1317 */ 1318 static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data) 1319 { 1320 int ret; 1321 struct aggr_cpu_id id; 1322 struct perf_cache cache; 1323 1324 id = aggr_cpu_id__die(cpu, data); 1325 if (aggr_cpu_id__is_empty(&id)) 1326 return id; 1327 1328 ret = cpu__get_cache_details(cpu, &cache); 1329 if (ret) 1330 return id; 1331 1332 id.cache_lvl = cache.cache_lvl; 1333 id.cache = cache.cache; 1334 return id; 1335 } 1336 1337 static const char *const aggr_mode__string[] = { 1338 [AGGR_CORE] = "core", 1339 [AGGR_CACHE] = "cache", 1340 [AGGR_CLUSTER] = "cluster", 1341 [AGGR_DIE] = "die", 1342 [AGGR_GLOBAL] = "global", 1343 [AGGR_NODE] = "node", 1344 [AGGR_NONE] = "none", 1345 [AGGR_SOCKET] = "socket", 1346 [AGGR_THREAD] = "thread", 1347 [AGGR_UNSET] = "unset", 1348 }; 1349 1350 static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, 1351 struct perf_cpu cpu) 1352 { 1353 return aggr_cpu_id__socket(cpu, /*data=*/NULL); 1354 } 1355 1356 static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, 1357 struct perf_cpu cpu) 1358 { 1359 return aggr_cpu_id__die(cpu, /*data=*/NULL); 1360 } 1361 1362 static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *config __maybe_unused, 1363 struct perf_cpu cpu) 1364 { 1365 return aggr_cpu_id__cache(cpu, /*data=*/NULL); 1366 } 1367 1368 static struct aggr_cpu_id perf_stat__get_cluster(struct perf_stat_config *config __maybe_unused, 1369 struct perf_cpu cpu) 1370 { 1371 return aggr_cpu_id__cluster(cpu, /*data=*/NULL); 1372 } 1373 1374 static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, 1375 struct perf_cpu cpu) 1376 { 1377 return aggr_cpu_id__core(cpu, /*data=*/NULL); 1378 } 1379 1380 static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, 1381 struct perf_cpu cpu) 1382 { 1383 return aggr_cpu_id__node(cpu, /*data=*/NULL); 1384 } 1385 1386 static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused, 1387 struct perf_cpu cpu) 1388 { 1389 return aggr_cpu_id__global(cpu, /*data=*/NULL); 1390 } 1391 1392 static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused, 1393 struct perf_cpu cpu) 1394 { 1395 return aggr_cpu_id__cpu(cpu, /*data=*/NULL); 1396 } 1397 1398 static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, 1399 aggr_get_id_t get_id, struct perf_cpu cpu) 1400 { 1401 struct aggr_cpu_id id; 1402 1403 /* per-process mode - should use global aggr mode */ 1404 if (cpu.cpu == -1 || cpu.cpu >= config->cpus_aggr_map->nr) 1405 return get_id(config, cpu); 1406 1407 if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) 1408 config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); 1409 1410 id = config->cpus_aggr_map->map[cpu.cpu]; 1411 return id; 1412 } 1413 1414 static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, 1415 struct perf_cpu cpu) 1416 { 1417 return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); 1418 } 1419 1420 static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, 1421 struct perf_cpu cpu) 1422 { 1423 return perf_stat__get_aggr(config, perf_stat__get_die, cpu); 1424 } 1425 1426 static struct aggr_cpu_id perf_stat__get_cluster_cached(struct perf_stat_config *config, 1427 struct perf_cpu cpu) 1428 { 1429 return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu); 1430 } 1431 1432 static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config, 1433 struct perf_cpu cpu) 1434 { 1435 return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu); 1436 } 1437 1438 static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, 1439 struct perf_cpu cpu) 1440 { 1441 return perf_stat__get_aggr(config, perf_stat__get_core, cpu); 1442 } 1443 1444 static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, 1445 struct perf_cpu cpu) 1446 { 1447 return perf_stat__get_aggr(config, perf_stat__get_node, cpu); 1448 } 1449 1450 static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config, 1451 struct perf_cpu cpu) 1452 { 1453 return perf_stat__get_aggr(config, perf_stat__get_global, cpu); 1454 } 1455 1456 static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config, 1457 struct perf_cpu cpu) 1458 { 1459 return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); 1460 } 1461 1462 static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) 1463 { 1464 switch (aggr_mode) { 1465 case AGGR_SOCKET: 1466 return aggr_cpu_id__socket; 1467 case AGGR_DIE: 1468 return aggr_cpu_id__die; 1469 case AGGR_CLUSTER: 1470 return aggr_cpu_id__cluster; 1471 case AGGR_CACHE: 1472 return aggr_cpu_id__cache; 1473 case AGGR_CORE: 1474 return aggr_cpu_id__core; 1475 case AGGR_NODE: 1476 return aggr_cpu_id__node; 1477 case AGGR_NONE: 1478 return aggr_cpu_id__cpu; 1479 case AGGR_GLOBAL: 1480 return aggr_cpu_id__global; 1481 case AGGR_THREAD: 1482 case AGGR_UNSET: 1483 case AGGR_MAX: 1484 default: 1485 return NULL; 1486 } 1487 } 1488 1489 static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) 1490 { 1491 switch (aggr_mode) { 1492 case AGGR_SOCKET: 1493 return perf_stat__get_socket_cached; 1494 case AGGR_DIE: 1495 return perf_stat__get_die_cached; 1496 case AGGR_CLUSTER: 1497 return perf_stat__get_cluster_cached; 1498 case AGGR_CACHE: 1499 return perf_stat__get_cache_id_cached; 1500 case AGGR_CORE: 1501 return perf_stat__get_core_cached; 1502 case AGGR_NODE: 1503 return perf_stat__get_node_cached; 1504 case AGGR_NONE: 1505 return perf_stat__get_cpu_cached; 1506 case AGGR_GLOBAL: 1507 return perf_stat__get_global_cached; 1508 case AGGR_THREAD: 1509 case AGGR_UNSET: 1510 case AGGR_MAX: 1511 default: 1512 return NULL; 1513 } 1514 } 1515 1516 static int perf_stat_init_aggr_mode(void) 1517 { 1518 int nr; 1519 aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); 1520 1521 if (get_id) { 1522 bool needs_sort = stat_config.aggr_mode != AGGR_NONE; 1523 stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, 1524 get_id, /*data=*/NULL, needs_sort); 1525 if (!stat_config.aggr_map) { 1526 pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]); 1527 return -1; 1528 } 1529 stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); 1530 } 1531 1532 if (stat_config.aggr_mode == AGGR_THREAD) { 1533 nr = perf_thread_map__nr(evsel_list->core.threads); 1534 stat_config.aggr_map = cpu_aggr_map__empty_new(nr); 1535 if (stat_config.aggr_map == NULL) 1536 return -ENOMEM; 1537 1538 for (int s = 0; s < nr; s++) { 1539 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1540 1541 id.thread_idx = s; 1542 stat_config.aggr_map->map[s] = id; 1543 } 1544 return 0; 1545 } 1546 1547 /* 1548 * The evsel_list->cpus is the base we operate on, 1549 * taking the highest cpu number to be the size of 1550 * the aggregation translate cpumap. 1551 */ 1552 nr = perf_cpu_map__max(evsel_list->core.all_cpus).cpu + 1; 1553 stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr); 1554 return stat_config.cpus_aggr_map ? 0 : -ENOMEM; 1555 } 1556 1557 static void cpu_aggr_map__delete(struct cpu_aggr_map *map) 1558 { 1559 free(map); 1560 } 1561 1562 static void perf_stat__exit_aggr_mode(void) 1563 { 1564 cpu_aggr_map__delete(stat_config.aggr_map); 1565 cpu_aggr_map__delete(stat_config.cpus_aggr_map); 1566 stat_config.aggr_map = NULL; 1567 stat_config.cpus_aggr_map = NULL; 1568 } 1569 1570 static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data) 1571 { 1572 struct perf_env *env = data; 1573 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1574 1575 if (cpu.cpu != -1) 1576 id.socket = env->cpu[cpu.cpu].socket_id; 1577 1578 return id; 1579 } 1580 1581 static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data) 1582 { 1583 struct perf_env *env = data; 1584 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1585 1586 if (cpu.cpu != -1) { 1587 /* 1588 * die_id is relative to socket, so start 1589 * with the socket ID and then add die to 1590 * make a unique ID. 1591 */ 1592 id.socket = env->cpu[cpu.cpu].socket_id; 1593 id.die = env->cpu[cpu.cpu].die_id; 1594 } 1595 1596 return id; 1597 } 1598 1599 static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env, 1600 u32 cache_level, struct aggr_cpu_id *id) 1601 { 1602 int i; 1603 int caches_cnt = env->caches_cnt; 1604 struct cpu_cache_level *caches = env->caches; 1605 1606 id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level; 1607 id->cache = -1; 1608 1609 if (!caches_cnt) 1610 return; 1611 1612 for (i = caches_cnt - 1; i > -1; --i) { 1613 struct perf_cpu_map *cpu_map; 1614 int map_contains_cpu; 1615 1616 /* 1617 * If user has not specified a level, find the fist level with 1618 * the cpu in the map. Since building the map is expensive, do 1619 * this only if levels match. 1620 */ 1621 if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level) 1622 continue; 1623 1624 cpu_map = perf_cpu_map__new(caches[i].map); 1625 map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu); 1626 perf_cpu_map__put(cpu_map); 1627 1628 if (map_contains_cpu != -1) { 1629 id->cache_lvl = caches[i].level; 1630 id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map); 1631 return; 1632 } 1633 } 1634 } 1635 1636 static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu, 1637 void *data) 1638 { 1639 struct perf_env *env = data; 1640 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1641 1642 if (cpu.cpu != -1) { 1643 u32 cache_level = (perf_stat.aggr_level) ?: stat_config.aggr_level; 1644 1645 id.socket = env->cpu[cpu.cpu].socket_id; 1646 id.die = env->cpu[cpu.cpu].die_id; 1647 perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id); 1648 } 1649 1650 return id; 1651 } 1652 1653 static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu, 1654 void *data) 1655 { 1656 struct perf_env *env = data; 1657 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1658 1659 if (cpu.cpu != -1) { 1660 id.socket = env->cpu[cpu.cpu].socket_id; 1661 id.die = env->cpu[cpu.cpu].die_id; 1662 id.cluster = env->cpu[cpu.cpu].cluster_id; 1663 } 1664 1665 return id; 1666 } 1667 1668 static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data) 1669 { 1670 struct perf_env *env = data; 1671 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1672 1673 if (cpu.cpu != -1) { 1674 /* 1675 * core_id is relative to socket, die and cluster, we need a 1676 * global id. So we set socket, die id, cluster id and core id. 1677 */ 1678 id.socket = env->cpu[cpu.cpu].socket_id; 1679 id.die = env->cpu[cpu.cpu].die_id; 1680 id.cluster = env->cpu[cpu.cpu].cluster_id; 1681 id.core = env->cpu[cpu.cpu].core_id; 1682 } 1683 1684 return id; 1685 } 1686 1687 static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data) 1688 { 1689 struct perf_env *env = data; 1690 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1691 1692 if (cpu.cpu != -1) { 1693 /* 1694 * core_id is relative to socket and die, 1695 * we need a global id. So we set 1696 * socket, die id and core id 1697 */ 1698 id.socket = env->cpu[cpu.cpu].socket_id; 1699 id.die = env->cpu[cpu.cpu].die_id; 1700 id.core = env->cpu[cpu.cpu].core_id; 1701 id.cpu = cpu; 1702 } 1703 1704 return id; 1705 } 1706 1707 static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) 1708 { 1709 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1710 1711 id.node = perf_env__numa_node(data, cpu); 1712 return id; 1713 } 1714 1715 static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused, 1716 void *data __maybe_unused) 1717 { 1718 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1719 1720 /* it always aggregates to the cpu 0 */ 1721 id.cpu = (struct perf_cpu){ .cpu = 0 }; 1722 return id; 1723 } 1724 1725 static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, 1726 struct perf_cpu cpu) 1727 { 1728 return perf_env__get_socket_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1729 } 1730 static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, 1731 struct perf_cpu cpu) 1732 { 1733 return perf_env__get_die_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1734 } 1735 1736 static struct aggr_cpu_id perf_stat__get_cluster_file(struct perf_stat_config *config __maybe_unused, 1737 struct perf_cpu cpu) 1738 { 1739 return perf_env__get_cluster_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1740 } 1741 1742 static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused, 1743 struct perf_cpu cpu) 1744 { 1745 return perf_env__get_cache_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1746 } 1747 1748 static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, 1749 struct perf_cpu cpu) 1750 { 1751 return perf_env__get_core_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1752 } 1753 1754 static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, 1755 struct perf_cpu cpu) 1756 { 1757 return perf_env__get_cpu_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1758 } 1759 1760 static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, 1761 struct perf_cpu cpu) 1762 { 1763 return perf_env__get_node_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1764 } 1765 1766 static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, 1767 struct perf_cpu cpu) 1768 { 1769 return perf_env__get_global_aggr_by_cpu(cpu, perf_session__env(perf_stat.session)); 1770 } 1771 1772 static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) 1773 { 1774 switch (aggr_mode) { 1775 case AGGR_SOCKET: 1776 return perf_env__get_socket_aggr_by_cpu; 1777 case AGGR_DIE: 1778 return perf_env__get_die_aggr_by_cpu; 1779 case AGGR_CLUSTER: 1780 return perf_env__get_cluster_aggr_by_cpu; 1781 case AGGR_CACHE: 1782 return perf_env__get_cache_aggr_by_cpu; 1783 case AGGR_CORE: 1784 return perf_env__get_core_aggr_by_cpu; 1785 case AGGR_NODE: 1786 return perf_env__get_node_aggr_by_cpu; 1787 case AGGR_GLOBAL: 1788 return perf_env__get_global_aggr_by_cpu; 1789 case AGGR_NONE: 1790 return perf_env__get_cpu_aggr_by_cpu; 1791 case AGGR_THREAD: 1792 case AGGR_UNSET: 1793 case AGGR_MAX: 1794 default: 1795 return NULL; 1796 } 1797 } 1798 1799 static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) 1800 { 1801 switch (aggr_mode) { 1802 case AGGR_SOCKET: 1803 return perf_stat__get_socket_file; 1804 case AGGR_DIE: 1805 return perf_stat__get_die_file; 1806 case AGGR_CLUSTER: 1807 return perf_stat__get_cluster_file; 1808 case AGGR_CACHE: 1809 return perf_stat__get_cache_file; 1810 case AGGR_CORE: 1811 return perf_stat__get_core_file; 1812 case AGGR_NODE: 1813 return perf_stat__get_node_file; 1814 case AGGR_GLOBAL: 1815 return perf_stat__get_global_file; 1816 case AGGR_NONE: 1817 return perf_stat__get_cpu_file; 1818 case AGGR_THREAD: 1819 case AGGR_UNSET: 1820 case AGGR_MAX: 1821 default: 1822 return NULL; 1823 } 1824 } 1825 1826 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1827 { 1828 struct perf_env *env = perf_session__env(st->session); 1829 aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); 1830 bool needs_sort = stat_config.aggr_mode != AGGR_NONE; 1831 1832 if (stat_config.aggr_mode == AGGR_THREAD) { 1833 int nr = perf_thread_map__nr(evsel_list->core.threads); 1834 1835 stat_config.aggr_map = cpu_aggr_map__empty_new(nr); 1836 if (stat_config.aggr_map == NULL) 1837 return -ENOMEM; 1838 1839 for (int s = 0; s < nr; s++) { 1840 struct aggr_cpu_id id = aggr_cpu_id__empty(); 1841 1842 id.thread_idx = s; 1843 stat_config.aggr_map->map[s] = id; 1844 } 1845 return 0; 1846 } 1847 1848 if (!get_id) 1849 return 0; 1850 1851 stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, 1852 get_id, env, needs_sort); 1853 if (!stat_config.aggr_map) { 1854 pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]); 1855 return -1; 1856 } 1857 stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode); 1858 return 0; 1859 } 1860 1861 static int default_evlist_evsel_cmp(void *priv __maybe_unused, 1862 const struct list_head *l, 1863 const struct list_head *r) 1864 { 1865 const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node); 1866 const struct evsel *lhs = container_of(lhs_core, struct evsel, core); 1867 const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node); 1868 const struct evsel *rhs = container_of(rhs_core, struct evsel, core); 1869 1870 if (evsel__leader(lhs) == evsel__leader(rhs)) { 1871 /* Within the same group, respect the original order. */ 1872 return lhs_core->idx - rhs_core->idx; 1873 } 1874 1875 /* Sort default metrics evsels first, and default show events before those. */ 1876 if (lhs->default_metricgroup != rhs->default_metricgroup) 1877 return lhs->default_metricgroup ? -1 : 1; 1878 1879 if (lhs->default_show_events != rhs->default_show_events) 1880 return lhs->default_show_events ? -1 : 1; 1881 1882 /* Sort by PMU type (prefers legacy types first). */ 1883 if (lhs->pmu != rhs->pmu) 1884 return lhs->pmu->type - rhs->pmu->type; 1885 1886 /* Sort by name. */ 1887 return strcmp(evsel__name((struct evsel *)lhs), evsel__name((struct evsel *)rhs)); 1888 } 1889 1890 /* 1891 * Add default events, if there were no attributes specified or 1892 * if -d/--detailed, -d -d or -d -d -d is used: 1893 */ 1894 static int add_default_events(void) 1895 { 1896 const char *pmu = parse_events_option_args.pmu_filter ?: "all"; 1897 struct parse_events_error err; 1898 struct evlist *evlist = evlist__new(); 1899 struct evsel *evsel; 1900 int ret = 0; 1901 1902 if (!evlist) 1903 return -ENOMEM; 1904 1905 parse_events_error__init(&err); 1906 1907 /* Set attrs if no event is selected and !null_run: */ 1908 if (stat_config.null_run) 1909 goto out; 1910 1911 if (transaction_run) { 1912 /* Handle -T as -M transaction. Once platform specific metrics 1913 * support has been added to the json files, all architectures 1914 * will use this approach. To determine transaction support 1915 * on an architecture test for such a metric name. 1916 */ 1917 if (!metricgroup__has_metric_or_groups(pmu, "transaction")) { 1918 pr_err("Missing transaction metrics\n"); 1919 ret = -1; 1920 goto out; 1921 } 1922 ret = metricgroup__parse_groups(evlist, pmu, "transaction", 1923 stat_config.metric_no_group, 1924 stat_config.metric_no_merge, 1925 stat_config.metric_no_threshold, 1926 stat_config.user_requested_cpu_list, 1927 stat_config.system_wide, 1928 stat_config.hardware_aware_grouping); 1929 goto out; 1930 } 1931 1932 if (smi_cost) { 1933 int smi; 1934 1935 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 1936 pr_err("freeze_on_smi is not supported.\n"); 1937 ret = -1; 1938 goto out; 1939 } 1940 1941 if (!smi) { 1942 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 1943 pr_err("Failed to set freeze_on_smi.\n"); 1944 ret = -1; 1945 goto out; 1946 } 1947 smi_reset = true; 1948 } 1949 1950 if (!metricgroup__has_metric_or_groups(pmu, "smi")) { 1951 pr_err("Missing smi metrics\n"); 1952 ret = -1; 1953 goto out; 1954 } 1955 1956 if (!force_metric_only) 1957 stat_config.metric_only = true; 1958 1959 ret = metricgroup__parse_groups(evlist, pmu, "smi", 1960 stat_config.metric_no_group, 1961 stat_config.metric_no_merge, 1962 stat_config.metric_no_threshold, 1963 stat_config.user_requested_cpu_list, 1964 stat_config.system_wide, 1965 stat_config.hardware_aware_grouping); 1966 goto out; 1967 } 1968 1969 if (topdown_run) { 1970 unsigned int max_level = metricgroups__topdown_max_level(); 1971 char str[] = "TopdownL1"; 1972 1973 if (!force_metric_only) 1974 stat_config.metric_only = true; 1975 1976 if (!max_level) { 1977 pr_err("Topdown requested but the topdown metric groups aren't present.\n" 1978 "(See perf list the metric groups have names like TopdownL1)\n"); 1979 ret = -1; 1980 goto out; 1981 } 1982 if (stat_config.topdown_level > max_level) { 1983 pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level); 1984 ret = -1; 1985 goto out; 1986 } else if (!stat_config.topdown_level) { 1987 stat_config.topdown_level = 1; 1988 } 1989 if (!stat_config.interval && !stat_config.metric_only) { 1990 fprintf(stat_config.output, 1991 "Topdown accuracy may decrease when measuring long periods.\n" 1992 "Please print the result regularly, e.g. -I1000\n"); 1993 } 1994 str[8] = stat_config.topdown_level + '0'; 1995 if (metricgroup__parse_groups(evlist, 1996 pmu, str, 1997 /*metric_no_group=*/false, 1998 /*metric_no_merge=*/false, 1999 /*metric_no_threshold=*/true, 2000 stat_config.user_requested_cpu_list, 2001 stat_config.system_wide, 2002 stat_config.hardware_aware_grouping) < 0) { 2003 ret = -1; 2004 goto out; 2005 } 2006 } 2007 2008 if (!stat_config.topdown_level) 2009 stat_config.topdown_level = 1; 2010 2011 if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { 2012 /* 2013 * Add Default metrics. To minimize multiplexing, don't request 2014 * threshold computation, but it will be computed if the events 2015 * are present. 2016 */ 2017 const char *default_metricgroup_names[] = { 2018 "Default", "Default2", "Default3", "Default4", 2019 }; 2020 2021 for (size_t i = 0; i < ARRAY_SIZE(default_metricgroup_names); i++) { 2022 struct evlist *metric_evlist; 2023 2024 if (!metricgroup__has_metric_or_groups(pmu, default_metricgroup_names[i])) 2025 continue; 2026 2027 if ((int)i > detailed_run) 2028 break; 2029 2030 metric_evlist = evlist__new(); 2031 if (!metric_evlist) { 2032 ret = -ENOMEM; 2033 break; 2034 } 2035 if (metricgroup__parse_groups(metric_evlist, pmu, default_metricgroup_names[i], 2036 /*metric_no_group=*/false, 2037 /*metric_no_merge=*/false, 2038 /*metric_no_threshold=*/true, 2039 stat_config.user_requested_cpu_list, 2040 stat_config.system_wide, 2041 stat_config.hardware_aware_grouping) < 0) { 2042 evlist__delete(metric_evlist); 2043 ret = -1; 2044 break; 2045 } 2046 2047 evlist__for_each_entry(metric_evlist, evsel) 2048 evsel->default_metricgroup = true; 2049 2050 evlist__splice_list_tail(evlist, &metric_evlist->core.entries); 2051 metricgroup__copy_metric_events(evlist, /*cgrp=*/NULL, 2052 &evlist->metric_events, 2053 &metric_evlist->metric_events); 2054 evlist__delete(metric_evlist); 2055 } 2056 list_sort(/*priv=*/NULL, &evlist->core.entries, default_evlist_evsel_cmp); 2057 2058 } 2059 out: 2060 if (!ret) { 2061 evlist__for_each_entry(evlist, evsel) { 2062 /* 2063 * Make at least one event non-skippable so fatal errors are visible. 2064 * 'cycles' always used to be default and non-skippable, so use that. 2065 */ 2066 if (!evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) 2067 evsel->skippable = true; 2068 } 2069 } 2070 parse_events_error__exit(&err); 2071 evlist__splice_list_tail(evsel_list, &evlist->core.entries); 2072 metricgroup__copy_metric_events(evsel_list, /*cgrp=*/NULL, 2073 &evsel_list->metric_events, 2074 &evlist->metric_events); 2075 evlist__delete(evlist); 2076 return ret; 2077 } 2078 2079 static const char * const stat_record_usage[] = { 2080 "perf stat record [<options>]", 2081 NULL, 2082 }; 2083 2084 static void init_features(struct perf_session *session) 2085 { 2086 int feat; 2087 2088 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2089 perf_header__set_feat(&session->header, feat); 2090 2091 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 2092 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2093 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2094 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2095 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2096 } 2097 2098 static int __cmd_record(const struct option stat_options[], struct opt_aggr_mode *opt_mode, 2099 int argc, const char **argv) 2100 { 2101 struct perf_session *session; 2102 struct perf_data *data = &perf_stat.data; 2103 2104 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2105 PARSE_OPT_STOP_AT_NON_OPTION); 2106 stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(opt_mode); 2107 2108 if (output_name) 2109 data->path = output_name; 2110 2111 if (stat_config.run_count != 1 || forever) { 2112 pr_err("Cannot use -r option with perf stat record.\n"); 2113 return -1; 2114 } 2115 2116 session = perf_session__new(data, NULL); 2117 if (IS_ERR(session)) { 2118 pr_err("Perf session creation failed\n"); 2119 return PTR_ERR(session); 2120 } 2121 2122 init_features(session); 2123 2124 session->evlist = evsel_list; 2125 perf_stat.session = session; 2126 perf_stat.record = true; 2127 return argc; 2128 } 2129 2130 static int process_stat_round_event(const struct perf_tool *tool __maybe_unused, 2131 struct perf_session *session, 2132 union perf_event *event) 2133 { 2134 struct perf_record_stat_round *stat_round = &event->stat_round; 2135 struct timespec tsh, *ts = NULL; 2136 struct perf_env *env = perf_session__env(session); 2137 const char **argv = env->cmdline_argv; 2138 int argc = env->nr_cmdline; 2139 2140 process_counters(); 2141 2142 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2143 update_stats(&walltime_nsecs_stats, stat_round->time); 2144 2145 if (stat_config.interval && stat_round->time) { 2146 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2147 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2148 ts = &tsh; 2149 } 2150 2151 print_counters(ts, argc, argv); 2152 return 0; 2153 } 2154 2155 static 2156 int process_stat_config_event(const struct perf_tool *tool, 2157 struct perf_session *session, 2158 union perf_event *event) 2159 { 2160 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2161 2162 perf_event__read_stat_config(&stat_config, &event->stat_config); 2163 2164 if (perf_cpu_map__is_empty(st->cpus)) { 2165 if (st->aggr_mode != AGGR_UNSET) 2166 pr_warning("warning: processing task data, aggregation mode not set\n"); 2167 } else if (st->aggr_mode != AGGR_UNSET) { 2168 stat_config.aggr_mode = st->aggr_mode; 2169 } 2170 2171 if (perf_stat.data.is_pipe) 2172 perf_stat_init_aggr_mode(); 2173 else 2174 perf_stat_init_aggr_mode_file(st); 2175 2176 if (stat_config.aggr_map) { 2177 int nr_aggr = stat_config.aggr_map->nr; 2178 2179 if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) { 2180 pr_err("cannot allocate aggr counts\n"); 2181 return -1; 2182 } 2183 } 2184 return 0; 2185 } 2186 2187 static int set_maps(struct perf_stat *st) 2188 { 2189 if (!st->cpus || !st->threads) 2190 return 0; 2191 2192 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2193 return -EINVAL; 2194 2195 perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); 2196 2197 if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true)) 2198 return -ENOMEM; 2199 2200 st->maps_allocated = true; 2201 return 0; 2202 } 2203 2204 static 2205 int process_thread_map_event(const struct perf_tool *tool, 2206 struct perf_session *session __maybe_unused, 2207 union perf_event *event) 2208 { 2209 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2210 2211 if (st->threads) { 2212 pr_warning("Extra thread map event, ignoring.\n"); 2213 return 0; 2214 } 2215 2216 st->threads = thread_map__new_event(&event->thread_map); 2217 if (!st->threads) 2218 return -ENOMEM; 2219 2220 return set_maps(st); 2221 } 2222 2223 static 2224 int process_cpu_map_event(const struct perf_tool *tool, 2225 struct perf_session *session __maybe_unused, 2226 union perf_event *event) 2227 { 2228 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2229 struct perf_cpu_map *cpus; 2230 2231 if (st->cpus) { 2232 pr_warning("Extra cpu map event, ignoring.\n"); 2233 return 0; 2234 } 2235 2236 cpus = cpu_map__new_data(&event->cpu_map.data); 2237 if (!cpus) 2238 return -ENOMEM; 2239 2240 st->cpus = cpus; 2241 return set_maps(st); 2242 } 2243 2244 static const char * const stat_report_usage[] = { 2245 "perf stat report [<options>]", 2246 NULL, 2247 }; 2248 2249 static struct perf_stat perf_stat = { 2250 .aggr_mode = AGGR_UNSET, 2251 .aggr_level = 0, 2252 }; 2253 2254 static int __cmd_report(int argc, const char **argv) 2255 { 2256 struct perf_session *session; 2257 const struct option options[] = { 2258 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2259 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2260 "aggregate counts per processor socket", AGGR_SOCKET), 2261 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, 2262 "aggregate counts per processor die", AGGR_DIE), 2263 OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode, 2264 "aggregate counts perf processor cluster", AGGR_CLUSTER), 2265 OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level, 2266 "cache level", 2267 "aggregate count at this cache level (Default: LLC)", 2268 parse_cache_level), 2269 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2270 "aggregate counts per physical processor core", AGGR_CORE), 2271 OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode, 2272 "aggregate counts per numa node", AGGR_NODE), 2273 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2274 "disable CPU count aggregation", AGGR_NONE), 2275 OPT_END() 2276 }; 2277 struct stat st; 2278 int ret; 2279 2280 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2281 2282 if (!input_name || !strlen(input_name)) { 2283 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2284 input_name = "-"; 2285 else 2286 input_name = "perf.data"; 2287 } 2288 2289 perf_stat.data.path = input_name; 2290 perf_stat.data.mode = PERF_DATA_MODE_READ; 2291 2292 perf_tool__init(&perf_stat.tool, /*ordered_events=*/false); 2293 perf_stat.tool.attr = perf_event__process_attr; 2294 perf_stat.tool.event_update = perf_event__process_event_update; 2295 perf_stat.tool.thread_map = process_thread_map_event; 2296 perf_stat.tool.cpu_map = process_cpu_map_event; 2297 perf_stat.tool.stat_config = process_stat_config_event; 2298 perf_stat.tool.stat = perf_event__process_stat_event; 2299 perf_stat.tool.stat_round = process_stat_round_event; 2300 2301 session = perf_session__new(&perf_stat.data, &perf_stat.tool); 2302 if (IS_ERR(session)) 2303 return PTR_ERR(session); 2304 2305 perf_stat.session = session; 2306 stat_config.output = stderr; 2307 evlist__delete(evsel_list); 2308 evsel_list = session->evlist; 2309 2310 ret = perf_session__process_events(session); 2311 if (ret) 2312 return ret; 2313 2314 perf_session__delete(session); 2315 return 0; 2316 } 2317 2318 static void setup_system_wide(int forks) 2319 { 2320 /* 2321 * Make system wide (-a) the default target if 2322 * no target was specified and one of following 2323 * conditions is met: 2324 * 2325 * - there's no workload specified 2326 * - there is workload specified but all requested 2327 * events are system wide events 2328 */ 2329 if (!target__none(&target)) 2330 return; 2331 2332 if (!forks) 2333 target.system_wide = true; 2334 else { 2335 struct evsel *counter; 2336 2337 evlist__for_each_entry(evsel_list, counter) { 2338 if (!counter->core.requires_cpu && 2339 !evsel__name_is(counter, "duration_time")) { 2340 return; 2341 } 2342 } 2343 2344 if (evsel_list->core.nr_entries) 2345 target.system_wide = true; 2346 } 2347 } 2348 2349 #ifdef HAVE_ARCH_X86_64_SUPPORT 2350 static int parse_tpebs_mode(const struct option *opt, const char *str, 2351 int unset __maybe_unused) 2352 { 2353 enum tpebs_mode *mode = opt->value; 2354 2355 if (!strcasecmp("mean", str)) { 2356 *mode = TPEBS_MODE__MEAN; 2357 return 0; 2358 } 2359 if (!strcasecmp("min", str)) { 2360 *mode = TPEBS_MODE__MIN; 2361 return 0; 2362 } 2363 if (!strcasecmp("max", str)) { 2364 *mode = TPEBS_MODE__MAX; 2365 return 0; 2366 } 2367 if (!strcasecmp("last", str)) { 2368 *mode = TPEBS_MODE__LAST; 2369 return 0; 2370 } 2371 return -1; 2372 } 2373 #endif // HAVE_ARCH_X86_64_SUPPORT 2374 2375 int cmd_stat(int argc, const char **argv) 2376 { 2377 struct opt_aggr_mode opt_mode = {}; 2378 struct option stat_options[] = { 2379 OPT_BOOLEAN('T', "transaction", &transaction_run, 2380 "hardware transaction statistics"), 2381 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 2382 "event selector. use 'perf list' to list available events", 2383 parse_events_option), 2384 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 2385 "event filter", parse_filter), 2386 OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit, 2387 "child tasks do not inherit counters"), 2388 OPT_STRING('p', "pid", &target.pid, "pid", 2389 "stat events on existing process id"), 2390 OPT_STRING('t', "tid", &target.tid, "tid", 2391 "stat events on existing thread id"), 2392 #ifdef HAVE_BPF_SKEL 2393 OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", 2394 "stat events on existing bpf program id"), 2395 OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, 2396 "use bpf program to count events"), 2397 OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", 2398 "path to perf_event_attr map"), 2399 #endif 2400 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2401 "system-wide collection from all CPUs"), 2402 OPT_BOOLEAN(0, "scale", &stat_config.scale, 2403 "Use --no-scale to disable counter scaling for multiplexing"), 2404 OPT_INCR('v', "verbose", &verbose, 2405 "be more verbose (show counter open errors, etc)"), 2406 OPT_INTEGER('r', "repeat", &stat_config.run_count, 2407 "repeat command and print average + stddev (max: 100, forever: 0)"), 2408 OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table, 2409 "display details about each run (only with -r option)"), 2410 OPT_BOOLEAN('n', "null", &stat_config.null_run, 2411 "null run - dont start any counters"), 2412 OPT_INCR('d', "detailed", &detailed_run, 2413 "detailed run - start a lot of events"), 2414 OPT_BOOLEAN('S', "sync", &sync_run, 2415 "call sync() before starting a run"), 2416 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 2417 "print large numbers with thousands\' separators", 2418 stat__set_big_num), 2419 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2420 "list of cpus to monitor in system-wide"), 2421 OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr, 2422 "disable aggregation across CPUs or PMUs"), 2423 OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr, 2424 "disable aggregation the same as -A or -no-aggr"), 2425 OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, 2426 "Merge identical named hybrid events"), 2427 OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", 2428 "print counts with custom separator"), 2429 OPT_BOOLEAN('j', "json-output", &stat_config.json_output, 2430 "print counts in JSON format"), 2431 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 2432 "monitor event in cgroup name only", parse_stat_cgroups), 2433 OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name", 2434 "expand events for each cgroup"), 2435 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2436 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 2437 OPT_INTEGER(0, "log-fd", &output_fd, 2438 "log output to fd, instead of stderr"), 2439 OPT_STRING(0, "pre", &pre_cmd, "command", 2440 "command to run prior to the measured command"), 2441 OPT_STRING(0, "post", &post_cmd, "command", 2442 "command to run after to the measured command"), 2443 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 2444 "print counts at regular interval in ms " 2445 "(overhead is possible for values <= 100ms)"), 2446 OPT_INTEGER(0, "interval-count", &stat_config.times, 2447 "print counts for fixed number of times"), 2448 OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear, 2449 "clear screen in between new interval"), 2450 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 2451 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 2452 OPT_BOOLEAN(0, "per-socket", &opt_mode.socket, 2453 "aggregate counts per processor socket"), 2454 OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"), 2455 OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster, 2456 "aggregate counts per processor cluster"), 2457 OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level, 2458 "cache level", "aggregate count at this cache level (Default: LLC)", 2459 parse_cache_level), 2460 OPT_BOOLEAN(0, "per-core", &opt_mode.core, 2461 "aggregate counts per physical processor core"), 2462 OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"), 2463 OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"), 2464 OPT_INTEGER('D', "delay", &target.initial_delay, 2465 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 2466 OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, 2467 "Only print computed metrics. No raw values", enable_metric_only), 2468 OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, 2469 "don't group metric events, impacts multiplexing"), 2470 OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, 2471 "don't try to share events between metrics in a group"), 2472 OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, 2473 "disable adding events for the metric threshold calculation"), 2474 OPT_BOOLEAN(0, "topdown", &topdown_run, 2475 "measure top-down statistics"), 2476 #ifdef HAVE_ARCH_X86_64_SUPPORT 2477 OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording, 2478 "enable recording for tpebs when retire_latency required"), 2479 OPT_CALLBACK(0, "tpebs-mode", &tpebs_mode, "tpebs-mode", 2480 "Mode of TPEBS recording: mean, min or max", 2481 parse_tpebs_mode), 2482 #endif 2483 OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, 2484 "Set the metrics level for the top-down statistics (0: max level)"), 2485 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 2486 "measure SMI cost"), 2487 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 2488 "monitor specified metrics or metric groups (separated by ,)", 2489 append_metric_groups), 2490 OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel, 2491 "Configure all used events to run in kernel space.", 2492 PARSE_OPT_EXCLUSIVE), 2493 OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, 2494 "Configure all used events to run in user space.", 2495 PARSE_OPT_EXCLUSIVE), 2496 OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, 2497 "Use with 'percore' event qualifier to show the event " 2498 "counts of one hardware thread by sum up total hardware " 2499 "threads of same physical core"), 2500 OPT_BOOLEAN(0, "summary", &stat_config.summary, 2501 "print summary for interval mode"), 2502 OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, 2503 "don't print 'summary' for CSV summary output"), 2504 OPT_BOOLEAN(0, "quiet", &quiet, 2505 "don't print any output, messages or warnings (useful with record)"), 2506 OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", 2507 "Only enable events on applying cpu with this type " 2508 "for hybrid platform (e.g. core or atom)", 2509 parse_cputype), 2510 #ifdef HAVE_LIBPFM 2511 OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", 2512 "libpfm4 event selector. use 'perf list' to list available events", 2513 parse_libpfm_events_option), 2514 #endif 2515 OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 2516 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" 2517 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 2518 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 2519 parse_control_option), 2520 OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default", 2521 "measure I/O performance metrics provided by arch/platform", 2522 iostat_parse), 2523 OPT_END() 2524 }; 2525 const char * const stat_usage[] = { 2526 "perf stat [<options>] [<command>]", 2527 NULL 2528 }; 2529 int status = -EINVAL, run_idx, err; 2530 const char *mode; 2531 FILE *output = stderr; 2532 unsigned int interval, timeout; 2533 const char * const stat_subcommands[] = { "record", "report" }; 2534 char errbuf[BUFSIZ]; 2535 struct evsel *counter; 2536 2537 setlocale(LC_ALL, ""); 2538 2539 evsel_list = evlist__new(); 2540 if (evsel_list == NULL) 2541 return -ENOMEM; 2542 2543 parse_events__shrink_config_terms(); 2544 2545 /* String-parsing callback-based options would segfault when negated */ 2546 set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG); 2547 set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG); 2548 set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG); 2549 2550 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2551 (const char **) stat_usage, 2552 PARSE_OPT_STOP_AT_NON_OPTION); 2553 2554 stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(&opt_mode); 2555 2556 if (stat_config.csv_sep) { 2557 stat_config.csv_output = true; 2558 if (!strcmp(stat_config.csv_sep, "\\t")) 2559 stat_config.csv_sep = "\t"; 2560 } else 2561 stat_config.csv_sep = DEFAULT_SEPARATOR; 2562 2563 if (argc && strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 2564 argc = __cmd_record(stat_options, &opt_mode, argc, argv); 2565 if (argc < 0) 2566 return -1; 2567 } else if (argc && strlen(argv[0]) > 2 && strstarts("report", argv[0])) 2568 return __cmd_report(argc, argv); 2569 2570 interval = stat_config.interval; 2571 timeout = stat_config.timeout; 2572 2573 /* 2574 * For record command the -o is already taken care of. 2575 */ 2576 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2577 output = NULL; 2578 2579 if (output_name && output_fd) { 2580 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2581 parse_options_usage(stat_usage, stat_options, "o", 1); 2582 parse_options_usage(NULL, stat_options, "log-fd", 0); 2583 goto out; 2584 } 2585 2586 if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2587 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2588 goto out; 2589 } 2590 2591 if (stat_config.metric_only && stat_config.run_count > 1) { 2592 fprintf(stderr, "--metric-only is not supported with -r\n"); 2593 goto out; 2594 } 2595 2596 if (stat_config.csv_output || (stat_config.metric_only && stat_config.json_output)) { 2597 /* 2598 * Current CSV and metric-only JSON output doesn't display the 2599 * metric threshold so don't compute it. 2600 */ 2601 stat_config.metric_no_threshold = true; 2602 } 2603 2604 if (stat_config.walltime_run_table && stat_config.run_count <= 1) { 2605 fprintf(stderr, "--table is only supported with -r\n"); 2606 parse_options_usage(stat_usage, stat_options, "r", 1); 2607 parse_options_usage(NULL, stat_options, "table", 0); 2608 goto out; 2609 } 2610 2611 if (output_fd < 0) { 2612 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2613 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2614 goto out; 2615 } 2616 2617 if (!output && !quiet) { 2618 struct timespec tm; 2619 mode = append_file ? "a" : "w"; 2620 2621 output = fopen(output_name, mode); 2622 if (!output) { 2623 perror("failed to create output file"); 2624 return -1; 2625 } 2626 if (!stat_config.json_output) { 2627 clock_gettime(CLOCK_REALTIME, &tm); 2628 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2629 } 2630 } else if (output_fd > 0) { 2631 mode = append_file ? "a" : "w"; 2632 output = fdopen(output_fd, mode); 2633 if (!output) { 2634 perror("Failed opening logfd"); 2635 return -errno; 2636 } 2637 } 2638 2639 if (stat_config.interval_clear && !isatty(fileno(output))) { 2640 fprintf(stderr, "--interval-clear does not work with output\n"); 2641 parse_options_usage(stat_usage, stat_options, "o", 1); 2642 parse_options_usage(NULL, stat_options, "log-fd", 0); 2643 parse_options_usage(NULL, stat_options, "interval-clear", 0); 2644 return -1; 2645 } 2646 2647 stat_config.output = output; 2648 2649 /* 2650 * let the spreadsheet do the pretty-printing 2651 */ 2652 if (stat_config.csv_output) { 2653 /* User explicitly passed -B? */ 2654 if (big_num_opt == 1) { 2655 fprintf(stderr, "-B option not supported with -x\n"); 2656 parse_options_usage(stat_usage, stat_options, "B", 1); 2657 parse_options_usage(NULL, stat_options, "x", 1); 2658 goto out; 2659 } else /* Nope, so disable big number formatting */ 2660 stat_config.big_num = false; 2661 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2662 stat_config.big_num = false; 2663 2664 target.inherit = !stat_config.no_inherit; 2665 err = target__validate(&target); 2666 if (err) { 2667 target__strerror(&target, err, errbuf, BUFSIZ); 2668 pr_warning("%s\n", errbuf); 2669 } 2670 2671 setup_system_wide(argc); 2672 2673 /* 2674 * Display user/system times only for single 2675 * run and when there's specified tracee. 2676 */ 2677 if ((stat_config.run_count == 1) && target__none(&target)) 2678 stat_config.ru_display = true; 2679 2680 if (stat_config.run_count < 0) { 2681 pr_err("Run count must be a positive number\n"); 2682 parse_options_usage(stat_usage, stat_options, "r", 1); 2683 goto out; 2684 } else if (stat_config.run_count == 0) { 2685 forever = true; 2686 stat_config.run_count = 1; 2687 } 2688 2689 if (stat_config.walltime_run_table) { 2690 stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0])); 2691 if (!stat_config.walltime_run) { 2692 pr_err("failed to setup -r option"); 2693 goto out; 2694 } 2695 } 2696 2697 if ((stat_config.aggr_mode == AGGR_THREAD) && 2698 !target__has_task(&target)) { 2699 if (!target.system_wide || target.cpu_list) { 2700 fprintf(stderr, "The --per-thread option is only " 2701 "available when monitoring via -p -t -a " 2702 "options or only --per-thread.\n"); 2703 parse_options_usage(NULL, stat_options, "p", 1); 2704 parse_options_usage(NULL, stat_options, "t", 1); 2705 goto out; 2706 } 2707 } 2708 2709 /* 2710 * no_aggr, cgroup are for system-wide only 2711 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2712 */ 2713 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2714 stat_config.aggr_mode != AGGR_THREAD) || 2715 (nr_cgroups || stat_config.cgroup_list)) && 2716 !target__has_cpu(&target)) { 2717 fprintf(stderr, "both cgroup and no-aggregation " 2718 "modes only available in system-wide mode\n"); 2719 2720 parse_options_usage(stat_usage, stat_options, "G", 1); 2721 parse_options_usage(NULL, stat_options, "A", 1); 2722 parse_options_usage(NULL, stat_options, "a", 1); 2723 parse_options_usage(NULL, stat_options, "for-each-cgroup", 0); 2724 goto out; 2725 } 2726 2727 if (stat_config.iostat_run) { 2728 status = iostat_prepare(evsel_list, &stat_config); 2729 if (status) 2730 goto out; 2731 if (iostat_mode == IOSTAT_LIST) { 2732 iostat_list(evsel_list, &stat_config); 2733 goto out; 2734 } else if (verbose > 0) 2735 iostat_list(evsel_list, &stat_config); 2736 if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target)) 2737 target.system_wide = true; 2738 } 2739 2740 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 2741 target.per_thread = true; 2742 2743 stat_config.system_wide = target.system_wide; 2744 if (target.cpu_list) { 2745 stat_config.user_requested_cpu_list = strdup(target.cpu_list); 2746 if (!stat_config.user_requested_cpu_list) { 2747 status = -ENOMEM; 2748 goto out; 2749 } 2750 } 2751 2752 /* 2753 * Metric parsing needs to be delayed as metrics may optimize events 2754 * knowing the target is system-wide. 2755 */ 2756 if (metrics) { 2757 const char *pmu = parse_events_option_args.pmu_filter ?: "all"; 2758 int ret = metricgroup__parse_groups(evsel_list, pmu, metrics, 2759 stat_config.metric_no_group, 2760 stat_config.metric_no_merge, 2761 stat_config.metric_no_threshold, 2762 stat_config.user_requested_cpu_list, 2763 stat_config.system_wide, 2764 stat_config.hardware_aware_grouping); 2765 2766 zfree(&metrics); 2767 if (ret) { 2768 status = ret; 2769 goto out; 2770 } 2771 } 2772 2773 if (add_default_events()) 2774 goto out; 2775 2776 if (stat_config.cgroup_list) { 2777 if (nr_cgroups > 0) { 2778 pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); 2779 parse_options_usage(stat_usage, stat_options, "G", 1); 2780 parse_options_usage(NULL, stat_options, "for-each-cgroup", 0); 2781 goto out; 2782 } 2783 2784 if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, true) < 0) { 2785 parse_options_usage(stat_usage, stat_options, 2786 "for-each-cgroup", 0); 2787 goto out; 2788 } 2789 } 2790 #ifdef HAVE_BPF_SKEL 2791 if (target.use_bpf && nr_cgroups && 2792 (evsel_list->core.nr_entries / nr_cgroups) > BPERF_CGROUP__MAX_EVENTS) { 2793 pr_warning("Disabling BPF counters due to more events (%d) than the max (%d)\n", 2794 evsel_list->core.nr_entries / nr_cgroups, BPERF_CGROUP__MAX_EVENTS); 2795 target.use_bpf = false; 2796 } 2797 #endif // HAVE_BPF_SKEL 2798 evlist__warn_user_requested_cpus(evsel_list, target.cpu_list); 2799 2800 evlist__for_each_entry(evsel_list, counter) { 2801 /* 2802 * Setup BPF counters to require CPUs as any(-1) isn't 2803 * supported. evlist__create_maps below will propagate this 2804 * information to the evsels. Note, evsel__is_bperf isn't yet 2805 * set up, and this change must happen early, so directly use 2806 * the bpf_counter variable and target information. 2807 */ 2808 if ((counter->bpf_counter || target.use_bpf) && !target__has_cpu(&target)) 2809 counter->core.requires_cpu = true; 2810 } 2811 2812 if (evlist__create_maps(evsel_list, &target) < 0) { 2813 if (target__has_task(&target)) { 2814 pr_err("Problems finding threads of monitor\n"); 2815 parse_options_usage(stat_usage, stat_options, "p", 1); 2816 parse_options_usage(NULL, stat_options, "t", 1); 2817 } else if (target__has_cpu(&target)) { 2818 perror("failed to parse CPUs map"); 2819 parse_options_usage(stat_usage, stat_options, "C", 1); 2820 parse_options_usage(NULL, stat_options, "a", 1); 2821 } 2822 goto out; 2823 } 2824 2825 evlist__check_cpu_maps(evsel_list); 2826 2827 /* 2828 * Initialize thread_map with comm names, 2829 * so we could print it out on output. 2830 */ 2831 if (stat_config.aggr_mode == AGGR_THREAD) { 2832 thread_map__read_comms(evsel_list->core.threads); 2833 } 2834 2835 if (stat_config.aggr_mode == AGGR_NODE) 2836 cpu__setup_cpunode_map(); 2837 2838 if (stat_config.times && interval) 2839 interval_count = true; 2840 else if (stat_config.times && !interval) { 2841 pr_err("interval-count option should be used together with " 2842 "interval-print.\n"); 2843 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 2844 parse_options_usage(stat_usage, stat_options, "I", 1); 2845 goto out; 2846 } 2847 2848 if (timeout && timeout < 100) { 2849 if (timeout < 10) { 2850 pr_err("timeout must be >= 10ms.\n"); 2851 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2852 goto out; 2853 } else 2854 pr_warning("timeout < 100ms. " 2855 "The overhead percentage could be high in some cases. " 2856 "Please proceed with caution.\n"); 2857 } 2858 if (timeout && interval) { 2859 pr_err("timeout option is not supported with interval-print.\n"); 2860 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2861 parse_options_usage(stat_usage, stat_options, "I", 1); 2862 goto out; 2863 } 2864 2865 if (perf_stat_init_aggr_mode()) 2866 goto out; 2867 2868 if (evlist__alloc_stats(&stat_config, evsel_list, interval)) 2869 goto out; 2870 2871 /* 2872 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 2873 * while avoiding that older tools show confusing messages. 2874 * 2875 * However for pipe sessions we need to keep it zero, 2876 * because script's perf_evsel__check_attr is triggered 2877 * by attr->sample_type != 0, and we can't run it on 2878 * stat sessions. 2879 */ 2880 stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe); 2881 2882 /* 2883 * We dont want to block the signals - that would cause 2884 * child tasks to inherit that and Ctrl-C would not work. 2885 * What we want is for Ctrl-C to work in the exec()-ed 2886 * task, but being ignored by perf stat itself: 2887 */ 2888 atexit(sig_atexit); 2889 if (!forever) 2890 signal(SIGINT, skip_signal); 2891 signal(SIGCHLD, skip_signal); 2892 signal(SIGALRM, skip_signal); 2893 signal(SIGABRT, skip_signal); 2894 2895 if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack)) 2896 goto out; 2897 2898 /* Enable ignoring missing threads when -p option is defined. */ 2899 evlist__first(evsel_list)->ignore_missing_thread = target.pid; 2900 status = 0; 2901 for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { 2902 if (stat_config.run_count != 1 && verbose > 0) 2903 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2904 run_idx + 1); 2905 2906 if (run_idx != 0) 2907 evlist__reset_prev_raw_counts(evsel_list); 2908 2909 status = run_perf_stat(argc, argv, run_idx); 2910 if (status == -1) 2911 break; 2912 2913 if (forever && !interval) { 2914 print_counters(NULL, argc, argv); 2915 perf_stat__reset_stats(); 2916 } 2917 } 2918 2919 if (!forever && status != -1 && (!interval || stat_config.summary)) { 2920 if (stat_config.run_count > 1) 2921 evlist__copy_res_stats(&stat_config, evsel_list); 2922 print_counters(NULL, argc, argv); 2923 } 2924 2925 evlist__finalize_ctlfd(evsel_list); 2926 2927 if (STAT_RECORD) { 2928 /* 2929 * We synthesize the kernel mmap record just so that older tools 2930 * don't emit warnings about not being able to resolve symbols 2931 * due to /proc/sys/kernel/kptr_restrict settings and instead provide 2932 * a saner message about no samples being in the perf.data file. 2933 * 2934 * This also serves to suppress a warning about f_header.data.size == 0 2935 * in header.c at the moment 'perf stat record' gets introduced, which 2936 * is not really needed once we start adding the stat specific PERF_RECORD_ 2937 * records, but the need to suppress the kptr_restrict messages in older 2938 * tools remain -acme 2939 */ 2940 int fd = perf_data__fd(&perf_stat.data); 2941 2942 err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2943 process_synthesized_event, 2944 &perf_stat.session->machines.host); 2945 if (err) { 2946 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2947 "older tools may produce warnings about this file\n."); 2948 } 2949 2950 if (!interval) { 2951 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2952 pr_err("failed to write stat round event\n"); 2953 } 2954 2955 if (!perf_stat.data.is_pipe) { 2956 perf_stat.session->header.data_size += perf_stat.bytes_written; 2957 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2958 } 2959 2960 evlist__close(evsel_list); 2961 perf_session__delete(perf_stat.session); 2962 } 2963 2964 perf_stat__exit_aggr_mode(); 2965 evlist__free_stats(evsel_list); 2966 out: 2967 if (stat_config.iostat_run) 2968 iostat_release(evsel_list); 2969 2970 zfree(&stat_config.walltime_run); 2971 zfree(&stat_config.user_requested_cpu_list); 2972 2973 if (smi_cost && smi_reset) 2974 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 2975 2976 evlist__delete(evsel_list); 2977 2978 evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close); 2979 2980 return status; 2981 } 2982