1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * builtin-stat.c 4 * 5 * Builtin stat command: Give a precise performance counters summary 6 * overview about any workload, CPU or specific PID. 7 * 8 * Sample output: 9 10 $ perf stat ./hackbench 10 11 12 Time: 0.118 13 14 Performance counter stats for './hackbench 10': 15 16 1708.761321 task-clock # 11.037 CPUs utilized 17 41,190 context-switches # 0.024 M/sec 18 6,735 CPU-migrations # 0.004 M/sec 19 17,318 page-faults # 0.010 M/sec 20 5,205,202,243 cycles # 3.046 GHz 21 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 22 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 23 2,603,501,247 instructions # 0.50 insns per cycle 24 # 1.48 stalled cycles per insn 25 484,357,498 branches # 283.455 M/sec 26 6,388,934 branch-misses # 1.32% of all branches 27 28 0.154822978 seconds time elapsed 29 30 * 31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 32 * 33 * Improvements and fixes by: 34 * 35 * Arjan van de Ven <arjan@linux.intel.com> 36 * Yanmin Zhang <yanmin.zhang@intel.com> 37 * Wu Fengguang <fengguang.wu@intel.com> 38 * Mike Galbraith <efault@gmx.de> 39 * Paul Mackerras <paulus@samba.org> 40 * Jaswinder Singh Rajput <jaswinder@kernel.org> 41 */ 42 43 #include "builtin.h" 44 #include "perf.h" 45 #include "util/cgroup.h" 46 #include <subcmd/parse-options.h> 47 #include "util/parse-events.h" 48 #include "util/pmu.h" 49 #include "util/event.h" 50 #include "util/evlist.h" 51 #include "util/evsel.h" 52 #include "util/debug.h" 53 #include "util/color.h" 54 #include "util/stat.h" 55 #include "util/header.h" 56 #include "util/cpumap.h" 57 #include "util/thread_map.h" 58 #include "util/counts.h" 59 #include "util/group.h" 60 #include "util/session.h" 61 #include "util/tool.h" 62 #include "util/string2.h" 63 #include "util/metricgroup.h" 64 #include "util/target.h" 65 #include "util/time-utils.h" 66 #include "util/top.h" 67 #include "asm/bug.h" 68 69 #include <linux/time64.h> 70 #include <linux/zalloc.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 #include <sys/time.h> 84 #include <sys/resource.h> 85 86 #include <linux/ctype.h> 87 #include <perf/evlist.h> 88 89 #define DEFAULT_SEPARATOR " " 90 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 91 92 static void print_counters(struct timespec *ts, int argc, const char **argv); 93 94 /* Default events used for perf stat -T */ 95 static const char *transaction_attrs = { 96 "task-clock," 97 "{" 98 "instructions," 99 "cycles," 100 "cpu/cycles-t/," 101 "cpu/tx-start/," 102 "cpu/el-start/," 103 "cpu/cycles-ct/" 104 "}" 105 }; 106 107 /* More limited version when the CPU does not have all events. */ 108 static const char * transaction_limited_attrs = { 109 "task-clock," 110 "{" 111 "instructions," 112 "cycles," 113 "cpu/cycles-t/," 114 "cpu/tx-start/" 115 "}" 116 }; 117 118 static const char * topdown_attrs[] = { 119 "topdown-total-slots", 120 "topdown-slots-retired", 121 "topdown-recovery-bubbles", 122 "topdown-fetch-bubbles", 123 "topdown-slots-issued", 124 NULL, 125 }; 126 127 static const char *smi_cost_attrs = { 128 "{" 129 "msr/aperf/," 130 "msr/smi/," 131 "cycles" 132 "}" 133 }; 134 135 static struct evlist *evsel_list; 136 137 static struct target target = { 138 .uid = UINT_MAX, 139 }; 140 141 #define METRIC_ONLY_LEN 20 142 143 static volatile pid_t child_pid = -1; 144 static int detailed_run = 0; 145 static bool transaction_run; 146 static bool topdown_run = false; 147 static bool smi_cost = false; 148 static bool smi_reset = false; 149 static int big_num_opt = -1; 150 static bool group = false; 151 static const char *pre_cmd = NULL; 152 static const char *post_cmd = NULL; 153 static bool sync_run = false; 154 static bool forever = false; 155 static bool force_metric_only = false; 156 static struct timespec ref_time; 157 static bool append_file; 158 static bool interval_count; 159 static const char *output_name; 160 static int output_fd; 161 162 struct perf_stat { 163 bool record; 164 struct perf_data data; 165 struct perf_session *session; 166 u64 bytes_written; 167 struct perf_tool tool; 168 bool maps_allocated; 169 struct perf_cpu_map *cpus; 170 struct perf_thread_map *threads; 171 enum aggr_mode aggr_mode; 172 }; 173 174 static struct perf_stat perf_stat; 175 #define STAT_RECORD perf_stat.record 176 177 static volatile int done = 0; 178 179 static struct perf_stat_config stat_config = { 180 .aggr_mode = AGGR_GLOBAL, 181 .scale = true, 182 .unit_width = 4, /* strlen("unit") */ 183 .run_count = 1, 184 .metric_only_len = METRIC_ONLY_LEN, 185 .walltime_nsecs_stats = &walltime_nsecs_stats, 186 .big_num = true, 187 }; 188 189 static inline void diff_timespec(struct timespec *r, struct timespec *a, 190 struct timespec *b) 191 { 192 r->tv_sec = a->tv_sec - b->tv_sec; 193 if (a->tv_nsec < b->tv_nsec) { 194 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 195 r->tv_sec--; 196 } else { 197 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 198 } 199 } 200 201 static void perf_stat__reset_stats(void) 202 { 203 int i; 204 205 perf_evlist__reset_stats(evsel_list); 206 perf_stat__reset_shadow_stats(); 207 208 for (i = 0; i < stat_config.stats_num; i++) 209 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 210 } 211 212 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 213 union perf_event *event, 214 struct perf_sample *sample __maybe_unused, 215 struct machine *machine __maybe_unused) 216 { 217 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 218 pr_err("failed to write perf data, error: %m\n"); 219 return -1; 220 } 221 222 perf_stat.bytes_written += event->header.size; 223 return 0; 224 } 225 226 static int write_stat_round_event(u64 tm, u64 type) 227 { 228 return perf_event__synthesize_stat_round(NULL, tm, type, 229 process_synthesized_event, 230 NULL); 231 } 232 233 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 234 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 235 236 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 237 238 static int 239 perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, 240 struct perf_counts_values *count) 241 { 242 struct perf_sample_id *sid = SID(counter, cpu, thread); 243 244 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 245 process_synthesized_event, NULL); 246 } 247 248 static int read_single_counter(struct evsel *counter, int cpu, 249 int thread, struct timespec *rs) 250 { 251 if (counter->tool_event == PERF_TOOL_DURATION_TIME) { 252 u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; 253 struct perf_counts_values *count = 254 perf_counts(counter->counts, cpu, thread); 255 count->ena = count->run = val; 256 count->val = val; 257 return 0; 258 } 259 return perf_evsel__read_counter(counter, cpu, thread); 260 } 261 262 /* 263 * Read out the results of a single counter: 264 * do not aggregate counts across CPUs in system-wide mode 265 */ 266 static int read_counter(struct evsel *counter, struct timespec *rs) 267 { 268 int nthreads = perf_thread_map__nr(evsel_list->core.threads); 269 int ncpus, cpu, thread; 270 271 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 272 ncpus = perf_evsel__nr_cpus(counter); 273 else 274 ncpus = 1; 275 276 if (!counter->supported) 277 return -ENOENT; 278 279 if (counter->system_wide) 280 nthreads = 1; 281 282 for (thread = 0; thread < nthreads; thread++) { 283 for (cpu = 0; cpu < ncpus; cpu++) { 284 struct perf_counts_values *count; 285 286 count = perf_counts(counter->counts, cpu, thread); 287 288 /* 289 * The leader's group read loads data into its group members 290 * (via perf_evsel__read_counter) and sets threir count->loaded. 291 */ 292 if (!perf_counts__is_loaded(counter->counts, cpu, thread) && 293 read_single_counter(counter, cpu, thread, rs)) { 294 counter->counts->scaled = -1; 295 perf_counts(counter->counts, cpu, thread)->ena = 0; 296 perf_counts(counter->counts, cpu, thread)->run = 0; 297 return -1; 298 } 299 300 perf_counts__set_loaded(counter->counts, cpu, thread, false); 301 302 if (STAT_RECORD) { 303 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 304 pr_err("failed to write stat event\n"); 305 return -1; 306 } 307 } 308 309 if (verbose > 1) { 310 fprintf(stat_config.output, 311 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 312 perf_evsel__name(counter), 313 cpu, 314 count->val, count->ena, count->run); 315 } 316 } 317 } 318 319 return 0; 320 } 321 322 static void read_counters(struct timespec *rs) 323 { 324 struct evsel *counter; 325 int ret; 326 327 evlist__for_each_entry(evsel_list, counter) { 328 ret = read_counter(counter, rs); 329 if (ret) 330 pr_debug("failed to read counter %s\n", counter->name); 331 332 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 333 pr_warning("failed to process counter %s\n", counter->name); 334 } 335 } 336 337 static void process_interval(void) 338 { 339 struct timespec ts, rs; 340 341 clock_gettime(CLOCK_MONOTONIC, &ts); 342 diff_timespec(&rs, &ts, &ref_time); 343 344 read_counters(&rs); 345 346 if (STAT_RECORD) { 347 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 348 pr_err("failed to write stat round event\n"); 349 } 350 351 init_stats(&walltime_nsecs_stats); 352 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 353 print_counters(&rs, 0, NULL); 354 } 355 356 static void enable_counters(void) 357 { 358 if (stat_config.initial_delay) 359 usleep(stat_config.initial_delay * USEC_PER_MSEC); 360 361 /* 362 * We need to enable counters only if: 363 * - we don't have tracee (attaching to task or cpu) 364 * - we have initial delay configured 365 */ 366 if (!target__none(&target) || stat_config.initial_delay) 367 evlist__enable(evsel_list); 368 } 369 370 static void disable_counters(void) 371 { 372 /* 373 * If we don't have tracee (attaching to task or cpu), counters may 374 * still be running. To get accurate group ratios, we must stop groups 375 * from counting before reading their constituent counters. 376 */ 377 if (!target__none(&target)) 378 evlist__disable(evsel_list); 379 } 380 381 static volatile int workload_exec_errno; 382 383 /* 384 * perf_evlist__prepare_workload will send a SIGUSR1 385 * if the fork fails, since we asked by setting its 386 * want_signal to true. 387 */ 388 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 389 void *ucontext __maybe_unused) 390 { 391 workload_exec_errno = info->si_value.sival_int; 392 } 393 394 static bool perf_evsel__should_store_id(struct evsel *counter) 395 { 396 return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID; 397 } 398 399 static bool is_target_alive(struct target *_target, 400 struct perf_thread_map *threads) 401 { 402 struct stat st; 403 int i; 404 405 if (!target__has_task(_target)) 406 return true; 407 408 for (i = 0; i < threads->nr; i++) { 409 char path[PATH_MAX]; 410 411 scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(), 412 threads->map[i].pid); 413 414 if (!stat(path, &st)) 415 return true; 416 } 417 418 return false; 419 } 420 421 static int __run_perf_stat(int argc, const char **argv, int run_idx) 422 { 423 int interval = stat_config.interval; 424 int times = stat_config.times; 425 int timeout = stat_config.timeout; 426 char msg[BUFSIZ]; 427 unsigned long long t0, t1; 428 struct evsel *counter; 429 struct timespec ts; 430 size_t l; 431 int status = 0; 432 const bool forks = (argc > 0); 433 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 434 435 if (interval) { 436 ts.tv_sec = interval / USEC_PER_MSEC; 437 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 438 } else if (timeout) { 439 ts.tv_sec = timeout / USEC_PER_MSEC; 440 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 441 } else { 442 ts.tv_sec = 1; 443 ts.tv_nsec = 0; 444 } 445 446 if (forks) { 447 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 448 workload_exec_failed_signal) < 0) { 449 perror("failed to prepare workload"); 450 return -1; 451 } 452 child_pid = evsel_list->workload.pid; 453 } 454 455 if (group) 456 perf_evlist__set_leader(evsel_list); 457 458 evlist__for_each_entry(evsel_list, counter) { 459 try_again: 460 if (create_perf_stat_counter(counter, &stat_config, &target) < 0) { 461 462 /* Weak group failed. Reset the group. */ 463 if ((errno == EINVAL || errno == EBADF) && 464 counter->leader != counter && 465 counter->weak_group) { 466 counter = perf_evlist__reset_weak_group(evsel_list, counter); 467 goto try_again; 468 } 469 470 /* 471 * PPC returns ENXIO for HW counters until 2.6.37 472 * (behavior changed with commit b0a873e). 473 */ 474 if (errno == EINVAL || errno == ENOSYS || 475 errno == ENOENT || errno == EOPNOTSUPP || 476 errno == ENXIO) { 477 if (verbose > 0) 478 ui__warning("%s event is not supported by the kernel.\n", 479 perf_evsel__name(counter)); 480 counter->supported = false; 481 482 if ((counter->leader != counter) || 483 !(counter->leader->core.nr_members > 1)) 484 continue; 485 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 486 if (verbose > 0) 487 ui__warning("%s\n", msg); 488 goto try_again; 489 } else if (target__has_per_thread(&target) && 490 evsel_list->core.threads && 491 evsel_list->core.threads->err_thread != -1) { 492 /* 493 * For global --per-thread case, skip current 494 * error thread. 495 */ 496 if (!thread_map__remove(evsel_list->core.threads, 497 evsel_list->core.threads->err_thread)) { 498 evsel_list->core.threads->err_thread = -1; 499 goto try_again; 500 } 501 } 502 503 perf_evsel__open_strerror(counter, &target, 504 errno, msg, sizeof(msg)); 505 ui__error("%s\n", msg); 506 507 if (child_pid != -1) 508 kill(child_pid, SIGTERM); 509 510 return -1; 511 } 512 counter->supported = true; 513 514 l = strlen(counter->unit); 515 if (l > stat_config.unit_width) 516 stat_config.unit_width = l; 517 518 if (perf_evsel__should_store_id(counter) && 519 perf_evsel__store_ids(counter, evsel_list)) 520 return -1; 521 } 522 523 if (perf_evlist__apply_filters(evsel_list, &counter)) { 524 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 525 counter->filter, perf_evsel__name(counter), errno, 526 str_error_r(errno, msg, sizeof(msg))); 527 return -1; 528 } 529 530 if (STAT_RECORD) { 531 int err, fd = perf_data__fd(&perf_stat.data); 532 533 if (is_pipe) { 534 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 535 } else { 536 err = perf_session__write_header(perf_stat.session, evsel_list, 537 fd, false); 538 } 539 540 if (err < 0) 541 return err; 542 543 err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, 544 process_synthesized_event, is_pipe); 545 if (err < 0) 546 return err; 547 } 548 549 /* 550 * Enable counters and exec the command: 551 */ 552 t0 = rdclock(); 553 clock_gettime(CLOCK_MONOTONIC, &ref_time); 554 555 if (forks) { 556 perf_evlist__start_workload(evsel_list); 557 enable_counters(); 558 559 if (interval || timeout) { 560 while (!waitpid(child_pid, &status, WNOHANG)) { 561 nanosleep(&ts, NULL); 562 if (timeout) 563 break; 564 process_interval(); 565 if (interval_count && !(--times)) 566 break; 567 } 568 } 569 if (child_pid != -1) 570 wait4(child_pid, &status, 0, &stat_config.ru_data); 571 572 if (workload_exec_errno) { 573 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 574 pr_err("Workload failed: %s\n", emsg); 575 return -1; 576 } 577 578 if (WIFSIGNALED(status)) 579 psignal(WTERMSIG(status), argv[0]); 580 } else { 581 enable_counters(); 582 while (!done) { 583 nanosleep(&ts, NULL); 584 if (!is_target_alive(&target, evsel_list->core.threads)) 585 break; 586 if (timeout) 587 break; 588 if (interval) { 589 process_interval(); 590 if (interval_count && !(--times)) 591 break; 592 } 593 } 594 } 595 596 disable_counters(); 597 598 t1 = rdclock(); 599 600 if (stat_config.walltime_run_table) 601 stat_config.walltime_run[run_idx] = t1 - t0; 602 603 update_stats(&walltime_nsecs_stats, t1 - t0); 604 605 /* 606 * Closing a group leader splits the group, and as we only disable 607 * group leaders, results in remaining events becoming enabled. To 608 * avoid arbitrary skew, we must read all counters before closing any 609 * group leaders. 610 */ 611 read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); 612 613 /* 614 * We need to keep evsel_list alive, because it's processed 615 * later the evsel_list will be closed after. 616 */ 617 if (!STAT_RECORD) 618 evlist__close(evsel_list); 619 620 return WEXITSTATUS(status); 621 } 622 623 static int run_perf_stat(int argc, const char **argv, int run_idx) 624 { 625 int ret; 626 627 if (pre_cmd) { 628 ret = system(pre_cmd); 629 if (ret) 630 return ret; 631 } 632 633 if (sync_run) 634 sync(); 635 636 ret = __run_perf_stat(argc, argv, run_idx); 637 if (ret) 638 return ret; 639 640 if (post_cmd) { 641 ret = system(post_cmd); 642 if (ret) 643 return ret; 644 } 645 646 return ret; 647 } 648 649 static void print_counters(struct timespec *ts, int argc, const char **argv) 650 { 651 /* Do not print anything if we record to the pipe. */ 652 if (STAT_RECORD && perf_stat.data.is_pipe) 653 return; 654 655 perf_evlist__print_counters(evsel_list, &stat_config, &target, 656 ts, argc, argv); 657 } 658 659 static volatile int signr = -1; 660 661 static void skip_signal(int signo) 662 { 663 if ((child_pid == -1) || stat_config.interval) 664 done = 1; 665 666 signr = signo; 667 /* 668 * render child_pid harmless 669 * won't send SIGTERM to a random 670 * process in case of race condition 671 * and fast PID recycling 672 */ 673 child_pid = -1; 674 } 675 676 static void sig_atexit(void) 677 { 678 sigset_t set, oset; 679 680 /* 681 * avoid race condition with SIGCHLD handler 682 * in skip_signal() which is modifying child_pid 683 * goal is to avoid send SIGTERM to a random 684 * process 685 */ 686 sigemptyset(&set); 687 sigaddset(&set, SIGCHLD); 688 sigprocmask(SIG_BLOCK, &set, &oset); 689 690 if (child_pid != -1) 691 kill(child_pid, SIGTERM); 692 693 sigprocmask(SIG_SETMASK, &oset, NULL); 694 695 if (signr == -1) 696 return; 697 698 signal(signr, SIG_DFL); 699 kill(getpid(), signr); 700 } 701 702 static int stat__set_big_num(const struct option *opt __maybe_unused, 703 const char *s __maybe_unused, int unset) 704 { 705 big_num_opt = unset ? 0 : 1; 706 return 0; 707 } 708 709 static int enable_metric_only(const struct option *opt __maybe_unused, 710 const char *s __maybe_unused, int unset) 711 { 712 force_metric_only = true; 713 stat_config.metric_only = !unset; 714 return 0; 715 } 716 717 static int parse_metric_groups(const struct option *opt, 718 const char *str, 719 int unset __maybe_unused) 720 { 721 return metricgroup__parse_groups(opt, str, &stat_config.metric_events); 722 } 723 724 static struct option stat_options[] = { 725 OPT_BOOLEAN('T', "transaction", &transaction_run, 726 "hardware transaction statistics"), 727 OPT_CALLBACK('e', "event", &evsel_list, "event", 728 "event selector. use 'perf list' to list available events", 729 parse_events_option), 730 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 731 "event filter", parse_filter), 732 OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit, 733 "child tasks do not inherit counters"), 734 OPT_STRING('p', "pid", &target.pid, "pid", 735 "stat events on existing process id"), 736 OPT_STRING('t', "tid", &target.tid, "tid", 737 "stat events on existing thread id"), 738 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 739 "system-wide collection from all CPUs"), 740 OPT_BOOLEAN('g', "group", &group, 741 "put the counters into a counter group"), 742 OPT_BOOLEAN(0, "scale", &stat_config.scale, 743 "Use --no-scale to disable counter scaling for multiplexing"), 744 OPT_INCR('v', "verbose", &verbose, 745 "be more verbose (show counter open errors, etc)"), 746 OPT_INTEGER('r', "repeat", &stat_config.run_count, 747 "repeat command and print average + stddev (max: 100, forever: 0)"), 748 OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table, 749 "display details about each run (only with -r option)"), 750 OPT_BOOLEAN('n', "null", &stat_config.null_run, 751 "null run - dont start any counters"), 752 OPT_INCR('d', "detailed", &detailed_run, 753 "detailed run - start a lot of events"), 754 OPT_BOOLEAN('S', "sync", &sync_run, 755 "call sync() before starting a run"), 756 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 757 "print large numbers with thousands\' separators", 758 stat__set_big_num), 759 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 760 "list of cpus to monitor in system-wide"), 761 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 762 "disable CPU count aggregation", AGGR_NONE), 763 OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), 764 OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", 765 "print counts with custom separator"), 766 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 767 "monitor event in cgroup name only", parse_cgroups), 768 OPT_STRING('o', "output", &output_name, "file", "output file name"), 769 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 770 OPT_INTEGER(0, "log-fd", &output_fd, 771 "log output to fd, instead of stderr"), 772 OPT_STRING(0, "pre", &pre_cmd, "command", 773 "command to run prior to the measured command"), 774 OPT_STRING(0, "post", &post_cmd, "command", 775 "command to run after to the measured command"), 776 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 777 "print counts at regular interval in ms " 778 "(overhead is possible for values <= 100ms)"), 779 OPT_INTEGER(0, "interval-count", &stat_config.times, 780 "print counts for fixed number of times"), 781 OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear, 782 "clear screen in between new interval"), 783 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 784 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 785 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 786 "aggregate counts per processor socket", AGGR_SOCKET), 787 OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, 788 "aggregate counts per processor die", AGGR_DIE), 789 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 790 "aggregate counts per physical processor core", AGGR_CORE), 791 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 792 "aggregate counts per thread", AGGR_THREAD), 793 OPT_UINTEGER('D', "delay", &stat_config.initial_delay, 794 "ms to wait before starting measurement after program start"), 795 OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, 796 "Only print computed metrics. No raw values", enable_metric_only), 797 OPT_BOOLEAN(0, "topdown", &topdown_run, 798 "measure topdown level 1 statistics"), 799 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 800 "measure SMI cost"), 801 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 802 "monitor specified metrics or metric groups (separated by ,)", 803 parse_metric_groups), 804 OPT_END() 805 }; 806 807 static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, 808 struct perf_cpu_map *map, int cpu) 809 { 810 return cpu_map__get_socket(map, cpu, NULL); 811 } 812 813 static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, 814 struct perf_cpu_map *map, int cpu) 815 { 816 return cpu_map__get_die(map, cpu, NULL); 817 } 818 819 static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, 820 struct perf_cpu_map *map, int cpu) 821 { 822 return cpu_map__get_core(map, cpu, NULL); 823 } 824 825 static int perf_stat__get_aggr(struct perf_stat_config *config, 826 aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) 827 { 828 int cpu; 829 830 if (idx >= map->nr) 831 return -1; 832 833 cpu = map->map[idx]; 834 835 if (config->cpus_aggr_map->map[cpu] == -1) 836 config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); 837 838 return config->cpus_aggr_map->map[cpu]; 839 } 840 841 static int perf_stat__get_socket_cached(struct perf_stat_config *config, 842 struct perf_cpu_map *map, int idx) 843 { 844 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); 845 } 846 847 static int perf_stat__get_die_cached(struct perf_stat_config *config, 848 struct perf_cpu_map *map, int idx) 849 { 850 return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); 851 } 852 853 static int perf_stat__get_core_cached(struct perf_stat_config *config, 854 struct perf_cpu_map *map, int idx) 855 { 856 return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); 857 } 858 859 static bool term_percore_set(void) 860 { 861 struct evsel *counter; 862 863 evlist__for_each_entry(evsel_list, counter) { 864 if (counter->percore) 865 return true; 866 } 867 868 return false; 869 } 870 871 static int perf_stat_init_aggr_mode(void) 872 { 873 int nr; 874 875 switch (stat_config.aggr_mode) { 876 case AGGR_SOCKET: 877 if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { 878 perror("cannot build socket map"); 879 return -1; 880 } 881 stat_config.aggr_get_id = perf_stat__get_socket_cached; 882 break; 883 case AGGR_DIE: 884 if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { 885 perror("cannot build die map"); 886 return -1; 887 } 888 stat_config.aggr_get_id = perf_stat__get_die_cached; 889 break; 890 case AGGR_CORE: 891 if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { 892 perror("cannot build core map"); 893 return -1; 894 } 895 stat_config.aggr_get_id = perf_stat__get_core_cached; 896 break; 897 case AGGR_NONE: 898 if (term_percore_set()) { 899 if (cpu_map__build_core_map(evsel_list->core.cpus, 900 &stat_config.aggr_map)) { 901 perror("cannot build core map"); 902 return -1; 903 } 904 stat_config.aggr_get_id = perf_stat__get_core_cached; 905 } 906 break; 907 case AGGR_GLOBAL: 908 case AGGR_THREAD: 909 case AGGR_UNSET: 910 default: 911 break; 912 } 913 914 /* 915 * The evsel_list->cpus is the base we operate on, 916 * taking the highest cpu number to be the size of 917 * the aggregation translate cpumap. 918 */ 919 nr = perf_cpu_map__max(evsel_list->core.cpus); 920 stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); 921 return stat_config.cpus_aggr_map ? 0 : -ENOMEM; 922 } 923 924 static void perf_stat__exit_aggr_mode(void) 925 { 926 perf_cpu_map__put(stat_config.aggr_map); 927 perf_cpu_map__put(stat_config.cpus_aggr_map); 928 stat_config.aggr_map = NULL; 929 stat_config.cpus_aggr_map = NULL; 930 } 931 932 static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) 933 { 934 int cpu; 935 936 if (idx > map->nr) 937 return -1; 938 939 cpu = map->map[idx]; 940 941 if (cpu >= env->nr_cpus_avail) 942 return -1; 943 944 return cpu; 945 } 946 947 static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) 948 { 949 struct perf_env *env = data; 950 int cpu = perf_env__get_cpu(env, map, idx); 951 952 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 953 } 954 955 static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) 956 { 957 struct perf_env *env = data; 958 int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); 959 960 if (cpu != -1) { 961 /* 962 * Encode socket in bit range 15:8 963 * die_id is relative to socket, 964 * we need a global id. So we combine 965 * socket + die id 966 */ 967 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) 968 return -1; 969 970 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) 971 return -1; 972 973 die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); 974 } 975 976 return die_id; 977 } 978 979 static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) 980 { 981 struct perf_env *env = data; 982 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 983 984 if (cpu != -1) { 985 /* 986 * Encode socket in bit range 31:24 987 * encode die id in bit range 23:16 988 * core_id is relative to socket and die, 989 * we need a global id. So we combine 990 * socket + die id + core id 991 */ 992 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) 993 return -1; 994 995 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) 996 return -1; 997 998 if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) 999 return -1; 1000 1001 core = (env->cpu[cpu].socket_id << 24) | 1002 (env->cpu[cpu].die_id << 16) | 1003 (env->cpu[cpu].core_id & 0xffff); 1004 } 1005 1006 return core; 1007 } 1008 1009 static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, 1010 struct perf_cpu_map **sockp) 1011 { 1012 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1013 } 1014 1015 static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, 1016 struct perf_cpu_map **diep) 1017 { 1018 return cpu_map__build_map(cpus, diep, perf_env__get_die, env); 1019 } 1020 1021 static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, 1022 struct perf_cpu_map **corep) 1023 { 1024 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1025 } 1026 1027 static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, 1028 struct perf_cpu_map *map, int idx) 1029 { 1030 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1031 } 1032 static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, 1033 struct perf_cpu_map *map, int idx) 1034 { 1035 return perf_env__get_die(map, idx, &perf_stat.session->header.env); 1036 } 1037 1038 static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, 1039 struct perf_cpu_map *map, int idx) 1040 { 1041 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1042 } 1043 1044 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1045 { 1046 struct perf_env *env = &st->session->header.env; 1047 1048 switch (stat_config.aggr_mode) { 1049 case AGGR_SOCKET: 1050 if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { 1051 perror("cannot build socket map"); 1052 return -1; 1053 } 1054 stat_config.aggr_get_id = perf_stat__get_socket_file; 1055 break; 1056 case AGGR_DIE: 1057 if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { 1058 perror("cannot build die map"); 1059 return -1; 1060 } 1061 stat_config.aggr_get_id = perf_stat__get_die_file; 1062 break; 1063 case AGGR_CORE: 1064 if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { 1065 perror("cannot build core map"); 1066 return -1; 1067 } 1068 stat_config.aggr_get_id = perf_stat__get_core_file; 1069 break; 1070 case AGGR_NONE: 1071 case AGGR_GLOBAL: 1072 case AGGR_THREAD: 1073 case AGGR_UNSET: 1074 default: 1075 break; 1076 } 1077 1078 return 0; 1079 } 1080 1081 static int topdown_filter_events(const char **attr, char **str, bool use_group) 1082 { 1083 int off = 0; 1084 int i; 1085 int len = 0; 1086 char *s; 1087 1088 for (i = 0; attr[i]; i++) { 1089 if (pmu_have_event("cpu", attr[i])) { 1090 len += strlen(attr[i]) + 1; 1091 attr[i - off] = attr[i]; 1092 } else 1093 off++; 1094 } 1095 attr[i - off] = NULL; 1096 1097 *str = malloc(len + 1 + 2); 1098 if (!*str) 1099 return -1; 1100 s = *str; 1101 if (i - off == 0) { 1102 *s = 0; 1103 return 0; 1104 } 1105 if (use_group) 1106 *s++ = '{'; 1107 for (i = 0; attr[i]; i++) { 1108 strcpy(s, attr[i]); 1109 s += strlen(s); 1110 *s++ = ','; 1111 } 1112 if (use_group) { 1113 s[-1] = '}'; 1114 *s = 0; 1115 } else 1116 s[-1] = 0; 1117 return 0; 1118 } 1119 1120 __weak bool arch_topdown_check_group(bool *warn) 1121 { 1122 *warn = false; 1123 return false; 1124 } 1125 1126 __weak void arch_topdown_group_warn(void) 1127 { 1128 } 1129 1130 /* 1131 * Add default attributes, if there were no attributes specified or 1132 * if -d/--detailed, -d -d or -d -d -d is used: 1133 */ 1134 static int add_default_attributes(void) 1135 { 1136 int err; 1137 struct perf_event_attr default_attrs0[] = { 1138 1139 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1140 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1141 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1142 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1143 1144 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1145 }; 1146 struct perf_event_attr frontend_attrs[] = { 1147 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1148 }; 1149 struct perf_event_attr backend_attrs[] = { 1150 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1151 }; 1152 struct perf_event_attr default_attrs1[] = { 1153 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1154 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1155 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1156 1157 }; 1158 1159 /* 1160 * Detailed stats (-d), covering the L1 and last level data caches: 1161 */ 1162 struct perf_event_attr detailed_attrs[] = { 1163 1164 { .type = PERF_TYPE_HW_CACHE, 1165 .config = 1166 PERF_COUNT_HW_CACHE_L1D << 0 | 1167 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1168 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1169 1170 { .type = PERF_TYPE_HW_CACHE, 1171 .config = 1172 PERF_COUNT_HW_CACHE_L1D << 0 | 1173 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1174 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1175 1176 { .type = PERF_TYPE_HW_CACHE, 1177 .config = 1178 PERF_COUNT_HW_CACHE_LL << 0 | 1179 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1180 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1181 1182 { .type = PERF_TYPE_HW_CACHE, 1183 .config = 1184 PERF_COUNT_HW_CACHE_LL << 0 | 1185 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1186 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1187 }; 1188 1189 /* 1190 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1191 */ 1192 struct perf_event_attr very_detailed_attrs[] = { 1193 1194 { .type = PERF_TYPE_HW_CACHE, 1195 .config = 1196 PERF_COUNT_HW_CACHE_L1I << 0 | 1197 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1198 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1199 1200 { .type = PERF_TYPE_HW_CACHE, 1201 .config = 1202 PERF_COUNT_HW_CACHE_L1I << 0 | 1203 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1204 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1205 1206 { .type = PERF_TYPE_HW_CACHE, 1207 .config = 1208 PERF_COUNT_HW_CACHE_DTLB << 0 | 1209 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1210 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1211 1212 { .type = PERF_TYPE_HW_CACHE, 1213 .config = 1214 PERF_COUNT_HW_CACHE_DTLB << 0 | 1215 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1216 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1217 1218 { .type = PERF_TYPE_HW_CACHE, 1219 .config = 1220 PERF_COUNT_HW_CACHE_ITLB << 0 | 1221 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1222 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1223 1224 { .type = PERF_TYPE_HW_CACHE, 1225 .config = 1226 PERF_COUNT_HW_CACHE_ITLB << 0 | 1227 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1228 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1229 1230 }; 1231 1232 /* 1233 * Very, very detailed stats (-d -d -d), adding prefetch events: 1234 */ 1235 struct perf_event_attr very_very_detailed_attrs[] = { 1236 1237 { .type = PERF_TYPE_HW_CACHE, 1238 .config = 1239 PERF_COUNT_HW_CACHE_L1D << 0 | 1240 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1241 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1242 1243 { .type = PERF_TYPE_HW_CACHE, 1244 .config = 1245 PERF_COUNT_HW_CACHE_L1D << 0 | 1246 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1247 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1248 }; 1249 struct parse_events_error errinfo; 1250 1251 /* Set attrs if no event is selected and !null_run: */ 1252 if (stat_config.null_run) 1253 return 0; 1254 1255 if (transaction_run) { 1256 /* Handle -T as -M transaction. Once platform specific metrics 1257 * support has been added to the json files, all archictures 1258 * will use this approach. To determine transaction support 1259 * on an architecture test for such a metric name. 1260 */ 1261 if (metricgroup__has_metric("transaction")) { 1262 struct option opt = { .value = &evsel_list }; 1263 1264 return metricgroup__parse_groups(&opt, "transaction", 1265 &stat_config.metric_events); 1266 } 1267 1268 if (pmu_have_event("cpu", "cycles-ct") && 1269 pmu_have_event("cpu", "el-start")) 1270 err = parse_events(evsel_list, transaction_attrs, 1271 &errinfo); 1272 else 1273 err = parse_events(evsel_list, 1274 transaction_limited_attrs, 1275 &errinfo); 1276 if (err) { 1277 fprintf(stderr, "Cannot set up transaction events\n"); 1278 parse_events_print_error(&errinfo, transaction_attrs); 1279 return -1; 1280 } 1281 return 0; 1282 } 1283 1284 if (smi_cost) { 1285 int smi; 1286 1287 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 1288 fprintf(stderr, "freeze_on_smi is not supported.\n"); 1289 return -1; 1290 } 1291 1292 if (!smi) { 1293 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 1294 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 1295 return -1; 1296 } 1297 smi_reset = true; 1298 } 1299 1300 if (pmu_have_event("msr", "aperf") && 1301 pmu_have_event("msr", "smi")) { 1302 if (!force_metric_only) 1303 stat_config.metric_only = true; 1304 err = parse_events(evsel_list, smi_cost_attrs, &errinfo); 1305 } else { 1306 fprintf(stderr, "To measure SMI cost, it needs " 1307 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 1308 parse_events_print_error(&errinfo, smi_cost_attrs); 1309 return -1; 1310 } 1311 if (err) { 1312 fprintf(stderr, "Cannot set up SMI cost events\n"); 1313 return -1; 1314 } 1315 return 0; 1316 } 1317 1318 if (topdown_run) { 1319 char *str = NULL; 1320 bool warn = false; 1321 1322 if (stat_config.aggr_mode != AGGR_GLOBAL && 1323 stat_config.aggr_mode != AGGR_CORE) { 1324 pr_err("top down event configuration requires --per-core mode\n"); 1325 return -1; 1326 } 1327 stat_config.aggr_mode = AGGR_CORE; 1328 if (nr_cgroups || !target__has_cpu(&target)) { 1329 pr_err("top down event configuration requires system-wide mode (-a)\n"); 1330 return -1; 1331 } 1332 1333 if (!force_metric_only) 1334 stat_config.metric_only = true; 1335 if (topdown_filter_events(topdown_attrs, &str, 1336 arch_topdown_check_group(&warn)) < 0) { 1337 pr_err("Out of memory\n"); 1338 return -1; 1339 } 1340 if (topdown_attrs[0] && str) { 1341 if (warn) 1342 arch_topdown_group_warn(); 1343 err = parse_events(evsel_list, str, &errinfo); 1344 if (err) { 1345 fprintf(stderr, 1346 "Cannot set up top down events %s: %d\n", 1347 str, err); 1348 parse_events_print_error(&errinfo, str); 1349 free(str); 1350 return -1; 1351 } 1352 } else { 1353 fprintf(stderr, "System does not support topdown\n"); 1354 return -1; 1355 } 1356 free(str); 1357 } 1358 1359 if (!evsel_list->core.nr_entries) { 1360 if (target__has_cpu(&target)) 1361 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 1362 1363 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 1364 return -1; 1365 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 1366 if (perf_evlist__add_default_attrs(evsel_list, 1367 frontend_attrs) < 0) 1368 return -1; 1369 } 1370 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 1371 if (perf_evlist__add_default_attrs(evsel_list, 1372 backend_attrs) < 0) 1373 return -1; 1374 } 1375 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 1376 return -1; 1377 } 1378 1379 /* Detailed events get appended to the event list: */ 1380 1381 if (detailed_run < 1) 1382 return 0; 1383 1384 /* Append detailed run extra attributes: */ 1385 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1386 return -1; 1387 1388 if (detailed_run < 2) 1389 return 0; 1390 1391 /* Append very detailed run extra attributes: */ 1392 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1393 return -1; 1394 1395 if (detailed_run < 3) 1396 return 0; 1397 1398 /* Append very, very detailed run extra attributes: */ 1399 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1400 } 1401 1402 static const char * const stat_record_usage[] = { 1403 "perf stat record [<options>]", 1404 NULL, 1405 }; 1406 1407 static void init_features(struct perf_session *session) 1408 { 1409 int feat; 1410 1411 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1412 perf_header__set_feat(&session->header, feat); 1413 1414 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1415 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1416 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1417 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1418 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1419 } 1420 1421 static int __cmd_record(int argc, const char **argv) 1422 { 1423 struct perf_session *session; 1424 struct perf_data *data = &perf_stat.data; 1425 1426 argc = parse_options(argc, argv, stat_options, stat_record_usage, 1427 PARSE_OPT_STOP_AT_NON_OPTION); 1428 1429 if (output_name) 1430 data->path = output_name; 1431 1432 if (stat_config.run_count != 1 || forever) { 1433 pr_err("Cannot use -r option with perf stat record.\n"); 1434 return -1; 1435 } 1436 1437 session = perf_session__new(data, false, NULL); 1438 if (session == NULL) { 1439 pr_err("Perf session creation failed.\n"); 1440 return -1; 1441 } 1442 1443 init_features(session); 1444 1445 session->evlist = evsel_list; 1446 perf_stat.session = session; 1447 perf_stat.record = true; 1448 return argc; 1449 } 1450 1451 static int process_stat_round_event(struct perf_session *session, 1452 union perf_event *event) 1453 { 1454 struct perf_record_stat_round *stat_round = &event->stat_round; 1455 struct evsel *counter; 1456 struct timespec tsh, *ts = NULL; 1457 const char **argv = session->header.env.cmdline_argv; 1458 int argc = session->header.env.nr_cmdline; 1459 1460 evlist__for_each_entry(evsel_list, counter) 1461 perf_stat_process_counter(&stat_config, counter); 1462 1463 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 1464 update_stats(&walltime_nsecs_stats, stat_round->time); 1465 1466 if (stat_config.interval && stat_round->time) { 1467 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 1468 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 1469 ts = &tsh; 1470 } 1471 1472 print_counters(ts, argc, argv); 1473 return 0; 1474 } 1475 1476 static 1477 int process_stat_config_event(struct perf_session *session, 1478 union perf_event *event) 1479 { 1480 struct perf_tool *tool = session->tool; 1481 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1482 1483 perf_event__read_stat_config(&stat_config, &event->stat_config); 1484 1485 if (perf_cpu_map__empty(st->cpus)) { 1486 if (st->aggr_mode != AGGR_UNSET) 1487 pr_warning("warning: processing task data, aggregation mode not set\n"); 1488 return 0; 1489 } 1490 1491 if (st->aggr_mode != AGGR_UNSET) 1492 stat_config.aggr_mode = st->aggr_mode; 1493 1494 if (perf_stat.data.is_pipe) 1495 perf_stat_init_aggr_mode(); 1496 else 1497 perf_stat_init_aggr_mode_file(st); 1498 1499 return 0; 1500 } 1501 1502 static int set_maps(struct perf_stat *st) 1503 { 1504 if (!st->cpus || !st->threads) 1505 return 0; 1506 1507 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 1508 return -EINVAL; 1509 1510 perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); 1511 1512 if (perf_evlist__alloc_stats(evsel_list, true)) 1513 return -ENOMEM; 1514 1515 st->maps_allocated = true; 1516 return 0; 1517 } 1518 1519 static 1520 int process_thread_map_event(struct perf_session *session, 1521 union perf_event *event) 1522 { 1523 struct perf_tool *tool = session->tool; 1524 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1525 1526 if (st->threads) { 1527 pr_warning("Extra thread map event, ignoring.\n"); 1528 return 0; 1529 } 1530 1531 st->threads = thread_map__new_event(&event->thread_map); 1532 if (!st->threads) 1533 return -ENOMEM; 1534 1535 return set_maps(st); 1536 } 1537 1538 static 1539 int process_cpu_map_event(struct perf_session *session, 1540 union perf_event *event) 1541 { 1542 struct perf_tool *tool = session->tool; 1543 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1544 struct perf_cpu_map *cpus; 1545 1546 if (st->cpus) { 1547 pr_warning("Extra cpu map event, ignoring.\n"); 1548 return 0; 1549 } 1550 1551 cpus = cpu_map__new_data(&event->cpu_map.data); 1552 if (!cpus) 1553 return -ENOMEM; 1554 1555 st->cpus = cpus; 1556 return set_maps(st); 1557 } 1558 1559 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 1560 { 1561 int i; 1562 1563 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 1564 if (!config->stats) 1565 return -1; 1566 1567 config->stats_num = nthreads; 1568 1569 for (i = 0; i < nthreads; i++) 1570 runtime_stat__init(&config->stats[i]); 1571 1572 return 0; 1573 } 1574 1575 static void runtime_stat_delete(struct perf_stat_config *config) 1576 { 1577 int i; 1578 1579 if (!config->stats) 1580 return; 1581 1582 for (i = 0; i < config->stats_num; i++) 1583 runtime_stat__exit(&config->stats[i]); 1584 1585 zfree(&config->stats); 1586 } 1587 1588 static const char * const stat_report_usage[] = { 1589 "perf stat report [<options>]", 1590 NULL, 1591 }; 1592 1593 static struct perf_stat perf_stat = { 1594 .tool = { 1595 .attr = perf_event__process_attr, 1596 .event_update = perf_event__process_event_update, 1597 .thread_map = process_thread_map_event, 1598 .cpu_map = process_cpu_map_event, 1599 .stat_config = process_stat_config_event, 1600 .stat = perf_event__process_stat_event, 1601 .stat_round = process_stat_round_event, 1602 }, 1603 .aggr_mode = AGGR_UNSET, 1604 }; 1605 1606 static int __cmd_report(int argc, const char **argv) 1607 { 1608 struct perf_session *session; 1609 const struct option options[] = { 1610 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1611 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1612 "aggregate counts per processor socket", AGGR_SOCKET), 1613 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, 1614 "aggregate counts per processor die", AGGR_DIE), 1615 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1616 "aggregate counts per physical processor core", AGGR_CORE), 1617 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 1618 "disable CPU count aggregation", AGGR_NONE), 1619 OPT_END() 1620 }; 1621 struct stat st; 1622 int ret; 1623 1624 argc = parse_options(argc, argv, options, stat_report_usage, 0); 1625 1626 if (!input_name || !strlen(input_name)) { 1627 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 1628 input_name = "-"; 1629 else 1630 input_name = "perf.data"; 1631 } 1632 1633 perf_stat.data.path = input_name; 1634 perf_stat.data.mode = PERF_DATA_MODE_READ; 1635 1636 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 1637 if (session == NULL) 1638 return -1; 1639 1640 perf_stat.session = session; 1641 stat_config.output = stderr; 1642 evsel_list = session->evlist; 1643 1644 ret = perf_session__process_events(session); 1645 if (ret) 1646 return ret; 1647 1648 perf_session__delete(session); 1649 return 0; 1650 } 1651 1652 static void setup_system_wide(int forks) 1653 { 1654 /* 1655 * Make system wide (-a) the default target if 1656 * no target was specified and one of following 1657 * conditions is met: 1658 * 1659 * - there's no workload specified 1660 * - there is workload specified but all requested 1661 * events are system wide events 1662 */ 1663 if (!target__none(&target)) 1664 return; 1665 1666 if (!forks) 1667 target.system_wide = true; 1668 else { 1669 struct evsel *counter; 1670 1671 evlist__for_each_entry(evsel_list, counter) { 1672 if (!counter->system_wide) 1673 return; 1674 } 1675 1676 if (evsel_list->core.nr_entries) 1677 target.system_wide = true; 1678 } 1679 } 1680 1681 int cmd_stat(int argc, const char **argv) 1682 { 1683 const char * const stat_usage[] = { 1684 "perf stat [<options>] [<command>]", 1685 NULL 1686 }; 1687 int status = -EINVAL, run_idx; 1688 const char *mode; 1689 FILE *output = stderr; 1690 unsigned int interval, timeout; 1691 const char * const stat_subcommands[] = { "record", "report" }; 1692 1693 setlocale(LC_ALL, ""); 1694 1695 evsel_list = evlist__new(); 1696 if (evsel_list == NULL) 1697 return -ENOMEM; 1698 1699 parse_events__shrink_config_terms(); 1700 1701 /* String-parsing callback-based options would segfault when negated */ 1702 set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG); 1703 set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG); 1704 set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG); 1705 1706 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 1707 (const char **) stat_usage, 1708 PARSE_OPT_STOP_AT_NON_OPTION); 1709 perf_stat__collect_metric_expr(evsel_list); 1710 perf_stat__init_shadow_stats(); 1711 1712 if (stat_config.csv_sep) { 1713 stat_config.csv_output = true; 1714 if (!strcmp(stat_config.csv_sep, "\\t")) 1715 stat_config.csv_sep = "\t"; 1716 } else 1717 stat_config.csv_sep = DEFAULT_SEPARATOR; 1718 1719 if (argc && !strncmp(argv[0], "rec", 3)) { 1720 argc = __cmd_record(argc, argv); 1721 if (argc < 0) 1722 return -1; 1723 } else if (argc && !strncmp(argv[0], "rep", 3)) 1724 return __cmd_report(argc, argv); 1725 1726 interval = stat_config.interval; 1727 timeout = stat_config.timeout; 1728 1729 /* 1730 * For record command the -o is already taken care of. 1731 */ 1732 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 1733 output = NULL; 1734 1735 if (output_name && output_fd) { 1736 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1737 parse_options_usage(stat_usage, stat_options, "o", 1); 1738 parse_options_usage(NULL, stat_options, "log-fd", 0); 1739 goto out; 1740 } 1741 1742 if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) { 1743 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 1744 goto out; 1745 } 1746 1747 if (stat_config.metric_only && stat_config.run_count > 1) { 1748 fprintf(stderr, "--metric-only is not supported with -r\n"); 1749 goto out; 1750 } 1751 1752 if (stat_config.walltime_run_table && stat_config.run_count <= 1) { 1753 fprintf(stderr, "--table is only supported with -r\n"); 1754 parse_options_usage(stat_usage, stat_options, "r", 1); 1755 parse_options_usage(NULL, stat_options, "table", 0); 1756 goto out; 1757 } 1758 1759 if (output_fd < 0) { 1760 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1761 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 1762 goto out; 1763 } 1764 1765 if (!output) { 1766 struct timespec tm; 1767 mode = append_file ? "a" : "w"; 1768 1769 output = fopen(output_name, mode); 1770 if (!output) { 1771 perror("failed to create output file"); 1772 return -1; 1773 } 1774 clock_gettime(CLOCK_REALTIME, &tm); 1775 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1776 } else if (output_fd > 0) { 1777 mode = append_file ? "a" : "w"; 1778 output = fdopen(output_fd, mode); 1779 if (!output) { 1780 perror("Failed opening logfd"); 1781 return -errno; 1782 } 1783 } 1784 1785 stat_config.output = output; 1786 1787 /* 1788 * let the spreadsheet do the pretty-printing 1789 */ 1790 if (stat_config.csv_output) { 1791 /* User explicitly passed -B? */ 1792 if (big_num_opt == 1) { 1793 fprintf(stderr, "-B option not supported with -x\n"); 1794 parse_options_usage(stat_usage, stat_options, "B", 1); 1795 parse_options_usage(NULL, stat_options, "x", 1); 1796 goto out; 1797 } else /* Nope, so disable big number formatting */ 1798 stat_config.big_num = false; 1799 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1800 stat_config.big_num = false; 1801 1802 setup_system_wide(argc); 1803 1804 /* 1805 * Display user/system times only for single 1806 * run and when there's specified tracee. 1807 */ 1808 if ((stat_config.run_count == 1) && target__none(&target)) 1809 stat_config.ru_display = true; 1810 1811 if (stat_config.run_count < 0) { 1812 pr_err("Run count must be a positive number\n"); 1813 parse_options_usage(stat_usage, stat_options, "r", 1); 1814 goto out; 1815 } else if (stat_config.run_count == 0) { 1816 forever = true; 1817 stat_config.run_count = 1; 1818 } 1819 1820 if (stat_config.walltime_run_table) { 1821 stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0])); 1822 if (!stat_config.walltime_run) { 1823 pr_err("failed to setup -r option"); 1824 goto out; 1825 } 1826 } 1827 1828 if ((stat_config.aggr_mode == AGGR_THREAD) && 1829 !target__has_task(&target)) { 1830 if (!target.system_wide || target.cpu_list) { 1831 fprintf(stderr, "The --per-thread option is only " 1832 "available when monitoring via -p -t -a " 1833 "options or only --per-thread.\n"); 1834 parse_options_usage(NULL, stat_options, "p", 1); 1835 parse_options_usage(NULL, stat_options, "t", 1); 1836 goto out; 1837 } 1838 } 1839 1840 /* 1841 * no_aggr, cgroup are for system-wide only 1842 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1843 */ 1844 if (((stat_config.aggr_mode != AGGR_GLOBAL && 1845 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 1846 !target__has_cpu(&target)) { 1847 fprintf(stderr, "both cgroup and no-aggregation " 1848 "modes only available in system-wide mode\n"); 1849 1850 parse_options_usage(stat_usage, stat_options, "G", 1); 1851 parse_options_usage(NULL, stat_options, "A", 1); 1852 parse_options_usage(NULL, stat_options, "a", 1); 1853 goto out; 1854 } 1855 1856 if (add_default_attributes()) 1857 goto out; 1858 1859 target__validate(&target); 1860 1861 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 1862 target.per_thread = true; 1863 1864 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1865 if (target__has_task(&target)) { 1866 pr_err("Problems finding threads of monitor\n"); 1867 parse_options_usage(stat_usage, stat_options, "p", 1); 1868 parse_options_usage(NULL, stat_options, "t", 1); 1869 } else if (target__has_cpu(&target)) { 1870 perror("failed to parse CPUs map"); 1871 parse_options_usage(stat_usage, stat_options, "C", 1); 1872 parse_options_usage(NULL, stat_options, "a", 1); 1873 } 1874 goto out; 1875 } 1876 1877 /* 1878 * Initialize thread_map with comm names, 1879 * so we could print it out on output. 1880 */ 1881 if (stat_config.aggr_mode == AGGR_THREAD) { 1882 thread_map__read_comms(evsel_list->core.threads); 1883 if (target.system_wide) { 1884 if (runtime_stat_new(&stat_config, 1885 perf_thread_map__nr(evsel_list->core.threads))) { 1886 goto out; 1887 } 1888 } 1889 } 1890 1891 if (stat_config.times && interval) 1892 interval_count = true; 1893 else if (stat_config.times && !interval) { 1894 pr_err("interval-count option should be used together with " 1895 "interval-print.\n"); 1896 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 1897 parse_options_usage(stat_usage, stat_options, "I", 1); 1898 goto out; 1899 } 1900 1901 if (timeout && timeout < 100) { 1902 if (timeout < 10) { 1903 pr_err("timeout must be >= 10ms.\n"); 1904 parse_options_usage(stat_usage, stat_options, "timeout", 0); 1905 goto out; 1906 } else 1907 pr_warning("timeout < 100ms. " 1908 "The overhead percentage could be high in some cases. " 1909 "Please proceed with caution.\n"); 1910 } 1911 if (timeout && interval) { 1912 pr_err("timeout option is not supported with interval-print.\n"); 1913 parse_options_usage(stat_usage, stat_options, "timeout", 0); 1914 parse_options_usage(stat_usage, stat_options, "I", 1); 1915 goto out; 1916 } 1917 1918 if (perf_evlist__alloc_stats(evsel_list, interval)) 1919 goto out; 1920 1921 if (perf_stat_init_aggr_mode()) 1922 goto out; 1923 1924 /* 1925 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 1926 * while avoiding that older tools show confusing messages. 1927 * 1928 * However for pipe sessions we need to keep it zero, 1929 * because script's perf_evsel__check_attr is triggered 1930 * by attr->sample_type != 0, and we can't run it on 1931 * stat sessions. 1932 */ 1933 stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe); 1934 1935 /* 1936 * We dont want to block the signals - that would cause 1937 * child tasks to inherit that and Ctrl-C would not work. 1938 * What we want is for Ctrl-C to work in the exec()-ed 1939 * task, but being ignored by perf stat itself: 1940 */ 1941 atexit(sig_atexit); 1942 if (!forever) 1943 signal(SIGINT, skip_signal); 1944 signal(SIGCHLD, skip_signal); 1945 signal(SIGALRM, skip_signal); 1946 signal(SIGABRT, skip_signal); 1947 1948 status = 0; 1949 for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { 1950 if (stat_config.run_count != 1 && verbose > 0) 1951 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1952 run_idx + 1); 1953 1954 status = run_perf_stat(argc, argv, run_idx); 1955 if (forever && status != -1) { 1956 print_counters(NULL, argc, argv); 1957 perf_stat__reset_stats(); 1958 } 1959 } 1960 1961 if (!forever && status != -1 && !interval) 1962 print_counters(NULL, argc, argv); 1963 1964 if (STAT_RECORD) { 1965 /* 1966 * We synthesize the kernel mmap record just so that older tools 1967 * don't emit warnings about not being able to resolve symbols 1968 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 1969 * a saner message about no samples being in the perf.data file. 1970 * 1971 * This also serves to suppress a warning about f_header.data.size == 0 1972 * in header.c at the moment 'perf stat record' gets introduced, which 1973 * is not really needed once we start adding the stat specific PERF_RECORD_ 1974 * records, but the need to suppress the kptr_restrict messages in older 1975 * tools remain -acme 1976 */ 1977 int fd = perf_data__fd(&perf_stat.data); 1978 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 1979 process_synthesized_event, 1980 &perf_stat.session->machines.host); 1981 if (err) { 1982 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 1983 "older tools may produce warnings about this file\n."); 1984 } 1985 1986 if (!interval) { 1987 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 1988 pr_err("failed to write stat round event\n"); 1989 } 1990 1991 if (!perf_stat.data.is_pipe) { 1992 perf_stat.session->header.data_size += perf_stat.bytes_written; 1993 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 1994 } 1995 1996 evlist__close(evsel_list); 1997 perf_session__delete(perf_stat.session); 1998 } 1999 2000 perf_stat__exit_aggr_mode(); 2001 perf_evlist__free_stats(evsel_list); 2002 out: 2003 zfree(&stat_config.walltime_run); 2004 2005 if (smi_cost && smi_reset) 2006 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 2007 2008 evlist__delete(evsel_list); 2009 2010 runtime_stat_delete(&stat_config); 2011 2012 return status; 2013 } 2014