1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/group.h" 67 #include "util/string2.h" 68 #include "util/metricgroup.h" 69 #include "asm/bug.h" 70 71 #include <linux/time64.h> 72 #include <api/fs/fs.h> 73 #include <errno.h> 74 #include <signal.h> 75 #include <stdlib.h> 76 #include <sys/prctl.h> 77 #include <inttypes.h> 78 #include <locale.h> 79 #include <math.h> 80 #include <sys/types.h> 81 #include <sys/stat.h> 82 #include <sys/wait.h> 83 #include <unistd.h> 84 85 #include "sane_ctype.h" 86 87 #define DEFAULT_SEPARATOR " " 88 #define CNTR_NOT_SUPPORTED "<not supported>" 89 #define CNTR_NOT_COUNTED "<not counted>" 90 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 91 92 static void print_counters(struct timespec *ts, int argc, const char **argv); 93 94 /* Default events used for perf stat -T */ 95 static const char *transaction_attrs = { 96 "task-clock," 97 "{" 98 "instructions," 99 "cycles," 100 "cpu/cycles-t/," 101 "cpu/tx-start/," 102 "cpu/el-start/," 103 "cpu/cycles-ct/" 104 "}" 105 }; 106 107 /* More limited version when the CPU does not have all events. */ 108 static const char * transaction_limited_attrs = { 109 "task-clock," 110 "{" 111 "instructions," 112 "cycles," 113 "cpu/cycles-t/," 114 "cpu/tx-start/" 115 "}" 116 }; 117 118 static const char * topdown_attrs[] = { 119 "topdown-total-slots", 120 "topdown-slots-retired", 121 "topdown-recovery-bubbles", 122 "topdown-fetch-bubbles", 123 "topdown-slots-issued", 124 NULL, 125 }; 126 127 static const char *smi_cost_attrs = { 128 "{" 129 "msr/aperf/," 130 "msr/smi/," 131 "cycles" 132 "}" 133 }; 134 135 static struct perf_evlist *evsel_list; 136 137 static struct rblist metric_events; 138 139 static struct target target = { 140 .uid = UINT_MAX, 141 }; 142 143 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 144 145 static int run_count = 1; 146 static bool no_inherit = false; 147 static volatile pid_t child_pid = -1; 148 static bool null_run = false; 149 static int detailed_run = 0; 150 static bool transaction_run; 151 static bool topdown_run = false; 152 static bool smi_cost = false; 153 static bool smi_reset = false; 154 static bool big_num = true; 155 static int big_num_opt = -1; 156 static const char *csv_sep = NULL; 157 static bool csv_output = false; 158 static bool group = false; 159 static const char *pre_cmd = NULL; 160 static const char *post_cmd = NULL; 161 static bool sync_run = false; 162 static unsigned int initial_delay = 0; 163 static unsigned int unit_width = 4; /* strlen("unit") */ 164 static bool forever = false; 165 static bool metric_only = false; 166 static bool force_metric_only = false; 167 static bool no_merge = false; 168 static struct timespec ref_time; 169 static struct cpu_map *aggr_map; 170 static aggr_get_id_t aggr_get_id; 171 static bool append_file; 172 static const char *output_name; 173 static int output_fd; 174 static int print_free_counters_hint; 175 176 struct perf_stat { 177 bool record; 178 struct perf_data_file file; 179 struct perf_session *session; 180 u64 bytes_written; 181 struct perf_tool tool; 182 bool maps_allocated; 183 struct cpu_map *cpus; 184 struct thread_map *threads; 185 enum aggr_mode aggr_mode; 186 }; 187 188 static struct perf_stat perf_stat; 189 #define STAT_RECORD perf_stat.record 190 191 static volatile int done = 0; 192 193 static struct perf_stat_config stat_config = { 194 .aggr_mode = AGGR_GLOBAL, 195 .scale = true, 196 }; 197 198 static bool is_duration_time(struct perf_evsel *evsel) 199 { 200 return !strcmp(evsel->name, "duration_time"); 201 } 202 203 static inline void diff_timespec(struct timespec *r, struct timespec *a, 204 struct timespec *b) 205 { 206 r->tv_sec = a->tv_sec - b->tv_sec; 207 if (a->tv_nsec < b->tv_nsec) { 208 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 209 r->tv_sec--; 210 } else { 211 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 212 } 213 } 214 215 static void perf_stat__reset_stats(void) 216 { 217 perf_evlist__reset_stats(evsel_list); 218 perf_stat__reset_shadow_stats(); 219 } 220 221 static int create_perf_stat_counter(struct perf_evsel *evsel) 222 { 223 struct perf_event_attr *attr = &evsel->attr; 224 struct perf_evsel *leader = evsel->leader; 225 226 if (stat_config.scale) { 227 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 228 PERF_FORMAT_TOTAL_TIME_RUNNING; 229 } 230 231 /* 232 * The event is part of non trivial group, let's enable 233 * the group read (for leader) and ID retrieval for all 234 * members. 235 */ 236 if (leader->nr_members > 1) 237 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 238 239 attr->inherit = !no_inherit; 240 241 /* 242 * Some events get initialized with sample_(period/type) set, 243 * like tracepoints. Clear it up for counting. 244 */ 245 attr->sample_period = 0; 246 247 /* 248 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 249 * while avoiding that older tools show confusing messages. 250 * 251 * However for pipe sessions we need to keep it zero, 252 * because script's perf_evsel__check_attr is triggered 253 * by attr->sample_type != 0, and we can't run it on 254 * stat sessions. 255 */ 256 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 257 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 258 259 /* 260 * Disabling all counters initially, they will be enabled 261 * either manually by us or by kernel via enable_on_exec 262 * set later. 263 */ 264 if (perf_evsel__is_group_leader(evsel)) { 265 attr->disabled = 1; 266 267 /* 268 * In case of initial_delay we enable tracee 269 * events manually. 270 */ 271 if (target__none(&target) && !initial_delay) 272 attr->enable_on_exec = 1; 273 } 274 275 if (target__has_cpu(&target)) 276 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 277 278 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 279 } 280 281 /* 282 * Does the counter have nsecs as a unit? 283 */ 284 static inline int nsec_counter(struct perf_evsel *evsel) 285 { 286 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 287 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 288 return 1; 289 290 return 0; 291 } 292 293 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 294 union perf_event *event, 295 struct perf_sample *sample __maybe_unused, 296 struct machine *machine __maybe_unused) 297 { 298 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 299 pr_err("failed to write perf data, error: %m\n"); 300 return -1; 301 } 302 303 perf_stat.bytes_written += event->header.size; 304 return 0; 305 } 306 307 static int write_stat_round_event(u64 tm, u64 type) 308 { 309 return perf_event__synthesize_stat_round(NULL, tm, type, 310 process_synthesized_event, 311 NULL); 312 } 313 314 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 315 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 316 317 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 318 319 static int 320 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 321 struct perf_counts_values *count) 322 { 323 struct perf_sample_id *sid = SID(counter, cpu, thread); 324 325 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 326 process_synthesized_event, NULL); 327 } 328 329 /* 330 * Read out the results of a single counter: 331 * do not aggregate counts across CPUs in system-wide mode 332 */ 333 static int read_counter(struct perf_evsel *counter) 334 { 335 int nthreads = thread_map__nr(evsel_list->threads); 336 int ncpus, cpu, thread; 337 338 if (target__has_cpu(&target)) 339 ncpus = perf_evsel__nr_cpus(counter); 340 else 341 ncpus = 1; 342 343 if (!counter->supported) 344 return -ENOENT; 345 346 if (counter->system_wide) 347 nthreads = 1; 348 349 for (thread = 0; thread < nthreads; thread++) { 350 for (cpu = 0; cpu < ncpus; cpu++) { 351 struct perf_counts_values *count; 352 353 count = perf_counts(counter->counts, cpu, thread); 354 355 /* 356 * The leader's group read loads data into its group members 357 * (via perf_evsel__read_counter) and sets threir count->loaded. 358 */ 359 if (!count->loaded && 360 perf_evsel__read_counter(counter, cpu, thread)) { 361 counter->counts->scaled = -1; 362 perf_counts(counter->counts, cpu, thread)->ena = 0; 363 perf_counts(counter->counts, cpu, thread)->run = 0; 364 return -1; 365 } 366 367 count->loaded = false; 368 369 if (STAT_RECORD) { 370 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 371 pr_err("failed to write stat event\n"); 372 return -1; 373 } 374 } 375 376 if (verbose > 1) { 377 fprintf(stat_config.output, 378 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 379 perf_evsel__name(counter), 380 cpu, 381 count->val, count->ena, count->run); 382 } 383 } 384 } 385 386 return 0; 387 } 388 389 static void read_counters(void) 390 { 391 struct perf_evsel *counter; 392 int ret; 393 394 evlist__for_each_entry(evsel_list, counter) { 395 ret = read_counter(counter); 396 if (ret) 397 pr_debug("failed to read counter %s\n", counter->name); 398 399 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 400 pr_warning("failed to process counter %s\n", counter->name); 401 } 402 } 403 404 static void process_interval(void) 405 { 406 struct timespec ts, rs; 407 408 read_counters(); 409 410 clock_gettime(CLOCK_MONOTONIC, &ts); 411 diff_timespec(&rs, &ts, &ref_time); 412 413 if (STAT_RECORD) { 414 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 415 pr_err("failed to write stat round event\n"); 416 } 417 418 init_stats(&walltime_nsecs_stats); 419 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 420 print_counters(&rs, 0, NULL); 421 } 422 423 static void enable_counters(void) 424 { 425 if (initial_delay) 426 usleep(initial_delay * USEC_PER_MSEC); 427 428 /* 429 * We need to enable counters only if: 430 * - we don't have tracee (attaching to task or cpu) 431 * - we have initial delay configured 432 */ 433 if (!target__none(&target) || initial_delay) 434 perf_evlist__enable(evsel_list); 435 } 436 437 static void disable_counters(void) 438 { 439 /* 440 * If we don't have tracee (attaching to task or cpu), counters may 441 * still be running. To get accurate group ratios, we must stop groups 442 * from counting before reading their constituent counters. 443 */ 444 if (!target__none(&target)) 445 perf_evlist__disable(evsel_list); 446 } 447 448 static volatile int workload_exec_errno; 449 450 /* 451 * perf_evlist__prepare_workload will send a SIGUSR1 452 * if the fork fails, since we asked by setting its 453 * want_signal to true. 454 */ 455 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 456 void *ucontext __maybe_unused) 457 { 458 workload_exec_errno = info->si_value.sival_int; 459 } 460 461 static bool has_unit(struct perf_evsel *counter) 462 { 463 return counter->unit && *counter->unit; 464 } 465 466 static bool has_scale(struct perf_evsel *counter) 467 { 468 return counter->scale != 1; 469 } 470 471 static int perf_stat_synthesize_config(bool is_pipe) 472 { 473 struct perf_evsel *counter; 474 int err; 475 476 if (is_pipe) { 477 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 478 process_synthesized_event); 479 if (err < 0) { 480 pr_err("Couldn't synthesize attrs.\n"); 481 return err; 482 } 483 } 484 485 /* 486 * Synthesize other events stuff not carried within 487 * attr event - unit, scale, name 488 */ 489 evlist__for_each_entry(evsel_list, counter) { 490 if (!counter->supported) 491 continue; 492 493 /* 494 * Synthesize unit and scale only if it's defined. 495 */ 496 if (has_unit(counter)) { 497 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 498 if (err < 0) { 499 pr_err("Couldn't synthesize evsel unit.\n"); 500 return err; 501 } 502 } 503 504 if (has_scale(counter)) { 505 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 506 if (err < 0) { 507 pr_err("Couldn't synthesize evsel scale.\n"); 508 return err; 509 } 510 } 511 512 if (counter->own_cpus) { 513 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 514 if (err < 0) { 515 pr_err("Couldn't synthesize evsel scale.\n"); 516 return err; 517 } 518 } 519 520 /* 521 * Name is needed only for pipe output, 522 * perf.data carries event names. 523 */ 524 if (is_pipe) { 525 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 526 if (err < 0) { 527 pr_err("Couldn't synthesize evsel name.\n"); 528 return err; 529 } 530 } 531 } 532 533 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 534 process_synthesized_event, 535 NULL); 536 if (err < 0) { 537 pr_err("Couldn't synthesize thread map.\n"); 538 return err; 539 } 540 541 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 542 process_synthesized_event, NULL); 543 if (err < 0) { 544 pr_err("Couldn't synthesize thread map.\n"); 545 return err; 546 } 547 548 err = perf_event__synthesize_stat_config(NULL, &stat_config, 549 process_synthesized_event, NULL); 550 if (err < 0) { 551 pr_err("Couldn't synthesize config.\n"); 552 return err; 553 } 554 555 return 0; 556 } 557 558 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 559 560 static int __store_counter_ids(struct perf_evsel *counter, 561 struct cpu_map *cpus, 562 struct thread_map *threads) 563 { 564 int cpu, thread; 565 566 for (cpu = 0; cpu < cpus->nr; cpu++) { 567 for (thread = 0; thread < threads->nr; thread++) { 568 int fd = FD(counter, cpu, thread); 569 570 if (perf_evlist__id_add_fd(evsel_list, counter, 571 cpu, thread, fd) < 0) 572 return -1; 573 } 574 } 575 576 return 0; 577 } 578 579 static int store_counter_ids(struct perf_evsel *counter) 580 { 581 struct cpu_map *cpus = counter->cpus; 582 struct thread_map *threads = counter->threads; 583 584 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 585 return -ENOMEM; 586 587 return __store_counter_ids(counter, cpus, threads); 588 } 589 590 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 591 { 592 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 593 } 594 595 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 596 { 597 struct perf_evsel *c2, *leader; 598 bool is_open = true; 599 600 leader = evsel->leader; 601 pr_debug("Weak group for %s/%d failed\n", 602 leader->name, leader->nr_members); 603 604 /* 605 * for_each_group_member doesn't work here because it doesn't 606 * include the first entry. 607 */ 608 evlist__for_each_entry(evsel_list, c2) { 609 if (c2 == evsel) 610 is_open = false; 611 if (c2->leader == leader) { 612 if (is_open) 613 perf_evsel__close(c2); 614 c2->leader = c2; 615 c2->nr_members = 0; 616 } 617 } 618 return leader; 619 } 620 621 static int __run_perf_stat(int argc, const char **argv) 622 { 623 int interval = stat_config.interval; 624 char msg[BUFSIZ]; 625 unsigned long long t0, t1; 626 struct perf_evsel *counter; 627 struct timespec ts; 628 size_t l; 629 int status = 0; 630 const bool forks = (argc > 0); 631 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 632 struct perf_evsel_config_term *err_term; 633 634 if (interval) { 635 ts.tv_sec = interval / USEC_PER_MSEC; 636 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 637 } else { 638 ts.tv_sec = 1; 639 ts.tv_nsec = 0; 640 } 641 642 if (forks) { 643 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 644 workload_exec_failed_signal) < 0) { 645 perror("failed to prepare workload"); 646 return -1; 647 } 648 child_pid = evsel_list->workload.pid; 649 } 650 651 if (group) 652 perf_evlist__set_leader(evsel_list); 653 654 evlist__for_each_entry(evsel_list, counter) { 655 try_again: 656 if (create_perf_stat_counter(counter) < 0) { 657 658 /* Weak group failed. Reset the group. */ 659 if ((errno == EINVAL || errno == EBADF) && 660 counter->leader != counter && 661 counter->weak_group) { 662 counter = perf_evsel__reset_weak_group(counter); 663 goto try_again; 664 } 665 666 /* 667 * PPC returns ENXIO for HW counters until 2.6.37 668 * (behavior changed with commit b0a873e). 669 */ 670 if (errno == EINVAL || errno == ENOSYS || 671 errno == ENOENT || errno == EOPNOTSUPP || 672 errno == ENXIO) { 673 if (verbose > 0) 674 ui__warning("%s event is not supported by the kernel.\n", 675 perf_evsel__name(counter)); 676 counter->supported = false; 677 678 if ((counter->leader != counter) || 679 !(counter->leader->nr_members > 1)) 680 continue; 681 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 682 if (verbose > 0) 683 ui__warning("%s\n", msg); 684 goto try_again; 685 } 686 687 perf_evsel__open_strerror(counter, &target, 688 errno, msg, sizeof(msg)); 689 ui__error("%s\n", msg); 690 691 if (child_pid != -1) 692 kill(child_pid, SIGTERM); 693 694 return -1; 695 } 696 counter->supported = true; 697 698 l = strlen(counter->unit); 699 if (l > unit_width) 700 unit_width = l; 701 702 if (perf_evsel__should_store_id(counter) && 703 store_counter_ids(counter)) 704 return -1; 705 } 706 707 if (perf_evlist__apply_filters(evsel_list, &counter)) { 708 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 709 counter->filter, perf_evsel__name(counter), errno, 710 str_error_r(errno, msg, sizeof(msg))); 711 return -1; 712 } 713 714 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 715 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 716 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 717 str_error_r(errno, msg, sizeof(msg))); 718 return -1; 719 } 720 721 if (STAT_RECORD) { 722 int err, fd = perf_data_file__fd(&perf_stat.file); 723 724 if (is_pipe) { 725 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 726 } else { 727 err = perf_session__write_header(perf_stat.session, evsel_list, 728 fd, false); 729 } 730 731 if (err < 0) 732 return err; 733 734 err = perf_stat_synthesize_config(is_pipe); 735 if (err < 0) 736 return err; 737 } 738 739 /* 740 * Enable counters and exec the command: 741 */ 742 t0 = rdclock(); 743 clock_gettime(CLOCK_MONOTONIC, &ref_time); 744 745 if (forks) { 746 perf_evlist__start_workload(evsel_list); 747 enable_counters(); 748 749 if (interval) { 750 while (!waitpid(child_pid, &status, WNOHANG)) { 751 nanosleep(&ts, NULL); 752 process_interval(); 753 } 754 } 755 waitpid(child_pid, &status, 0); 756 757 if (workload_exec_errno) { 758 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 759 pr_err("Workload failed: %s\n", emsg); 760 return -1; 761 } 762 763 if (WIFSIGNALED(status)) 764 psignal(WTERMSIG(status), argv[0]); 765 } else { 766 enable_counters(); 767 while (!done) { 768 nanosleep(&ts, NULL); 769 if (interval) 770 process_interval(); 771 } 772 } 773 774 disable_counters(); 775 776 t1 = rdclock(); 777 778 update_stats(&walltime_nsecs_stats, t1 - t0); 779 780 /* 781 * Closing a group leader splits the group, and as we only disable 782 * group leaders, results in remaining events becoming enabled. To 783 * avoid arbitrary skew, we must read all counters before closing any 784 * group leaders. 785 */ 786 read_counters(); 787 perf_evlist__close(evsel_list); 788 789 return WEXITSTATUS(status); 790 } 791 792 static int run_perf_stat(int argc, const char **argv) 793 { 794 int ret; 795 796 if (pre_cmd) { 797 ret = system(pre_cmd); 798 if (ret) 799 return ret; 800 } 801 802 if (sync_run) 803 sync(); 804 805 ret = __run_perf_stat(argc, argv); 806 if (ret) 807 return ret; 808 809 if (post_cmd) { 810 ret = system(post_cmd); 811 if (ret) 812 return ret; 813 } 814 815 return ret; 816 } 817 818 static void print_running(u64 run, u64 ena) 819 { 820 if (csv_output) { 821 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 822 csv_sep, 823 run, 824 csv_sep, 825 ena ? 100.0 * run / ena : 100.0); 826 } else if (run != ena) { 827 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 828 } 829 } 830 831 static void print_noise_pct(double total, double avg) 832 { 833 double pct = rel_stddev_stats(total, avg); 834 835 if (csv_output) 836 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 837 else if (pct) 838 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 839 } 840 841 static void print_noise(struct perf_evsel *evsel, double avg) 842 { 843 struct perf_stat_evsel *ps; 844 845 if (run_count == 1) 846 return; 847 848 ps = evsel->priv; 849 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 850 } 851 852 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 853 { 854 switch (stat_config.aggr_mode) { 855 case AGGR_CORE: 856 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 857 cpu_map__id_to_socket(id), 858 csv_output ? 0 : -8, 859 cpu_map__id_to_cpu(id), 860 csv_sep, 861 csv_output ? 0 : 4, 862 nr, 863 csv_sep); 864 break; 865 case AGGR_SOCKET: 866 fprintf(stat_config.output, "S%*d%s%*d%s", 867 csv_output ? 0 : -5, 868 id, 869 csv_sep, 870 csv_output ? 0 : 4, 871 nr, 872 csv_sep); 873 break; 874 case AGGR_NONE: 875 fprintf(stat_config.output, "CPU%*d%s", 876 csv_output ? 0 : -4, 877 perf_evsel__cpus(evsel)->map[id], csv_sep); 878 break; 879 case AGGR_THREAD: 880 fprintf(stat_config.output, "%*s-%*d%s", 881 csv_output ? 0 : 16, 882 thread_map__comm(evsel->threads, id), 883 csv_output ? 0 : -8, 884 thread_map__pid(evsel->threads, id), 885 csv_sep); 886 break; 887 case AGGR_GLOBAL: 888 case AGGR_UNSET: 889 default: 890 break; 891 } 892 } 893 894 struct outstate { 895 FILE *fh; 896 bool newline; 897 const char *prefix; 898 int nfields; 899 int id, nr; 900 struct perf_evsel *evsel; 901 }; 902 903 #define METRIC_LEN 35 904 905 static void new_line_std(void *ctx) 906 { 907 struct outstate *os = ctx; 908 909 os->newline = true; 910 } 911 912 static void do_new_line_std(struct outstate *os) 913 { 914 fputc('\n', os->fh); 915 fputs(os->prefix, os->fh); 916 aggr_printout(os->evsel, os->id, os->nr); 917 if (stat_config.aggr_mode == AGGR_NONE) 918 fprintf(os->fh, " "); 919 fprintf(os->fh, " "); 920 } 921 922 static void print_metric_std(void *ctx, const char *color, const char *fmt, 923 const char *unit, double val) 924 { 925 struct outstate *os = ctx; 926 FILE *out = os->fh; 927 int n; 928 bool newline = os->newline; 929 930 os->newline = false; 931 932 if (unit == NULL || fmt == NULL) { 933 fprintf(out, "%-*s", METRIC_LEN, ""); 934 return; 935 } 936 937 if (newline) 938 do_new_line_std(os); 939 940 n = fprintf(out, " # "); 941 if (color) 942 n += color_fprintf(out, color, fmt, val); 943 else 944 n += fprintf(out, fmt, val); 945 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 946 } 947 948 static void new_line_csv(void *ctx) 949 { 950 struct outstate *os = ctx; 951 int i; 952 953 fputc('\n', os->fh); 954 if (os->prefix) 955 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 956 aggr_printout(os->evsel, os->id, os->nr); 957 for (i = 0; i < os->nfields; i++) 958 fputs(csv_sep, os->fh); 959 } 960 961 static void print_metric_csv(void *ctx, 962 const char *color __maybe_unused, 963 const char *fmt, const char *unit, double val) 964 { 965 struct outstate *os = ctx; 966 FILE *out = os->fh; 967 char buf[64], *vals, *ends; 968 969 if (unit == NULL || fmt == NULL) { 970 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 971 return; 972 } 973 snprintf(buf, sizeof(buf), fmt, val); 974 ends = vals = ltrim(buf); 975 while (isdigit(*ends) || *ends == '.') 976 ends++; 977 *ends = 0; 978 while (isspace(*unit)) 979 unit++; 980 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 981 } 982 983 #define METRIC_ONLY_LEN 20 984 985 /* Filter out some columns that don't work well in metrics only mode */ 986 987 static bool valid_only_metric(const char *unit) 988 { 989 if (!unit) 990 return false; 991 if (strstr(unit, "/sec") || 992 strstr(unit, "hz") || 993 strstr(unit, "Hz") || 994 strstr(unit, "CPUs utilized")) 995 return false; 996 return true; 997 } 998 999 static const char *fixunit(char *buf, struct perf_evsel *evsel, 1000 const char *unit) 1001 { 1002 if (!strncmp(unit, "of all", 6)) { 1003 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 1004 unit); 1005 return buf; 1006 } 1007 return unit; 1008 } 1009 1010 static void print_metric_only(void *ctx, const char *color, const char *fmt, 1011 const char *unit, double val) 1012 { 1013 struct outstate *os = ctx; 1014 FILE *out = os->fh; 1015 int n; 1016 char buf[1024]; 1017 unsigned mlen = METRIC_ONLY_LEN; 1018 1019 if (!valid_only_metric(unit)) 1020 return; 1021 unit = fixunit(buf, os->evsel, unit); 1022 if (color) 1023 n = color_fprintf(out, color, fmt, val); 1024 else 1025 n = fprintf(out, fmt, val); 1026 if (n > METRIC_ONLY_LEN) 1027 n = METRIC_ONLY_LEN; 1028 if (mlen < strlen(unit)) 1029 mlen = strlen(unit) + 1; 1030 fprintf(out, "%*s", mlen - n, ""); 1031 } 1032 1033 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1034 const char *fmt, 1035 const char *unit, double val) 1036 { 1037 struct outstate *os = ctx; 1038 FILE *out = os->fh; 1039 char buf[64], *vals, *ends; 1040 char tbuf[1024]; 1041 1042 if (!valid_only_metric(unit)) 1043 return; 1044 unit = fixunit(tbuf, os->evsel, unit); 1045 snprintf(buf, sizeof buf, fmt, val); 1046 ends = vals = ltrim(buf); 1047 while (isdigit(*ends) || *ends == '.') 1048 ends++; 1049 *ends = 0; 1050 fprintf(out, "%s%s", vals, csv_sep); 1051 } 1052 1053 static void new_line_metric(void *ctx __maybe_unused) 1054 { 1055 } 1056 1057 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1058 const char *fmt __maybe_unused, 1059 const char *unit, double val __maybe_unused) 1060 { 1061 struct outstate *os = ctx; 1062 char tbuf[1024]; 1063 1064 if (!valid_only_metric(unit)) 1065 return; 1066 unit = fixunit(tbuf, os->evsel, unit); 1067 if (csv_output) 1068 fprintf(os->fh, "%s%s", unit, csv_sep); 1069 else 1070 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1071 } 1072 1073 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1074 { 1075 FILE *output = stat_config.output; 1076 double msecs = avg / NSEC_PER_MSEC; 1077 const char *fmt_v, *fmt_n; 1078 char name[25]; 1079 1080 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1081 fmt_n = csv_output ? "%s" : "%-25s"; 1082 1083 aggr_printout(evsel, id, nr); 1084 1085 scnprintf(name, sizeof(name), "%s%s", 1086 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1087 1088 fprintf(output, fmt_v, msecs, csv_sep); 1089 1090 if (csv_output) 1091 fprintf(output, "%s%s", evsel->unit, csv_sep); 1092 else 1093 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1094 1095 fprintf(output, fmt_n, name); 1096 1097 if (evsel->cgrp) 1098 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1099 } 1100 1101 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1102 { 1103 int i; 1104 1105 if (!aggr_get_id) 1106 return 0; 1107 1108 if (stat_config.aggr_mode == AGGR_NONE) 1109 return id; 1110 1111 if (stat_config.aggr_mode == AGGR_GLOBAL) 1112 return 0; 1113 1114 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1115 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1116 1117 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1118 return cpu2; 1119 } 1120 return 0; 1121 } 1122 1123 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1124 { 1125 FILE *output = stat_config.output; 1126 double sc = evsel->scale; 1127 const char *fmt; 1128 1129 if (csv_output) { 1130 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1131 } else { 1132 if (big_num) 1133 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1134 else 1135 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1136 } 1137 1138 aggr_printout(evsel, id, nr); 1139 1140 fprintf(output, fmt, avg, csv_sep); 1141 1142 if (evsel->unit) 1143 fprintf(output, "%-*s%s", 1144 csv_output ? 0 : unit_width, 1145 evsel->unit, csv_sep); 1146 1147 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1148 1149 if (evsel->cgrp) 1150 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1151 } 1152 1153 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1154 char *prefix, u64 run, u64 ena, double noise) 1155 { 1156 struct perf_stat_output_ctx out; 1157 struct outstate os = { 1158 .fh = stat_config.output, 1159 .prefix = prefix ? prefix : "", 1160 .id = id, 1161 .nr = nr, 1162 .evsel = counter, 1163 }; 1164 print_metric_t pm = print_metric_std; 1165 void (*nl)(void *); 1166 1167 if (metric_only) { 1168 nl = new_line_metric; 1169 if (csv_output) 1170 pm = print_metric_only_csv; 1171 else 1172 pm = print_metric_only; 1173 } else 1174 nl = new_line_std; 1175 1176 if (csv_output && !metric_only) { 1177 static int aggr_fields[] = { 1178 [AGGR_GLOBAL] = 0, 1179 [AGGR_THREAD] = 1, 1180 [AGGR_NONE] = 1, 1181 [AGGR_SOCKET] = 2, 1182 [AGGR_CORE] = 2, 1183 }; 1184 1185 pm = print_metric_csv; 1186 nl = new_line_csv; 1187 os.nfields = 3; 1188 os.nfields += aggr_fields[stat_config.aggr_mode]; 1189 if (counter->cgrp) 1190 os.nfields++; 1191 } 1192 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1193 if (metric_only) { 1194 pm(&os, NULL, "", "", 0); 1195 return; 1196 } 1197 aggr_printout(counter, id, nr); 1198 1199 fprintf(stat_config.output, "%*s%s", 1200 csv_output ? 0 : 18, 1201 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1202 csv_sep); 1203 1204 if (counter->supported) 1205 print_free_counters_hint = 1; 1206 1207 fprintf(stat_config.output, "%-*s%s", 1208 csv_output ? 0 : unit_width, 1209 counter->unit, csv_sep); 1210 1211 fprintf(stat_config.output, "%*s", 1212 csv_output ? 0 : -25, 1213 perf_evsel__name(counter)); 1214 1215 if (counter->cgrp) 1216 fprintf(stat_config.output, "%s%s", 1217 csv_sep, counter->cgrp->name); 1218 1219 if (!csv_output) 1220 pm(&os, NULL, NULL, "", 0); 1221 print_noise(counter, noise); 1222 print_running(run, ena); 1223 if (csv_output) 1224 pm(&os, NULL, NULL, "", 0); 1225 return; 1226 } 1227 1228 if (metric_only) 1229 /* nothing */; 1230 else if (nsec_counter(counter)) 1231 nsec_printout(id, nr, counter, uval); 1232 else 1233 abs_printout(id, nr, counter, uval); 1234 1235 out.print_metric = pm; 1236 out.new_line = nl; 1237 out.ctx = &os; 1238 out.force_header = false; 1239 1240 if (csv_output && !metric_only) { 1241 print_noise(counter, noise); 1242 print_running(run, ena); 1243 } 1244 1245 perf_stat__print_shadow_stats(counter, uval, 1246 first_shadow_cpu(counter, id), 1247 &out, &metric_events); 1248 if (!csv_output && !metric_only) { 1249 print_noise(counter, noise); 1250 print_running(run, ena); 1251 } 1252 } 1253 1254 static void aggr_update_shadow(void) 1255 { 1256 int cpu, s2, id, s; 1257 u64 val; 1258 struct perf_evsel *counter; 1259 1260 for (s = 0; s < aggr_map->nr; s++) { 1261 id = aggr_map->map[s]; 1262 evlist__for_each_entry(evsel_list, counter) { 1263 val = 0; 1264 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1265 s2 = aggr_get_id(evsel_list->cpus, cpu); 1266 if (s2 != id) 1267 continue; 1268 val += perf_counts(counter->counts, cpu, 0)->val; 1269 } 1270 val = val * counter->scale; 1271 perf_stat__update_shadow_stats(counter, &val, 1272 first_shadow_cpu(counter, id)); 1273 } 1274 } 1275 } 1276 1277 static void collect_all_aliases(struct perf_evsel *counter, 1278 void (*cb)(struct perf_evsel *counter, void *data, 1279 bool first), 1280 void *data) 1281 { 1282 struct perf_evsel *alias; 1283 1284 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1285 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1286 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1287 alias->scale != counter->scale || 1288 alias->cgrp != counter->cgrp || 1289 strcmp(alias->unit, counter->unit) || 1290 nsec_counter(alias) != nsec_counter(counter)) 1291 break; 1292 alias->merged_stat = true; 1293 cb(alias, data, false); 1294 } 1295 } 1296 1297 static bool collect_data(struct perf_evsel *counter, 1298 void (*cb)(struct perf_evsel *counter, void *data, 1299 bool first), 1300 void *data) 1301 { 1302 if (counter->merged_stat) 1303 return false; 1304 cb(counter, data, true); 1305 if (!no_merge && counter->auto_merge_stats) 1306 collect_all_aliases(counter, cb, data); 1307 return true; 1308 } 1309 1310 struct aggr_data { 1311 u64 ena, run, val; 1312 int id; 1313 int nr; 1314 int cpu; 1315 }; 1316 1317 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1318 { 1319 struct aggr_data *ad = data; 1320 int cpu, s2; 1321 1322 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1323 struct perf_counts_values *counts; 1324 1325 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1326 if (s2 != ad->id) 1327 continue; 1328 if (first) 1329 ad->nr++; 1330 counts = perf_counts(counter->counts, cpu, 0); 1331 /* 1332 * When any result is bad, make them all to give 1333 * consistent output in interval mode. 1334 */ 1335 if (counts->ena == 0 || counts->run == 0 || 1336 counter->counts->scaled == -1) { 1337 ad->ena = 0; 1338 ad->run = 0; 1339 break; 1340 } 1341 ad->val += counts->val; 1342 ad->ena += counts->ena; 1343 ad->run += counts->run; 1344 } 1345 } 1346 1347 static void print_aggr(char *prefix) 1348 { 1349 FILE *output = stat_config.output; 1350 struct perf_evsel *counter; 1351 int s, id, nr; 1352 double uval; 1353 u64 ena, run, val; 1354 bool first; 1355 1356 if (!(aggr_map || aggr_get_id)) 1357 return; 1358 1359 aggr_update_shadow(); 1360 1361 /* 1362 * With metric_only everything is on a single line. 1363 * Without each counter has its own line. 1364 */ 1365 for (s = 0; s < aggr_map->nr; s++) { 1366 struct aggr_data ad; 1367 if (prefix && metric_only) 1368 fprintf(output, "%s", prefix); 1369 1370 ad.id = id = aggr_map->map[s]; 1371 first = true; 1372 evlist__for_each_entry(evsel_list, counter) { 1373 if (is_duration_time(counter)) 1374 continue; 1375 1376 ad.val = ad.ena = ad.run = 0; 1377 ad.nr = 0; 1378 if (!collect_data(counter, aggr_cb, &ad)) 1379 continue; 1380 nr = ad.nr; 1381 ena = ad.ena; 1382 run = ad.run; 1383 val = ad.val; 1384 if (first && metric_only) { 1385 first = false; 1386 aggr_printout(counter, id, nr); 1387 } 1388 if (prefix && !metric_only) 1389 fprintf(output, "%s", prefix); 1390 1391 uval = val * counter->scale; 1392 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1393 if (!metric_only) 1394 fputc('\n', output); 1395 } 1396 if (metric_only) 1397 fputc('\n', output); 1398 } 1399 } 1400 1401 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1402 { 1403 FILE *output = stat_config.output; 1404 int nthreads = thread_map__nr(counter->threads); 1405 int ncpus = cpu_map__nr(counter->cpus); 1406 int cpu, thread; 1407 double uval; 1408 1409 for (thread = 0; thread < nthreads; thread++) { 1410 u64 ena = 0, run = 0, val = 0; 1411 1412 for (cpu = 0; cpu < ncpus; cpu++) { 1413 val += perf_counts(counter->counts, cpu, thread)->val; 1414 ena += perf_counts(counter->counts, cpu, thread)->ena; 1415 run += perf_counts(counter->counts, cpu, thread)->run; 1416 } 1417 1418 if (prefix) 1419 fprintf(output, "%s", prefix); 1420 1421 uval = val * counter->scale; 1422 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1423 fputc('\n', output); 1424 } 1425 } 1426 1427 struct caggr_data { 1428 double avg, avg_enabled, avg_running; 1429 }; 1430 1431 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1432 bool first __maybe_unused) 1433 { 1434 struct caggr_data *cd = data; 1435 struct perf_stat_evsel *ps = counter->priv; 1436 1437 cd->avg += avg_stats(&ps->res_stats[0]); 1438 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1439 cd->avg_running += avg_stats(&ps->res_stats[2]); 1440 } 1441 1442 /* 1443 * Print out the results of a single counter: 1444 * aggregated counts in system-wide mode 1445 */ 1446 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1447 { 1448 FILE *output = stat_config.output; 1449 double uval; 1450 struct caggr_data cd = { .avg = 0.0 }; 1451 1452 if (!collect_data(counter, counter_aggr_cb, &cd)) 1453 return; 1454 1455 if (prefix && !metric_only) 1456 fprintf(output, "%s", prefix); 1457 1458 uval = cd.avg * counter->scale; 1459 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); 1460 if (!metric_only) 1461 fprintf(output, "\n"); 1462 } 1463 1464 static void counter_cb(struct perf_evsel *counter, void *data, 1465 bool first __maybe_unused) 1466 { 1467 struct aggr_data *ad = data; 1468 1469 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1470 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1471 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1472 } 1473 1474 /* 1475 * Print out the results of a single counter: 1476 * does not use aggregated count in system-wide 1477 */ 1478 static void print_counter(struct perf_evsel *counter, char *prefix) 1479 { 1480 FILE *output = stat_config.output; 1481 u64 ena, run, val; 1482 double uval; 1483 int cpu; 1484 1485 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1486 struct aggr_data ad = { .cpu = cpu }; 1487 1488 if (!collect_data(counter, counter_cb, &ad)) 1489 return; 1490 val = ad.val; 1491 ena = ad.ena; 1492 run = ad.run; 1493 1494 if (prefix) 1495 fprintf(output, "%s", prefix); 1496 1497 uval = val * counter->scale; 1498 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1499 1500 fputc('\n', output); 1501 } 1502 } 1503 1504 static void print_no_aggr_metric(char *prefix) 1505 { 1506 int cpu; 1507 int nrcpus = 0; 1508 struct perf_evsel *counter; 1509 u64 ena, run, val; 1510 double uval; 1511 1512 nrcpus = evsel_list->cpus->nr; 1513 for (cpu = 0; cpu < nrcpus; cpu++) { 1514 bool first = true; 1515 1516 if (prefix) 1517 fputs(prefix, stat_config.output); 1518 evlist__for_each_entry(evsel_list, counter) { 1519 if (is_duration_time(counter)) 1520 continue; 1521 if (first) { 1522 aggr_printout(counter, cpu, 0); 1523 first = false; 1524 } 1525 val = perf_counts(counter->counts, cpu, 0)->val; 1526 ena = perf_counts(counter->counts, cpu, 0)->ena; 1527 run = perf_counts(counter->counts, cpu, 0)->run; 1528 1529 uval = val * counter->scale; 1530 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1531 } 1532 fputc('\n', stat_config.output); 1533 } 1534 } 1535 1536 static int aggr_header_lens[] = { 1537 [AGGR_CORE] = 18, 1538 [AGGR_SOCKET] = 12, 1539 [AGGR_NONE] = 6, 1540 [AGGR_THREAD] = 24, 1541 [AGGR_GLOBAL] = 0, 1542 }; 1543 1544 static const char *aggr_header_csv[] = { 1545 [AGGR_CORE] = "core,cpus,", 1546 [AGGR_SOCKET] = "socket,cpus", 1547 [AGGR_NONE] = "cpu,", 1548 [AGGR_THREAD] = "comm-pid,", 1549 [AGGR_GLOBAL] = "" 1550 }; 1551 1552 static void print_metric_headers(const char *prefix, bool no_indent) 1553 { 1554 struct perf_stat_output_ctx out; 1555 struct perf_evsel *counter; 1556 struct outstate os = { 1557 .fh = stat_config.output 1558 }; 1559 1560 if (prefix) 1561 fprintf(stat_config.output, "%s", prefix); 1562 1563 if (!csv_output && !no_indent) 1564 fprintf(stat_config.output, "%*s", 1565 aggr_header_lens[stat_config.aggr_mode], ""); 1566 if (csv_output) { 1567 if (stat_config.interval) 1568 fputs("time,", stat_config.output); 1569 fputs(aggr_header_csv[stat_config.aggr_mode], 1570 stat_config.output); 1571 } 1572 1573 /* Print metrics headers only */ 1574 evlist__for_each_entry(evsel_list, counter) { 1575 if (is_duration_time(counter)) 1576 continue; 1577 os.evsel = counter; 1578 out.ctx = &os; 1579 out.print_metric = print_metric_header; 1580 out.new_line = new_line_metric; 1581 out.force_header = true; 1582 os.evsel = counter; 1583 perf_stat__print_shadow_stats(counter, 0, 1584 0, 1585 &out, 1586 &metric_events); 1587 } 1588 fputc('\n', stat_config.output); 1589 } 1590 1591 static void print_interval(char *prefix, struct timespec *ts) 1592 { 1593 FILE *output = stat_config.output; 1594 static int num_print_interval; 1595 1596 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1597 1598 if (num_print_interval == 0 && !csv_output) { 1599 switch (stat_config.aggr_mode) { 1600 case AGGR_SOCKET: 1601 fprintf(output, "# time socket cpus"); 1602 if (!metric_only) 1603 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1604 break; 1605 case AGGR_CORE: 1606 fprintf(output, "# time core cpus"); 1607 if (!metric_only) 1608 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1609 break; 1610 case AGGR_NONE: 1611 fprintf(output, "# time CPU"); 1612 if (!metric_only) 1613 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1614 break; 1615 case AGGR_THREAD: 1616 fprintf(output, "# time comm-pid"); 1617 if (!metric_only) 1618 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1619 break; 1620 case AGGR_GLOBAL: 1621 default: 1622 fprintf(output, "# time"); 1623 if (!metric_only) 1624 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1625 case AGGR_UNSET: 1626 break; 1627 } 1628 } 1629 1630 if (num_print_interval == 0 && metric_only) 1631 print_metric_headers(" ", true); 1632 if (++num_print_interval == 25) 1633 num_print_interval = 0; 1634 } 1635 1636 static void print_header(int argc, const char **argv) 1637 { 1638 FILE *output = stat_config.output; 1639 int i; 1640 1641 fflush(stdout); 1642 1643 if (!csv_output) { 1644 fprintf(output, "\n"); 1645 fprintf(output, " Performance counter stats for "); 1646 if (target.system_wide) 1647 fprintf(output, "\'system wide"); 1648 else if (target.cpu_list) 1649 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1650 else if (!target__has_task(&target)) { 1651 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1652 for (i = 1; argv && (i < argc); i++) 1653 fprintf(output, " %s", argv[i]); 1654 } else if (target.pid) 1655 fprintf(output, "process id \'%s", target.pid); 1656 else 1657 fprintf(output, "thread id \'%s", target.tid); 1658 1659 fprintf(output, "\'"); 1660 if (run_count > 1) 1661 fprintf(output, " (%d runs)", run_count); 1662 fprintf(output, ":\n\n"); 1663 } 1664 } 1665 1666 static void print_footer(void) 1667 { 1668 FILE *output = stat_config.output; 1669 int n; 1670 1671 if (!null_run) 1672 fprintf(output, "\n"); 1673 fprintf(output, " %17.9f seconds time elapsed", 1674 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1675 if (run_count > 1) { 1676 fprintf(output, " "); 1677 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1678 avg_stats(&walltime_nsecs_stats)); 1679 } 1680 fprintf(output, "\n\n"); 1681 1682 if (print_free_counters_hint && 1683 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1684 n > 0) 1685 fprintf(output, 1686 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1687 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1688 " perf stat ...\n" 1689 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1690 } 1691 1692 static void print_counters(struct timespec *ts, int argc, const char **argv) 1693 { 1694 int interval = stat_config.interval; 1695 struct perf_evsel *counter; 1696 char buf[64], *prefix = NULL; 1697 1698 /* Do not print anything if we record to the pipe. */ 1699 if (STAT_RECORD && perf_stat.file.is_pipe) 1700 return; 1701 1702 if (interval) 1703 print_interval(prefix = buf, ts); 1704 else 1705 print_header(argc, argv); 1706 1707 if (metric_only) { 1708 static int num_print_iv; 1709 1710 if (num_print_iv == 0 && !interval) 1711 print_metric_headers(prefix, false); 1712 if (num_print_iv++ == 25) 1713 num_print_iv = 0; 1714 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1715 fprintf(stat_config.output, "%s", prefix); 1716 } 1717 1718 switch (stat_config.aggr_mode) { 1719 case AGGR_CORE: 1720 case AGGR_SOCKET: 1721 print_aggr(prefix); 1722 break; 1723 case AGGR_THREAD: 1724 evlist__for_each_entry(evsel_list, counter) { 1725 if (is_duration_time(counter)) 1726 continue; 1727 print_aggr_thread(counter, prefix); 1728 } 1729 break; 1730 case AGGR_GLOBAL: 1731 evlist__for_each_entry(evsel_list, counter) { 1732 if (is_duration_time(counter)) 1733 continue; 1734 print_counter_aggr(counter, prefix); 1735 } 1736 if (metric_only) 1737 fputc('\n', stat_config.output); 1738 break; 1739 case AGGR_NONE: 1740 if (metric_only) 1741 print_no_aggr_metric(prefix); 1742 else { 1743 evlist__for_each_entry(evsel_list, counter) { 1744 if (is_duration_time(counter)) 1745 continue; 1746 print_counter(counter, prefix); 1747 } 1748 } 1749 break; 1750 case AGGR_UNSET: 1751 default: 1752 break; 1753 } 1754 1755 if (!interval && !csv_output) 1756 print_footer(); 1757 1758 fflush(stat_config.output); 1759 } 1760 1761 static volatile int signr = -1; 1762 1763 static void skip_signal(int signo) 1764 { 1765 if ((child_pid == -1) || stat_config.interval) 1766 done = 1; 1767 1768 signr = signo; 1769 /* 1770 * render child_pid harmless 1771 * won't send SIGTERM to a random 1772 * process in case of race condition 1773 * and fast PID recycling 1774 */ 1775 child_pid = -1; 1776 } 1777 1778 static void sig_atexit(void) 1779 { 1780 sigset_t set, oset; 1781 1782 /* 1783 * avoid race condition with SIGCHLD handler 1784 * in skip_signal() which is modifying child_pid 1785 * goal is to avoid send SIGTERM to a random 1786 * process 1787 */ 1788 sigemptyset(&set); 1789 sigaddset(&set, SIGCHLD); 1790 sigprocmask(SIG_BLOCK, &set, &oset); 1791 1792 if (child_pid != -1) 1793 kill(child_pid, SIGTERM); 1794 1795 sigprocmask(SIG_SETMASK, &oset, NULL); 1796 1797 if (signr == -1) 1798 return; 1799 1800 signal(signr, SIG_DFL); 1801 kill(getpid(), signr); 1802 } 1803 1804 static int stat__set_big_num(const struct option *opt __maybe_unused, 1805 const char *s __maybe_unused, int unset) 1806 { 1807 big_num_opt = unset ? 0 : 1; 1808 return 0; 1809 } 1810 1811 static int enable_metric_only(const struct option *opt __maybe_unused, 1812 const char *s __maybe_unused, int unset) 1813 { 1814 force_metric_only = true; 1815 metric_only = !unset; 1816 return 0; 1817 } 1818 1819 static int parse_metric_groups(const struct option *opt, 1820 const char *str, 1821 int unset __maybe_unused) 1822 { 1823 return metricgroup__parse_groups(opt, str, &metric_events); 1824 } 1825 1826 static const struct option stat_options[] = { 1827 OPT_BOOLEAN('T', "transaction", &transaction_run, 1828 "hardware transaction statistics"), 1829 OPT_CALLBACK('e', "event", &evsel_list, "event", 1830 "event selector. use 'perf list' to list available events", 1831 parse_events_option), 1832 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1833 "event filter", parse_filter), 1834 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1835 "child tasks do not inherit counters"), 1836 OPT_STRING('p', "pid", &target.pid, "pid", 1837 "stat events on existing process id"), 1838 OPT_STRING('t', "tid", &target.tid, "tid", 1839 "stat events on existing thread id"), 1840 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1841 "system-wide collection from all CPUs"), 1842 OPT_BOOLEAN('g', "group", &group, 1843 "put the counters into a counter group"), 1844 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1845 OPT_INCR('v', "verbose", &verbose, 1846 "be more verbose (show counter open errors, etc)"), 1847 OPT_INTEGER('r', "repeat", &run_count, 1848 "repeat command and print average + stddev (max: 100, forever: 0)"), 1849 OPT_BOOLEAN('n', "null", &null_run, 1850 "null run - dont start any counters"), 1851 OPT_INCR('d', "detailed", &detailed_run, 1852 "detailed run - start a lot of events"), 1853 OPT_BOOLEAN('S', "sync", &sync_run, 1854 "call sync() before starting a run"), 1855 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1856 "print large numbers with thousands\' separators", 1857 stat__set_big_num), 1858 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1859 "list of cpus to monitor in system-wide"), 1860 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1861 "disable CPU count aggregation", AGGR_NONE), 1862 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1863 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1864 "print counts with custom separator"), 1865 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1866 "monitor event in cgroup name only", parse_cgroups), 1867 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1868 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1869 OPT_INTEGER(0, "log-fd", &output_fd, 1870 "log output to fd, instead of stderr"), 1871 OPT_STRING(0, "pre", &pre_cmd, "command", 1872 "command to run prior to the measured command"), 1873 OPT_STRING(0, "post", &post_cmd, "command", 1874 "command to run after to the measured command"), 1875 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1876 "print counts at regular interval in ms (>= 10)"), 1877 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1878 "aggregate counts per processor socket", AGGR_SOCKET), 1879 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1880 "aggregate counts per physical processor core", AGGR_CORE), 1881 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1882 "aggregate counts per thread", AGGR_THREAD), 1883 OPT_UINTEGER('D', "delay", &initial_delay, 1884 "ms to wait before starting measurement after program start"), 1885 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1886 "Only print computed metrics. No raw values", enable_metric_only), 1887 OPT_BOOLEAN(0, "topdown", &topdown_run, 1888 "measure topdown level 1 statistics"), 1889 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1890 "measure SMI cost"), 1891 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 1892 "monitor specified metrics or metric groups (separated by ,)", 1893 parse_metric_groups), 1894 OPT_END() 1895 }; 1896 1897 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1898 { 1899 return cpu_map__get_socket(map, cpu, NULL); 1900 } 1901 1902 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1903 { 1904 return cpu_map__get_core(map, cpu, NULL); 1905 } 1906 1907 static int cpu_map__get_max(struct cpu_map *map) 1908 { 1909 int i, max = -1; 1910 1911 for (i = 0; i < map->nr; i++) { 1912 if (map->map[i] > max) 1913 max = map->map[i]; 1914 } 1915 1916 return max; 1917 } 1918 1919 static struct cpu_map *cpus_aggr_map; 1920 1921 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1922 { 1923 int cpu; 1924 1925 if (idx >= map->nr) 1926 return -1; 1927 1928 cpu = map->map[idx]; 1929 1930 if (cpus_aggr_map->map[cpu] == -1) 1931 cpus_aggr_map->map[cpu] = get_id(map, idx); 1932 1933 return cpus_aggr_map->map[cpu]; 1934 } 1935 1936 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1937 { 1938 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1939 } 1940 1941 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1942 { 1943 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1944 } 1945 1946 static int perf_stat_init_aggr_mode(void) 1947 { 1948 int nr; 1949 1950 switch (stat_config.aggr_mode) { 1951 case AGGR_SOCKET: 1952 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1953 perror("cannot build socket map"); 1954 return -1; 1955 } 1956 aggr_get_id = perf_stat__get_socket_cached; 1957 break; 1958 case AGGR_CORE: 1959 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1960 perror("cannot build core map"); 1961 return -1; 1962 } 1963 aggr_get_id = perf_stat__get_core_cached; 1964 break; 1965 case AGGR_NONE: 1966 case AGGR_GLOBAL: 1967 case AGGR_THREAD: 1968 case AGGR_UNSET: 1969 default: 1970 break; 1971 } 1972 1973 /* 1974 * The evsel_list->cpus is the base we operate on, 1975 * taking the highest cpu number to be the size of 1976 * the aggregation translate cpumap. 1977 */ 1978 nr = cpu_map__get_max(evsel_list->cpus); 1979 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1980 return cpus_aggr_map ? 0 : -ENOMEM; 1981 } 1982 1983 static void perf_stat__exit_aggr_mode(void) 1984 { 1985 cpu_map__put(aggr_map); 1986 cpu_map__put(cpus_aggr_map); 1987 aggr_map = NULL; 1988 cpus_aggr_map = NULL; 1989 } 1990 1991 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1992 { 1993 int cpu; 1994 1995 if (idx > map->nr) 1996 return -1; 1997 1998 cpu = map->map[idx]; 1999 2000 if (cpu >= env->nr_cpus_avail) 2001 return -1; 2002 2003 return cpu; 2004 } 2005 2006 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2007 { 2008 struct perf_env *env = data; 2009 int cpu = perf_env__get_cpu(env, map, idx); 2010 2011 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2012 } 2013 2014 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2015 { 2016 struct perf_env *env = data; 2017 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2018 2019 if (cpu != -1) { 2020 int socket_id = env->cpu[cpu].socket_id; 2021 2022 /* 2023 * Encode socket in upper 16 bits 2024 * core_id is relative to socket, and 2025 * we need a global id. So we combine 2026 * socket + core id. 2027 */ 2028 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2029 } 2030 2031 return core; 2032 } 2033 2034 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2035 struct cpu_map **sockp) 2036 { 2037 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2038 } 2039 2040 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2041 struct cpu_map **corep) 2042 { 2043 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2044 } 2045 2046 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2047 { 2048 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2049 } 2050 2051 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2052 { 2053 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2054 } 2055 2056 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2057 { 2058 struct perf_env *env = &st->session->header.env; 2059 2060 switch (stat_config.aggr_mode) { 2061 case AGGR_SOCKET: 2062 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2063 perror("cannot build socket map"); 2064 return -1; 2065 } 2066 aggr_get_id = perf_stat__get_socket_file; 2067 break; 2068 case AGGR_CORE: 2069 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2070 perror("cannot build core map"); 2071 return -1; 2072 } 2073 aggr_get_id = perf_stat__get_core_file; 2074 break; 2075 case AGGR_NONE: 2076 case AGGR_GLOBAL: 2077 case AGGR_THREAD: 2078 case AGGR_UNSET: 2079 default: 2080 break; 2081 } 2082 2083 return 0; 2084 } 2085 2086 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2087 { 2088 int off = 0; 2089 int i; 2090 int len = 0; 2091 char *s; 2092 2093 for (i = 0; attr[i]; i++) { 2094 if (pmu_have_event("cpu", attr[i])) { 2095 len += strlen(attr[i]) + 1; 2096 attr[i - off] = attr[i]; 2097 } else 2098 off++; 2099 } 2100 attr[i - off] = NULL; 2101 2102 *str = malloc(len + 1 + 2); 2103 if (!*str) 2104 return -1; 2105 s = *str; 2106 if (i - off == 0) { 2107 *s = 0; 2108 return 0; 2109 } 2110 if (use_group) 2111 *s++ = '{'; 2112 for (i = 0; attr[i]; i++) { 2113 strcpy(s, attr[i]); 2114 s += strlen(s); 2115 *s++ = ','; 2116 } 2117 if (use_group) { 2118 s[-1] = '}'; 2119 *s = 0; 2120 } else 2121 s[-1] = 0; 2122 return 0; 2123 } 2124 2125 __weak bool arch_topdown_check_group(bool *warn) 2126 { 2127 *warn = false; 2128 return false; 2129 } 2130 2131 __weak void arch_topdown_group_warn(void) 2132 { 2133 } 2134 2135 /* 2136 * Add default attributes, if there were no attributes specified or 2137 * if -d/--detailed, -d -d or -d -d -d is used: 2138 */ 2139 static int add_default_attributes(void) 2140 { 2141 int err; 2142 struct perf_event_attr default_attrs0[] = { 2143 2144 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2145 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2146 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2147 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2148 2149 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2150 }; 2151 struct perf_event_attr frontend_attrs[] = { 2152 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2153 }; 2154 struct perf_event_attr backend_attrs[] = { 2155 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2156 }; 2157 struct perf_event_attr default_attrs1[] = { 2158 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2159 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2160 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2161 2162 }; 2163 2164 /* 2165 * Detailed stats (-d), covering the L1 and last level data caches: 2166 */ 2167 struct perf_event_attr detailed_attrs[] = { 2168 2169 { .type = PERF_TYPE_HW_CACHE, 2170 .config = 2171 PERF_COUNT_HW_CACHE_L1D << 0 | 2172 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2173 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2174 2175 { .type = PERF_TYPE_HW_CACHE, 2176 .config = 2177 PERF_COUNT_HW_CACHE_L1D << 0 | 2178 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2179 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2180 2181 { .type = PERF_TYPE_HW_CACHE, 2182 .config = 2183 PERF_COUNT_HW_CACHE_LL << 0 | 2184 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2185 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2186 2187 { .type = PERF_TYPE_HW_CACHE, 2188 .config = 2189 PERF_COUNT_HW_CACHE_LL << 0 | 2190 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2191 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2192 }; 2193 2194 /* 2195 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2196 */ 2197 struct perf_event_attr very_detailed_attrs[] = { 2198 2199 { .type = PERF_TYPE_HW_CACHE, 2200 .config = 2201 PERF_COUNT_HW_CACHE_L1I << 0 | 2202 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2203 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2204 2205 { .type = PERF_TYPE_HW_CACHE, 2206 .config = 2207 PERF_COUNT_HW_CACHE_L1I << 0 | 2208 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2209 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2210 2211 { .type = PERF_TYPE_HW_CACHE, 2212 .config = 2213 PERF_COUNT_HW_CACHE_DTLB << 0 | 2214 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2215 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2216 2217 { .type = PERF_TYPE_HW_CACHE, 2218 .config = 2219 PERF_COUNT_HW_CACHE_DTLB << 0 | 2220 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2221 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2222 2223 { .type = PERF_TYPE_HW_CACHE, 2224 .config = 2225 PERF_COUNT_HW_CACHE_ITLB << 0 | 2226 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2227 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2228 2229 { .type = PERF_TYPE_HW_CACHE, 2230 .config = 2231 PERF_COUNT_HW_CACHE_ITLB << 0 | 2232 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2233 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2234 2235 }; 2236 2237 /* 2238 * Very, very detailed stats (-d -d -d), adding prefetch events: 2239 */ 2240 struct perf_event_attr very_very_detailed_attrs[] = { 2241 2242 { .type = PERF_TYPE_HW_CACHE, 2243 .config = 2244 PERF_COUNT_HW_CACHE_L1D << 0 | 2245 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2246 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2247 2248 { .type = PERF_TYPE_HW_CACHE, 2249 .config = 2250 PERF_COUNT_HW_CACHE_L1D << 0 | 2251 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2252 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2253 }; 2254 2255 /* Set attrs if no event is selected and !null_run: */ 2256 if (null_run) 2257 return 0; 2258 2259 if (transaction_run) { 2260 if (pmu_have_event("cpu", "cycles-ct") && 2261 pmu_have_event("cpu", "el-start")) 2262 err = parse_events(evsel_list, transaction_attrs, NULL); 2263 else 2264 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2265 if (err) { 2266 fprintf(stderr, "Cannot set up transaction events\n"); 2267 return -1; 2268 } 2269 return 0; 2270 } 2271 2272 if (smi_cost) { 2273 int smi; 2274 2275 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2276 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2277 return -1; 2278 } 2279 2280 if (!smi) { 2281 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2282 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2283 return -1; 2284 } 2285 smi_reset = true; 2286 } 2287 2288 if (pmu_have_event("msr", "aperf") && 2289 pmu_have_event("msr", "smi")) { 2290 if (!force_metric_only) 2291 metric_only = true; 2292 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2293 } else { 2294 fprintf(stderr, "To measure SMI cost, it needs " 2295 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2296 return -1; 2297 } 2298 if (err) { 2299 fprintf(stderr, "Cannot set up SMI cost events\n"); 2300 return -1; 2301 } 2302 return 0; 2303 } 2304 2305 if (topdown_run) { 2306 char *str = NULL; 2307 bool warn = false; 2308 2309 if (stat_config.aggr_mode != AGGR_GLOBAL && 2310 stat_config.aggr_mode != AGGR_CORE) { 2311 pr_err("top down event configuration requires --per-core mode\n"); 2312 return -1; 2313 } 2314 stat_config.aggr_mode = AGGR_CORE; 2315 if (nr_cgroups || !target__has_cpu(&target)) { 2316 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2317 return -1; 2318 } 2319 2320 if (!force_metric_only) 2321 metric_only = true; 2322 if (topdown_filter_events(topdown_attrs, &str, 2323 arch_topdown_check_group(&warn)) < 0) { 2324 pr_err("Out of memory\n"); 2325 return -1; 2326 } 2327 if (topdown_attrs[0] && str) { 2328 if (warn) 2329 arch_topdown_group_warn(); 2330 err = parse_events(evsel_list, str, NULL); 2331 if (err) { 2332 fprintf(stderr, 2333 "Cannot set up top down events %s: %d\n", 2334 str, err); 2335 free(str); 2336 return -1; 2337 } 2338 } else { 2339 fprintf(stderr, "System does not support topdown\n"); 2340 return -1; 2341 } 2342 free(str); 2343 } 2344 2345 if (!evsel_list->nr_entries) { 2346 if (target__has_cpu(&target)) 2347 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2348 2349 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2350 return -1; 2351 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2352 if (perf_evlist__add_default_attrs(evsel_list, 2353 frontend_attrs) < 0) 2354 return -1; 2355 } 2356 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2357 if (perf_evlist__add_default_attrs(evsel_list, 2358 backend_attrs) < 0) 2359 return -1; 2360 } 2361 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2362 return -1; 2363 } 2364 2365 /* Detailed events get appended to the event list: */ 2366 2367 if (detailed_run < 1) 2368 return 0; 2369 2370 /* Append detailed run extra attributes: */ 2371 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2372 return -1; 2373 2374 if (detailed_run < 2) 2375 return 0; 2376 2377 /* Append very detailed run extra attributes: */ 2378 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2379 return -1; 2380 2381 if (detailed_run < 3) 2382 return 0; 2383 2384 /* Append very, very detailed run extra attributes: */ 2385 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2386 } 2387 2388 static const char * const stat_record_usage[] = { 2389 "perf stat record [<options>]", 2390 NULL, 2391 }; 2392 2393 static void init_features(struct perf_session *session) 2394 { 2395 int feat; 2396 2397 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2398 perf_header__set_feat(&session->header, feat); 2399 2400 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2401 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2402 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2403 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2404 } 2405 2406 static int __cmd_record(int argc, const char **argv) 2407 { 2408 struct perf_session *session; 2409 struct perf_data_file *file = &perf_stat.file; 2410 2411 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2412 PARSE_OPT_STOP_AT_NON_OPTION); 2413 2414 if (output_name) 2415 file->path = output_name; 2416 2417 if (run_count != 1 || forever) { 2418 pr_err("Cannot use -r option with perf stat record.\n"); 2419 return -1; 2420 } 2421 2422 session = perf_session__new(file, false, NULL); 2423 if (session == NULL) { 2424 pr_err("Perf session creation failed.\n"); 2425 return -1; 2426 } 2427 2428 init_features(session); 2429 2430 session->evlist = evsel_list; 2431 perf_stat.session = session; 2432 perf_stat.record = true; 2433 return argc; 2434 } 2435 2436 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2437 union perf_event *event, 2438 struct perf_session *session) 2439 { 2440 struct stat_round_event *stat_round = &event->stat_round; 2441 struct perf_evsel *counter; 2442 struct timespec tsh, *ts = NULL; 2443 const char **argv = session->header.env.cmdline_argv; 2444 int argc = session->header.env.nr_cmdline; 2445 2446 evlist__for_each_entry(evsel_list, counter) 2447 perf_stat_process_counter(&stat_config, counter); 2448 2449 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2450 update_stats(&walltime_nsecs_stats, stat_round->time); 2451 2452 if (stat_config.interval && stat_round->time) { 2453 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2454 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2455 ts = &tsh; 2456 } 2457 2458 print_counters(ts, argc, argv); 2459 return 0; 2460 } 2461 2462 static 2463 int process_stat_config_event(struct perf_tool *tool, 2464 union perf_event *event, 2465 struct perf_session *session __maybe_unused) 2466 { 2467 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2468 2469 perf_event__read_stat_config(&stat_config, &event->stat_config); 2470 2471 if (cpu_map__empty(st->cpus)) { 2472 if (st->aggr_mode != AGGR_UNSET) 2473 pr_warning("warning: processing task data, aggregation mode not set\n"); 2474 return 0; 2475 } 2476 2477 if (st->aggr_mode != AGGR_UNSET) 2478 stat_config.aggr_mode = st->aggr_mode; 2479 2480 if (perf_stat.file.is_pipe) 2481 perf_stat_init_aggr_mode(); 2482 else 2483 perf_stat_init_aggr_mode_file(st); 2484 2485 return 0; 2486 } 2487 2488 static int set_maps(struct perf_stat *st) 2489 { 2490 if (!st->cpus || !st->threads) 2491 return 0; 2492 2493 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2494 return -EINVAL; 2495 2496 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2497 2498 if (perf_evlist__alloc_stats(evsel_list, true)) 2499 return -ENOMEM; 2500 2501 st->maps_allocated = true; 2502 return 0; 2503 } 2504 2505 static 2506 int process_thread_map_event(struct perf_tool *tool, 2507 union perf_event *event, 2508 struct perf_session *session __maybe_unused) 2509 { 2510 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2511 2512 if (st->threads) { 2513 pr_warning("Extra thread map event, ignoring.\n"); 2514 return 0; 2515 } 2516 2517 st->threads = thread_map__new_event(&event->thread_map); 2518 if (!st->threads) 2519 return -ENOMEM; 2520 2521 return set_maps(st); 2522 } 2523 2524 static 2525 int process_cpu_map_event(struct perf_tool *tool, 2526 union perf_event *event, 2527 struct perf_session *session __maybe_unused) 2528 { 2529 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2530 struct cpu_map *cpus; 2531 2532 if (st->cpus) { 2533 pr_warning("Extra cpu map event, ignoring.\n"); 2534 return 0; 2535 } 2536 2537 cpus = cpu_map__new_data(&event->cpu_map.data); 2538 if (!cpus) 2539 return -ENOMEM; 2540 2541 st->cpus = cpus; 2542 return set_maps(st); 2543 } 2544 2545 static const char * const stat_report_usage[] = { 2546 "perf stat report [<options>]", 2547 NULL, 2548 }; 2549 2550 static struct perf_stat perf_stat = { 2551 .tool = { 2552 .attr = perf_event__process_attr, 2553 .event_update = perf_event__process_event_update, 2554 .thread_map = process_thread_map_event, 2555 .cpu_map = process_cpu_map_event, 2556 .stat_config = process_stat_config_event, 2557 .stat = perf_event__process_stat_event, 2558 .stat_round = process_stat_round_event, 2559 }, 2560 .aggr_mode = AGGR_UNSET, 2561 }; 2562 2563 static int __cmd_report(int argc, const char **argv) 2564 { 2565 struct perf_session *session; 2566 const struct option options[] = { 2567 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2568 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2569 "aggregate counts per processor socket", AGGR_SOCKET), 2570 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2571 "aggregate counts per physical processor core", AGGR_CORE), 2572 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2573 "disable CPU count aggregation", AGGR_NONE), 2574 OPT_END() 2575 }; 2576 struct stat st; 2577 int ret; 2578 2579 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2580 2581 if (!input_name || !strlen(input_name)) { 2582 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2583 input_name = "-"; 2584 else 2585 input_name = "perf.data"; 2586 } 2587 2588 perf_stat.file.path = input_name; 2589 perf_stat.file.mode = PERF_DATA_MODE_READ; 2590 2591 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2592 if (session == NULL) 2593 return -1; 2594 2595 perf_stat.session = session; 2596 stat_config.output = stderr; 2597 evsel_list = session->evlist; 2598 2599 ret = perf_session__process_events(session); 2600 if (ret) 2601 return ret; 2602 2603 perf_session__delete(session); 2604 return 0; 2605 } 2606 2607 static void setup_system_wide(int forks) 2608 { 2609 /* 2610 * Make system wide (-a) the default target if 2611 * no target was specified and one of following 2612 * conditions is met: 2613 * 2614 * - there's no workload specified 2615 * - there is workload specified but all requested 2616 * events are system wide events 2617 */ 2618 if (!target__none(&target)) 2619 return; 2620 2621 if (!forks) 2622 target.system_wide = true; 2623 else { 2624 struct perf_evsel *counter; 2625 2626 evlist__for_each_entry(evsel_list, counter) { 2627 if (!counter->system_wide) 2628 return; 2629 } 2630 2631 if (evsel_list->nr_entries) 2632 target.system_wide = true; 2633 } 2634 } 2635 2636 int cmd_stat(int argc, const char **argv) 2637 { 2638 const char * const stat_usage[] = { 2639 "perf stat [<options>] [<command>]", 2640 NULL 2641 }; 2642 int status = -EINVAL, run_idx; 2643 const char *mode; 2644 FILE *output = stderr; 2645 unsigned int interval; 2646 const char * const stat_subcommands[] = { "record", "report" }; 2647 2648 setlocale(LC_ALL, ""); 2649 2650 evsel_list = perf_evlist__new(); 2651 if (evsel_list == NULL) 2652 return -ENOMEM; 2653 2654 parse_events__shrink_config_terms(); 2655 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2656 (const char **) stat_usage, 2657 PARSE_OPT_STOP_AT_NON_OPTION); 2658 perf_stat__collect_metric_expr(evsel_list); 2659 perf_stat__init_shadow_stats(); 2660 2661 if (csv_sep) { 2662 csv_output = true; 2663 if (!strcmp(csv_sep, "\\t")) 2664 csv_sep = "\t"; 2665 } else 2666 csv_sep = DEFAULT_SEPARATOR; 2667 2668 if (argc && !strncmp(argv[0], "rec", 3)) { 2669 argc = __cmd_record(argc, argv); 2670 if (argc < 0) 2671 return -1; 2672 } else if (argc && !strncmp(argv[0], "rep", 3)) 2673 return __cmd_report(argc, argv); 2674 2675 interval = stat_config.interval; 2676 2677 /* 2678 * For record command the -o is already taken care of. 2679 */ 2680 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2681 output = NULL; 2682 2683 if (output_name && output_fd) { 2684 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2685 parse_options_usage(stat_usage, stat_options, "o", 1); 2686 parse_options_usage(NULL, stat_options, "log-fd", 0); 2687 goto out; 2688 } 2689 2690 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2691 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2692 goto out; 2693 } 2694 2695 if (metric_only && run_count > 1) { 2696 fprintf(stderr, "--metric-only is not supported with -r\n"); 2697 goto out; 2698 } 2699 2700 if (output_fd < 0) { 2701 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2702 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2703 goto out; 2704 } 2705 2706 if (!output) { 2707 struct timespec tm; 2708 mode = append_file ? "a" : "w"; 2709 2710 output = fopen(output_name, mode); 2711 if (!output) { 2712 perror("failed to create output file"); 2713 return -1; 2714 } 2715 clock_gettime(CLOCK_REALTIME, &tm); 2716 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2717 } else if (output_fd > 0) { 2718 mode = append_file ? "a" : "w"; 2719 output = fdopen(output_fd, mode); 2720 if (!output) { 2721 perror("Failed opening logfd"); 2722 return -errno; 2723 } 2724 } 2725 2726 stat_config.output = output; 2727 2728 /* 2729 * let the spreadsheet do the pretty-printing 2730 */ 2731 if (csv_output) { 2732 /* User explicitly passed -B? */ 2733 if (big_num_opt == 1) { 2734 fprintf(stderr, "-B option not supported with -x\n"); 2735 parse_options_usage(stat_usage, stat_options, "B", 1); 2736 parse_options_usage(NULL, stat_options, "x", 1); 2737 goto out; 2738 } else /* Nope, so disable big number formatting */ 2739 big_num = false; 2740 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2741 big_num = false; 2742 2743 setup_system_wide(argc); 2744 2745 if (run_count < 0) { 2746 pr_err("Run count must be a positive number\n"); 2747 parse_options_usage(stat_usage, stat_options, "r", 1); 2748 goto out; 2749 } else if (run_count == 0) { 2750 forever = true; 2751 run_count = 1; 2752 } 2753 2754 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2755 fprintf(stderr, "The --per-thread option is only available " 2756 "when monitoring via -p -t options.\n"); 2757 parse_options_usage(NULL, stat_options, "p", 1); 2758 parse_options_usage(NULL, stat_options, "t", 1); 2759 goto out; 2760 } 2761 2762 /* 2763 * no_aggr, cgroup are for system-wide only 2764 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2765 */ 2766 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2767 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2768 !target__has_cpu(&target)) { 2769 fprintf(stderr, "both cgroup and no-aggregation " 2770 "modes only available in system-wide mode\n"); 2771 2772 parse_options_usage(stat_usage, stat_options, "G", 1); 2773 parse_options_usage(NULL, stat_options, "A", 1); 2774 parse_options_usage(NULL, stat_options, "a", 1); 2775 goto out; 2776 } 2777 2778 if (add_default_attributes()) 2779 goto out; 2780 2781 target__validate(&target); 2782 2783 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2784 if (target__has_task(&target)) { 2785 pr_err("Problems finding threads of monitor\n"); 2786 parse_options_usage(stat_usage, stat_options, "p", 1); 2787 parse_options_usage(NULL, stat_options, "t", 1); 2788 } else if (target__has_cpu(&target)) { 2789 perror("failed to parse CPUs map"); 2790 parse_options_usage(stat_usage, stat_options, "C", 1); 2791 parse_options_usage(NULL, stat_options, "a", 1); 2792 } 2793 goto out; 2794 } 2795 2796 /* 2797 * Initialize thread_map with comm names, 2798 * so we could print it out on output. 2799 */ 2800 if (stat_config.aggr_mode == AGGR_THREAD) 2801 thread_map__read_comms(evsel_list->threads); 2802 2803 if (interval && interval < 100) { 2804 if (interval < 10) { 2805 pr_err("print interval must be >= 10ms\n"); 2806 parse_options_usage(stat_usage, stat_options, "I", 1); 2807 goto out; 2808 } else 2809 pr_warning("print interval < 100ms. " 2810 "The overhead percentage could be high in some cases. " 2811 "Please proceed with caution.\n"); 2812 } 2813 2814 if (perf_evlist__alloc_stats(evsel_list, interval)) 2815 goto out; 2816 2817 if (perf_stat_init_aggr_mode()) 2818 goto out; 2819 2820 /* 2821 * We dont want to block the signals - that would cause 2822 * child tasks to inherit that and Ctrl-C would not work. 2823 * What we want is for Ctrl-C to work in the exec()-ed 2824 * task, but being ignored by perf stat itself: 2825 */ 2826 atexit(sig_atexit); 2827 if (!forever) 2828 signal(SIGINT, skip_signal); 2829 signal(SIGCHLD, skip_signal); 2830 signal(SIGALRM, skip_signal); 2831 signal(SIGABRT, skip_signal); 2832 2833 status = 0; 2834 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2835 if (run_count != 1 && verbose > 0) 2836 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2837 run_idx + 1); 2838 2839 status = run_perf_stat(argc, argv); 2840 if (forever && status != -1) { 2841 print_counters(NULL, argc, argv); 2842 perf_stat__reset_stats(); 2843 } 2844 } 2845 2846 if (!forever && status != -1 && !interval) 2847 print_counters(NULL, argc, argv); 2848 2849 if (STAT_RECORD) { 2850 /* 2851 * We synthesize the kernel mmap record just so that older tools 2852 * don't emit warnings about not being able to resolve symbols 2853 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2854 * a saner message about no samples being in the perf.data file. 2855 * 2856 * This also serves to suppress a warning about f_header.data.size == 0 2857 * in header.c at the moment 'perf stat record' gets introduced, which 2858 * is not really needed once we start adding the stat specific PERF_RECORD_ 2859 * records, but the need to suppress the kptr_restrict messages in older 2860 * tools remain -acme 2861 */ 2862 int fd = perf_data_file__fd(&perf_stat.file); 2863 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2864 process_synthesized_event, 2865 &perf_stat.session->machines.host); 2866 if (err) { 2867 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2868 "older tools may produce warnings about this file\n."); 2869 } 2870 2871 if (!interval) { 2872 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2873 pr_err("failed to write stat round event\n"); 2874 } 2875 2876 if (!perf_stat.file.is_pipe) { 2877 perf_stat.session->header.data_size += perf_stat.bytes_written; 2878 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2879 } 2880 2881 perf_session__delete(perf_stat.session); 2882 } 2883 2884 perf_stat__exit_aggr_mode(); 2885 perf_evlist__free_stats(evsel_list); 2886 out: 2887 if (smi_cost && smi_reset) 2888 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 2889 2890 perf_evlist__delete(evsel_list); 2891 return status; 2892 } 2893