1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/string2.h" 67 #include "util/metricgroup.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct rblist metric_events; 137 138 static struct target target = { 139 .uid = UINT_MAX, 140 }; 141 142 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 143 144 static int run_count = 1; 145 static bool no_inherit = false; 146 static volatile pid_t child_pid = -1; 147 static bool null_run = false; 148 static int detailed_run = 0; 149 static bool transaction_run; 150 static bool topdown_run = false; 151 static bool smi_cost = false; 152 static bool smi_reset = false; 153 static bool big_num = true; 154 static int big_num_opt = -1; 155 static const char *csv_sep = NULL; 156 static bool csv_output = false; 157 static bool group = false; 158 static const char *pre_cmd = NULL; 159 static const char *post_cmd = NULL; 160 static bool sync_run = false; 161 static unsigned int initial_delay = 0; 162 static unsigned int unit_width = 4; /* strlen("unit") */ 163 static bool forever = false; 164 static bool metric_only = false; 165 static bool force_metric_only = false; 166 static bool no_merge = false; 167 static bool walltime_run_table = false; 168 static struct timespec ref_time; 169 static struct cpu_map *aggr_map; 170 static aggr_get_id_t aggr_get_id; 171 static bool append_file; 172 static bool interval_count; 173 static const char *output_name; 174 static int output_fd; 175 static int print_free_counters_hint; 176 static int print_mixed_hw_group_error; 177 static u64 *walltime_run; 178 179 struct perf_stat { 180 bool record; 181 struct perf_data data; 182 struct perf_session *session; 183 u64 bytes_written; 184 struct perf_tool tool; 185 bool maps_allocated; 186 struct cpu_map *cpus; 187 struct thread_map *threads; 188 enum aggr_mode aggr_mode; 189 }; 190 191 static struct perf_stat perf_stat; 192 #define STAT_RECORD perf_stat.record 193 194 static volatile int done = 0; 195 196 static struct perf_stat_config stat_config = { 197 .aggr_mode = AGGR_GLOBAL, 198 .scale = true, 199 }; 200 201 static bool is_duration_time(struct perf_evsel *evsel) 202 { 203 return !strcmp(evsel->name, "duration_time"); 204 } 205 206 static inline void diff_timespec(struct timespec *r, struct timespec *a, 207 struct timespec *b) 208 { 209 r->tv_sec = a->tv_sec - b->tv_sec; 210 if (a->tv_nsec < b->tv_nsec) { 211 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 212 r->tv_sec--; 213 } else { 214 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 215 } 216 } 217 218 static void perf_stat__reset_stats(void) 219 { 220 int i; 221 222 perf_evlist__reset_stats(evsel_list); 223 perf_stat__reset_shadow_stats(); 224 225 for (i = 0; i < stat_config.stats_num; i++) 226 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 227 } 228 229 static int create_perf_stat_counter(struct perf_evsel *evsel) 230 { 231 struct perf_event_attr *attr = &evsel->attr; 232 struct perf_evsel *leader = evsel->leader; 233 234 if (stat_config.scale) { 235 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 236 PERF_FORMAT_TOTAL_TIME_RUNNING; 237 } 238 239 /* 240 * The event is part of non trivial group, let's enable 241 * the group read (for leader) and ID retrieval for all 242 * members. 243 */ 244 if (leader->nr_members > 1) 245 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 246 247 attr->inherit = !no_inherit; 248 249 /* 250 * Some events get initialized with sample_(period/type) set, 251 * like tracepoints. Clear it up for counting. 252 */ 253 attr->sample_period = 0; 254 255 /* 256 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 257 * while avoiding that older tools show confusing messages. 258 * 259 * However for pipe sessions we need to keep it zero, 260 * because script's perf_evsel__check_attr is triggered 261 * by attr->sample_type != 0, and we can't run it on 262 * stat sessions. 263 */ 264 if (!(STAT_RECORD && perf_stat.data.is_pipe)) 265 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 266 267 /* 268 * Disabling all counters initially, they will be enabled 269 * either manually by us or by kernel via enable_on_exec 270 * set later. 271 */ 272 if (perf_evsel__is_group_leader(evsel)) { 273 attr->disabled = 1; 274 275 /* 276 * In case of initial_delay we enable tracee 277 * events manually. 278 */ 279 if (target__none(&target) && !initial_delay) 280 attr->enable_on_exec = 1; 281 } 282 283 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 284 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 285 286 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 287 } 288 289 /* 290 * Does the counter have nsecs as a unit? 291 */ 292 static inline int nsec_counter(struct perf_evsel *evsel) 293 { 294 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 295 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 296 return 1; 297 298 return 0; 299 } 300 301 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 302 union perf_event *event, 303 struct perf_sample *sample __maybe_unused, 304 struct machine *machine __maybe_unused) 305 { 306 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 307 pr_err("failed to write perf data, error: %m\n"); 308 return -1; 309 } 310 311 perf_stat.bytes_written += event->header.size; 312 return 0; 313 } 314 315 static int write_stat_round_event(u64 tm, u64 type) 316 { 317 return perf_event__synthesize_stat_round(NULL, tm, type, 318 process_synthesized_event, 319 NULL); 320 } 321 322 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 323 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 324 325 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 326 327 static int 328 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 329 struct perf_counts_values *count) 330 { 331 struct perf_sample_id *sid = SID(counter, cpu, thread); 332 333 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 334 process_synthesized_event, NULL); 335 } 336 337 /* 338 * Read out the results of a single counter: 339 * do not aggregate counts across CPUs in system-wide mode 340 */ 341 static int read_counter(struct perf_evsel *counter) 342 { 343 int nthreads = thread_map__nr(evsel_list->threads); 344 int ncpus, cpu, thread; 345 346 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 347 ncpus = perf_evsel__nr_cpus(counter); 348 else 349 ncpus = 1; 350 351 if (!counter->supported) 352 return -ENOENT; 353 354 if (counter->system_wide) 355 nthreads = 1; 356 357 for (thread = 0; thread < nthreads; thread++) { 358 for (cpu = 0; cpu < ncpus; cpu++) { 359 struct perf_counts_values *count; 360 361 count = perf_counts(counter->counts, cpu, thread); 362 363 /* 364 * The leader's group read loads data into its group members 365 * (via perf_evsel__read_counter) and sets threir count->loaded. 366 */ 367 if (!count->loaded && 368 perf_evsel__read_counter(counter, cpu, thread)) { 369 counter->counts->scaled = -1; 370 perf_counts(counter->counts, cpu, thread)->ena = 0; 371 perf_counts(counter->counts, cpu, thread)->run = 0; 372 return -1; 373 } 374 375 count->loaded = false; 376 377 if (STAT_RECORD) { 378 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 379 pr_err("failed to write stat event\n"); 380 return -1; 381 } 382 } 383 384 if (verbose > 1) { 385 fprintf(stat_config.output, 386 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 387 perf_evsel__name(counter), 388 cpu, 389 count->val, count->ena, count->run); 390 } 391 } 392 } 393 394 return 0; 395 } 396 397 static void read_counters(void) 398 { 399 struct perf_evsel *counter; 400 int ret; 401 402 evlist__for_each_entry(evsel_list, counter) { 403 ret = read_counter(counter); 404 if (ret) 405 pr_debug("failed to read counter %s\n", counter->name); 406 407 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 408 pr_warning("failed to process counter %s\n", counter->name); 409 } 410 } 411 412 static void process_interval(void) 413 { 414 struct timespec ts, rs; 415 416 read_counters(); 417 418 clock_gettime(CLOCK_MONOTONIC, &ts); 419 diff_timespec(&rs, &ts, &ref_time); 420 421 if (STAT_RECORD) { 422 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 423 pr_err("failed to write stat round event\n"); 424 } 425 426 init_stats(&walltime_nsecs_stats); 427 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 428 print_counters(&rs, 0, NULL); 429 } 430 431 static void enable_counters(void) 432 { 433 if (initial_delay) 434 usleep(initial_delay * USEC_PER_MSEC); 435 436 /* 437 * We need to enable counters only if: 438 * - we don't have tracee (attaching to task or cpu) 439 * - we have initial delay configured 440 */ 441 if (!target__none(&target) || initial_delay) 442 perf_evlist__enable(evsel_list); 443 } 444 445 static void disable_counters(void) 446 { 447 /* 448 * If we don't have tracee (attaching to task or cpu), counters may 449 * still be running. To get accurate group ratios, we must stop groups 450 * from counting before reading their constituent counters. 451 */ 452 if (!target__none(&target)) 453 perf_evlist__disable(evsel_list); 454 } 455 456 static volatile int workload_exec_errno; 457 458 /* 459 * perf_evlist__prepare_workload will send a SIGUSR1 460 * if the fork fails, since we asked by setting its 461 * want_signal to true. 462 */ 463 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 464 void *ucontext __maybe_unused) 465 { 466 workload_exec_errno = info->si_value.sival_int; 467 } 468 469 static int perf_stat_synthesize_config(bool is_pipe) 470 { 471 int err; 472 473 if (is_pipe) { 474 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 475 process_synthesized_event); 476 if (err < 0) { 477 pr_err("Couldn't synthesize attrs.\n"); 478 return err; 479 } 480 } 481 482 err = perf_event__synthesize_extra_attr(NULL, 483 evsel_list, 484 process_synthesized_event, 485 is_pipe); 486 487 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 488 process_synthesized_event, 489 NULL); 490 if (err < 0) { 491 pr_err("Couldn't synthesize thread map.\n"); 492 return err; 493 } 494 495 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 496 process_synthesized_event, NULL); 497 if (err < 0) { 498 pr_err("Couldn't synthesize thread map.\n"); 499 return err; 500 } 501 502 err = perf_event__synthesize_stat_config(NULL, &stat_config, 503 process_synthesized_event, NULL); 504 if (err < 0) { 505 pr_err("Couldn't synthesize config.\n"); 506 return err; 507 } 508 509 return 0; 510 } 511 512 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 513 514 static int __store_counter_ids(struct perf_evsel *counter) 515 { 516 int cpu, thread; 517 518 for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { 519 for (thread = 0; thread < xyarray__max_y(counter->fd); 520 thread++) { 521 int fd = FD(counter, cpu, thread); 522 523 if (perf_evlist__id_add_fd(evsel_list, counter, 524 cpu, thread, fd) < 0) 525 return -1; 526 } 527 } 528 529 return 0; 530 } 531 532 static int store_counter_ids(struct perf_evsel *counter) 533 { 534 struct cpu_map *cpus = counter->cpus; 535 struct thread_map *threads = counter->threads; 536 537 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 538 return -ENOMEM; 539 540 return __store_counter_ids(counter); 541 } 542 543 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 544 { 545 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 546 } 547 548 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 549 { 550 struct perf_evsel *c2, *leader; 551 bool is_open = true; 552 553 leader = evsel->leader; 554 pr_debug("Weak group for %s/%d failed\n", 555 leader->name, leader->nr_members); 556 557 /* 558 * for_each_group_member doesn't work here because it doesn't 559 * include the first entry. 560 */ 561 evlist__for_each_entry(evsel_list, c2) { 562 if (c2 == evsel) 563 is_open = false; 564 if (c2->leader == leader) { 565 if (is_open) 566 perf_evsel__close(c2); 567 c2->leader = c2; 568 c2->nr_members = 0; 569 } 570 } 571 return leader; 572 } 573 574 static int __run_perf_stat(int argc, const char **argv, int run_idx) 575 { 576 int interval = stat_config.interval; 577 int times = stat_config.times; 578 int timeout = stat_config.timeout; 579 char msg[BUFSIZ]; 580 unsigned long long t0, t1; 581 struct perf_evsel *counter; 582 struct timespec ts; 583 size_t l; 584 int status = 0; 585 const bool forks = (argc > 0); 586 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 587 struct perf_evsel_config_term *err_term; 588 589 if (interval) { 590 ts.tv_sec = interval / USEC_PER_MSEC; 591 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 592 } else if (timeout) { 593 ts.tv_sec = timeout / USEC_PER_MSEC; 594 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 595 } else { 596 ts.tv_sec = 1; 597 ts.tv_nsec = 0; 598 } 599 600 if (forks) { 601 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 602 workload_exec_failed_signal) < 0) { 603 perror("failed to prepare workload"); 604 return -1; 605 } 606 child_pid = evsel_list->workload.pid; 607 } 608 609 if (group) 610 perf_evlist__set_leader(evsel_list); 611 612 evlist__for_each_entry(evsel_list, counter) { 613 try_again: 614 if (create_perf_stat_counter(counter) < 0) { 615 616 /* Weak group failed. Reset the group. */ 617 if ((errno == EINVAL || errno == EBADF) && 618 counter->leader != counter && 619 counter->weak_group) { 620 counter = perf_evsel__reset_weak_group(counter); 621 goto try_again; 622 } 623 624 /* 625 * PPC returns ENXIO for HW counters until 2.6.37 626 * (behavior changed with commit b0a873e). 627 */ 628 if (errno == EINVAL || errno == ENOSYS || 629 errno == ENOENT || errno == EOPNOTSUPP || 630 errno == ENXIO) { 631 if (verbose > 0) 632 ui__warning("%s event is not supported by the kernel.\n", 633 perf_evsel__name(counter)); 634 counter->supported = false; 635 636 if ((counter->leader != counter) || 637 !(counter->leader->nr_members > 1)) 638 continue; 639 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 640 if (verbose > 0) 641 ui__warning("%s\n", msg); 642 goto try_again; 643 } else if (target__has_per_thread(&target) && 644 evsel_list->threads && 645 evsel_list->threads->err_thread != -1) { 646 /* 647 * For global --per-thread case, skip current 648 * error thread. 649 */ 650 if (!thread_map__remove(evsel_list->threads, 651 evsel_list->threads->err_thread)) { 652 evsel_list->threads->err_thread = -1; 653 goto try_again; 654 } 655 } 656 657 perf_evsel__open_strerror(counter, &target, 658 errno, msg, sizeof(msg)); 659 ui__error("%s\n", msg); 660 661 if (child_pid != -1) 662 kill(child_pid, SIGTERM); 663 664 return -1; 665 } 666 counter->supported = true; 667 668 l = strlen(counter->unit); 669 if (l > unit_width) 670 unit_width = l; 671 672 if (perf_evsel__should_store_id(counter) && 673 store_counter_ids(counter)) 674 return -1; 675 } 676 677 if (perf_evlist__apply_filters(evsel_list, &counter)) { 678 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 679 counter->filter, perf_evsel__name(counter), errno, 680 str_error_r(errno, msg, sizeof(msg))); 681 return -1; 682 } 683 684 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 685 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 686 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 687 str_error_r(errno, msg, sizeof(msg))); 688 return -1; 689 } 690 691 if (STAT_RECORD) { 692 int err, fd = perf_data__fd(&perf_stat.data); 693 694 if (is_pipe) { 695 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 696 } else { 697 err = perf_session__write_header(perf_stat.session, evsel_list, 698 fd, false); 699 } 700 701 if (err < 0) 702 return err; 703 704 err = perf_stat_synthesize_config(is_pipe); 705 if (err < 0) 706 return err; 707 } 708 709 /* 710 * Enable counters and exec the command: 711 */ 712 t0 = rdclock(); 713 clock_gettime(CLOCK_MONOTONIC, &ref_time); 714 715 if (forks) { 716 perf_evlist__start_workload(evsel_list); 717 enable_counters(); 718 719 if (interval || timeout) { 720 while (!waitpid(child_pid, &status, WNOHANG)) { 721 nanosleep(&ts, NULL); 722 if (timeout) 723 break; 724 process_interval(); 725 if (interval_count && !(--times)) 726 break; 727 } 728 } 729 waitpid(child_pid, &status, 0); 730 731 if (workload_exec_errno) { 732 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 733 pr_err("Workload failed: %s\n", emsg); 734 return -1; 735 } 736 737 if (WIFSIGNALED(status)) 738 psignal(WTERMSIG(status), argv[0]); 739 } else { 740 enable_counters(); 741 while (!done) { 742 nanosleep(&ts, NULL); 743 if (timeout) 744 break; 745 if (interval) { 746 process_interval(); 747 if (interval_count && !(--times)) 748 break; 749 } 750 } 751 } 752 753 disable_counters(); 754 755 t1 = rdclock(); 756 757 if (walltime_run_table) 758 walltime_run[run_idx] = t1 - t0; 759 760 update_stats(&walltime_nsecs_stats, t1 - t0); 761 762 /* 763 * Closing a group leader splits the group, and as we only disable 764 * group leaders, results in remaining events becoming enabled. To 765 * avoid arbitrary skew, we must read all counters before closing any 766 * group leaders. 767 */ 768 read_counters(); 769 perf_evlist__close(evsel_list); 770 771 return WEXITSTATUS(status); 772 } 773 774 static int run_perf_stat(int argc, const char **argv, int run_idx) 775 { 776 int ret; 777 778 if (pre_cmd) { 779 ret = system(pre_cmd); 780 if (ret) 781 return ret; 782 } 783 784 if (sync_run) 785 sync(); 786 787 ret = __run_perf_stat(argc, argv, run_idx); 788 if (ret) 789 return ret; 790 791 if (post_cmd) { 792 ret = system(post_cmd); 793 if (ret) 794 return ret; 795 } 796 797 return ret; 798 } 799 800 static void print_running(u64 run, u64 ena) 801 { 802 if (csv_output) { 803 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 804 csv_sep, 805 run, 806 csv_sep, 807 ena ? 100.0 * run / ena : 100.0); 808 } else if (run != ena) { 809 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 810 } 811 } 812 813 static void print_noise_pct(double total, double avg) 814 { 815 double pct = rel_stddev_stats(total, avg); 816 817 if (csv_output) 818 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 819 else if (pct) 820 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 821 } 822 823 static void print_noise(struct perf_evsel *evsel, double avg) 824 { 825 struct perf_stat_evsel *ps; 826 827 if (run_count == 1) 828 return; 829 830 ps = evsel->stats; 831 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 832 } 833 834 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 835 { 836 switch (stat_config.aggr_mode) { 837 case AGGR_CORE: 838 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 839 cpu_map__id_to_socket(id), 840 csv_output ? 0 : -8, 841 cpu_map__id_to_cpu(id), 842 csv_sep, 843 csv_output ? 0 : 4, 844 nr, 845 csv_sep); 846 break; 847 case AGGR_SOCKET: 848 fprintf(stat_config.output, "S%*d%s%*d%s", 849 csv_output ? 0 : -5, 850 id, 851 csv_sep, 852 csv_output ? 0 : 4, 853 nr, 854 csv_sep); 855 break; 856 case AGGR_NONE: 857 fprintf(stat_config.output, "CPU%*d%s", 858 csv_output ? 0 : -4, 859 perf_evsel__cpus(evsel)->map[id], csv_sep); 860 break; 861 case AGGR_THREAD: 862 fprintf(stat_config.output, "%*s-%*d%s", 863 csv_output ? 0 : 16, 864 thread_map__comm(evsel->threads, id), 865 csv_output ? 0 : -8, 866 thread_map__pid(evsel->threads, id), 867 csv_sep); 868 break; 869 case AGGR_GLOBAL: 870 case AGGR_UNSET: 871 default: 872 break; 873 } 874 } 875 876 struct outstate { 877 FILE *fh; 878 bool newline; 879 const char *prefix; 880 int nfields; 881 int id, nr; 882 struct perf_evsel *evsel; 883 }; 884 885 #define METRIC_LEN 35 886 887 static void new_line_std(void *ctx) 888 { 889 struct outstate *os = ctx; 890 891 os->newline = true; 892 } 893 894 static void do_new_line_std(struct outstate *os) 895 { 896 fputc('\n', os->fh); 897 fputs(os->prefix, os->fh); 898 aggr_printout(os->evsel, os->id, os->nr); 899 if (stat_config.aggr_mode == AGGR_NONE) 900 fprintf(os->fh, " "); 901 fprintf(os->fh, " "); 902 } 903 904 static void print_metric_std(void *ctx, const char *color, const char *fmt, 905 const char *unit, double val) 906 { 907 struct outstate *os = ctx; 908 FILE *out = os->fh; 909 int n; 910 bool newline = os->newline; 911 912 os->newline = false; 913 914 if (unit == NULL || fmt == NULL) { 915 fprintf(out, "%-*s", METRIC_LEN, ""); 916 return; 917 } 918 919 if (newline) 920 do_new_line_std(os); 921 922 n = fprintf(out, " # "); 923 if (color) 924 n += color_fprintf(out, color, fmt, val); 925 else 926 n += fprintf(out, fmt, val); 927 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 928 } 929 930 static void new_line_csv(void *ctx) 931 { 932 struct outstate *os = ctx; 933 int i; 934 935 fputc('\n', os->fh); 936 if (os->prefix) 937 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 938 aggr_printout(os->evsel, os->id, os->nr); 939 for (i = 0; i < os->nfields; i++) 940 fputs(csv_sep, os->fh); 941 } 942 943 static void print_metric_csv(void *ctx, 944 const char *color __maybe_unused, 945 const char *fmt, const char *unit, double val) 946 { 947 struct outstate *os = ctx; 948 FILE *out = os->fh; 949 char buf[64], *vals, *ends; 950 951 if (unit == NULL || fmt == NULL) { 952 fprintf(out, "%s%s", csv_sep, csv_sep); 953 return; 954 } 955 snprintf(buf, sizeof(buf), fmt, val); 956 ends = vals = ltrim(buf); 957 while (isdigit(*ends) || *ends == '.') 958 ends++; 959 *ends = 0; 960 while (isspace(*unit)) 961 unit++; 962 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 963 } 964 965 #define METRIC_ONLY_LEN 20 966 967 /* Filter out some columns that don't work well in metrics only mode */ 968 969 static bool valid_only_metric(const char *unit) 970 { 971 if (!unit) 972 return false; 973 if (strstr(unit, "/sec") || 974 strstr(unit, "hz") || 975 strstr(unit, "Hz") || 976 strstr(unit, "CPUs utilized")) 977 return false; 978 return true; 979 } 980 981 static const char *fixunit(char *buf, struct perf_evsel *evsel, 982 const char *unit) 983 { 984 if (!strncmp(unit, "of all", 6)) { 985 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 986 unit); 987 return buf; 988 } 989 return unit; 990 } 991 992 static void print_metric_only(void *ctx, const char *color, const char *fmt, 993 const char *unit, double val) 994 { 995 struct outstate *os = ctx; 996 FILE *out = os->fh; 997 int n; 998 char buf[1024]; 999 unsigned mlen = METRIC_ONLY_LEN; 1000 1001 if (!valid_only_metric(unit)) 1002 return; 1003 unit = fixunit(buf, os->evsel, unit); 1004 if (color) 1005 n = color_fprintf(out, color, fmt, val); 1006 else 1007 n = fprintf(out, fmt, val); 1008 if (n > METRIC_ONLY_LEN) 1009 n = METRIC_ONLY_LEN; 1010 if (mlen < strlen(unit)) 1011 mlen = strlen(unit) + 1; 1012 fprintf(out, "%*s", mlen - n, ""); 1013 } 1014 1015 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1016 const char *fmt, 1017 const char *unit, double val) 1018 { 1019 struct outstate *os = ctx; 1020 FILE *out = os->fh; 1021 char buf[64], *vals, *ends; 1022 char tbuf[1024]; 1023 1024 if (!valid_only_metric(unit)) 1025 return; 1026 unit = fixunit(tbuf, os->evsel, unit); 1027 snprintf(buf, sizeof buf, fmt, val); 1028 ends = vals = ltrim(buf); 1029 while (isdigit(*ends) || *ends == '.') 1030 ends++; 1031 *ends = 0; 1032 fprintf(out, "%s%s", vals, csv_sep); 1033 } 1034 1035 static void new_line_metric(void *ctx __maybe_unused) 1036 { 1037 } 1038 1039 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1040 const char *fmt __maybe_unused, 1041 const char *unit, double val __maybe_unused) 1042 { 1043 struct outstate *os = ctx; 1044 char tbuf[1024]; 1045 1046 if (!valid_only_metric(unit)) 1047 return; 1048 unit = fixunit(tbuf, os->evsel, unit); 1049 if (csv_output) 1050 fprintf(os->fh, "%s%s", unit, csv_sep); 1051 else 1052 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1053 } 1054 1055 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1056 { 1057 FILE *output = stat_config.output; 1058 double msecs = avg / NSEC_PER_MSEC; 1059 const char *fmt_v, *fmt_n; 1060 char name[25]; 1061 1062 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1063 fmt_n = csv_output ? "%s" : "%-25s"; 1064 1065 aggr_printout(evsel, id, nr); 1066 1067 scnprintf(name, sizeof(name), "%s%s", 1068 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1069 1070 fprintf(output, fmt_v, msecs, csv_sep); 1071 1072 if (csv_output) 1073 fprintf(output, "%s%s", evsel->unit, csv_sep); 1074 else 1075 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1076 1077 fprintf(output, fmt_n, name); 1078 1079 if (evsel->cgrp) 1080 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1081 } 1082 1083 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1084 { 1085 int i; 1086 1087 if (!aggr_get_id) 1088 return 0; 1089 1090 if (stat_config.aggr_mode == AGGR_NONE) 1091 return id; 1092 1093 if (stat_config.aggr_mode == AGGR_GLOBAL) 1094 return 0; 1095 1096 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1097 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1098 1099 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1100 return cpu2; 1101 } 1102 return 0; 1103 } 1104 1105 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1106 { 1107 FILE *output = stat_config.output; 1108 double sc = evsel->scale; 1109 const char *fmt; 1110 1111 if (csv_output) { 1112 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1113 } else { 1114 if (big_num) 1115 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1116 else 1117 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1118 } 1119 1120 aggr_printout(evsel, id, nr); 1121 1122 fprintf(output, fmt, avg, csv_sep); 1123 1124 if (evsel->unit) 1125 fprintf(output, "%-*s%s", 1126 csv_output ? 0 : unit_width, 1127 evsel->unit, csv_sep); 1128 1129 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1130 1131 if (evsel->cgrp) 1132 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1133 } 1134 1135 static bool is_mixed_hw_group(struct perf_evsel *counter) 1136 { 1137 struct perf_evlist *evlist = counter->evlist; 1138 u32 pmu_type = counter->attr.type; 1139 struct perf_evsel *pos; 1140 1141 if (counter->nr_members < 2) 1142 return false; 1143 1144 evlist__for_each_entry(evlist, pos) { 1145 /* software events can be part of any hardware group */ 1146 if (pos->attr.type == PERF_TYPE_SOFTWARE) 1147 continue; 1148 if (pmu_type == PERF_TYPE_SOFTWARE) { 1149 pmu_type = pos->attr.type; 1150 continue; 1151 } 1152 if (pmu_type != pos->attr.type) 1153 return true; 1154 } 1155 1156 return false; 1157 } 1158 1159 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1160 char *prefix, u64 run, u64 ena, double noise, 1161 struct runtime_stat *st) 1162 { 1163 struct perf_stat_output_ctx out; 1164 struct outstate os = { 1165 .fh = stat_config.output, 1166 .prefix = prefix ? prefix : "", 1167 .id = id, 1168 .nr = nr, 1169 .evsel = counter, 1170 }; 1171 print_metric_t pm = print_metric_std; 1172 void (*nl)(void *); 1173 1174 if (metric_only) { 1175 nl = new_line_metric; 1176 if (csv_output) 1177 pm = print_metric_only_csv; 1178 else 1179 pm = print_metric_only; 1180 } else 1181 nl = new_line_std; 1182 1183 if (csv_output && !metric_only) { 1184 static int aggr_fields[] = { 1185 [AGGR_GLOBAL] = 0, 1186 [AGGR_THREAD] = 1, 1187 [AGGR_NONE] = 1, 1188 [AGGR_SOCKET] = 2, 1189 [AGGR_CORE] = 2, 1190 }; 1191 1192 pm = print_metric_csv; 1193 nl = new_line_csv; 1194 os.nfields = 3; 1195 os.nfields += aggr_fields[stat_config.aggr_mode]; 1196 if (counter->cgrp) 1197 os.nfields++; 1198 } 1199 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1200 if (metric_only) { 1201 pm(&os, NULL, "", "", 0); 1202 return; 1203 } 1204 aggr_printout(counter, id, nr); 1205 1206 fprintf(stat_config.output, "%*s%s", 1207 csv_output ? 0 : 18, 1208 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1209 csv_sep); 1210 1211 if (counter->supported) { 1212 print_free_counters_hint = 1; 1213 if (is_mixed_hw_group(counter)) 1214 print_mixed_hw_group_error = 1; 1215 } 1216 1217 fprintf(stat_config.output, "%-*s%s", 1218 csv_output ? 0 : unit_width, 1219 counter->unit, csv_sep); 1220 1221 fprintf(stat_config.output, "%*s", 1222 csv_output ? 0 : -25, 1223 perf_evsel__name(counter)); 1224 1225 if (counter->cgrp) 1226 fprintf(stat_config.output, "%s%s", 1227 csv_sep, counter->cgrp->name); 1228 1229 if (!csv_output) 1230 pm(&os, NULL, NULL, "", 0); 1231 print_noise(counter, noise); 1232 print_running(run, ena); 1233 if (csv_output) 1234 pm(&os, NULL, NULL, "", 0); 1235 return; 1236 } 1237 1238 if (metric_only) 1239 /* nothing */; 1240 else if (nsec_counter(counter)) 1241 nsec_printout(id, nr, counter, uval); 1242 else 1243 abs_printout(id, nr, counter, uval); 1244 1245 out.print_metric = pm; 1246 out.new_line = nl; 1247 out.ctx = &os; 1248 out.force_header = false; 1249 1250 if (csv_output && !metric_only) { 1251 print_noise(counter, noise); 1252 print_running(run, ena); 1253 } 1254 1255 perf_stat__print_shadow_stats(counter, uval, 1256 first_shadow_cpu(counter, id), 1257 &out, &metric_events, st); 1258 if (!csv_output && !metric_only) { 1259 print_noise(counter, noise); 1260 print_running(run, ena); 1261 } 1262 } 1263 1264 static void aggr_update_shadow(void) 1265 { 1266 int cpu, s2, id, s; 1267 u64 val; 1268 struct perf_evsel *counter; 1269 1270 for (s = 0; s < aggr_map->nr; s++) { 1271 id = aggr_map->map[s]; 1272 evlist__for_each_entry(evsel_list, counter) { 1273 val = 0; 1274 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1275 s2 = aggr_get_id(evsel_list->cpus, cpu); 1276 if (s2 != id) 1277 continue; 1278 val += perf_counts(counter->counts, cpu, 0)->val; 1279 } 1280 perf_stat__update_shadow_stats(counter, val, 1281 first_shadow_cpu(counter, id), 1282 &rt_stat); 1283 } 1284 } 1285 } 1286 1287 static void uniquify_event_name(struct perf_evsel *counter) 1288 { 1289 char *new_name; 1290 char *config; 1291 1292 if (counter->uniquified_name || 1293 !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, 1294 strlen(counter->pmu_name))) 1295 return; 1296 1297 config = strchr(counter->name, '/'); 1298 if (config) { 1299 if (asprintf(&new_name, 1300 "%s%s", counter->pmu_name, config) > 0) { 1301 free(counter->name); 1302 counter->name = new_name; 1303 } 1304 } else { 1305 if (asprintf(&new_name, 1306 "%s [%s]", counter->name, counter->pmu_name) > 0) { 1307 free(counter->name); 1308 counter->name = new_name; 1309 } 1310 } 1311 1312 counter->uniquified_name = true; 1313 } 1314 1315 static void collect_all_aliases(struct perf_evsel *counter, 1316 void (*cb)(struct perf_evsel *counter, void *data, 1317 bool first), 1318 void *data) 1319 { 1320 struct perf_evsel *alias; 1321 1322 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1323 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1324 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1325 alias->scale != counter->scale || 1326 alias->cgrp != counter->cgrp || 1327 strcmp(alias->unit, counter->unit) || 1328 nsec_counter(alias) != nsec_counter(counter)) 1329 break; 1330 alias->merged_stat = true; 1331 cb(alias, data, false); 1332 } 1333 } 1334 1335 static bool collect_data(struct perf_evsel *counter, 1336 void (*cb)(struct perf_evsel *counter, void *data, 1337 bool first), 1338 void *data) 1339 { 1340 if (counter->merged_stat) 1341 return false; 1342 cb(counter, data, true); 1343 if (no_merge) 1344 uniquify_event_name(counter); 1345 else if (counter->auto_merge_stats) 1346 collect_all_aliases(counter, cb, data); 1347 return true; 1348 } 1349 1350 struct aggr_data { 1351 u64 ena, run, val; 1352 int id; 1353 int nr; 1354 int cpu; 1355 }; 1356 1357 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1358 { 1359 struct aggr_data *ad = data; 1360 int cpu, s2; 1361 1362 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1363 struct perf_counts_values *counts; 1364 1365 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1366 if (s2 != ad->id) 1367 continue; 1368 if (first) 1369 ad->nr++; 1370 counts = perf_counts(counter->counts, cpu, 0); 1371 /* 1372 * When any result is bad, make them all to give 1373 * consistent output in interval mode. 1374 */ 1375 if (counts->ena == 0 || counts->run == 0 || 1376 counter->counts->scaled == -1) { 1377 ad->ena = 0; 1378 ad->run = 0; 1379 break; 1380 } 1381 ad->val += counts->val; 1382 ad->ena += counts->ena; 1383 ad->run += counts->run; 1384 } 1385 } 1386 1387 static void print_aggr(char *prefix) 1388 { 1389 FILE *output = stat_config.output; 1390 struct perf_evsel *counter; 1391 int s, id, nr; 1392 double uval; 1393 u64 ena, run, val; 1394 bool first; 1395 1396 if (!(aggr_map || aggr_get_id)) 1397 return; 1398 1399 aggr_update_shadow(); 1400 1401 /* 1402 * With metric_only everything is on a single line. 1403 * Without each counter has its own line. 1404 */ 1405 for (s = 0; s < aggr_map->nr; s++) { 1406 struct aggr_data ad; 1407 if (prefix && metric_only) 1408 fprintf(output, "%s", prefix); 1409 1410 ad.id = id = aggr_map->map[s]; 1411 first = true; 1412 evlist__for_each_entry(evsel_list, counter) { 1413 if (is_duration_time(counter)) 1414 continue; 1415 1416 ad.val = ad.ena = ad.run = 0; 1417 ad.nr = 0; 1418 if (!collect_data(counter, aggr_cb, &ad)) 1419 continue; 1420 nr = ad.nr; 1421 ena = ad.ena; 1422 run = ad.run; 1423 val = ad.val; 1424 if (first && metric_only) { 1425 first = false; 1426 aggr_printout(counter, id, nr); 1427 } 1428 if (prefix && !metric_only) 1429 fprintf(output, "%s", prefix); 1430 1431 uval = val * counter->scale; 1432 printout(id, nr, counter, uval, prefix, run, ena, 1.0, 1433 &rt_stat); 1434 if (!metric_only) 1435 fputc('\n', output); 1436 } 1437 if (metric_only) 1438 fputc('\n', output); 1439 } 1440 } 1441 1442 static int cmp_val(const void *a, const void *b) 1443 { 1444 return ((struct perf_aggr_thread_value *)b)->val - 1445 ((struct perf_aggr_thread_value *)a)->val; 1446 } 1447 1448 static struct perf_aggr_thread_value *sort_aggr_thread( 1449 struct perf_evsel *counter, 1450 int nthreads, int ncpus, 1451 int *ret) 1452 { 1453 int cpu, thread, i = 0; 1454 double uval; 1455 struct perf_aggr_thread_value *buf; 1456 1457 buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 1458 if (!buf) 1459 return NULL; 1460 1461 for (thread = 0; thread < nthreads; thread++) { 1462 u64 ena = 0, run = 0, val = 0; 1463 1464 for (cpu = 0; cpu < ncpus; cpu++) { 1465 val += perf_counts(counter->counts, cpu, thread)->val; 1466 ena += perf_counts(counter->counts, cpu, thread)->ena; 1467 run += perf_counts(counter->counts, cpu, thread)->run; 1468 } 1469 1470 uval = val * counter->scale; 1471 1472 /* 1473 * Skip value 0 when enabling --per-thread globally, 1474 * otherwise too many 0 output. 1475 */ 1476 if (uval == 0.0 && target__has_per_thread(&target)) 1477 continue; 1478 1479 buf[i].counter = counter; 1480 buf[i].id = thread; 1481 buf[i].uval = uval; 1482 buf[i].val = val; 1483 buf[i].run = run; 1484 buf[i].ena = ena; 1485 i++; 1486 } 1487 1488 qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 1489 1490 if (ret) 1491 *ret = i; 1492 1493 return buf; 1494 } 1495 1496 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1497 { 1498 FILE *output = stat_config.output; 1499 int nthreads = thread_map__nr(counter->threads); 1500 int ncpus = cpu_map__nr(counter->cpus); 1501 int thread, sorted_threads, id; 1502 struct perf_aggr_thread_value *buf; 1503 1504 buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); 1505 if (!buf) { 1506 perror("cannot sort aggr thread"); 1507 return; 1508 } 1509 1510 for (thread = 0; thread < sorted_threads; thread++) { 1511 if (prefix) 1512 fprintf(output, "%s", prefix); 1513 1514 id = buf[thread].id; 1515 if (stat_config.stats) 1516 printout(id, 0, buf[thread].counter, buf[thread].uval, 1517 prefix, buf[thread].run, buf[thread].ena, 1.0, 1518 &stat_config.stats[id]); 1519 else 1520 printout(id, 0, buf[thread].counter, buf[thread].uval, 1521 prefix, buf[thread].run, buf[thread].ena, 1.0, 1522 &rt_stat); 1523 fputc('\n', output); 1524 } 1525 1526 free(buf); 1527 } 1528 1529 struct caggr_data { 1530 double avg, avg_enabled, avg_running; 1531 }; 1532 1533 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1534 bool first __maybe_unused) 1535 { 1536 struct caggr_data *cd = data; 1537 struct perf_stat_evsel *ps = counter->stats; 1538 1539 cd->avg += avg_stats(&ps->res_stats[0]); 1540 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1541 cd->avg_running += avg_stats(&ps->res_stats[2]); 1542 } 1543 1544 /* 1545 * Print out the results of a single counter: 1546 * aggregated counts in system-wide mode 1547 */ 1548 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1549 { 1550 FILE *output = stat_config.output; 1551 double uval; 1552 struct caggr_data cd = { .avg = 0.0 }; 1553 1554 if (!collect_data(counter, counter_aggr_cb, &cd)) 1555 return; 1556 1557 if (prefix && !metric_only) 1558 fprintf(output, "%s", prefix); 1559 1560 uval = cd.avg * counter->scale; 1561 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 1562 cd.avg, &rt_stat); 1563 if (!metric_only) 1564 fprintf(output, "\n"); 1565 } 1566 1567 static void counter_cb(struct perf_evsel *counter, void *data, 1568 bool first __maybe_unused) 1569 { 1570 struct aggr_data *ad = data; 1571 1572 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1573 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1574 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1575 } 1576 1577 /* 1578 * Print out the results of a single counter: 1579 * does not use aggregated count in system-wide 1580 */ 1581 static void print_counter(struct perf_evsel *counter, char *prefix) 1582 { 1583 FILE *output = stat_config.output; 1584 u64 ena, run, val; 1585 double uval; 1586 int cpu; 1587 1588 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1589 struct aggr_data ad = { .cpu = cpu }; 1590 1591 if (!collect_data(counter, counter_cb, &ad)) 1592 return; 1593 val = ad.val; 1594 ena = ad.ena; 1595 run = ad.run; 1596 1597 if (prefix) 1598 fprintf(output, "%s", prefix); 1599 1600 uval = val * counter->scale; 1601 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1602 &rt_stat); 1603 1604 fputc('\n', output); 1605 } 1606 } 1607 1608 static void print_no_aggr_metric(char *prefix) 1609 { 1610 int cpu; 1611 int nrcpus = 0; 1612 struct perf_evsel *counter; 1613 u64 ena, run, val; 1614 double uval; 1615 1616 nrcpus = evsel_list->cpus->nr; 1617 for (cpu = 0; cpu < nrcpus; cpu++) { 1618 bool first = true; 1619 1620 if (prefix) 1621 fputs(prefix, stat_config.output); 1622 evlist__for_each_entry(evsel_list, counter) { 1623 if (is_duration_time(counter)) 1624 continue; 1625 if (first) { 1626 aggr_printout(counter, cpu, 0); 1627 first = false; 1628 } 1629 val = perf_counts(counter->counts, cpu, 0)->val; 1630 ena = perf_counts(counter->counts, cpu, 0)->ena; 1631 run = perf_counts(counter->counts, cpu, 0)->run; 1632 1633 uval = val * counter->scale; 1634 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1635 &rt_stat); 1636 } 1637 fputc('\n', stat_config.output); 1638 } 1639 } 1640 1641 static int aggr_header_lens[] = { 1642 [AGGR_CORE] = 18, 1643 [AGGR_SOCKET] = 12, 1644 [AGGR_NONE] = 6, 1645 [AGGR_THREAD] = 24, 1646 [AGGR_GLOBAL] = 0, 1647 }; 1648 1649 static const char *aggr_header_csv[] = { 1650 [AGGR_CORE] = "core,cpus,", 1651 [AGGR_SOCKET] = "socket,cpus", 1652 [AGGR_NONE] = "cpu,", 1653 [AGGR_THREAD] = "comm-pid,", 1654 [AGGR_GLOBAL] = "" 1655 }; 1656 1657 static void print_metric_headers(const char *prefix, bool no_indent) 1658 { 1659 struct perf_stat_output_ctx out; 1660 struct perf_evsel *counter; 1661 struct outstate os = { 1662 .fh = stat_config.output 1663 }; 1664 1665 if (prefix) 1666 fprintf(stat_config.output, "%s", prefix); 1667 1668 if (!csv_output && !no_indent) 1669 fprintf(stat_config.output, "%*s", 1670 aggr_header_lens[stat_config.aggr_mode], ""); 1671 if (csv_output) { 1672 if (stat_config.interval) 1673 fputs("time,", stat_config.output); 1674 fputs(aggr_header_csv[stat_config.aggr_mode], 1675 stat_config.output); 1676 } 1677 1678 /* Print metrics headers only */ 1679 evlist__for_each_entry(evsel_list, counter) { 1680 if (is_duration_time(counter)) 1681 continue; 1682 os.evsel = counter; 1683 out.ctx = &os; 1684 out.print_metric = print_metric_header; 1685 out.new_line = new_line_metric; 1686 out.force_header = true; 1687 os.evsel = counter; 1688 perf_stat__print_shadow_stats(counter, 0, 1689 0, 1690 &out, 1691 &metric_events, 1692 &rt_stat); 1693 } 1694 fputc('\n', stat_config.output); 1695 } 1696 1697 static void print_interval(char *prefix, struct timespec *ts) 1698 { 1699 FILE *output = stat_config.output; 1700 static int num_print_interval; 1701 1702 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1703 1704 if (num_print_interval == 0 && !csv_output) { 1705 switch (stat_config.aggr_mode) { 1706 case AGGR_SOCKET: 1707 fprintf(output, "# time socket cpus"); 1708 if (!metric_only) 1709 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1710 break; 1711 case AGGR_CORE: 1712 fprintf(output, "# time core cpus"); 1713 if (!metric_only) 1714 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1715 break; 1716 case AGGR_NONE: 1717 fprintf(output, "# time CPU"); 1718 if (!metric_only) 1719 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1720 break; 1721 case AGGR_THREAD: 1722 fprintf(output, "# time comm-pid"); 1723 if (!metric_only) 1724 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1725 break; 1726 case AGGR_GLOBAL: 1727 default: 1728 fprintf(output, "# time"); 1729 if (!metric_only) 1730 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1731 case AGGR_UNSET: 1732 break; 1733 } 1734 } 1735 1736 if (num_print_interval == 0 && metric_only) 1737 print_metric_headers(" ", true); 1738 if (++num_print_interval == 25) 1739 num_print_interval = 0; 1740 } 1741 1742 static void print_header(int argc, const char **argv) 1743 { 1744 FILE *output = stat_config.output; 1745 int i; 1746 1747 fflush(stdout); 1748 1749 if (!csv_output) { 1750 fprintf(output, "\n"); 1751 fprintf(output, " Performance counter stats for "); 1752 if (target.system_wide) 1753 fprintf(output, "\'system wide"); 1754 else if (target.cpu_list) 1755 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1756 else if (!target__has_task(&target)) { 1757 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1758 for (i = 1; argv && (i < argc); i++) 1759 fprintf(output, " %s", argv[i]); 1760 } else if (target.pid) 1761 fprintf(output, "process id \'%s", target.pid); 1762 else 1763 fprintf(output, "thread id \'%s", target.tid); 1764 1765 fprintf(output, "\'"); 1766 if (run_count > 1) 1767 fprintf(output, " (%d runs)", run_count); 1768 fprintf(output, ":\n\n"); 1769 } 1770 } 1771 1772 static int get_precision(double num) 1773 { 1774 if (num > 1) 1775 return 0; 1776 1777 return lround(ceil(-log10(num))); 1778 } 1779 1780 static void print_table(FILE *output, int precision, double avg) 1781 { 1782 char tmp[64]; 1783 int idx, indent = 0; 1784 1785 scnprintf(tmp, 64, " %17.*f", precision, avg); 1786 while (tmp[indent] == ' ') 1787 indent++; 1788 1789 fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); 1790 1791 for (idx = 0; idx < run_count; idx++) { 1792 double run = (double) walltime_run[idx] / NSEC_PER_SEC; 1793 int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); 1794 1795 fprintf(output, " %17.*f (%+.*f) ", 1796 precision, run, precision, run - avg); 1797 1798 for (h = 0; h < n; h++) 1799 fprintf(output, "#"); 1800 1801 fprintf(output, "\n"); 1802 } 1803 1804 fprintf(output, "\n%*s# Final result:\n", indent, ""); 1805 } 1806 1807 static void print_footer(void) 1808 { 1809 double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1810 FILE *output = stat_config.output; 1811 int n; 1812 1813 if (!null_run) 1814 fprintf(output, "\n"); 1815 1816 if (run_count == 1) { 1817 fprintf(output, " %17.9f seconds time elapsed", avg); 1818 } else { 1819 double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1820 /* 1821 * Display at most 2 more significant 1822 * digits than the stddev inaccuracy. 1823 */ 1824 int precision = get_precision(sd) + 2; 1825 1826 if (walltime_run_table) 1827 print_table(output, precision, avg); 1828 1829 fprintf(output, " %17.*f +- %.*f seconds time elapsed", 1830 precision, avg, precision, sd); 1831 1832 print_noise_pct(sd, avg); 1833 } 1834 fprintf(output, "\n\n"); 1835 1836 if (print_free_counters_hint && 1837 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1838 n > 0) 1839 fprintf(output, 1840 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1841 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1842 " perf stat ...\n" 1843 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1844 1845 if (print_mixed_hw_group_error) 1846 fprintf(output, 1847 "The events in group usually have to be from " 1848 "the same PMU. Try reorganizing the group.\n"); 1849 } 1850 1851 static void print_counters(struct timespec *ts, int argc, const char **argv) 1852 { 1853 int interval = stat_config.interval; 1854 struct perf_evsel *counter; 1855 char buf[64], *prefix = NULL; 1856 1857 /* Do not print anything if we record to the pipe. */ 1858 if (STAT_RECORD && perf_stat.data.is_pipe) 1859 return; 1860 1861 if (interval) 1862 print_interval(prefix = buf, ts); 1863 else 1864 print_header(argc, argv); 1865 1866 if (metric_only) { 1867 static int num_print_iv; 1868 1869 if (num_print_iv == 0 && !interval) 1870 print_metric_headers(prefix, false); 1871 if (num_print_iv++ == 25) 1872 num_print_iv = 0; 1873 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1874 fprintf(stat_config.output, "%s", prefix); 1875 } 1876 1877 switch (stat_config.aggr_mode) { 1878 case AGGR_CORE: 1879 case AGGR_SOCKET: 1880 print_aggr(prefix); 1881 break; 1882 case AGGR_THREAD: 1883 evlist__for_each_entry(evsel_list, counter) { 1884 if (is_duration_time(counter)) 1885 continue; 1886 print_aggr_thread(counter, prefix); 1887 } 1888 break; 1889 case AGGR_GLOBAL: 1890 evlist__for_each_entry(evsel_list, counter) { 1891 if (is_duration_time(counter)) 1892 continue; 1893 print_counter_aggr(counter, prefix); 1894 } 1895 if (metric_only) 1896 fputc('\n', stat_config.output); 1897 break; 1898 case AGGR_NONE: 1899 if (metric_only) 1900 print_no_aggr_metric(prefix); 1901 else { 1902 evlist__for_each_entry(evsel_list, counter) { 1903 if (is_duration_time(counter)) 1904 continue; 1905 print_counter(counter, prefix); 1906 } 1907 } 1908 break; 1909 case AGGR_UNSET: 1910 default: 1911 break; 1912 } 1913 1914 if (!interval && !csv_output) 1915 print_footer(); 1916 1917 fflush(stat_config.output); 1918 } 1919 1920 static volatile int signr = -1; 1921 1922 static void skip_signal(int signo) 1923 { 1924 if ((child_pid == -1) || stat_config.interval) 1925 done = 1; 1926 1927 signr = signo; 1928 /* 1929 * render child_pid harmless 1930 * won't send SIGTERM to a random 1931 * process in case of race condition 1932 * and fast PID recycling 1933 */ 1934 child_pid = -1; 1935 } 1936 1937 static void sig_atexit(void) 1938 { 1939 sigset_t set, oset; 1940 1941 /* 1942 * avoid race condition with SIGCHLD handler 1943 * in skip_signal() which is modifying child_pid 1944 * goal is to avoid send SIGTERM to a random 1945 * process 1946 */ 1947 sigemptyset(&set); 1948 sigaddset(&set, SIGCHLD); 1949 sigprocmask(SIG_BLOCK, &set, &oset); 1950 1951 if (child_pid != -1) 1952 kill(child_pid, SIGTERM); 1953 1954 sigprocmask(SIG_SETMASK, &oset, NULL); 1955 1956 if (signr == -1) 1957 return; 1958 1959 signal(signr, SIG_DFL); 1960 kill(getpid(), signr); 1961 } 1962 1963 static int stat__set_big_num(const struct option *opt __maybe_unused, 1964 const char *s __maybe_unused, int unset) 1965 { 1966 big_num_opt = unset ? 0 : 1; 1967 return 0; 1968 } 1969 1970 static int enable_metric_only(const struct option *opt __maybe_unused, 1971 const char *s __maybe_unused, int unset) 1972 { 1973 force_metric_only = true; 1974 metric_only = !unset; 1975 return 0; 1976 } 1977 1978 static int parse_metric_groups(const struct option *opt, 1979 const char *str, 1980 int unset __maybe_unused) 1981 { 1982 return metricgroup__parse_groups(opt, str, &metric_events); 1983 } 1984 1985 static const struct option stat_options[] = { 1986 OPT_BOOLEAN('T', "transaction", &transaction_run, 1987 "hardware transaction statistics"), 1988 OPT_CALLBACK('e', "event", &evsel_list, "event", 1989 "event selector. use 'perf list' to list available events", 1990 parse_events_option), 1991 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1992 "event filter", parse_filter), 1993 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1994 "child tasks do not inherit counters"), 1995 OPT_STRING('p', "pid", &target.pid, "pid", 1996 "stat events on existing process id"), 1997 OPT_STRING('t', "tid", &target.tid, "tid", 1998 "stat events on existing thread id"), 1999 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2000 "system-wide collection from all CPUs"), 2001 OPT_BOOLEAN('g', "group", &group, 2002 "put the counters into a counter group"), 2003 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 2004 OPT_INCR('v', "verbose", &verbose, 2005 "be more verbose (show counter open errors, etc)"), 2006 OPT_INTEGER('r', "repeat", &run_count, 2007 "repeat command and print average + stddev (max: 100, forever: 0)"), 2008 OPT_BOOLEAN(0, "table", &walltime_run_table, 2009 "display details about each run (only with -r option)"), 2010 OPT_BOOLEAN('n', "null", &null_run, 2011 "null run - dont start any counters"), 2012 OPT_INCR('d', "detailed", &detailed_run, 2013 "detailed run - start a lot of events"), 2014 OPT_BOOLEAN('S', "sync", &sync_run, 2015 "call sync() before starting a run"), 2016 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 2017 "print large numbers with thousands\' separators", 2018 stat__set_big_num), 2019 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2020 "list of cpus to monitor in system-wide"), 2021 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 2022 "disable CPU count aggregation", AGGR_NONE), 2023 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 2024 OPT_STRING('x', "field-separator", &csv_sep, "separator", 2025 "print counts with custom separator"), 2026 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 2027 "monitor event in cgroup name only", parse_cgroups), 2028 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2029 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 2030 OPT_INTEGER(0, "log-fd", &output_fd, 2031 "log output to fd, instead of stderr"), 2032 OPT_STRING(0, "pre", &pre_cmd, "command", 2033 "command to run prior to the measured command"), 2034 OPT_STRING(0, "post", &post_cmd, "command", 2035 "command to run after to the measured command"), 2036 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 2037 "print counts at regular interval in ms " 2038 "(overhead is possible for values <= 100ms)"), 2039 OPT_INTEGER(0, "interval-count", &stat_config.times, 2040 "print counts for fixed number of times"), 2041 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 2042 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 2043 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 2044 "aggregate counts per processor socket", AGGR_SOCKET), 2045 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 2046 "aggregate counts per physical processor core", AGGR_CORE), 2047 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 2048 "aggregate counts per thread", AGGR_THREAD), 2049 OPT_UINTEGER('D', "delay", &initial_delay, 2050 "ms to wait before starting measurement after program start"), 2051 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 2052 "Only print computed metrics. No raw values", enable_metric_only), 2053 OPT_BOOLEAN(0, "topdown", &topdown_run, 2054 "measure topdown level 1 statistics"), 2055 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 2056 "measure SMI cost"), 2057 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 2058 "monitor specified metrics or metric groups (separated by ,)", 2059 parse_metric_groups), 2060 OPT_END() 2061 }; 2062 2063 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 2064 { 2065 return cpu_map__get_socket(map, cpu, NULL); 2066 } 2067 2068 static int perf_stat__get_core(struct cpu_map *map, int cpu) 2069 { 2070 return cpu_map__get_core(map, cpu, NULL); 2071 } 2072 2073 static int cpu_map__get_max(struct cpu_map *map) 2074 { 2075 int i, max = -1; 2076 2077 for (i = 0; i < map->nr; i++) { 2078 if (map->map[i] > max) 2079 max = map->map[i]; 2080 } 2081 2082 return max; 2083 } 2084 2085 static struct cpu_map *cpus_aggr_map; 2086 2087 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 2088 { 2089 int cpu; 2090 2091 if (idx >= map->nr) 2092 return -1; 2093 2094 cpu = map->map[idx]; 2095 2096 if (cpus_aggr_map->map[cpu] == -1) 2097 cpus_aggr_map->map[cpu] = get_id(map, idx); 2098 2099 return cpus_aggr_map->map[cpu]; 2100 } 2101 2102 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 2103 { 2104 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 2105 } 2106 2107 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 2108 { 2109 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 2110 } 2111 2112 static int perf_stat_init_aggr_mode(void) 2113 { 2114 int nr; 2115 2116 switch (stat_config.aggr_mode) { 2117 case AGGR_SOCKET: 2118 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 2119 perror("cannot build socket map"); 2120 return -1; 2121 } 2122 aggr_get_id = perf_stat__get_socket_cached; 2123 break; 2124 case AGGR_CORE: 2125 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 2126 perror("cannot build core map"); 2127 return -1; 2128 } 2129 aggr_get_id = perf_stat__get_core_cached; 2130 break; 2131 case AGGR_NONE: 2132 case AGGR_GLOBAL: 2133 case AGGR_THREAD: 2134 case AGGR_UNSET: 2135 default: 2136 break; 2137 } 2138 2139 /* 2140 * The evsel_list->cpus is the base we operate on, 2141 * taking the highest cpu number to be the size of 2142 * the aggregation translate cpumap. 2143 */ 2144 nr = cpu_map__get_max(evsel_list->cpus); 2145 cpus_aggr_map = cpu_map__empty_new(nr + 1); 2146 return cpus_aggr_map ? 0 : -ENOMEM; 2147 } 2148 2149 static void perf_stat__exit_aggr_mode(void) 2150 { 2151 cpu_map__put(aggr_map); 2152 cpu_map__put(cpus_aggr_map); 2153 aggr_map = NULL; 2154 cpus_aggr_map = NULL; 2155 } 2156 2157 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 2158 { 2159 int cpu; 2160 2161 if (idx > map->nr) 2162 return -1; 2163 2164 cpu = map->map[idx]; 2165 2166 if (cpu >= env->nr_cpus_avail) 2167 return -1; 2168 2169 return cpu; 2170 } 2171 2172 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2173 { 2174 struct perf_env *env = data; 2175 int cpu = perf_env__get_cpu(env, map, idx); 2176 2177 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2178 } 2179 2180 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2181 { 2182 struct perf_env *env = data; 2183 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2184 2185 if (cpu != -1) { 2186 int socket_id = env->cpu[cpu].socket_id; 2187 2188 /* 2189 * Encode socket in upper 16 bits 2190 * core_id is relative to socket, and 2191 * we need a global id. So we combine 2192 * socket + core id. 2193 */ 2194 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2195 } 2196 2197 return core; 2198 } 2199 2200 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2201 struct cpu_map **sockp) 2202 { 2203 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2204 } 2205 2206 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2207 struct cpu_map **corep) 2208 { 2209 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2210 } 2211 2212 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2213 { 2214 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2215 } 2216 2217 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2218 { 2219 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2220 } 2221 2222 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2223 { 2224 struct perf_env *env = &st->session->header.env; 2225 2226 switch (stat_config.aggr_mode) { 2227 case AGGR_SOCKET: 2228 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2229 perror("cannot build socket map"); 2230 return -1; 2231 } 2232 aggr_get_id = perf_stat__get_socket_file; 2233 break; 2234 case AGGR_CORE: 2235 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2236 perror("cannot build core map"); 2237 return -1; 2238 } 2239 aggr_get_id = perf_stat__get_core_file; 2240 break; 2241 case AGGR_NONE: 2242 case AGGR_GLOBAL: 2243 case AGGR_THREAD: 2244 case AGGR_UNSET: 2245 default: 2246 break; 2247 } 2248 2249 return 0; 2250 } 2251 2252 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2253 { 2254 int off = 0; 2255 int i; 2256 int len = 0; 2257 char *s; 2258 2259 for (i = 0; attr[i]; i++) { 2260 if (pmu_have_event("cpu", attr[i])) { 2261 len += strlen(attr[i]) + 1; 2262 attr[i - off] = attr[i]; 2263 } else 2264 off++; 2265 } 2266 attr[i - off] = NULL; 2267 2268 *str = malloc(len + 1 + 2); 2269 if (!*str) 2270 return -1; 2271 s = *str; 2272 if (i - off == 0) { 2273 *s = 0; 2274 return 0; 2275 } 2276 if (use_group) 2277 *s++ = '{'; 2278 for (i = 0; attr[i]; i++) { 2279 strcpy(s, attr[i]); 2280 s += strlen(s); 2281 *s++ = ','; 2282 } 2283 if (use_group) { 2284 s[-1] = '}'; 2285 *s = 0; 2286 } else 2287 s[-1] = 0; 2288 return 0; 2289 } 2290 2291 __weak bool arch_topdown_check_group(bool *warn) 2292 { 2293 *warn = false; 2294 return false; 2295 } 2296 2297 __weak void arch_topdown_group_warn(void) 2298 { 2299 } 2300 2301 /* 2302 * Add default attributes, if there were no attributes specified or 2303 * if -d/--detailed, -d -d or -d -d -d is used: 2304 */ 2305 static int add_default_attributes(void) 2306 { 2307 int err; 2308 struct perf_event_attr default_attrs0[] = { 2309 2310 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2311 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2312 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2313 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2314 2315 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2316 }; 2317 struct perf_event_attr frontend_attrs[] = { 2318 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2319 }; 2320 struct perf_event_attr backend_attrs[] = { 2321 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2322 }; 2323 struct perf_event_attr default_attrs1[] = { 2324 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2325 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2326 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2327 2328 }; 2329 2330 /* 2331 * Detailed stats (-d), covering the L1 and last level data caches: 2332 */ 2333 struct perf_event_attr detailed_attrs[] = { 2334 2335 { .type = PERF_TYPE_HW_CACHE, 2336 .config = 2337 PERF_COUNT_HW_CACHE_L1D << 0 | 2338 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2339 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2340 2341 { .type = PERF_TYPE_HW_CACHE, 2342 .config = 2343 PERF_COUNT_HW_CACHE_L1D << 0 | 2344 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2345 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2346 2347 { .type = PERF_TYPE_HW_CACHE, 2348 .config = 2349 PERF_COUNT_HW_CACHE_LL << 0 | 2350 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2351 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2352 2353 { .type = PERF_TYPE_HW_CACHE, 2354 .config = 2355 PERF_COUNT_HW_CACHE_LL << 0 | 2356 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2357 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2358 }; 2359 2360 /* 2361 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2362 */ 2363 struct perf_event_attr very_detailed_attrs[] = { 2364 2365 { .type = PERF_TYPE_HW_CACHE, 2366 .config = 2367 PERF_COUNT_HW_CACHE_L1I << 0 | 2368 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2369 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2370 2371 { .type = PERF_TYPE_HW_CACHE, 2372 .config = 2373 PERF_COUNT_HW_CACHE_L1I << 0 | 2374 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2375 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2376 2377 { .type = PERF_TYPE_HW_CACHE, 2378 .config = 2379 PERF_COUNT_HW_CACHE_DTLB << 0 | 2380 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2381 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2382 2383 { .type = PERF_TYPE_HW_CACHE, 2384 .config = 2385 PERF_COUNT_HW_CACHE_DTLB << 0 | 2386 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2387 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2388 2389 { .type = PERF_TYPE_HW_CACHE, 2390 .config = 2391 PERF_COUNT_HW_CACHE_ITLB << 0 | 2392 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2393 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2394 2395 { .type = PERF_TYPE_HW_CACHE, 2396 .config = 2397 PERF_COUNT_HW_CACHE_ITLB << 0 | 2398 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2399 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2400 2401 }; 2402 2403 /* 2404 * Very, very detailed stats (-d -d -d), adding prefetch events: 2405 */ 2406 struct perf_event_attr very_very_detailed_attrs[] = { 2407 2408 { .type = PERF_TYPE_HW_CACHE, 2409 .config = 2410 PERF_COUNT_HW_CACHE_L1D << 0 | 2411 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2412 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2413 2414 { .type = PERF_TYPE_HW_CACHE, 2415 .config = 2416 PERF_COUNT_HW_CACHE_L1D << 0 | 2417 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2418 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2419 }; 2420 2421 /* Set attrs if no event is selected and !null_run: */ 2422 if (null_run) 2423 return 0; 2424 2425 if (transaction_run) { 2426 struct parse_events_error errinfo; 2427 2428 if (pmu_have_event("cpu", "cycles-ct") && 2429 pmu_have_event("cpu", "el-start")) 2430 err = parse_events(evsel_list, transaction_attrs, 2431 &errinfo); 2432 else 2433 err = parse_events(evsel_list, 2434 transaction_limited_attrs, 2435 &errinfo); 2436 if (err) { 2437 fprintf(stderr, "Cannot set up transaction events\n"); 2438 return -1; 2439 } 2440 return 0; 2441 } 2442 2443 if (smi_cost) { 2444 int smi; 2445 2446 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2447 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2448 return -1; 2449 } 2450 2451 if (!smi) { 2452 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2453 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2454 return -1; 2455 } 2456 smi_reset = true; 2457 } 2458 2459 if (pmu_have_event("msr", "aperf") && 2460 pmu_have_event("msr", "smi")) { 2461 if (!force_metric_only) 2462 metric_only = true; 2463 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2464 } else { 2465 fprintf(stderr, "To measure SMI cost, it needs " 2466 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2467 return -1; 2468 } 2469 if (err) { 2470 fprintf(stderr, "Cannot set up SMI cost events\n"); 2471 return -1; 2472 } 2473 return 0; 2474 } 2475 2476 if (topdown_run) { 2477 char *str = NULL; 2478 bool warn = false; 2479 2480 if (stat_config.aggr_mode != AGGR_GLOBAL && 2481 stat_config.aggr_mode != AGGR_CORE) { 2482 pr_err("top down event configuration requires --per-core mode\n"); 2483 return -1; 2484 } 2485 stat_config.aggr_mode = AGGR_CORE; 2486 if (nr_cgroups || !target__has_cpu(&target)) { 2487 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2488 return -1; 2489 } 2490 2491 if (!force_metric_only) 2492 metric_only = true; 2493 if (topdown_filter_events(topdown_attrs, &str, 2494 arch_topdown_check_group(&warn)) < 0) { 2495 pr_err("Out of memory\n"); 2496 return -1; 2497 } 2498 if (topdown_attrs[0] && str) { 2499 if (warn) 2500 arch_topdown_group_warn(); 2501 err = parse_events(evsel_list, str, NULL); 2502 if (err) { 2503 fprintf(stderr, 2504 "Cannot set up top down events %s: %d\n", 2505 str, err); 2506 free(str); 2507 return -1; 2508 } 2509 } else { 2510 fprintf(stderr, "System does not support topdown\n"); 2511 return -1; 2512 } 2513 free(str); 2514 } 2515 2516 if (!evsel_list->nr_entries) { 2517 if (target__has_cpu(&target)) 2518 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2519 2520 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2521 return -1; 2522 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2523 if (perf_evlist__add_default_attrs(evsel_list, 2524 frontend_attrs) < 0) 2525 return -1; 2526 } 2527 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2528 if (perf_evlist__add_default_attrs(evsel_list, 2529 backend_attrs) < 0) 2530 return -1; 2531 } 2532 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2533 return -1; 2534 } 2535 2536 /* Detailed events get appended to the event list: */ 2537 2538 if (detailed_run < 1) 2539 return 0; 2540 2541 /* Append detailed run extra attributes: */ 2542 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2543 return -1; 2544 2545 if (detailed_run < 2) 2546 return 0; 2547 2548 /* Append very detailed run extra attributes: */ 2549 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2550 return -1; 2551 2552 if (detailed_run < 3) 2553 return 0; 2554 2555 /* Append very, very detailed run extra attributes: */ 2556 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2557 } 2558 2559 static const char * const stat_record_usage[] = { 2560 "perf stat record [<options>]", 2561 NULL, 2562 }; 2563 2564 static void init_features(struct perf_session *session) 2565 { 2566 int feat; 2567 2568 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2569 perf_header__set_feat(&session->header, feat); 2570 2571 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2572 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2573 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2574 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2575 } 2576 2577 static int __cmd_record(int argc, const char **argv) 2578 { 2579 struct perf_session *session; 2580 struct perf_data *data = &perf_stat.data; 2581 2582 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2583 PARSE_OPT_STOP_AT_NON_OPTION); 2584 2585 if (output_name) 2586 data->file.path = output_name; 2587 2588 if (run_count != 1 || forever) { 2589 pr_err("Cannot use -r option with perf stat record.\n"); 2590 return -1; 2591 } 2592 2593 session = perf_session__new(data, false, NULL); 2594 if (session == NULL) { 2595 pr_err("Perf session creation failed.\n"); 2596 return -1; 2597 } 2598 2599 init_features(session); 2600 2601 session->evlist = evsel_list; 2602 perf_stat.session = session; 2603 perf_stat.record = true; 2604 return argc; 2605 } 2606 2607 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2608 union perf_event *event, 2609 struct perf_session *session) 2610 { 2611 struct stat_round_event *stat_round = &event->stat_round; 2612 struct perf_evsel *counter; 2613 struct timespec tsh, *ts = NULL; 2614 const char **argv = session->header.env.cmdline_argv; 2615 int argc = session->header.env.nr_cmdline; 2616 2617 evlist__for_each_entry(evsel_list, counter) 2618 perf_stat_process_counter(&stat_config, counter); 2619 2620 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2621 update_stats(&walltime_nsecs_stats, stat_round->time); 2622 2623 if (stat_config.interval && stat_round->time) { 2624 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2625 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2626 ts = &tsh; 2627 } 2628 2629 print_counters(ts, argc, argv); 2630 return 0; 2631 } 2632 2633 static 2634 int process_stat_config_event(struct perf_tool *tool, 2635 union perf_event *event, 2636 struct perf_session *session __maybe_unused) 2637 { 2638 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2639 2640 perf_event__read_stat_config(&stat_config, &event->stat_config); 2641 2642 if (cpu_map__empty(st->cpus)) { 2643 if (st->aggr_mode != AGGR_UNSET) 2644 pr_warning("warning: processing task data, aggregation mode not set\n"); 2645 return 0; 2646 } 2647 2648 if (st->aggr_mode != AGGR_UNSET) 2649 stat_config.aggr_mode = st->aggr_mode; 2650 2651 if (perf_stat.data.is_pipe) 2652 perf_stat_init_aggr_mode(); 2653 else 2654 perf_stat_init_aggr_mode_file(st); 2655 2656 return 0; 2657 } 2658 2659 static int set_maps(struct perf_stat *st) 2660 { 2661 if (!st->cpus || !st->threads) 2662 return 0; 2663 2664 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2665 return -EINVAL; 2666 2667 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2668 2669 if (perf_evlist__alloc_stats(evsel_list, true)) 2670 return -ENOMEM; 2671 2672 st->maps_allocated = true; 2673 return 0; 2674 } 2675 2676 static 2677 int process_thread_map_event(struct perf_tool *tool, 2678 union perf_event *event, 2679 struct perf_session *session __maybe_unused) 2680 { 2681 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2682 2683 if (st->threads) { 2684 pr_warning("Extra thread map event, ignoring.\n"); 2685 return 0; 2686 } 2687 2688 st->threads = thread_map__new_event(&event->thread_map); 2689 if (!st->threads) 2690 return -ENOMEM; 2691 2692 return set_maps(st); 2693 } 2694 2695 static 2696 int process_cpu_map_event(struct perf_tool *tool, 2697 union perf_event *event, 2698 struct perf_session *session __maybe_unused) 2699 { 2700 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2701 struct cpu_map *cpus; 2702 2703 if (st->cpus) { 2704 pr_warning("Extra cpu map event, ignoring.\n"); 2705 return 0; 2706 } 2707 2708 cpus = cpu_map__new_data(&event->cpu_map.data); 2709 if (!cpus) 2710 return -ENOMEM; 2711 2712 st->cpus = cpus; 2713 return set_maps(st); 2714 } 2715 2716 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 2717 { 2718 int i; 2719 2720 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 2721 if (!config->stats) 2722 return -1; 2723 2724 config->stats_num = nthreads; 2725 2726 for (i = 0; i < nthreads; i++) 2727 runtime_stat__init(&config->stats[i]); 2728 2729 return 0; 2730 } 2731 2732 static void runtime_stat_delete(struct perf_stat_config *config) 2733 { 2734 int i; 2735 2736 if (!config->stats) 2737 return; 2738 2739 for (i = 0; i < config->stats_num; i++) 2740 runtime_stat__exit(&config->stats[i]); 2741 2742 free(config->stats); 2743 } 2744 2745 static const char * const stat_report_usage[] = { 2746 "perf stat report [<options>]", 2747 NULL, 2748 }; 2749 2750 static struct perf_stat perf_stat = { 2751 .tool = { 2752 .attr = perf_event__process_attr, 2753 .event_update = perf_event__process_event_update, 2754 .thread_map = process_thread_map_event, 2755 .cpu_map = process_cpu_map_event, 2756 .stat_config = process_stat_config_event, 2757 .stat = perf_event__process_stat_event, 2758 .stat_round = process_stat_round_event, 2759 }, 2760 .aggr_mode = AGGR_UNSET, 2761 }; 2762 2763 static int __cmd_report(int argc, const char **argv) 2764 { 2765 struct perf_session *session; 2766 const struct option options[] = { 2767 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2768 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2769 "aggregate counts per processor socket", AGGR_SOCKET), 2770 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2771 "aggregate counts per physical processor core", AGGR_CORE), 2772 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2773 "disable CPU count aggregation", AGGR_NONE), 2774 OPT_END() 2775 }; 2776 struct stat st; 2777 int ret; 2778 2779 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2780 2781 if (!input_name || !strlen(input_name)) { 2782 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2783 input_name = "-"; 2784 else 2785 input_name = "perf.data"; 2786 } 2787 2788 perf_stat.data.file.path = input_name; 2789 perf_stat.data.mode = PERF_DATA_MODE_READ; 2790 2791 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 2792 if (session == NULL) 2793 return -1; 2794 2795 perf_stat.session = session; 2796 stat_config.output = stderr; 2797 evsel_list = session->evlist; 2798 2799 ret = perf_session__process_events(session); 2800 if (ret) 2801 return ret; 2802 2803 perf_session__delete(session); 2804 return 0; 2805 } 2806 2807 static void setup_system_wide(int forks) 2808 { 2809 /* 2810 * Make system wide (-a) the default target if 2811 * no target was specified and one of following 2812 * conditions is met: 2813 * 2814 * - there's no workload specified 2815 * - there is workload specified but all requested 2816 * events are system wide events 2817 */ 2818 if (!target__none(&target)) 2819 return; 2820 2821 if (!forks) 2822 target.system_wide = true; 2823 else { 2824 struct perf_evsel *counter; 2825 2826 evlist__for_each_entry(evsel_list, counter) { 2827 if (!counter->system_wide) 2828 return; 2829 } 2830 2831 if (evsel_list->nr_entries) 2832 target.system_wide = true; 2833 } 2834 } 2835 2836 int cmd_stat(int argc, const char **argv) 2837 { 2838 const char * const stat_usage[] = { 2839 "perf stat [<options>] [<command>]", 2840 NULL 2841 }; 2842 int status = -EINVAL, run_idx; 2843 const char *mode; 2844 FILE *output = stderr; 2845 unsigned int interval, timeout; 2846 const char * const stat_subcommands[] = { "record", "report" }; 2847 2848 setlocale(LC_ALL, ""); 2849 2850 evsel_list = perf_evlist__new(); 2851 if (evsel_list == NULL) 2852 return -ENOMEM; 2853 2854 parse_events__shrink_config_terms(); 2855 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2856 (const char **) stat_usage, 2857 PARSE_OPT_STOP_AT_NON_OPTION); 2858 perf_stat__collect_metric_expr(evsel_list); 2859 perf_stat__init_shadow_stats(); 2860 2861 if (csv_sep) { 2862 csv_output = true; 2863 if (!strcmp(csv_sep, "\\t")) 2864 csv_sep = "\t"; 2865 } else 2866 csv_sep = DEFAULT_SEPARATOR; 2867 2868 if (argc && !strncmp(argv[0], "rec", 3)) { 2869 argc = __cmd_record(argc, argv); 2870 if (argc < 0) 2871 return -1; 2872 } else if (argc && !strncmp(argv[0], "rep", 3)) 2873 return __cmd_report(argc, argv); 2874 2875 interval = stat_config.interval; 2876 timeout = stat_config.timeout; 2877 2878 /* 2879 * For record command the -o is already taken care of. 2880 */ 2881 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2882 output = NULL; 2883 2884 if (output_name && output_fd) { 2885 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2886 parse_options_usage(stat_usage, stat_options, "o", 1); 2887 parse_options_usage(NULL, stat_options, "log-fd", 0); 2888 goto out; 2889 } 2890 2891 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2892 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2893 goto out; 2894 } 2895 2896 if (metric_only && run_count > 1) { 2897 fprintf(stderr, "--metric-only is not supported with -r\n"); 2898 goto out; 2899 } 2900 2901 if (walltime_run_table && run_count <= 1) { 2902 fprintf(stderr, "--table is only supported with -r\n"); 2903 parse_options_usage(stat_usage, stat_options, "r", 1); 2904 parse_options_usage(NULL, stat_options, "table", 0); 2905 goto out; 2906 } 2907 2908 if (output_fd < 0) { 2909 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2910 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2911 goto out; 2912 } 2913 2914 if (!output) { 2915 struct timespec tm; 2916 mode = append_file ? "a" : "w"; 2917 2918 output = fopen(output_name, mode); 2919 if (!output) { 2920 perror("failed to create output file"); 2921 return -1; 2922 } 2923 clock_gettime(CLOCK_REALTIME, &tm); 2924 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2925 } else if (output_fd > 0) { 2926 mode = append_file ? "a" : "w"; 2927 output = fdopen(output_fd, mode); 2928 if (!output) { 2929 perror("Failed opening logfd"); 2930 return -errno; 2931 } 2932 } 2933 2934 stat_config.output = output; 2935 2936 /* 2937 * let the spreadsheet do the pretty-printing 2938 */ 2939 if (csv_output) { 2940 /* User explicitly passed -B? */ 2941 if (big_num_opt == 1) { 2942 fprintf(stderr, "-B option not supported with -x\n"); 2943 parse_options_usage(stat_usage, stat_options, "B", 1); 2944 parse_options_usage(NULL, stat_options, "x", 1); 2945 goto out; 2946 } else /* Nope, so disable big number formatting */ 2947 big_num = false; 2948 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2949 big_num = false; 2950 2951 setup_system_wide(argc); 2952 2953 if (run_count < 0) { 2954 pr_err("Run count must be a positive number\n"); 2955 parse_options_usage(stat_usage, stat_options, "r", 1); 2956 goto out; 2957 } else if (run_count == 0) { 2958 forever = true; 2959 run_count = 1; 2960 } 2961 2962 if (walltime_run_table) { 2963 walltime_run = zalloc(run_count * sizeof(walltime_run[0])); 2964 if (!walltime_run) { 2965 pr_err("failed to setup -r option"); 2966 goto out; 2967 } 2968 } 2969 2970 if ((stat_config.aggr_mode == AGGR_THREAD) && 2971 !target__has_task(&target)) { 2972 if (!target.system_wide || target.cpu_list) { 2973 fprintf(stderr, "The --per-thread option is only " 2974 "available when monitoring via -p -t -a " 2975 "options or only --per-thread.\n"); 2976 parse_options_usage(NULL, stat_options, "p", 1); 2977 parse_options_usage(NULL, stat_options, "t", 1); 2978 goto out; 2979 } 2980 } 2981 2982 /* 2983 * no_aggr, cgroup are for system-wide only 2984 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2985 */ 2986 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2987 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2988 !target__has_cpu(&target)) { 2989 fprintf(stderr, "both cgroup and no-aggregation " 2990 "modes only available in system-wide mode\n"); 2991 2992 parse_options_usage(stat_usage, stat_options, "G", 1); 2993 parse_options_usage(NULL, stat_options, "A", 1); 2994 parse_options_usage(NULL, stat_options, "a", 1); 2995 goto out; 2996 } 2997 2998 if (add_default_attributes()) 2999 goto out; 3000 3001 target__validate(&target); 3002 3003 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 3004 target.per_thread = true; 3005 3006 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 3007 if (target__has_task(&target)) { 3008 pr_err("Problems finding threads of monitor\n"); 3009 parse_options_usage(stat_usage, stat_options, "p", 1); 3010 parse_options_usage(NULL, stat_options, "t", 1); 3011 } else if (target__has_cpu(&target)) { 3012 perror("failed to parse CPUs map"); 3013 parse_options_usage(stat_usage, stat_options, "C", 1); 3014 parse_options_usage(NULL, stat_options, "a", 1); 3015 } 3016 goto out; 3017 } 3018 3019 /* 3020 * Initialize thread_map with comm names, 3021 * so we could print it out on output. 3022 */ 3023 if (stat_config.aggr_mode == AGGR_THREAD) { 3024 thread_map__read_comms(evsel_list->threads); 3025 if (target.system_wide) { 3026 if (runtime_stat_new(&stat_config, 3027 thread_map__nr(evsel_list->threads))) { 3028 goto out; 3029 } 3030 } 3031 } 3032 3033 if (stat_config.times && interval) 3034 interval_count = true; 3035 else if (stat_config.times && !interval) { 3036 pr_err("interval-count option should be used together with " 3037 "interval-print.\n"); 3038 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 3039 parse_options_usage(stat_usage, stat_options, "I", 1); 3040 goto out; 3041 } 3042 3043 if (timeout && timeout < 100) { 3044 if (timeout < 10) { 3045 pr_err("timeout must be >= 10ms.\n"); 3046 parse_options_usage(stat_usage, stat_options, "timeout", 0); 3047 goto out; 3048 } else 3049 pr_warning("timeout < 100ms. " 3050 "The overhead percentage could be high in some cases. " 3051 "Please proceed with caution.\n"); 3052 } 3053 if (timeout && interval) { 3054 pr_err("timeout option is not supported with interval-print.\n"); 3055 parse_options_usage(stat_usage, stat_options, "timeout", 0); 3056 parse_options_usage(stat_usage, stat_options, "I", 1); 3057 goto out; 3058 } 3059 3060 if (perf_evlist__alloc_stats(evsel_list, interval)) 3061 goto out; 3062 3063 if (perf_stat_init_aggr_mode()) 3064 goto out; 3065 3066 /* 3067 * We dont want to block the signals - that would cause 3068 * child tasks to inherit that and Ctrl-C would not work. 3069 * What we want is for Ctrl-C to work in the exec()-ed 3070 * task, but being ignored by perf stat itself: 3071 */ 3072 atexit(sig_atexit); 3073 if (!forever) 3074 signal(SIGINT, skip_signal); 3075 signal(SIGCHLD, skip_signal); 3076 signal(SIGALRM, skip_signal); 3077 signal(SIGABRT, skip_signal); 3078 3079 status = 0; 3080 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 3081 if (run_count != 1 && verbose > 0) 3082 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 3083 run_idx + 1); 3084 3085 status = run_perf_stat(argc, argv, run_idx); 3086 if (forever && status != -1) { 3087 print_counters(NULL, argc, argv); 3088 perf_stat__reset_stats(); 3089 } 3090 } 3091 3092 if (!forever && status != -1 && !interval) 3093 print_counters(NULL, argc, argv); 3094 3095 if (STAT_RECORD) { 3096 /* 3097 * We synthesize the kernel mmap record just so that older tools 3098 * don't emit warnings about not being able to resolve symbols 3099 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 3100 * a saner message about no samples being in the perf.data file. 3101 * 3102 * This also serves to suppress a warning about f_header.data.size == 0 3103 * in header.c at the moment 'perf stat record' gets introduced, which 3104 * is not really needed once we start adding the stat specific PERF_RECORD_ 3105 * records, but the need to suppress the kptr_restrict messages in older 3106 * tools remain -acme 3107 */ 3108 int fd = perf_data__fd(&perf_stat.data); 3109 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 3110 process_synthesized_event, 3111 &perf_stat.session->machines.host); 3112 if (err) { 3113 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 3114 "older tools may produce warnings about this file\n."); 3115 } 3116 3117 if (!interval) { 3118 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 3119 pr_err("failed to write stat round event\n"); 3120 } 3121 3122 if (!perf_stat.data.is_pipe) { 3123 perf_stat.session->header.data_size += perf_stat.bytes_written; 3124 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 3125 } 3126 3127 perf_session__delete(perf_stat.session); 3128 } 3129 3130 perf_stat__exit_aggr_mode(); 3131 perf_evlist__free_stats(evsel_list); 3132 out: 3133 free(walltime_run); 3134 3135 if (smi_cost && smi_reset) 3136 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3137 3138 perf_evlist__delete(evsel_list); 3139 3140 runtime_stat_delete(&stat_config); 3141 3142 return status; 3143 } 3144