1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/session.h" 63 #include "util/tool.h" 64 #include "asm/bug.h" 65 66 #include <stdlib.h> 67 #include <sys/prctl.h> 68 #include <locale.h> 69 #include <math.h> 70 71 #define DEFAULT_SEPARATOR " " 72 #define CNTR_NOT_SUPPORTED "<not supported>" 73 #define CNTR_NOT_COUNTED "<not counted>" 74 75 static void print_counters(struct timespec *ts, int argc, const char **argv); 76 77 /* Default events used for perf stat -T */ 78 static const char *transaction_attrs = { 79 "task-clock," 80 "{" 81 "instructions," 82 "cycles," 83 "cpu/cycles-t/," 84 "cpu/tx-start/," 85 "cpu/el-start/," 86 "cpu/cycles-ct/" 87 "}" 88 }; 89 90 /* More limited version when the CPU does not have all events. */ 91 static const char * transaction_limited_attrs = { 92 "task-clock," 93 "{" 94 "instructions," 95 "cycles," 96 "cpu/cycles-t/," 97 "cpu/tx-start/" 98 "}" 99 }; 100 101 static struct perf_evlist *evsel_list; 102 103 static struct target target = { 104 .uid = UINT_MAX, 105 }; 106 107 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 108 109 static int run_count = 1; 110 static bool no_inherit = false; 111 static volatile pid_t child_pid = -1; 112 static bool null_run = false; 113 static int detailed_run = 0; 114 static bool transaction_run; 115 static bool big_num = true; 116 static int big_num_opt = -1; 117 static const char *csv_sep = NULL; 118 static bool csv_output = false; 119 static bool group = false; 120 static const char *pre_cmd = NULL; 121 static const char *post_cmd = NULL; 122 static bool sync_run = false; 123 static unsigned int initial_delay = 0; 124 static unsigned int unit_width = 4; /* strlen("unit") */ 125 static bool forever = false; 126 static bool metric_only = false; 127 static struct timespec ref_time; 128 static struct cpu_map *aggr_map; 129 static aggr_get_id_t aggr_get_id; 130 static bool append_file; 131 static const char *output_name; 132 static int output_fd; 133 134 struct perf_stat { 135 bool record; 136 struct perf_data_file file; 137 struct perf_session *session; 138 u64 bytes_written; 139 struct perf_tool tool; 140 bool maps_allocated; 141 struct cpu_map *cpus; 142 struct thread_map *threads; 143 enum aggr_mode aggr_mode; 144 }; 145 146 static struct perf_stat perf_stat; 147 #define STAT_RECORD perf_stat.record 148 149 static volatile int done = 0; 150 151 static struct perf_stat_config stat_config = { 152 .aggr_mode = AGGR_GLOBAL, 153 .scale = true, 154 }; 155 156 static inline void diff_timespec(struct timespec *r, struct timespec *a, 157 struct timespec *b) 158 { 159 r->tv_sec = a->tv_sec - b->tv_sec; 160 if (a->tv_nsec < b->tv_nsec) { 161 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 162 r->tv_sec--; 163 } else { 164 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 165 } 166 } 167 168 static void perf_stat__reset_stats(void) 169 { 170 perf_evlist__reset_stats(evsel_list); 171 perf_stat__reset_shadow_stats(); 172 } 173 174 static int create_perf_stat_counter(struct perf_evsel *evsel) 175 { 176 struct perf_event_attr *attr = &evsel->attr; 177 178 if (stat_config.scale) 179 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 180 PERF_FORMAT_TOTAL_TIME_RUNNING; 181 182 attr->inherit = !no_inherit; 183 184 /* 185 * Some events get initialized with sample_(period/type) set, 186 * like tracepoints. Clear it up for counting. 187 */ 188 attr->sample_period = 0; 189 190 /* 191 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 192 * while avoiding that older tools show confusing messages. 193 * 194 * However for pipe sessions we need to keep it zero, 195 * because script's perf_evsel__check_attr is triggered 196 * by attr->sample_type != 0, and we can't run it on 197 * stat sessions. 198 */ 199 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 200 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 201 202 /* 203 * Disabling all counters initially, they will be enabled 204 * either manually by us or by kernel via enable_on_exec 205 * set later. 206 */ 207 if (perf_evsel__is_group_leader(evsel)) { 208 attr->disabled = 1; 209 210 /* 211 * In case of initial_delay we enable tracee 212 * events manually. 213 */ 214 if (target__none(&target) && !initial_delay) 215 attr->enable_on_exec = 1; 216 } 217 218 if (target__has_cpu(&target)) 219 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 220 221 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 222 } 223 224 /* 225 * Does the counter have nsecs as a unit? 226 */ 227 static inline int nsec_counter(struct perf_evsel *evsel) 228 { 229 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 230 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 231 return 1; 232 233 return 0; 234 } 235 236 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 237 union perf_event *event, 238 struct perf_sample *sample __maybe_unused, 239 struct machine *machine __maybe_unused) 240 { 241 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 242 pr_err("failed to write perf data, error: %m\n"); 243 return -1; 244 } 245 246 perf_stat.bytes_written += event->header.size; 247 return 0; 248 } 249 250 static int write_stat_round_event(u64 tm, u64 type) 251 { 252 return perf_event__synthesize_stat_round(NULL, tm, type, 253 process_synthesized_event, 254 NULL); 255 } 256 257 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 258 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 259 260 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 261 262 static int 263 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 264 struct perf_counts_values *count) 265 { 266 struct perf_sample_id *sid = SID(counter, cpu, thread); 267 268 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 269 process_synthesized_event, NULL); 270 } 271 272 /* 273 * Read out the results of a single counter: 274 * do not aggregate counts across CPUs in system-wide mode 275 */ 276 static int read_counter(struct perf_evsel *counter) 277 { 278 int nthreads = thread_map__nr(evsel_list->threads); 279 int ncpus = perf_evsel__nr_cpus(counter); 280 int cpu, thread; 281 282 if (!counter->supported) 283 return -ENOENT; 284 285 if (counter->system_wide) 286 nthreads = 1; 287 288 for (thread = 0; thread < nthreads; thread++) { 289 for (cpu = 0; cpu < ncpus; cpu++) { 290 struct perf_counts_values *count; 291 292 count = perf_counts(counter->counts, cpu, thread); 293 if (perf_evsel__read(counter, cpu, thread, count)) 294 return -1; 295 296 if (STAT_RECORD) { 297 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 298 pr_err("failed to write stat event\n"); 299 return -1; 300 } 301 } 302 303 if (verbose > 1) { 304 fprintf(stat_config.output, 305 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 306 perf_evsel__name(counter), 307 cpu, 308 count->val, count->ena, count->run); 309 } 310 } 311 } 312 313 return 0; 314 } 315 316 static void read_counters(bool close_counters) 317 { 318 struct perf_evsel *counter; 319 320 evlist__for_each(evsel_list, counter) { 321 if (read_counter(counter)) 322 pr_debug("failed to read counter %s\n", counter->name); 323 324 if (perf_stat_process_counter(&stat_config, counter)) 325 pr_warning("failed to process counter %s\n", counter->name); 326 327 if (close_counters) { 328 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 329 thread_map__nr(evsel_list->threads)); 330 } 331 } 332 } 333 334 static void process_interval(void) 335 { 336 struct timespec ts, rs; 337 338 read_counters(false); 339 340 clock_gettime(CLOCK_MONOTONIC, &ts); 341 diff_timespec(&rs, &ts, &ref_time); 342 343 if (STAT_RECORD) { 344 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 345 pr_err("failed to write stat round event\n"); 346 } 347 348 print_counters(&rs, 0, NULL); 349 } 350 351 static void enable_counters(void) 352 { 353 if (initial_delay) 354 usleep(initial_delay * 1000); 355 356 /* 357 * We need to enable counters only if: 358 * - we don't have tracee (attaching to task or cpu) 359 * - we have initial delay configured 360 */ 361 if (!target__none(&target) || initial_delay) 362 perf_evlist__enable(evsel_list); 363 } 364 365 static volatile int workload_exec_errno; 366 367 /* 368 * perf_evlist__prepare_workload will send a SIGUSR1 369 * if the fork fails, since we asked by setting its 370 * want_signal to true. 371 */ 372 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 373 void *ucontext __maybe_unused) 374 { 375 workload_exec_errno = info->si_value.sival_int; 376 } 377 378 static bool has_unit(struct perf_evsel *counter) 379 { 380 return counter->unit && *counter->unit; 381 } 382 383 static bool has_scale(struct perf_evsel *counter) 384 { 385 return counter->scale != 1; 386 } 387 388 static int perf_stat_synthesize_config(bool is_pipe) 389 { 390 struct perf_evsel *counter; 391 int err; 392 393 if (is_pipe) { 394 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 395 process_synthesized_event); 396 if (err < 0) { 397 pr_err("Couldn't synthesize attrs.\n"); 398 return err; 399 } 400 } 401 402 /* 403 * Synthesize other events stuff not carried within 404 * attr event - unit, scale, name 405 */ 406 evlist__for_each(evsel_list, counter) { 407 if (!counter->supported) 408 continue; 409 410 /* 411 * Synthesize unit and scale only if it's defined. 412 */ 413 if (has_unit(counter)) { 414 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 415 if (err < 0) { 416 pr_err("Couldn't synthesize evsel unit.\n"); 417 return err; 418 } 419 } 420 421 if (has_scale(counter)) { 422 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 423 if (err < 0) { 424 pr_err("Couldn't synthesize evsel scale.\n"); 425 return err; 426 } 427 } 428 429 if (counter->own_cpus) { 430 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 431 if (err < 0) { 432 pr_err("Couldn't synthesize evsel scale.\n"); 433 return err; 434 } 435 } 436 437 /* 438 * Name is needed only for pipe output, 439 * perf.data carries event names. 440 */ 441 if (is_pipe) { 442 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 443 if (err < 0) { 444 pr_err("Couldn't synthesize evsel name.\n"); 445 return err; 446 } 447 } 448 } 449 450 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 451 process_synthesized_event, 452 NULL); 453 if (err < 0) { 454 pr_err("Couldn't synthesize thread map.\n"); 455 return err; 456 } 457 458 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 459 process_synthesized_event, NULL); 460 if (err < 0) { 461 pr_err("Couldn't synthesize thread map.\n"); 462 return err; 463 } 464 465 err = perf_event__synthesize_stat_config(NULL, &stat_config, 466 process_synthesized_event, NULL); 467 if (err < 0) { 468 pr_err("Couldn't synthesize config.\n"); 469 return err; 470 } 471 472 return 0; 473 } 474 475 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 476 477 static int __store_counter_ids(struct perf_evsel *counter, 478 struct cpu_map *cpus, 479 struct thread_map *threads) 480 { 481 int cpu, thread; 482 483 for (cpu = 0; cpu < cpus->nr; cpu++) { 484 for (thread = 0; thread < threads->nr; thread++) { 485 int fd = FD(counter, cpu, thread); 486 487 if (perf_evlist__id_add_fd(evsel_list, counter, 488 cpu, thread, fd) < 0) 489 return -1; 490 } 491 } 492 493 return 0; 494 } 495 496 static int store_counter_ids(struct perf_evsel *counter) 497 { 498 struct cpu_map *cpus = counter->cpus; 499 struct thread_map *threads = counter->threads; 500 501 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 502 return -ENOMEM; 503 504 return __store_counter_ids(counter, cpus, threads); 505 } 506 507 static int __run_perf_stat(int argc, const char **argv) 508 { 509 int interval = stat_config.interval; 510 char msg[512]; 511 unsigned long long t0, t1; 512 struct perf_evsel *counter; 513 struct timespec ts; 514 size_t l; 515 int status = 0; 516 const bool forks = (argc > 0); 517 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 518 519 if (interval) { 520 ts.tv_sec = interval / 1000; 521 ts.tv_nsec = (interval % 1000) * 1000000; 522 } else { 523 ts.tv_sec = 1; 524 ts.tv_nsec = 0; 525 } 526 527 if (forks) { 528 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 529 workload_exec_failed_signal) < 0) { 530 perror("failed to prepare workload"); 531 return -1; 532 } 533 child_pid = evsel_list->workload.pid; 534 } 535 536 if (group) 537 perf_evlist__set_leader(evsel_list); 538 539 evlist__for_each(evsel_list, counter) { 540 try_again: 541 if (create_perf_stat_counter(counter) < 0) { 542 /* 543 * PPC returns ENXIO for HW counters until 2.6.37 544 * (behavior changed with commit b0a873e). 545 */ 546 if (errno == EINVAL || errno == ENOSYS || 547 errno == ENOENT || errno == EOPNOTSUPP || 548 errno == ENXIO) { 549 if (verbose) 550 ui__warning("%s event is not supported by the kernel.\n", 551 perf_evsel__name(counter)); 552 counter->supported = false; 553 554 if ((counter->leader != counter) || 555 !(counter->leader->nr_members > 1)) 556 continue; 557 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 558 if (verbose) 559 ui__warning("%s\n", msg); 560 goto try_again; 561 } 562 563 perf_evsel__open_strerror(counter, &target, 564 errno, msg, sizeof(msg)); 565 ui__error("%s\n", msg); 566 567 if (child_pid != -1) 568 kill(child_pid, SIGTERM); 569 570 return -1; 571 } 572 counter->supported = true; 573 574 l = strlen(counter->unit); 575 if (l > unit_width) 576 unit_width = l; 577 578 if (STAT_RECORD && store_counter_ids(counter)) 579 return -1; 580 } 581 582 if (perf_evlist__apply_filters(evsel_list, &counter)) { 583 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 584 counter->filter, perf_evsel__name(counter), errno, 585 strerror_r(errno, msg, sizeof(msg))); 586 return -1; 587 } 588 589 if (STAT_RECORD) { 590 int err, fd = perf_data_file__fd(&perf_stat.file); 591 592 if (is_pipe) { 593 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 594 } else { 595 err = perf_session__write_header(perf_stat.session, evsel_list, 596 fd, false); 597 } 598 599 if (err < 0) 600 return err; 601 602 err = perf_stat_synthesize_config(is_pipe); 603 if (err < 0) 604 return err; 605 } 606 607 /* 608 * Enable counters and exec the command: 609 */ 610 t0 = rdclock(); 611 clock_gettime(CLOCK_MONOTONIC, &ref_time); 612 613 if (forks) { 614 perf_evlist__start_workload(evsel_list); 615 enable_counters(); 616 617 if (interval) { 618 while (!waitpid(child_pid, &status, WNOHANG)) { 619 nanosleep(&ts, NULL); 620 process_interval(); 621 } 622 } 623 wait(&status); 624 625 if (workload_exec_errno) { 626 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 627 pr_err("Workload failed: %s\n", emsg); 628 return -1; 629 } 630 631 if (WIFSIGNALED(status)) 632 psignal(WTERMSIG(status), argv[0]); 633 } else { 634 enable_counters(); 635 while (!done) { 636 nanosleep(&ts, NULL); 637 if (interval) 638 process_interval(); 639 } 640 } 641 642 t1 = rdclock(); 643 644 update_stats(&walltime_nsecs_stats, t1 - t0); 645 646 read_counters(true); 647 648 return WEXITSTATUS(status); 649 } 650 651 static int run_perf_stat(int argc, const char **argv) 652 { 653 int ret; 654 655 if (pre_cmd) { 656 ret = system(pre_cmd); 657 if (ret) 658 return ret; 659 } 660 661 if (sync_run) 662 sync(); 663 664 ret = __run_perf_stat(argc, argv); 665 if (ret) 666 return ret; 667 668 if (post_cmd) { 669 ret = system(post_cmd); 670 if (ret) 671 return ret; 672 } 673 674 return ret; 675 } 676 677 static void print_running(u64 run, u64 ena) 678 { 679 if (csv_output) { 680 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 681 csv_sep, 682 run, 683 csv_sep, 684 ena ? 100.0 * run / ena : 100.0); 685 } else if (run != ena) { 686 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 687 } 688 } 689 690 static void print_noise_pct(double total, double avg) 691 { 692 double pct = rel_stddev_stats(total, avg); 693 694 if (csv_output) 695 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 696 else if (pct) 697 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 698 } 699 700 static void print_noise(struct perf_evsel *evsel, double avg) 701 { 702 struct perf_stat_evsel *ps; 703 704 if (run_count == 1) 705 return; 706 707 ps = evsel->priv; 708 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 709 } 710 711 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 712 { 713 switch (stat_config.aggr_mode) { 714 case AGGR_CORE: 715 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 716 cpu_map__id_to_socket(id), 717 csv_output ? 0 : -8, 718 cpu_map__id_to_cpu(id), 719 csv_sep, 720 csv_output ? 0 : 4, 721 nr, 722 csv_sep); 723 break; 724 case AGGR_SOCKET: 725 fprintf(stat_config.output, "S%*d%s%*d%s", 726 csv_output ? 0 : -5, 727 id, 728 csv_sep, 729 csv_output ? 0 : 4, 730 nr, 731 csv_sep); 732 break; 733 case AGGR_NONE: 734 fprintf(stat_config.output, "CPU%*d%s", 735 csv_output ? 0 : -4, 736 perf_evsel__cpus(evsel)->map[id], csv_sep); 737 break; 738 case AGGR_THREAD: 739 fprintf(stat_config.output, "%*s-%*d%s", 740 csv_output ? 0 : 16, 741 thread_map__comm(evsel->threads, id), 742 csv_output ? 0 : -8, 743 thread_map__pid(evsel->threads, id), 744 csv_sep); 745 break; 746 case AGGR_GLOBAL: 747 case AGGR_UNSET: 748 default: 749 break; 750 } 751 } 752 753 struct outstate { 754 FILE *fh; 755 bool newline; 756 const char *prefix; 757 int nfields; 758 int id, nr; 759 struct perf_evsel *evsel; 760 }; 761 762 #define METRIC_LEN 35 763 764 static void new_line_std(void *ctx) 765 { 766 struct outstate *os = ctx; 767 768 os->newline = true; 769 } 770 771 static void do_new_line_std(struct outstate *os) 772 { 773 fputc('\n', os->fh); 774 fputs(os->prefix, os->fh); 775 aggr_printout(os->evsel, os->id, os->nr); 776 if (stat_config.aggr_mode == AGGR_NONE) 777 fprintf(os->fh, " "); 778 fprintf(os->fh, " "); 779 } 780 781 static void print_metric_std(void *ctx, const char *color, const char *fmt, 782 const char *unit, double val) 783 { 784 struct outstate *os = ctx; 785 FILE *out = os->fh; 786 int n; 787 bool newline = os->newline; 788 789 os->newline = false; 790 791 if (unit == NULL || fmt == NULL) { 792 fprintf(out, "%-*s", METRIC_LEN, ""); 793 return; 794 } 795 796 if (newline) 797 do_new_line_std(os); 798 799 n = fprintf(out, " # "); 800 if (color) 801 n += color_fprintf(out, color, fmt, val); 802 else 803 n += fprintf(out, fmt, val); 804 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 805 } 806 807 static void new_line_csv(void *ctx) 808 { 809 struct outstate *os = ctx; 810 int i; 811 812 fputc('\n', os->fh); 813 if (os->prefix) 814 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 815 aggr_printout(os->evsel, os->id, os->nr); 816 for (i = 0; i < os->nfields; i++) 817 fputs(csv_sep, os->fh); 818 } 819 820 static void print_metric_csv(void *ctx, 821 const char *color __maybe_unused, 822 const char *fmt, const char *unit, double val) 823 { 824 struct outstate *os = ctx; 825 FILE *out = os->fh; 826 char buf[64], *vals, *ends; 827 828 if (unit == NULL || fmt == NULL) { 829 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 830 return; 831 } 832 snprintf(buf, sizeof(buf), fmt, val); 833 vals = buf; 834 while (isspace(*vals)) 835 vals++; 836 ends = vals; 837 while (isdigit(*ends) || *ends == '.') 838 ends++; 839 *ends = 0; 840 while (isspace(*unit)) 841 unit++; 842 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 843 } 844 845 #define METRIC_ONLY_LEN 20 846 847 /* Filter out some columns that don't work well in metrics only mode */ 848 849 static bool valid_only_metric(const char *unit) 850 { 851 if (!unit) 852 return false; 853 if (strstr(unit, "/sec") || 854 strstr(unit, "hz") || 855 strstr(unit, "Hz") || 856 strstr(unit, "CPUs utilized")) 857 return false; 858 return true; 859 } 860 861 static const char *fixunit(char *buf, struct perf_evsel *evsel, 862 const char *unit) 863 { 864 if (!strncmp(unit, "of all", 6)) { 865 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 866 unit); 867 return buf; 868 } 869 return unit; 870 } 871 872 static void print_metric_only(void *ctx, const char *color, const char *fmt, 873 const char *unit, double val) 874 { 875 struct outstate *os = ctx; 876 FILE *out = os->fh; 877 int n; 878 char buf[1024]; 879 unsigned mlen = METRIC_ONLY_LEN; 880 881 if (!valid_only_metric(unit)) 882 return; 883 unit = fixunit(buf, os->evsel, unit); 884 if (color) 885 n = color_fprintf(out, color, fmt, val); 886 else 887 n = fprintf(out, fmt, val); 888 if (n > METRIC_ONLY_LEN) 889 n = METRIC_ONLY_LEN; 890 if (mlen < strlen(unit)) 891 mlen = strlen(unit) + 1; 892 fprintf(out, "%*s", mlen - n, ""); 893 } 894 895 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 896 const char *fmt, 897 const char *unit, double val) 898 { 899 struct outstate *os = ctx; 900 FILE *out = os->fh; 901 char buf[64], *vals, *ends; 902 char tbuf[1024]; 903 904 if (!valid_only_metric(unit)) 905 return; 906 unit = fixunit(tbuf, os->evsel, unit); 907 snprintf(buf, sizeof buf, fmt, val); 908 vals = buf; 909 while (isspace(*vals)) 910 vals++; 911 ends = vals; 912 while (isdigit(*ends) || *ends == '.') 913 ends++; 914 *ends = 0; 915 fprintf(out, "%s%s", vals, csv_sep); 916 } 917 918 static void new_line_metric(void *ctx __maybe_unused) 919 { 920 } 921 922 static void print_metric_header(void *ctx, const char *color __maybe_unused, 923 const char *fmt __maybe_unused, 924 const char *unit, double val __maybe_unused) 925 { 926 struct outstate *os = ctx; 927 char tbuf[1024]; 928 929 if (!valid_only_metric(unit)) 930 return; 931 unit = fixunit(tbuf, os->evsel, unit); 932 if (csv_output) 933 fprintf(os->fh, "%s%s", unit, csv_sep); 934 else 935 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 936 } 937 938 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 939 { 940 FILE *output = stat_config.output; 941 double msecs = avg / 1e6; 942 const char *fmt_v, *fmt_n; 943 char name[25]; 944 945 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 946 fmt_n = csv_output ? "%s" : "%-25s"; 947 948 aggr_printout(evsel, id, nr); 949 950 scnprintf(name, sizeof(name), "%s%s", 951 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 952 953 fprintf(output, fmt_v, msecs, csv_sep); 954 955 if (csv_output) 956 fprintf(output, "%s%s", evsel->unit, csv_sep); 957 else 958 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 959 960 fprintf(output, fmt_n, name); 961 962 if (evsel->cgrp) 963 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 964 } 965 966 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 967 { 968 int i; 969 970 if (!aggr_get_id) 971 return 0; 972 973 if (stat_config.aggr_mode == AGGR_NONE) 974 return id; 975 976 if (stat_config.aggr_mode == AGGR_GLOBAL) 977 return 0; 978 979 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 980 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 981 982 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 983 return cpu2; 984 } 985 return 0; 986 } 987 988 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 989 { 990 FILE *output = stat_config.output; 991 double sc = evsel->scale; 992 const char *fmt; 993 994 if (csv_output) { 995 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 996 } else { 997 if (big_num) 998 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 999 else 1000 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1001 } 1002 1003 aggr_printout(evsel, id, nr); 1004 1005 fprintf(output, fmt, avg, csv_sep); 1006 1007 if (evsel->unit) 1008 fprintf(output, "%-*s%s", 1009 csv_output ? 0 : unit_width, 1010 evsel->unit, csv_sep); 1011 1012 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1013 1014 if (evsel->cgrp) 1015 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1016 } 1017 1018 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1019 char *prefix, u64 run, u64 ena, double noise) 1020 { 1021 struct perf_stat_output_ctx out; 1022 struct outstate os = { 1023 .fh = stat_config.output, 1024 .prefix = prefix ? prefix : "", 1025 .id = id, 1026 .nr = nr, 1027 .evsel = counter, 1028 }; 1029 print_metric_t pm = print_metric_std; 1030 void (*nl)(void *); 1031 1032 if (metric_only) { 1033 nl = new_line_metric; 1034 if (csv_output) 1035 pm = print_metric_only_csv; 1036 else 1037 pm = print_metric_only; 1038 } else 1039 nl = new_line_std; 1040 1041 if (csv_output && !metric_only) { 1042 static int aggr_fields[] = { 1043 [AGGR_GLOBAL] = 0, 1044 [AGGR_THREAD] = 1, 1045 [AGGR_NONE] = 1, 1046 [AGGR_SOCKET] = 2, 1047 [AGGR_CORE] = 2, 1048 }; 1049 1050 pm = print_metric_csv; 1051 nl = new_line_csv; 1052 os.nfields = 3; 1053 os.nfields += aggr_fields[stat_config.aggr_mode]; 1054 if (counter->cgrp) 1055 os.nfields++; 1056 } 1057 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1058 if (metric_only) { 1059 pm(&os, NULL, "", "", 0); 1060 return; 1061 } 1062 aggr_printout(counter, id, nr); 1063 1064 fprintf(stat_config.output, "%*s%s", 1065 csv_output ? 0 : 18, 1066 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1067 csv_sep); 1068 1069 fprintf(stat_config.output, "%-*s%s", 1070 csv_output ? 0 : unit_width, 1071 counter->unit, csv_sep); 1072 1073 fprintf(stat_config.output, "%*s", 1074 csv_output ? 0 : -25, 1075 perf_evsel__name(counter)); 1076 1077 if (counter->cgrp) 1078 fprintf(stat_config.output, "%s%s", 1079 csv_sep, counter->cgrp->name); 1080 1081 if (!csv_output) 1082 pm(&os, NULL, NULL, "", 0); 1083 print_noise(counter, noise); 1084 print_running(run, ena); 1085 if (csv_output) 1086 pm(&os, NULL, NULL, "", 0); 1087 return; 1088 } 1089 1090 if (metric_only) 1091 /* nothing */; 1092 else if (nsec_counter(counter)) 1093 nsec_printout(id, nr, counter, uval); 1094 else 1095 abs_printout(id, nr, counter, uval); 1096 1097 out.print_metric = pm; 1098 out.new_line = nl; 1099 out.ctx = &os; 1100 1101 if (csv_output && !metric_only) { 1102 print_noise(counter, noise); 1103 print_running(run, ena); 1104 } 1105 1106 perf_stat__print_shadow_stats(counter, uval, 1107 first_shadow_cpu(counter, id), 1108 &out); 1109 if (!csv_output && !metric_only) { 1110 print_noise(counter, noise); 1111 print_running(run, ena); 1112 } 1113 } 1114 1115 static void aggr_update_shadow(void) 1116 { 1117 int cpu, s2, id, s; 1118 u64 val; 1119 struct perf_evsel *counter; 1120 1121 for (s = 0; s < aggr_map->nr; s++) { 1122 id = aggr_map->map[s]; 1123 evlist__for_each(evsel_list, counter) { 1124 val = 0; 1125 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1126 s2 = aggr_get_id(evsel_list->cpus, cpu); 1127 if (s2 != id) 1128 continue; 1129 val += perf_counts(counter->counts, cpu, 0)->val; 1130 } 1131 val = val * counter->scale; 1132 perf_stat__update_shadow_stats(counter, &val, 1133 first_shadow_cpu(counter, id)); 1134 } 1135 } 1136 } 1137 1138 static void print_aggr(char *prefix) 1139 { 1140 FILE *output = stat_config.output; 1141 struct perf_evsel *counter; 1142 int cpu, s, s2, id, nr; 1143 double uval; 1144 u64 ena, run, val; 1145 bool first; 1146 1147 if (!(aggr_map || aggr_get_id)) 1148 return; 1149 1150 aggr_update_shadow(); 1151 1152 /* 1153 * With metric_only everything is on a single line. 1154 * Without each counter has its own line. 1155 */ 1156 for (s = 0; s < aggr_map->nr; s++) { 1157 if (prefix && metric_only) 1158 fprintf(output, "%s", prefix); 1159 1160 id = aggr_map->map[s]; 1161 first = true; 1162 evlist__for_each(evsel_list, counter) { 1163 val = ena = run = 0; 1164 nr = 0; 1165 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1166 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1167 if (s2 != id) 1168 continue; 1169 val += perf_counts(counter->counts, cpu, 0)->val; 1170 ena += perf_counts(counter->counts, cpu, 0)->ena; 1171 run += perf_counts(counter->counts, cpu, 0)->run; 1172 nr++; 1173 } 1174 if (first && metric_only) { 1175 first = false; 1176 aggr_printout(counter, id, nr); 1177 } 1178 if (prefix && !metric_only) 1179 fprintf(output, "%s", prefix); 1180 1181 uval = val * counter->scale; 1182 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1183 if (!metric_only) 1184 fputc('\n', output); 1185 } 1186 if (metric_only) 1187 fputc('\n', output); 1188 } 1189 } 1190 1191 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1192 { 1193 FILE *output = stat_config.output; 1194 int nthreads = thread_map__nr(counter->threads); 1195 int ncpus = cpu_map__nr(counter->cpus); 1196 int cpu, thread; 1197 double uval; 1198 1199 for (thread = 0; thread < nthreads; thread++) { 1200 u64 ena = 0, run = 0, val = 0; 1201 1202 for (cpu = 0; cpu < ncpus; cpu++) { 1203 val += perf_counts(counter->counts, cpu, thread)->val; 1204 ena += perf_counts(counter->counts, cpu, thread)->ena; 1205 run += perf_counts(counter->counts, cpu, thread)->run; 1206 } 1207 1208 if (prefix) 1209 fprintf(output, "%s", prefix); 1210 1211 uval = val * counter->scale; 1212 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1213 fputc('\n', output); 1214 } 1215 } 1216 1217 /* 1218 * Print out the results of a single counter: 1219 * aggregated counts in system-wide mode 1220 */ 1221 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1222 { 1223 FILE *output = stat_config.output; 1224 struct perf_stat_evsel *ps = counter->priv; 1225 double avg = avg_stats(&ps->res_stats[0]); 1226 double uval; 1227 double avg_enabled, avg_running; 1228 1229 avg_enabled = avg_stats(&ps->res_stats[1]); 1230 avg_running = avg_stats(&ps->res_stats[2]); 1231 1232 if (prefix && !metric_only) 1233 fprintf(output, "%s", prefix); 1234 1235 uval = avg * counter->scale; 1236 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1237 if (!metric_only) 1238 fprintf(output, "\n"); 1239 } 1240 1241 /* 1242 * Print out the results of a single counter: 1243 * does not use aggregated count in system-wide 1244 */ 1245 static void print_counter(struct perf_evsel *counter, char *prefix) 1246 { 1247 FILE *output = stat_config.output; 1248 u64 ena, run, val; 1249 double uval; 1250 int cpu; 1251 1252 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1253 val = perf_counts(counter->counts, cpu, 0)->val; 1254 ena = perf_counts(counter->counts, cpu, 0)->ena; 1255 run = perf_counts(counter->counts, cpu, 0)->run; 1256 1257 if (prefix) 1258 fprintf(output, "%s", prefix); 1259 1260 uval = val * counter->scale; 1261 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1262 1263 fputc('\n', output); 1264 } 1265 } 1266 1267 static void print_no_aggr_metric(char *prefix) 1268 { 1269 int cpu; 1270 int nrcpus = 0; 1271 struct perf_evsel *counter; 1272 u64 ena, run, val; 1273 double uval; 1274 1275 nrcpus = evsel_list->cpus->nr; 1276 for (cpu = 0; cpu < nrcpus; cpu++) { 1277 bool first = true; 1278 1279 if (prefix) 1280 fputs(prefix, stat_config.output); 1281 evlist__for_each(evsel_list, counter) { 1282 if (first) { 1283 aggr_printout(counter, cpu, 0); 1284 first = false; 1285 } 1286 val = perf_counts(counter->counts, cpu, 0)->val; 1287 ena = perf_counts(counter->counts, cpu, 0)->ena; 1288 run = perf_counts(counter->counts, cpu, 0)->run; 1289 1290 uval = val * counter->scale; 1291 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1292 } 1293 fputc('\n', stat_config.output); 1294 } 1295 } 1296 1297 static int aggr_header_lens[] = { 1298 [AGGR_CORE] = 18, 1299 [AGGR_SOCKET] = 12, 1300 [AGGR_NONE] = 6, 1301 [AGGR_THREAD] = 24, 1302 [AGGR_GLOBAL] = 0, 1303 }; 1304 1305 static void print_metric_headers(char *prefix) 1306 { 1307 struct perf_stat_output_ctx out; 1308 struct perf_evsel *counter; 1309 struct outstate os = { 1310 .fh = stat_config.output 1311 }; 1312 1313 if (prefix) 1314 fprintf(stat_config.output, "%s", prefix); 1315 1316 if (!csv_output) 1317 fprintf(stat_config.output, "%*s", 1318 aggr_header_lens[stat_config.aggr_mode], ""); 1319 1320 /* Print metrics headers only */ 1321 evlist__for_each(evsel_list, counter) { 1322 os.evsel = counter; 1323 out.ctx = &os; 1324 out.print_metric = print_metric_header; 1325 out.new_line = new_line_metric; 1326 os.evsel = counter; 1327 perf_stat__print_shadow_stats(counter, 0, 1328 0, 1329 &out); 1330 } 1331 fputc('\n', stat_config.output); 1332 } 1333 1334 static void print_interval(char *prefix, struct timespec *ts) 1335 { 1336 FILE *output = stat_config.output; 1337 static int num_print_interval; 1338 1339 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1340 1341 if (num_print_interval == 0 && !csv_output && !metric_only) { 1342 switch (stat_config.aggr_mode) { 1343 case AGGR_SOCKET: 1344 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 1345 break; 1346 case AGGR_CORE: 1347 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 1348 break; 1349 case AGGR_NONE: 1350 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 1351 break; 1352 case AGGR_THREAD: 1353 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 1354 break; 1355 case AGGR_GLOBAL: 1356 default: 1357 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 1358 case AGGR_UNSET: 1359 break; 1360 } 1361 } 1362 1363 if (++num_print_interval == 25) 1364 num_print_interval = 0; 1365 } 1366 1367 static void print_header(int argc, const char **argv) 1368 { 1369 FILE *output = stat_config.output; 1370 int i; 1371 1372 fflush(stdout); 1373 1374 if (!csv_output) { 1375 fprintf(output, "\n"); 1376 fprintf(output, " Performance counter stats for "); 1377 if (target.system_wide) 1378 fprintf(output, "\'system wide"); 1379 else if (target.cpu_list) 1380 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1381 else if (!target__has_task(&target)) { 1382 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1383 for (i = 1; argv && (i < argc); i++) 1384 fprintf(output, " %s", argv[i]); 1385 } else if (target.pid) 1386 fprintf(output, "process id \'%s", target.pid); 1387 else 1388 fprintf(output, "thread id \'%s", target.tid); 1389 1390 fprintf(output, "\'"); 1391 if (run_count > 1) 1392 fprintf(output, " (%d runs)", run_count); 1393 fprintf(output, ":\n\n"); 1394 } 1395 } 1396 1397 static void print_footer(void) 1398 { 1399 FILE *output = stat_config.output; 1400 1401 if (!null_run) 1402 fprintf(output, "\n"); 1403 fprintf(output, " %17.9f seconds time elapsed", 1404 avg_stats(&walltime_nsecs_stats)/1e9); 1405 if (run_count > 1) { 1406 fprintf(output, " "); 1407 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1408 avg_stats(&walltime_nsecs_stats)); 1409 } 1410 fprintf(output, "\n\n"); 1411 } 1412 1413 static void print_counters(struct timespec *ts, int argc, const char **argv) 1414 { 1415 int interval = stat_config.interval; 1416 struct perf_evsel *counter; 1417 char buf[64], *prefix = NULL; 1418 1419 /* Do not print anything if we record to the pipe. */ 1420 if (STAT_RECORD && perf_stat.file.is_pipe) 1421 return; 1422 1423 if (interval) 1424 print_interval(prefix = buf, ts); 1425 else 1426 print_header(argc, argv); 1427 1428 if (metric_only) { 1429 static int num_print_iv; 1430 1431 if (num_print_iv == 0) 1432 print_metric_headers(prefix); 1433 if (num_print_iv++ == 25) 1434 num_print_iv = 0; 1435 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1436 fprintf(stat_config.output, "%s", prefix); 1437 } 1438 1439 switch (stat_config.aggr_mode) { 1440 case AGGR_CORE: 1441 case AGGR_SOCKET: 1442 print_aggr(prefix); 1443 break; 1444 case AGGR_THREAD: 1445 evlist__for_each(evsel_list, counter) 1446 print_aggr_thread(counter, prefix); 1447 break; 1448 case AGGR_GLOBAL: 1449 evlist__for_each(evsel_list, counter) 1450 print_counter_aggr(counter, prefix); 1451 if (metric_only) 1452 fputc('\n', stat_config.output); 1453 break; 1454 case AGGR_NONE: 1455 if (metric_only) 1456 print_no_aggr_metric(prefix); 1457 else { 1458 evlist__for_each(evsel_list, counter) 1459 print_counter(counter, prefix); 1460 } 1461 break; 1462 case AGGR_UNSET: 1463 default: 1464 break; 1465 } 1466 1467 if (!interval && !csv_output) 1468 print_footer(); 1469 1470 fflush(stat_config.output); 1471 } 1472 1473 static volatile int signr = -1; 1474 1475 static void skip_signal(int signo) 1476 { 1477 if ((child_pid == -1) || stat_config.interval) 1478 done = 1; 1479 1480 signr = signo; 1481 /* 1482 * render child_pid harmless 1483 * won't send SIGTERM to a random 1484 * process in case of race condition 1485 * and fast PID recycling 1486 */ 1487 child_pid = -1; 1488 } 1489 1490 static void sig_atexit(void) 1491 { 1492 sigset_t set, oset; 1493 1494 /* 1495 * avoid race condition with SIGCHLD handler 1496 * in skip_signal() which is modifying child_pid 1497 * goal is to avoid send SIGTERM to a random 1498 * process 1499 */ 1500 sigemptyset(&set); 1501 sigaddset(&set, SIGCHLD); 1502 sigprocmask(SIG_BLOCK, &set, &oset); 1503 1504 if (child_pid != -1) 1505 kill(child_pid, SIGTERM); 1506 1507 sigprocmask(SIG_SETMASK, &oset, NULL); 1508 1509 if (signr == -1) 1510 return; 1511 1512 signal(signr, SIG_DFL); 1513 kill(getpid(), signr); 1514 } 1515 1516 static int stat__set_big_num(const struct option *opt __maybe_unused, 1517 const char *s __maybe_unused, int unset) 1518 { 1519 big_num_opt = unset ? 0 : 1; 1520 return 0; 1521 } 1522 1523 static const struct option stat_options[] = { 1524 OPT_BOOLEAN('T', "transaction", &transaction_run, 1525 "hardware transaction statistics"), 1526 OPT_CALLBACK('e', "event", &evsel_list, "event", 1527 "event selector. use 'perf list' to list available events", 1528 parse_events_option), 1529 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1530 "event filter", parse_filter), 1531 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1532 "child tasks do not inherit counters"), 1533 OPT_STRING('p', "pid", &target.pid, "pid", 1534 "stat events on existing process id"), 1535 OPT_STRING('t', "tid", &target.tid, "tid", 1536 "stat events on existing thread id"), 1537 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1538 "system-wide collection from all CPUs"), 1539 OPT_BOOLEAN('g', "group", &group, 1540 "put the counters into a counter group"), 1541 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1542 OPT_INCR('v', "verbose", &verbose, 1543 "be more verbose (show counter open errors, etc)"), 1544 OPT_INTEGER('r', "repeat", &run_count, 1545 "repeat command and print average + stddev (max: 100, forever: 0)"), 1546 OPT_BOOLEAN('n', "null", &null_run, 1547 "null run - dont start any counters"), 1548 OPT_INCR('d', "detailed", &detailed_run, 1549 "detailed run - start a lot of events"), 1550 OPT_BOOLEAN('S', "sync", &sync_run, 1551 "call sync() before starting a run"), 1552 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1553 "print large numbers with thousands\' separators", 1554 stat__set_big_num), 1555 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1556 "list of cpus to monitor in system-wide"), 1557 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1558 "disable CPU count aggregation", AGGR_NONE), 1559 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1560 "print counts with custom separator"), 1561 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1562 "monitor event in cgroup name only", parse_cgroups), 1563 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1564 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1565 OPT_INTEGER(0, "log-fd", &output_fd, 1566 "log output to fd, instead of stderr"), 1567 OPT_STRING(0, "pre", &pre_cmd, "command", 1568 "command to run prior to the measured command"), 1569 OPT_STRING(0, "post", &post_cmd, "command", 1570 "command to run after to the measured command"), 1571 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1572 "print counts at regular interval in ms (>= 10)"), 1573 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1574 "aggregate counts per processor socket", AGGR_SOCKET), 1575 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1576 "aggregate counts per physical processor core", AGGR_CORE), 1577 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1578 "aggregate counts per thread", AGGR_THREAD), 1579 OPT_UINTEGER('D', "delay", &initial_delay, 1580 "ms to wait before starting measurement after program start"), 1581 OPT_BOOLEAN(0, "metric-only", &metric_only, 1582 "Only print computed metrics. No raw values"), 1583 OPT_END() 1584 }; 1585 1586 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1587 { 1588 return cpu_map__get_socket(map, cpu, NULL); 1589 } 1590 1591 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1592 { 1593 return cpu_map__get_core(map, cpu, NULL); 1594 } 1595 1596 static int cpu_map__get_max(struct cpu_map *map) 1597 { 1598 int i, max = -1; 1599 1600 for (i = 0; i < map->nr; i++) { 1601 if (map->map[i] > max) 1602 max = map->map[i]; 1603 } 1604 1605 return max; 1606 } 1607 1608 static struct cpu_map *cpus_aggr_map; 1609 1610 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1611 { 1612 int cpu; 1613 1614 if (idx >= map->nr) 1615 return -1; 1616 1617 cpu = map->map[idx]; 1618 1619 if (cpus_aggr_map->map[cpu] == -1) 1620 cpus_aggr_map->map[cpu] = get_id(map, idx); 1621 1622 return cpus_aggr_map->map[cpu]; 1623 } 1624 1625 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1626 { 1627 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1628 } 1629 1630 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1631 { 1632 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1633 } 1634 1635 static int perf_stat_init_aggr_mode(void) 1636 { 1637 int nr; 1638 1639 switch (stat_config.aggr_mode) { 1640 case AGGR_SOCKET: 1641 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1642 perror("cannot build socket map"); 1643 return -1; 1644 } 1645 aggr_get_id = perf_stat__get_socket_cached; 1646 break; 1647 case AGGR_CORE: 1648 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1649 perror("cannot build core map"); 1650 return -1; 1651 } 1652 aggr_get_id = perf_stat__get_core_cached; 1653 break; 1654 case AGGR_NONE: 1655 case AGGR_GLOBAL: 1656 case AGGR_THREAD: 1657 case AGGR_UNSET: 1658 default: 1659 break; 1660 } 1661 1662 /* 1663 * The evsel_list->cpus is the base we operate on, 1664 * taking the highest cpu number to be the size of 1665 * the aggregation translate cpumap. 1666 */ 1667 nr = cpu_map__get_max(evsel_list->cpus); 1668 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1669 return cpus_aggr_map ? 0 : -ENOMEM; 1670 } 1671 1672 static void perf_stat__exit_aggr_mode(void) 1673 { 1674 cpu_map__put(aggr_map); 1675 cpu_map__put(cpus_aggr_map); 1676 aggr_map = NULL; 1677 cpus_aggr_map = NULL; 1678 } 1679 1680 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1681 { 1682 int cpu; 1683 1684 if (idx > map->nr) 1685 return -1; 1686 1687 cpu = map->map[idx]; 1688 1689 if (cpu >= env->nr_cpus_online) 1690 return -1; 1691 1692 return cpu; 1693 } 1694 1695 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1696 { 1697 struct perf_env *env = data; 1698 int cpu = perf_env__get_cpu(env, map, idx); 1699 1700 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1701 } 1702 1703 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1704 { 1705 struct perf_env *env = data; 1706 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1707 1708 if (cpu != -1) { 1709 int socket_id = env->cpu[cpu].socket_id; 1710 1711 /* 1712 * Encode socket in upper 16 bits 1713 * core_id is relative to socket, and 1714 * we need a global id. So we combine 1715 * socket + core id. 1716 */ 1717 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1718 } 1719 1720 return core; 1721 } 1722 1723 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1724 struct cpu_map **sockp) 1725 { 1726 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1727 } 1728 1729 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1730 struct cpu_map **corep) 1731 { 1732 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1733 } 1734 1735 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1736 { 1737 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1738 } 1739 1740 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1741 { 1742 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1743 } 1744 1745 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1746 { 1747 struct perf_env *env = &st->session->header.env; 1748 1749 switch (stat_config.aggr_mode) { 1750 case AGGR_SOCKET: 1751 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1752 perror("cannot build socket map"); 1753 return -1; 1754 } 1755 aggr_get_id = perf_stat__get_socket_file; 1756 break; 1757 case AGGR_CORE: 1758 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1759 perror("cannot build core map"); 1760 return -1; 1761 } 1762 aggr_get_id = perf_stat__get_core_file; 1763 break; 1764 case AGGR_NONE: 1765 case AGGR_GLOBAL: 1766 case AGGR_THREAD: 1767 case AGGR_UNSET: 1768 default: 1769 break; 1770 } 1771 1772 return 0; 1773 } 1774 1775 /* 1776 * Add default attributes, if there were no attributes specified or 1777 * if -d/--detailed, -d -d or -d -d -d is used: 1778 */ 1779 static int add_default_attributes(void) 1780 { 1781 struct perf_event_attr default_attrs0[] = { 1782 1783 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1784 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1785 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1786 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1787 1788 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1789 }; 1790 struct perf_event_attr frontend_attrs[] = { 1791 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1792 }; 1793 struct perf_event_attr backend_attrs[] = { 1794 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1795 }; 1796 struct perf_event_attr default_attrs1[] = { 1797 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1798 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1799 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1800 1801 }; 1802 1803 /* 1804 * Detailed stats (-d), covering the L1 and last level data caches: 1805 */ 1806 struct perf_event_attr detailed_attrs[] = { 1807 1808 { .type = PERF_TYPE_HW_CACHE, 1809 .config = 1810 PERF_COUNT_HW_CACHE_L1D << 0 | 1811 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1812 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1813 1814 { .type = PERF_TYPE_HW_CACHE, 1815 .config = 1816 PERF_COUNT_HW_CACHE_L1D << 0 | 1817 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1818 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1819 1820 { .type = PERF_TYPE_HW_CACHE, 1821 .config = 1822 PERF_COUNT_HW_CACHE_LL << 0 | 1823 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1824 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1825 1826 { .type = PERF_TYPE_HW_CACHE, 1827 .config = 1828 PERF_COUNT_HW_CACHE_LL << 0 | 1829 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1830 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1831 }; 1832 1833 /* 1834 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1835 */ 1836 struct perf_event_attr very_detailed_attrs[] = { 1837 1838 { .type = PERF_TYPE_HW_CACHE, 1839 .config = 1840 PERF_COUNT_HW_CACHE_L1I << 0 | 1841 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1842 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1843 1844 { .type = PERF_TYPE_HW_CACHE, 1845 .config = 1846 PERF_COUNT_HW_CACHE_L1I << 0 | 1847 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1848 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1849 1850 { .type = PERF_TYPE_HW_CACHE, 1851 .config = 1852 PERF_COUNT_HW_CACHE_DTLB << 0 | 1853 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1854 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1855 1856 { .type = PERF_TYPE_HW_CACHE, 1857 .config = 1858 PERF_COUNT_HW_CACHE_DTLB << 0 | 1859 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1860 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1861 1862 { .type = PERF_TYPE_HW_CACHE, 1863 .config = 1864 PERF_COUNT_HW_CACHE_ITLB << 0 | 1865 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1866 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1867 1868 { .type = PERF_TYPE_HW_CACHE, 1869 .config = 1870 PERF_COUNT_HW_CACHE_ITLB << 0 | 1871 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1872 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1873 1874 }; 1875 1876 /* 1877 * Very, very detailed stats (-d -d -d), adding prefetch events: 1878 */ 1879 struct perf_event_attr very_very_detailed_attrs[] = { 1880 1881 { .type = PERF_TYPE_HW_CACHE, 1882 .config = 1883 PERF_COUNT_HW_CACHE_L1D << 0 | 1884 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1885 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1886 1887 { .type = PERF_TYPE_HW_CACHE, 1888 .config = 1889 PERF_COUNT_HW_CACHE_L1D << 0 | 1890 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1891 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1892 }; 1893 1894 /* Set attrs if no event is selected and !null_run: */ 1895 if (null_run) 1896 return 0; 1897 1898 if (transaction_run) { 1899 int err; 1900 if (pmu_have_event("cpu", "cycles-ct") && 1901 pmu_have_event("cpu", "el-start")) 1902 err = parse_events(evsel_list, transaction_attrs, NULL); 1903 else 1904 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1905 if (err) { 1906 fprintf(stderr, "Cannot set up transaction events\n"); 1907 return -1; 1908 } 1909 return 0; 1910 } 1911 1912 if (!evsel_list->nr_entries) { 1913 if (target__has_cpu(&target)) 1914 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 1915 1916 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 1917 return -1; 1918 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 1919 if (perf_evlist__add_default_attrs(evsel_list, 1920 frontend_attrs) < 0) 1921 return -1; 1922 } 1923 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 1924 if (perf_evlist__add_default_attrs(evsel_list, 1925 backend_attrs) < 0) 1926 return -1; 1927 } 1928 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 1929 return -1; 1930 } 1931 1932 /* Detailed events get appended to the event list: */ 1933 1934 if (detailed_run < 1) 1935 return 0; 1936 1937 /* Append detailed run extra attributes: */ 1938 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1939 return -1; 1940 1941 if (detailed_run < 2) 1942 return 0; 1943 1944 /* Append very detailed run extra attributes: */ 1945 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1946 return -1; 1947 1948 if (detailed_run < 3) 1949 return 0; 1950 1951 /* Append very, very detailed run extra attributes: */ 1952 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1953 } 1954 1955 static const char * const stat_record_usage[] = { 1956 "perf stat record [<options>]", 1957 NULL, 1958 }; 1959 1960 static void init_features(struct perf_session *session) 1961 { 1962 int feat; 1963 1964 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1965 perf_header__set_feat(&session->header, feat); 1966 1967 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1968 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1969 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1970 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1971 } 1972 1973 static int __cmd_record(int argc, const char **argv) 1974 { 1975 struct perf_session *session; 1976 struct perf_data_file *file = &perf_stat.file; 1977 1978 argc = parse_options(argc, argv, stat_options, stat_record_usage, 1979 PARSE_OPT_STOP_AT_NON_OPTION); 1980 1981 if (output_name) 1982 file->path = output_name; 1983 1984 if (run_count != 1 || forever) { 1985 pr_err("Cannot use -r option with perf stat record.\n"); 1986 return -1; 1987 } 1988 1989 session = perf_session__new(file, false, NULL); 1990 if (session == NULL) { 1991 pr_err("Perf session creation failed.\n"); 1992 return -1; 1993 } 1994 1995 init_features(session); 1996 1997 session->evlist = evsel_list; 1998 perf_stat.session = session; 1999 perf_stat.record = true; 2000 return argc; 2001 } 2002 2003 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2004 union perf_event *event, 2005 struct perf_session *session) 2006 { 2007 struct stat_round_event *stat_round = &event->stat_round; 2008 struct perf_evsel *counter; 2009 struct timespec tsh, *ts = NULL; 2010 const char **argv = session->header.env.cmdline_argv; 2011 int argc = session->header.env.nr_cmdline; 2012 2013 evlist__for_each(evsel_list, counter) 2014 perf_stat_process_counter(&stat_config, counter); 2015 2016 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2017 update_stats(&walltime_nsecs_stats, stat_round->time); 2018 2019 if (stat_config.interval && stat_round->time) { 2020 tsh.tv_sec = stat_round->time / NSECS_PER_SEC; 2021 tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; 2022 ts = &tsh; 2023 } 2024 2025 print_counters(ts, argc, argv); 2026 return 0; 2027 } 2028 2029 static 2030 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2031 union perf_event *event, 2032 struct perf_session *session __maybe_unused) 2033 { 2034 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2035 2036 perf_event__read_stat_config(&stat_config, &event->stat_config); 2037 2038 if (cpu_map__empty(st->cpus)) { 2039 if (st->aggr_mode != AGGR_UNSET) 2040 pr_warning("warning: processing task data, aggregation mode not set\n"); 2041 return 0; 2042 } 2043 2044 if (st->aggr_mode != AGGR_UNSET) 2045 stat_config.aggr_mode = st->aggr_mode; 2046 2047 if (perf_stat.file.is_pipe) 2048 perf_stat_init_aggr_mode(); 2049 else 2050 perf_stat_init_aggr_mode_file(st); 2051 2052 return 0; 2053 } 2054 2055 static int set_maps(struct perf_stat *st) 2056 { 2057 if (!st->cpus || !st->threads) 2058 return 0; 2059 2060 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2061 return -EINVAL; 2062 2063 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2064 2065 if (perf_evlist__alloc_stats(evsel_list, true)) 2066 return -ENOMEM; 2067 2068 st->maps_allocated = true; 2069 return 0; 2070 } 2071 2072 static 2073 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2074 union perf_event *event, 2075 struct perf_session *session __maybe_unused) 2076 { 2077 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2078 2079 if (st->threads) { 2080 pr_warning("Extra thread map event, ignoring.\n"); 2081 return 0; 2082 } 2083 2084 st->threads = thread_map__new_event(&event->thread_map); 2085 if (!st->threads) 2086 return -ENOMEM; 2087 2088 return set_maps(st); 2089 } 2090 2091 static 2092 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2093 union perf_event *event, 2094 struct perf_session *session __maybe_unused) 2095 { 2096 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2097 struct cpu_map *cpus; 2098 2099 if (st->cpus) { 2100 pr_warning("Extra cpu map event, ignoring.\n"); 2101 return 0; 2102 } 2103 2104 cpus = cpu_map__new_data(&event->cpu_map.data); 2105 if (!cpus) 2106 return -ENOMEM; 2107 2108 st->cpus = cpus; 2109 return set_maps(st); 2110 } 2111 2112 static const char * const stat_report_usage[] = { 2113 "perf stat report [<options>]", 2114 NULL, 2115 }; 2116 2117 static struct perf_stat perf_stat = { 2118 .tool = { 2119 .attr = perf_event__process_attr, 2120 .event_update = perf_event__process_event_update, 2121 .thread_map = process_thread_map_event, 2122 .cpu_map = process_cpu_map_event, 2123 .stat_config = process_stat_config_event, 2124 .stat = perf_event__process_stat_event, 2125 .stat_round = process_stat_round_event, 2126 }, 2127 .aggr_mode = AGGR_UNSET, 2128 }; 2129 2130 static int __cmd_report(int argc, const char **argv) 2131 { 2132 struct perf_session *session; 2133 const struct option options[] = { 2134 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2135 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2136 "aggregate counts per processor socket", AGGR_SOCKET), 2137 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2138 "aggregate counts per physical processor core", AGGR_CORE), 2139 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2140 "disable CPU count aggregation", AGGR_NONE), 2141 OPT_END() 2142 }; 2143 struct stat st; 2144 int ret; 2145 2146 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2147 2148 if (!input_name || !strlen(input_name)) { 2149 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2150 input_name = "-"; 2151 else 2152 input_name = "perf.data"; 2153 } 2154 2155 perf_stat.file.path = input_name; 2156 perf_stat.file.mode = PERF_DATA_MODE_READ; 2157 2158 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2159 if (session == NULL) 2160 return -1; 2161 2162 perf_stat.session = session; 2163 stat_config.output = stderr; 2164 evsel_list = session->evlist; 2165 2166 ret = perf_session__process_events(session); 2167 if (ret) 2168 return ret; 2169 2170 perf_session__delete(session); 2171 return 0; 2172 } 2173 2174 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2175 { 2176 const char * const stat_usage[] = { 2177 "perf stat [<options>] [<command>]", 2178 NULL 2179 }; 2180 int status = -EINVAL, run_idx; 2181 const char *mode; 2182 FILE *output = stderr; 2183 unsigned int interval; 2184 const char * const stat_subcommands[] = { "record", "report" }; 2185 2186 setlocale(LC_ALL, ""); 2187 2188 evsel_list = perf_evlist__new(); 2189 if (evsel_list == NULL) 2190 return -ENOMEM; 2191 2192 parse_events__shrink_config_terms(); 2193 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2194 (const char **) stat_usage, 2195 PARSE_OPT_STOP_AT_NON_OPTION); 2196 perf_stat__init_shadow_stats(); 2197 2198 if (csv_sep) { 2199 csv_output = true; 2200 if (!strcmp(csv_sep, "\\t")) 2201 csv_sep = "\t"; 2202 } else 2203 csv_sep = DEFAULT_SEPARATOR; 2204 2205 if (argc && !strncmp(argv[0], "rec", 3)) { 2206 argc = __cmd_record(argc, argv); 2207 if (argc < 0) 2208 return -1; 2209 } else if (argc && !strncmp(argv[0], "rep", 3)) 2210 return __cmd_report(argc, argv); 2211 2212 interval = stat_config.interval; 2213 2214 /* 2215 * For record command the -o is already taken care of. 2216 */ 2217 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2218 output = NULL; 2219 2220 if (output_name && output_fd) { 2221 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2222 parse_options_usage(stat_usage, stat_options, "o", 1); 2223 parse_options_usage(NULL, stat_options, "log-fd", 0); 2224 goto out; 2225 } 2226 2227 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2228 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2229 goto out; 2230 } 2231 2232 if (metric_only && run_count > 1) { 2233 fprintf(stderr, "--metric-only is not supported with -r\n"); 2234 goto out; 2235 } 2236 2237 if (output_fd < 0) { 2238 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2239 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2240 goto out; 2241 } 2242 2243 if (!output) { 2244 struct timespec tm; 2245 mode = append_file ? "a" : "w"; 2246 2247 output = fopen(output_name, mode); 2248 if (!output) { 2249 perror("failed to create output file"); 2250 return -1; 2251 } 2252 clock_gettime(CLOCK_REALTIME, &tm); 2253 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2254 } else if (output_fd > 0) { 2255 mode = append_file ? "a" : "w"; 2256 output = fdopen(output_fd, mode); 2257 if (!output) { 2258 perror("Failed opening logfd"); 2259 return -errno; 2260 } 2261 } 2262 2263 stat_config.output = output; 2264 2265 /* 2266 * let the spreadsheet do the pretty-printing 2267 */ 2268 if (csv_output) { 2269 /* User explicitly passed -B? */ 2270 if (big_num_opt == 1) { 2271 fprintf(stderr, "-B option not supported with -x\n"); 2272 parse_options_usage(stat_usage, stat_options, "B", 1); 2273 parse_options_usage(NULL, stat_options, "x", 1); 2274 goto out; 2275 } else /* Nope, so disable big number formatting */ 2276 big_num = false; 2277 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2278 big_num = false; 2279 2280 if (!argc && target__none(&target)) 2281 usage_with_options(stat_usage, stat_options); 2282 2283 if (run_count < 0) { 2284 pr_err("Run count must be a positive number\n"); 2285 parse_options_usage(stat_usage, stat_options, "r", 1); 2286 goto out; 2287 } else if (run_count == 0) { 2288 forever = true; 2289 run_count = 1; 2290 } 2291 2292 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2293 fprintf(stderr, "The --per-thread option is only available " 2294 "when monitoring via -p -t options.\n"); 2295 parse_options_usage(NULL, stat_options, "p", 1); 2296 parse_options_usage(NULL, stat_options, "t", 1); 2297 goto out; 2298 } 2299 2300 /* 2301 * no_aggr, cgroup are for system-wide only 2302 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2303 */ 2304 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2305 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2306 !target__has_cpu(&target)) { 2307 fprintf(stderr, "both cgroup and no-aggregation " 2308 "modes only available in system-wide mode\n"); 2309 2310 parse_options_usage(stat_usage, stat_options, "G", 1); 2311 parse_options_usage(NULL, stat_options, "A", 1); 2312 parse_options_usage(NULL, stat_options, "a", 1); 2313 goto out; 2314 } 2315 2316 if (add_default_attributes()) 2317 goto out; 2318 2319 target__validate(&target); 2320 2321 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2322 if (target__has_task(&target)) { 2323 pr_err("Problems finding threads of monitor\n"); 2324 parse_options_usage(stat_usage, stat_options, "p", 1); 2325 parse_options_usage(NULL, stat_options, "t", 1); 2326 } else if (target__has_cpu(&target)) { 2327 perror("failed to parse CPUs map"); 2328 parse_options_usage(stat_usage, stat_options, "C", 1); 2329 parse_options_usage(NULL, stat_options, "a", 1); 2330 } 2331 goto out; 2332 } 2333 2334 /* 2335 * Initialize thread_map with comm names, 2336 * so we could print it out on output. 2337 */ 2338 if (stat_config.aggr_mode == AGGR_THREAD) 2339 thread_map__read_comms(evsel_list->threads); 2340 2341 if (interval && interval < 100) { 2342 if (interval < 10) { 2343 pr_err("print interval must be >= 10ms\n"); 2344 parse_options_usage(stat_usage, stat_options, "I", 1); 2345 goto out; 2346 } else 2347 pr_warning("print interval < 100ms. " 2348 "The overhead percentage could be high in some cases. " 2349 "Please proceed with caution.\n"); 2350 } 2351 2352 if (perf_evlist__alloc_stats(evsel_list, interval)) 2353 goto out; 2354 2355 if (perf_stat_init_aggr_mode()) 2356 goto out; 2357 2358 /* 2359 * We dont want to block the signals - that would cause 2360 * child tasks to inherit that and Ctrl-C would not work. 2361 * What we want is for Ctrl-C to work in the exec()-ed 2362 * task, but being ignored by perf stat itself: 2363 */ 2364 atexit(sig_atexit); 2365 if (!forever) 2366 signal(SIGINT, skip_signal); 2367 signal(SIGCHLD, skip_signal); 2368 signal(SIGALRM, skip_signal); 2369 signal(SIGABRT, skip_signal); 2370 2371 status = 0; 2372 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2373 if (run_count != 1 && verbose) 2374 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2375 run_idx + 1); 2376 2377 status = run_perf_stat(argc, argv); 2378 if (forever && status != -1) { 2379 print_counters(NULL, argc, argv); 2380 perf_stat__reset_stats(); 2381 } 2382 } 2383 2384 if (!forever && status != -1 && !interval) 2385 print_counters(NULL, argc, argv); 2386 2387 if (STAT_RECORD) { 2388 /* 2389 * We synthesize the kernel mmap record just so that older tools 2390 * don't emit warnings about not being able to resolve symbols 2391 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2392 * a saner message about no samples being in the perf.data file. 2393 * 2394 * This also serves to suppress a warning about f_header.data.size == 0 2395 * in header.c at the moment 'perf stat record' gets introduced, which 2396 * is not really needed once we start adding the stat specific PERF_RECORD_ 2397 * records, but the need to suppress the kptr_restrict messages in older 2398 * tools remain -acme 2399 */ 2400 int fd = perf_data_file__fd(&perf_stat.file); 2401 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2402 process_synthesized_event, 2403 &perf_stat.session->machines.host); 2404 if (err) { 2405 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2406 "older tools may produce warnings about this file\n."); 2407 } 2408 2409 if (!interval) { 2410 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2411 pr_err("failed to write stat round event\n"); 2412 } 2413 2414 if (!perf_stat.file.is_pipe) { 2415 perf_stat.session->header.data_size += perf_stat.bytes_written; 2416 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2417 } 2418 2419 perf_session__delete(perf_stat.session); 2420 } 2421 2422 perf_stat__exit_aggr_mode(); 2423 perf_evlist__free_stats(evsel_list); 2424 out: 2425 perf_evlist__delete(evsel_list); 2426 return status; 2427 } 2428