1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/session.h" 63 #include "util/tool.h" 64 #include "asm/bug.h" 65 66 #include <stdlib.h> 67 #include <sys/prctl.h> 68 #include <locale.h> 69 70 #define DEFAULT_SEPARATOR " " 71 #define CNTR_NOT_SUPPORTED "<not supported>" 72 #define CNTR_NOT_COUNTED "<not counted>" 73 74 static void print_counters(struct timespec *ts, int argc, const char **argv); 75 76 /* Default events used for perf stat -T */ 77 static const char *transaction_attrs = { 78 "task-clock," 79 "{" 80 "instructions," 81 "cycles," 82 "cpu/cycles-t/," 83 "cpu/tx-start/," 84 "cpu/el-start/," 85 "cpu/cycles-ct/" 86 "}" 87 }; 88 89 /* More limited version when the CPU does not have all events. */ 90 static const char * transaction_limited_attrs = { 91 "task-clock," 92 "{" 93 "instructions," 94 "cycles," 95 "cpu/cycles-t/," 96 "cpu/tx-start/" 97 "}" 98 }; 99 100 static struct perf_evlist *evsel_list; 101 102 static struct target target = { 103 .uid = UINT_MAX, 104 }; 105 106 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 107 108 static int run_count = 1; 109 static bool no_inherit = false; 110 static volatile pid_t child_pid = -1; 111 static bool null_run = false; 112 static int detailed_run = 0; 113 static bool transaction_run; 114 static bool big_num = true; 115 static int big_num_opt = -1; 116 static const char *csv_sep = NULL; 117 static bool csv_output = false; 118 static bool group = false; 119 static const char *pre_cmd = NULL; 120 static const char *post_cmd = NULL; 121 static bool sync_run = false; 122 static unsigned int initial_delay = 0; 123 static unsigned int unit_width = 4; /* strlen("unit") */ 124 static bool forever = false; 125 static bool metric_only = false; 126 static struct timespec ref_time; 127 static struct cpu_map *aggr_map; 128 static aggr_get_id_t aggr_get_id; 129 static bool append_file; 130 static const char *output_name; 131 static int output_fd; 132 133 struct perf_stat { 134 bool record; 135 struct perf_data_file file; 136 struct perf_session *session; 137 u64 bytes_written; 138 struct perf_tool tool; 139 bool maps_allocated; 140 struct cpu_map *cpus; 141 struct thread_map *threads; 142 enum aggr_mode aggr_mode; 143 }; 144 145 static struct perf_stat perf_stat; 146 #define STAT_RECORD perf_stat.record 147 148 static volatile int done = 0; 149 150 static struct perf_stat_config stat_config = { 151 .aggr_mode = AGGR_GLOBAL, 152 .scale = true, 153 }; 154 155 static inline void diff_timespec(struct timespec *r, struct timespec *a, 156 struct timespec *b) 157 { 158 r->tv_sec = a->tv_sec - b->tv_sec; 159 if (a->tv_nsec < b->tv_nsec) { 160 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 161 r->tv_sec--; 162 } else { 163 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 164 } 165 } 166 167 static void perf_stat__reset_stats(void) 168 { 169 perf_evlist__reset_stats(evsel_list); 170 perf_stat__reset_shadow_stats(); 171 } 172 173 static int create_perf_stat_counter(struct perf_evsel *evsel) 174 { 175 struct perf_event_attr *attr = &evsel->attr; 176 177 if (stat_config.scale) 178 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 179 PERF_FORMAT_TOTAL_TIME_RUNNING; 180 181 attr->inherit = !no_inherit; 182 183 /* 184 * Some events get initialized with sample_(period/type) set, 185 * like tracepoints. Clear it up for counting. 186 */ 187 attr->sample_period = 0; 188 189 /* 190 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 191 * while avoiding that older tools show confusing messages. 192 * 193 * However for pipe sessions we need to keep it zero, 194 * because script's perf_evsel__check_attr is triggered 195 * by attr->sample_type != 0, and we can't run it on 196 * stat sessions. 197 */ 198 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 199 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 200 201 /* 202 * Disabling all counters initially, they will be enabled 203 * either manually by us or by kernel via enable_on_exec 204 * set later. 205 */ 206 if (perf_evsel__is_group_leader(evsel)) { 207 attr->disabled = 1; 208 209 /* 210 * In case of initial_delay we enable tracee 211 * events manually. 212 */ 213 if (target__none(&target) && !initial_delay) 214 attr->enable_on_exec = 1; 215 } 216 217 if (target__has_cpu(&target)) 218 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 219 220 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 221 } 222 223 /* 224 * Does the counter have nsecs as a unit? 225 */ 226 static inline int nsec_counter(struct perf_evsel *evsel) 227 { 228 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 229 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 230 return 1; 231 232 return 0; 233 } 234 235 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 236 union perf_event *event, 237 struct perf_sample *sample __maybe_unused, 238 struct machine *machine __maybe_unused) 239 { 240 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 241 pr_err("failed to write perf data, error: %m\n"); 242 return -1; 243 } 244 245 perf_stat.bytes_written += event->header.size; 246 return 0; 247 } 248 249 static int write_stat_round_event(u64 tm, u64 type) 250 { 251 return perf_event__synthesize_stat_round(NULL, tm, type, 252 process_synthesized_event, 253 NULL); 254 } 255 256 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 257 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 258 259 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 260 261 static int 262 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 263 struct perf_counts_values *count) 264 { 265 struct perf_sample_id *sid = SID(counter, cpu, thread); 266 267 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 268 process_synthesized_event, NULL); 269 } 270 271 /* 272 * Read out the results of a single counter: 273 * do not aggregate counts across CPUs in system-wide mode 274 */ 275 static int read_counter(struct perf_evsel *counter) 276 { 277 int nthreads = thread_map__nr(evsel_list->threads); 278 int ncpus = perf_evsel__nr_cpus(counter); 279 int cpu, thread; 280 281 if (!counter->supported) 282 return -ENOENT; 283 284 if (counter->system_wide) 285 nthreads = 1; 286 287 for (thread = 0; thread < nthreads; thread++) { 288 for (cpu = 0; cpu < ncpus; cpu++) { 289 struct perf_counts_values *count; 290 291 count = perf_counts(counter->counts, cpu, thread); 292 if (perf_evsel__read(counter, cpu, thread, count)) 293 return -1; 294 295 if (STAT_RECORD) { 296 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 297 pr_err("failed to write stat event\n"); 298 return -1; 299 } 300 } 301 302 if (verbose > 1) { 303 fprintf(stat_config.output, 304 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 305 perf_evsel__name(counter), 306 cpu, 307 count->val, count->ena, count->run); 308 } 309 } 310 } 311 312 return 0; 313 } 314 315 static void read_counters(bool close_counters) 316 { 317 struct perf_evsel *counter; 318 319 evlist__for_each(evsel_list, counter) { 320 if (read_counter(counter)) 321 pr_debug("failed to read counter %s\n", counter->name); 322 323 if (perf_stat_process_counter(&stat_config, counter)) 324 pr_warning("failed to process counter %s\n", counter->name); 325 326 if (close_counters) { 327 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 328 thread_map__nr(evsel_list->threads)); 329 } 330 } 331 } 332 333 static void process_interval(void) 334 { 335 struct timespec ts, rs; 336 337 read_counters(false); 338 339 clock_gettime(CLOCK_MONOTONIC, &ts); 340 diff_timespec(&rs, &ts, &ref_time); 341 342 if (STAT_RECORD) { 343 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 344 pr_err("failed to write stat round event\n"); 345 } 346 347 print_counters(&rs, 0, NULL); 348 } 349 350 static void enable_counters(void) 351 { 352 if (initial_delay) 353 usleep(initial_delay * 1000); 354 355 /* 356 * We need to enable counters only if: 357 * - we don't have tracee (attaching to task or cpu) 358 * - we have initial delay configured 359 */ 360 if (!target__none(&target) || initial_delay) 361 perf_evlist__enable(evsel_list); 362 } 363 364 static volatile int workload_exec_errno; 365 366 /* 367 * perf_evlist__prepare_workload will send a SIGUSR1 368 * if the fork fails, since we asked by setting its 369 * want_signal to true. 370 */ 371 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 372 void *ucontext __maybe_unused) 373 { 374 workload_exec_errno = info->si_value.sival_int; 375 } 376 377 static bool has_unit(struct perf_evsel *counter) 378 { 379 return counter->unit && *counter->unit; 380 } 381 382 static bool has_scale(struct perf_evsel *counter) 383 { 384 return counter->scale != 1; 385 } 386 387 static int perf_stat_synthesize_config(bool is_pipe) 388 { 389 struct perf_evsel *counter; 390 int err; 391 392 if (is_pipe) { 393 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 394 process_synthesized_event); 395 if (err < 0) { 396 pr_err("Couldn't synthesize attrs.\n"); 397 return err; 398 } 399 } 400 401 /* 402 * Synthesize other events stuff not carried within 403 * attr event - unit, scale, name 404 */ 405 evlist__for_each(evsel_list, counter) { 406 if (!counter->supported) 407 continue; 408 409 /* 410 * Synthesize unit and scale only if it's defined. 411 */ 412 if (has_unit(counter)) { 413 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 414 if (err < 0) { 415 pr_err("Couldn't synthesize evsel unit.\n"); 416 return err; 417 } 418 } 419 420 if (has_scale(counter)) { 421 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 422 if (err < 0) { 423 pr_err("Couldn't synthesize evsel scale.\n"); 424 return err; 425 } 426 } 427 428 if (counter->own_cpus) { 429 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 430 if (err < 0) { 431 pr_err("Couldn't synthesize evsel scale.\n"); 432 return err; 433 } 434 } 435 436 /* 437 * Name is needed only for pipe output, 438 * perf.data carries event names. 439 */ 440 if (is_pipe) { 441 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 442 if (err < 0) { 443 pr_err("Couldn't synthesize evsel name.\n"); 444 return err; 445 } 446 } 447 } 448 449 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 450 process_synthesized_event, 451 NULL); 452 if (err < 0) { 453 pr_err("Couldn't synthesize thread map.\n"); 454 return err; 455 } 456 457 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 458 process_synthesized_event, NULL); 459 if (err < 0) { 460 pr_err("Couldn't synthesize thread map.\n"); 461 return err; 462 } 463 464 err = perf_event__synthesize_stat_config(NULL, &stat_config, 465 process_synthesized_event, NULL); 466 if (err < 0) { 467 pr_err("Couldn't synthesize config.\n"); 468 return err; 469 } 470 471 return 0; 472 } 473 474 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 475 476 static int __store_counter_ids(struct perf_evsel *counter, 477 struct cpu_map *cpus, 478 struct thread_map *threads) 479 { 480 int cpu, thread; 481 482 for (cpu = 0; cpu < cpus->nr; cpu++) { 483 for (thread = 0; thread < threads->nr; thread++) { 484 int fd = FD(counter, cpu, thread); 485 486 if (perf_evlist__id_add_fd(evsel_list, counter, 487 cpu, thread, fd) < 0) 488 return -1; 489 } 490 } 491 492 return 0; 493 } 494 495 static int store_counter_ids(struct perf_evsel *counter) 496 { 497 struct cpu_map *cpus = counter->cpus; 498 struct thread_map *threads = counter->threads; 499 500 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 501 return -ENOMEM; 502 503 return __store_counter_ids(counter, cpus, threads); 504 } 505 506 static int __run_perf_stat(int argc, const char **argv) 507 { 508 int interval = stat_config.interval; 509 char msg[512]; 510 unsigned long long t0, t1; 511 struct perf_evsel *counter; 512 struct timespec ts; 513 size_t l; 514 int status = 0; 515 const bool forks = (argc > 0); 516 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 517 518 if (interval) { 519 ts.tv_sec = interval / 1000; 520 ts.tv_nsec = (interval % 1000) * 1000000; 521 } else { 522 ts.tv_sec = 1; 523 ts.tv_nsec = 0; 524 } 525 526 if (forks) { 527 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 528 workload_exec_failed_signal) < 0) { 529 perror("failed to prepare workload"); 530 return -1; 531 } 532 child_pid = evsel_list->workload.pid; 533 } 534 535 if (group) 536 perf_evlist__set_leader(evsel_list); 537 538 evlist__for_each(evsel_list, counter) { 539 try_again: 540 if (create_perf_stat_counter(counter) < 0) { 541 /* 542 * PPC returns ENXIO for HW counters until 2.6.37 543 * (behavior changed with commit b0a873e). 544 */ 545 if (errno == EINVAL || errno == ENOSYS || 546 errno == ENOENT || errno == EOPNOTSUPP || 547 errno == ENXIO) { 548 if (verbose) 549 ui__warning("%s event is not supported by the kernel.\n", 550 perf_evsel__name(counter)); 551 counter->supported = false; 552 553 if ((counter->leader != counter) || 554 !(counter->leader->nr_members > 1)) 555 continue; 556 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 557 if (verbose) 558 ui__warning("%s\n", msg); 559 goto try_again; 560 } 561 562 perf_evsel__open_strerror(counter, &target, 563 errno, msg, sizeof(msg)); 564 ui__error("%s\n", msg); 565 566 if (child_pid != -1) 567 kill(child_pid, SIGTERM); 568 569 return -1; 570 } 571 counter->supported = true; 572 573 l = strlen(counter->unit); 574 if (l > unit_width) 575 unit_width = l; 576 577 if (STAT_RECORD && store_counter_ids(counter)) 578 return -1; 579 } 580 581 if (perf_evlist__apply_filters(evsel_list, &counter)) { 582 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 583 counter->filter, perf_evsel__name(counter), errno, 584 strerror_r(errno, msg, sizeof(msg))); 585 return -1; 586 } 587 588 if (STAT_RECORD) { 589 int err, fd = perf_data_file__fd(&perf_stat.file); 590 591 if (is_pipe) { 592 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 593 } else { 594 err = perf_session__write_header(perf_stat.session, evsel_list, 595 fd, false); 596 } 597 598 if (err < 0) 599 return err; 600 601 err = perf_stat_synthesize_config(is_pipe); 602 if (err < 0) 603 return err; 604 } 605 606 /* 607 * Enable counters and exec the command: 608 */ 609 t0 = rdclock(); 610 clock_gettime(CLOCK_MONOTONIC, &ref_time); 611 612 if (forks) { 613 perf_evlist__start_workload(evsel_list); 614 enable_counters(); 615 616 if (interval) { 617 while (!waitpid(child_pid, &status, WNOHANG)) { 618 nanosleep(&ts, NULL); 619 process_interval(); 620 } 621 } 622 wait(&status); 623 624 if (workload_exec_errno) { 625 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 626 pr_err("Workload failed: %s\n", emsg); 627 return -1; 628 } 629 630 if (WIFSIGNALED(status)) 631 psignal(WTERMSIG(status), argv[0]); 632 } else { 633 enable_counters(); 634 while (!done) { 635 nanosleep(&ts, NULL); 636 if (interval) 637 process_interval(); 638 } 639 } 640 641 t1 = rdclock(); 642 643 update_stats(&walltime_nsecs_stats, t1 - t0); 644 645 read_counters(true); 646 647 return WEXITSTATUS(status); 648 } 649 650 static int run_perf_stat(int argc, const char **argv) 651 { 652 int ret; 653 654 if (pre_cmd) { 655 ret = system(pre_cmd); 656 if (ret) 657 return ret; 658 } 659 660 if (sync_run) 661 sync(); 662 663 ret = __run_perf_stat(argc, argv); 664 if (ret) 665 return ret; 666 667 if (post_cmd) { 668 ret = system(post_cmd); 669 if (ret) 670 return ret; 671 } 672 673 return ret; 674 } 675 676 static void print_running(u64 run, u64 ena) 677 { 678 if (csv_output) { 679 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 680 csv_sep, 681 run, 682 csv_sep, 683 ena ? 100.0 * run / ena : 100.0); 684 } else if (run != ena) { 685 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 686 } 687 } 688 689 static void print_noise_pct(double total, double avg) 690 { 691 double pct = rel_stddev_stats(total, avg); 692 693 if (csv_output) 694 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 695 else if (pct) 696 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 697 } 698 699 static void print_noise(struct perf_evsel *evsel, double avg) 700 { 701 struct perf_stat_evsel *ps; 702 703 if (run_count == 1) 704 return; 705 706 ps = evsel->priv; 707 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 708 } 709 710 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 711 { 712 switch (stat_config.aggr_mode) { 713 case AGGR_CORE: 714 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 715 cpu_map__id_to_socket(id), 716 csv_output ? 0 : -8, 717 cpu_map__id_to_cpu(id), 718 csv_sep, 719 csv_output ? 0 : 4, 720 nr, 721 csv_sep); 722 break; 723 case AGGR_SOCKET: 724 fprintf(stat_config.output, "S%*d%s%*d%s", 725 csv_output ? 0 : -5, 726 id, 727 csv_sep, 728 csv_output ? 0 : 4, 729 nr, 730 csv_sep); 731 break; 732 case AGGR_NONE: 733 fprintf(stat_config.output, "CPU%*d%s", 734 csv_output ? 0 : -4, 735 perf_evsel__cpus(evsel)->map[id], csv_sep); 736 break; 737 case AGGR_THREAD: 738 fprintf(stat_config.output, "%*s-%*d%s", 739 csv_output ? 0 : 16, 740 thread_map__comm(evsel->threads, id), 741 csv_output ? 0 : -8, 742 thread_map__pid(evsel->threads, id), 743 csv_sep); 744 break; 745 case AGGR_GLOBAL: 746 case AGGR_UNSET: 747 default: 748 break; 749 } 750 } 751 752 struct outstate { 753 FILE *fh; 754 bool newline; 755 const char *prefix; 756 int nfields; 757 int id, nr; 758 struct perf_evsel *evsel; 759 }; 760 761 #define METRIC_LEN 35 762 763 static void new_line_std(void *ctx) 764 { 765 struct outstate *os = ctx; 766 767 os->newline = true; 768 } 769 770 static void do_new_line_std(struct outstate *os) 771 { 772 fputc('\n', os->fh); 773 fputs(os->prefix, os->fh); 774 aggr_printout(os->evsel, os->id, os->nr); 775 if (stat_config.aggr_mode == AGGR_NONE) 776 fprintf(os->fh, " "); 777 fprintf(os->fh, " "); 778 } 779 780 static void print_metric_std(void *ctx, const char *color, const char *fmt, 781 const char *unit, double val) 782 { 783 struct outstate *os = ctx; 784 FILE *out = os->fh; 785 int n; 786 bool newline = os->newline; 787 788 os->newline = false; 789 790 if (unit == NULL || fmt == NULL) { 791 fprintf(out, "%-*s", METRIC_LEN, ""); 792 return; 793 } 794 795 if (newline) 796 do_new_line_std(os); 797 798 n = fprintf(out, " # "); 799 if (color) 800 n += color_fprintf(out, color, fmt, val); 801 else 802 n += fprintf(out, fmt, val); 803 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 804 } 805 806 static void new_line_csv(void *ctx) 807 { 808 struct outstate *os = ctx; 809 int i; 810 811 fputc('\n', os->fh); 812 if (os->prefix) 813 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 814 aggr_printout(os->evsel, os->id, os->nr); 815 for (i = 0; i < os->nfields; i++) 816 fputs(csv_sep, os->fh); 817 } 818 819 static void print_metric_csv(void *ctx, 820 const char *color __maybe_unused, 821 const char *fmt, const char *unit, double val) 822 { 823 struct outstate *os = ctx; 824 FILE *out = os->fh; 825 char buf[64], *vals, *ends; 826 827 if (unit == NULL || fmt == NULL) { 828 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 829 return; 830 } 831 snprintf(buf, sizeof(buf), fmt, val); 832 vals = buf; 833 while (isspace(*vals)) 834 vals++; 835 ends = vals; 836 while (isdigit(*ends) || *ends == '.') 837 ends++; 838 *ends = 0; 839 while (isspace(*unit)) 840 unit++; 841 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 842 } 843 844 #define METRIC_ONLY_LEN 20 845 846 /* Filter out some columns that don't work well in metrics only mode */ 847 848 static bool valid_only_metric(const char *unit) 849 { 850 if (!unit) 851 return false; 852 if (strstr(unit, "/sec") || 853 strstr(unit, "hz") || 854 strstr(unit, "Hz") || 855 strstr(unit, "CPUs utilized")) 856 return false; 857 return true; 858 } 859 860 static const char *fixunit(char *buf, struct perf_evsel *evsel, 861 const char *unit) 862 { 863 if (!strncmp(unit, "of all", 6)) { 864 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 865 unit); 866 return buf; 867 } 868 return unit; 869 } 870 871 static void print_metric_only(void *ctx, const char *color, const char *fmt, 872 const char *unit, double val) 873 { 874 struct outstate *os = ctx; 875 FILE *out = os->fh; 876 int n; 877 char buf[1024]; 878 unsigned mlen = METRIC_ONLY_LEN; 879 880 if (!valid_only_metric(unit)) 881 return; 882 unit = fixunit(buf, os->evsel, unit); 883 if (color) 884 n = color_fprintf(out, color, fmt, val); 885 else 886 n = fprintf(out, fmt, val); 887 if (n > METRIC_ONLY_LEN) 888 n = METRIC_ONLY_LEN; 889 if (mlen < strlen(unit)) 890 mlen = strlen(unit) + 1; 891 fprintf(out, "%*s", mlen - n, ""); 892 } 893 894 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 895 const char *fmt, 896 const char *unit, double val) 897 { 898 struct outstate *os = ctx; 899 FILE *out = os->fh; 900 char buf[64], *vals, *ends; 901 char tbuf[1024]; 902 903 if (!valid_only_metric(unit)) 904 return; 905 unit = fixunit(tbuf, os->evsel, unit); 906 snprintf(buf, sizeof buf, fmt, val); 907 vals = buf; 908 while (isspace(*vals)) 909 vals++; 910 ends = vals; 911 while (isdigit(*ends) || *ends == '.') 912 ends++; 913 *ends = 0; 914 fprintf(out, "%s%s", vals, csv_sep); 915 } 916 917 static void new_line_metric(void *ctx __maybe_unused) 918 { 919 } 920 921 static void print_metric_header(void *ctx, const char *color __maybe_unused, 922 const char *fmt __maybe_unused, 923 const char *unit, double val __maybe_unused) 924 { 925 struct outstate *os = ctx; 926 char tbuf[1024]; 927 928 if (!valid_only_metric(unit)) 929 return; 930 unit = fixunit(tbuf, os->evsel, unit); 931 if (csv_output) 932 fprintf(os->fh, "%s%s", unit, csv_sep); 933 else 934 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 935 } 936 937 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 938 { 939 FILE *output = stat_config.output; 940 double msecs = avg / 1e6; 941 const char *fmt_v, *fmt_n; 942 char name[25]; 943 944 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 945 fmt_n = csv_output ? "%s" : "%-25s"; 946 947 aggr_printout(evsel, id, nr); 948 949 scnprintf(name, sizeof(name), "%s%s", 950 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 951 952 fprintf(output, fmt_v, msecs, csv_sep); 953 954 if (csv_output) 955 fprintf(output, "%s%s", evsel->unit, csv_sep); 956 else 957 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 958 959 fprintf(output, fmt_n, name); 960 961 if (evsel->cgrp) 962 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 963 } 964 965 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 966 { 967 int i; 968 969 if (!aggr_get_id) 970 return 0; 971 972 if (stat_config.aggr_mode == AGGR_NONE) 973 return id; 974 975 if (stat_config.aggr_mode == AGGR_GLOBAL) 976 return 0; 977 978 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 979 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 980 981 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 982 return cpu2; 983 } 984 return 0; 985 } 986 987 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 988 { 989 FILE *output = stat_config.output; 990 double sc = evsel->scale; 991 const char *fmt; 992 993 if (csv_output) { 994 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 995 } else { 996 if (big_num) 997 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 998 else 999 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 1000 } 1001 1002 aggr_printout(evsel, id, nr); 1003 1004 fprintf(output, fmt, avg, csv_sep); 1005 1006 if (evsel->unit) 1007 fprintf(output, "%-*s%s", 1008 csv_output ? 0 : unit_width, 1009 evsel->unit, csv_sep); 1010 1011 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1012 1013 if (evsel->cgrp) 1014 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1015 } 1016 1017 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1018 char *prefix, u64 run, u64 ena, double noise) 1019 { 1020 struct perf_stat_output_ctx out; 1021 struct outstate os = { 1022 .fh = stat_config.output, 1023 .prefix = prefix ? prefix : "", 1024 .id = id, 1025 .nr = nr, 1026 .evsel = counter, 1027 }; 1028 print_metric_t pm = print_metric_std; 1029 void (*nl)(void *); 1030 1031 if (metric_only) { 1032 nl = new_line_metric; 1033 if (csv_output) 1034 pm = print_metric_only_csv; 1035 else 1036 pm = print_metric_only; 1037 } else 1038 nl = new_line_std; 1039 1040 if (csv_output && !metric_only) { 1041 static int aggr_fields[] = { 1042 [AGGR_GLOBAL] = 0, 1043 [AGGR_THREAD] = 1, 1044 [AGGR_NONE] = 1, 1045 [AGGR_SOCKET] = 2, 1046 [AGGR_CORE] = 2, 1047 }; 1048 1049 pm = print_metric_csv; 1050 nl = new_line_csv; 1051 os.nfields = 3; 1052 os.nfields += aggr_fields[stat_config.aggr_mode]; 1053 if (counter->cgrp) 1054 os.nfields++; 1055 } 1056 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1057 if (metric_only) { 1058 pm(&os, NULL, "", "", 0); 1059 return; 1060 } 1061 aggr_printout(counter, id, nr); 1062 1063 fprintf(stat_config.output, "%*s%s", 1064 csv_output ? 0 : 18, 1065 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1066 csv_sep); 1067 1068 fprintf(stat_config.output, "%-*s%s", 1069 csv_output ? 0 : unit_width, 1070 counter->unit, csv_sep); 1071 1072 fprintf(stat_config.output, "%*s", 1073 csv_output ? 0 : -25, 1074 perf_evsel__name(counter)); 1075 1076 if (counter->cgrp) 1077 fprintf(stat_config.output, "%s%s", 1078 csv_sep, counter->cgrp->name); 1079 1080 if (!csv_output) 1081 pm(&os, NULL, NULL, "", 0); 1082 print_noise(counter, noise); 1083 print_running(run, ena); 1084 if (csv_output) 1085 pm(&os, NULL, NULL, "", 0); 1086 return; 1087 } 1088 1089 if (metric_only) 1090 /* nothing */; 1091 else if (nsec_counter(counter)) 1092 nsec_printout(id, nr, counter, uval); 1093 else 1094 abs_printout(id, nr, counter, uval); 1095 1096 out.print_metric = pm; 1097 out.new_line = nl; 1098 out.ctx = &os; 1099 1100 if (csv_output && !metric_only) { 1101 print_noise(counter, noise); 1102 print_running(run, ena); 1103 } 1104 1105 perf_stat__print_shadow_stats(counter, uval, 1106 first_shadow_cpu(counter, id), 1107 &out); 1108 if (!csv_output && !metric_only) { 1109 print_noise(counter, noise); 1110 print_running(run, ena); 1111 } 1112 } 1113 1114 static void aggr_update_shadow(void) 1115 { 1116 int cpu, s2, id, s; 1117 u64 val; 1118 struct perf_evsel *counter; 1119 1120 for (s = 0; s < aggr_map->nr; s++) { 1121 id = aggr_map->map[s]; 1122 evlist__for_each(evsel_list, counter) { 1123 val = 0; 1124 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1125 s2 = aggr_get_id(evsel_list->cpus, cpu); 1126 if (s2 != id) 1127 continue; 1128 val += perf_counts(counter->counts, cpu, 0)->val; 1129 } 1130 val = val * counter->scale; 1131 perf_stat__update_shadow_stats(counter, &val, 1132 first_shadow_cpu(counter, id)); 1133 } 1134 } 1135 } 1136 1137 static void print_aggr(char *prefix) 1138 { 1139 FILE *output = stat_config.output; 1140 struct perf_evsel *counter; 1141 int cpu, s, s2, id, nr; 1142 double uval; 1143 u64 ena, run, val; 1144 bool first; 1145 1146 if (!(aggr_map || aggr_get_id)) 1147 return; 1148 1149 aggr_update_shadow(); 1150 1151 /* 1152 * With metric_only everything is on a single line. 1153 * Without each counter has its own line. 1154 */ 1155 for (s = 0; s < aggr_map->nr; s++) { 1156 if (prefix && metric_only) 1157 fprintf(output, "%s", prefix); 1158 1159 id = aggr_map->map[s]; 1160 first = true; 1161 evlist__for_each(evsel_list, counter) { 1162 val = ena = run = 0; 1163 nr = 0; 1164 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1165 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1166 if (s2 != id) 1167 continue; 1168 val += perf_counts(counter->counts, cpu, 0)->val; 1169 ena += perf_counts(counter->counts, cpu, 0)->ena; 1170 run += perf_counts(counter->counts, cpu, 0)->run; 1171 nr++; 1172 } 1173 if (first && metric_only) { 1174 first = false; 1175 aggr_printout(counter, id, nr); 1176 } 1177 if (prefix && !metric_only) 1178 fprintf(output, "%s", prefix); 1179 1180 uval = val * counter->scale; 1181 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1182 if (!metric_only) 1183 fputc('\n', output); 1184 } 1185 if (metric_only) 1186 fputc('\n', output); 1187 } 1188 } 1189 1190 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1191 { 1192 FILE *output = stat_config.output; 1193 int nthreads = thread_map__nr(counter->threads); 1194 int ncpus = cpu_map__nr(counter->cpus); 1195 int cpu, thread; 1196 double uval; 1197 1198 for (thread = 0; thread < nthreads; thread++) { 1199 u64 ena = 0, run = 0, val = 0; 1200 1201 for (cpu = 0; cpu < ncpus; cpu++) { 1202 val += perf_counts(counter->counts, cpu, thread)->val; 1203 ena += perf_counts(counter->counts, cpu, thread)->ena; 1204 run += perf_counts(counter->counts, cpu, thread)->run; 1205 } 1206 1207 if (prefix) 1208 fprintf(output, "%s", prefix); 1209 1210 uval = val * counter->scale; 1211 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1212 fputc('\n', output); 1213 } 1214 } 1215 1216 /* 1217 * Print out the results of a single counter: 1218 * aggregated counts in system-wide mode 1219 */ 1220 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1221 { 1222 FILE *output = stat_config.output; 1223 struct perf_stat_evsel *ps = counter->priv; 1224 double avg = avg_stats(&ps->res_stats[0]); 1225 double uval; 1226 double avg_enabled, avg_running; 1227 1228 avg_enabled = avg_stats(&ps->res_stats[1]); 1229 avg_running = avg_stats(&ps->res_stats[2]); 1230 1231 if (prefix && !metric_only) 1232 fprintf(output, "%s", prefix); 1233 1234 uval = avg * counter->scale; 1235 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1236 if (!metric_only) 1237 fprintf(output, "\n"); 1238 } 1239 1240 /* 1241 * Print out the results of a single counter: 1242 * does not use aggregated count in system-wide 1243 */ 1244 static void print_counter(struct perf_evsel *counter, char *prefix) 1245 { 1246 FILE *output = stat_config.output; 1247 u64 ena, run, val; 1248 double uval; 1249 int cpu; 1250 1251 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1252 val = perf_counts(counter->counts, cpu, 0)->val; 1253 ena = perf_counts(counter->counts, cpu, 0)->ena; 1254 run = perf_counts(counter->counts, cpu, 0)->run; 1255 1256 if (prefix) 1257 fprintf(output, "%s", prefix); 1258 1259 uval = val * counter->scale; 1260 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1261 1262 fputc('\n', output); 1263 } 1264 } 1265 1266 static void print_no_aggr_metric(char *prefix) 1267 { 1268 int cpu; 1269 int nrcpus = 0; 1270 struct perf_evsel *counter; 1271 u64 ena, run, val; 1272 double uval; 1273 1274 nrcpus = evsel_list->cpus->nr; 1275 for (cpu = 0; cpu < nrcpus; cpu++) { 1276 bool first = true; 1277 1278 if (prefix) 1279 fputs(prefix, stat_config.output); 1280 evlist__for_each(evsel_list, counter) { 1281 if (first) { 1282 aggr_printout(counter, cpu, 0); 1283 first = false; 1284 } 1285 val = perf_counts(counter->counts, cpu, 0)->val; 1286 ena = perf_counts(counter->counts, cpu, 0)->ena; 1287 run = perf_counts(counter->counts, cpu, 0)->run; 1288 1289 uval = val * counter->scale; 1290 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1291 } 1292 fputc('\n', stat_config.output); 1293 } 1294 } 1295 1296 static int aggr_header_lens[] = { 1297 [AGGR_CORE] = 18, 1298 [AGGR_SOCKET] = 12, 1299 [AGGR_NONE] = 6, 1300 [AGGR_THREAD] = 24, 1301 [AGGR_GLOBAL] = 0, 1302 }; 1303 1304 static void print_metric_headers(char *prefix) 1305 { 1306 struct perf_stat_output_ctx out; 1307 struct perf_evsel *counter; 1308 struct outstate os = { 1309 .fh = stat_config.output 1310 }; 1311 1312 if (prefix) 1313 fprintf(stat_config.output, "%s", prefix); 1314 1315 if (!csv_output) 1316 fprintf(stat_config.output, "%*s", 1317 aggr_header_lens[stat_config.aggr_mode], ""); 1318 1319 /* Print metrics headers only */ 1320 evlist__for_each(evsel_list, counter) { 1321 os.evsel = counter; 1322 out.ctx = &os; 1323 out.print_metric = print_metric_header; 1324 out.new_line = new_line_metric; 1325 os.evsel = counter; 1326 perf_stat__print_shadow_stats(counter, 0, 1327 0, 1328 &out); 1329 } 1330 fputc('\n', stat_config.output); 1331 } 1332 1333 static void print_interval(char *prefix, struct timespec *ts) 1334 { 1335 FILE *output = stat_config.output; 1336 static int num_print_interval; 1337 1338 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1339 1340 if (num_print_interval == 0 && !csv_output && !metric_only) { 1341 switch (stat_config.aggr_mode) { 1342 case AGGR_SOCKET: 1343 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 1344 break; 1345 case AGGR_CORE: 1346 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 1347 break; 1348 case AGGR_NONE: 1349 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 1350 break; 1351 case AGGR_THREAD: 1352 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 1353 break; 1354 case AGGR_GLOBAL: 1355 default: 1356 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 1357 case AGGR_UNSET: 1358 break; 1359 } 1360 } 1361 1362 if (++num_print_interval == 25) 1363 num_print_interval = 0; 1364 } 1365 1366 static void print_header(int argc, const char **argv) 1367 { 1368 FILE *output = stat_config.output; 1369 int i; 1370 1371 fflush(stdout); 1372 1373 if (!csv_output) { 1374 fprintf(output, "\n"); 1375 fprintf(output, " Performance counter stats for "); 1376 if (target.system_wide) 1377 fprintf(output, "\'system wide"); 1378 else if (target.cpu_list) 1379 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1380 else if (!target__has_task(&target)) { 1381 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1382 for (i = 1; argv && (i < argc); i++) 1383 fprintf(output, " %s", argv[i]); 1384 } else if (target.pid) 1385 fprintf(output, "process id \'%s", target.pid); 1386 else 1387 fprintf(output, "thread id \'%s", target.tid); 1388 1389 fprintf(output, "\'"); 1390 if (run_count > 1) 1391 fprintf(output, " (%d runs)", run_count); 1392 fprintf(output, ":\n\n"); 1393 } 1394 } 1395 1396 static void print_footer(void) 1397 { 1398 FILE *output = stat_config.output; 1399 1400 if (!null_run) 1401 fprintf(output, "\n"); 1402 fprintf(output, " %17.9f seconds time elapsed", 1403 avg_stats(&walltime_nsecs_stats)/1e9); 1404 if (run_count > 1) { 1405 fprintf(output, " "); 1406 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1407 avg_stats(&walltime_nsecs_stats)); 1408 } 1409 fprintf(output, "\n\n"); 1410 } 1411 1412 static void print_counters(struct timespec *ts, int argc, const char **argv) 1413 { 1414 int interval = stat_config.interval; 1415 struct perf_evsel *counter; 1416 char buf[64], *prefix = NULL; 1417 1418 /* Do not print anything if we record to the pipe. */ 1419 if (STAT_RECORD && perf_stat.file.is_pipe) 1420 return; 1421 1422 if (interval) 1423 print_interval(prefix = buf, ts); 1424 else 1425 print_header(argc, argv); 1426 1427 if (metric_only) { 1428 static int num_print_iv; 1429 1430 if (num_print_iv == 0) 1431 print_metric_headers(prefix); 1432 if (num_print_iv++ == 25) 1433 num_print_iv = 0; 1434 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1435 fprintf(stat_config.output, "%s", prefix); 1436 } 1437 1438 switch (stat_config.aggr_mode) { 1439 case AGGR_CORE: 1440 case AGGR_SOCKET: 1441 print_aggr(prefix); 1442 break; 1443 case AGGR_THREAD: 1444 evlist__for_each(evsel_list, counter) 1445 print_aggr_thread(counter, prefix); 1446 break; 1447 case AGGR_GLOBAL: 1448 evlist__for_each(evsel_list, counter) 1449 print_counter_aggr(counter, prefix); 1450 if (metric_only) 1451 fputc('\n', stat_config.output); 1452 break; 1453 case AGGR_NONE: 1454 if (metric_only) 1455 print_no_aggr_metric(prefix); 1456 else { 1457 evlist__for_each(evsel_list, counter) 1458 print_counter(counter, prefix); 1459 } 1460 break; 1461 case AGGR_UNSET: 1462 default: 1463 break; 1464 } 1465 1466 if (!interval && !csv_output) 1467 print_footer(); 1468 1469 fflush(stat_config.output); 1470 } 1471 1472 static volatile int signr = -1; 1473 1474 static void skip_signal(int signo) 1475 { 1476 if ((child_pid == -1) || stat_config.interval) 1477 done = 1; 1478 1479 signr = signo; 1480 /* 1481 * render child_pid harmless 1482 * won't send SIGTERM to a random 1483 * process in case of race condition 1484 * and fast PID recycling 1485 */ 1486 child_pid = -1; 1487 } 1488 1489 static void sig_atexit(void) 1490 { 1491 sigset_t set, oset; 1492 1493 /* 1494 * avoid race condition with SIGCHLD handler 1495 * in skip_signal() which is modifying child_pid 1496 * goal is to avoid send SIGTERM to a random 1497 * process 1498 */ 1499 sigemptyset(&set); 1500 sigaddset(&set, SIGCHLD); 1501 sigprocmask(SIG_BLOCK, &set, &oset); 1502 1503 if (child_pid != -1) 1504 kill(child_pid, SIGTERM); 1505 1506 sigprocmask(SIG_SETMASK, &oset, NULL); 1507 1508 if (signr == -1) 1509 return; 1510 1511 signal(signr, SIG_DFL); 1512 kill(getpid(), signr); 1513 } 1514 1515 static int stat__set_big_num(const struct option *opt __maybe_unused, 1516 const char *s __maybe_unused, int unset) 1517 { 1518 big_num_opt = unset ? 0 : 1; 1519 return 0; 1520 } 1521 1522 static const struct option stat_options[] = { 1523 OPT_BOOLEAN('T', "transaction", &transaction_run, 1524 "hardware transaction statistics"), 1525 OPT_CALLBACK('e', "event", &evsel_list, "event", 1526 "event selector. use 'perf list' to list available events", 1527 parse_events_option), 1528 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1529 "event filter", parse_filter), 1530 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1531 "child tasks do not inherit counters"), 1532 OPT_STRING('p', "pid", &target.pid, "pid", 1533 "stat events on existing process id"), 1534 OPT_STRING('t', "tid", &target.tid, "tid", 1535 "stat events on existing thread id"), 1536 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1537 "system-wide collection from all CPUs"), 1538 OPT_BOOLEAN('g', "group", &group, 1539 "put the counters into a counter group"), 1540 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1541 OPT_INCR('v', "verbose", &verbose, 1542 "be more verbose (show counter open errors, etc)"), 1543 OPT_INTEGER('r', "repeat", &run_count, 1544 "repeat command and print average + stddev (max: 100, forever: 0)"), 1545 OPT_BOOLEAN('n', "null", &null_run, 1546 "null run - dont start any counters"), 1547 OPT_INCR('d', "detailed", &detailed_run, 1548 "detailed run - start a lot of events"), 1549 OPT_BOOLEAN('S', "sync", &sync_run, 1550 "call sync() before starting a run"), 1551 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1552 "print large numbers with thousands\' separators", 1553 stat__set_big_num), 1554 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1555 "list of cpus to monitor in system-wide"), 1556 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1557 "disable CPU count aggregation", AGGR_NONE), 1558 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1559 "print counts with custom separator"), 1560 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1561 "monitor event in cgroup name only", parse_cgroups), 1562 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1563 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1564 OPT_INTEGER(0, "log-fd", &output_fd, 1565 "log output to fd, instead of stderr"), 1566 OPT_STRING(0, "pre", &pre_cmd, "command", 1567 "command to run prior to the measured command"), 1568 OPT_STRING(0, "post", &post_cmd, "command", 1569 "command to run after to the measured command"), 1570 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1571 "print counts at regular interval in ms (>= 10)"), 1572 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1573 "aggregate counts per processor socket", AGGR_SOCKET), 1574 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1575 "aggregate counts per physical processor core", AGGR_CORE), 1576 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1577 "aggregate counts per thread", AGGR_THREAD), 1578 OPT_UINTEGER('D', "delay", &initial_delay, 1579 "ms to wait before starting measurement after program start"), 1580 OPT_BOOLEAN(0, "metric-only", &metric_only, 1581 "Only print computed metrics. No raw values"), 1582 OPT_END() 1583 }; 1584 1585 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1586 { 1587 return cpu_map__get_socket(map, cpu, NULL); 1588 } 1589 1590 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1591 { 1592 return cpu_map__get_core(map, cpu, NULL); 1593 } 1594 1595 static int cpu_map__get_max(struct cpu_map *map) 1596 { 1597 int i, max = -1; 1598 1599 for (i = 0; i < map->nr; i++) { 1600 if (map->map[i] > max) 1601 max = map->map[i]; 1602 } 1603 1604 return max; 1605 } 1606 1607 static struct cpu_map *cpus_aggr_map; 1608 1609 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1610 { 1611 int cpu; 1612 1613 if (idx >= map->nr) 1614 return -1; 1615 1616 cpu = map->map[idx]; 1617 1618 if (cpus_aggr_map->map[cpu] == -1) 1619 cpus_aggr_map->map[cpu] = get_id(map, idx); 1620 1621 return cpus_aggr_map->map[cpu]; 1622 } 1623 1624 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1625 { 1626 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1627 } 1628 1629 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1630 { 1631 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1632 } 1633 1634 static int perf_stat_init_aggr_mode(void) 1635 { 1636 int nr; 1637 1638 switch (stat_config.aggr_mode) { 1639 case AGGR_SOCKET: 1640 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1641 perror("cannot build socket map"); 1642 return -1; 1643 } 1644 aggr_get_id = perf_stat__get_socket_cached; 1645 break; 1646 case AGGR_CORE: 1647 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1648 perror("cannot build core map"); 1649 return -1; 1650 } 1651 aggr_get_id = perf_stat__get_core_cached; 1652 break; 1653 case AGGR_NONE: 1654 case AGGR_GLOBAL: 1655 case AGGR_THREAD: 1656 case AGGR_UNSET: 1657 default: 1658 break; 1659 } 1660 1661 /* 1662 * The evsel_list->cpus is the base we operate on, 1663 * taking the highest cpu number to be the size of 1664 * the aggregation translate cpumap. 1665 */ 1666 nr = cpu_map__get_max(evsel_list->cpus); 1667 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1668 return cpus_aggr_map ? 0 : -ENOMEM; 1669 } 1670 1671 static void perf_stat__exit_aggr_mode(void) 1672 { 1673 cpu_map__put(aggr_map); 1674 cpu_map__put(cpus_aggr_map); 1675 aggr_map = NULL; 1676 cpus_aggr_map = NULL; 1677 } 1678 1679 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1680 { 1681 int cpu; 1682 1683 if (idx > map->nr) 1684 return -1; 1685 1686 cpu = map->map[idx]; 1687 1688 if (cpu >= env->nr_cpus_online) 1689 return -1; 1690 1691 return cpu; 1692 } 1693 1694 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1695 { 1696 struct perf_env *env = data; 1697 int cpu = perf_env__get_cpu(env, map, idx); 1698 1699 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1700 } 1701 1702 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1703 { 1704 struct perf_env *env = data; 1705 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1706 1707 if (cpu != -1) { 1708 int socket_id = env->cpu[cpu].socket_id; 1709 1710 /* 1711 * Encode socket in upper 16 bits 1712 * core_id is relative to socket, and 1713 * we need a global id. So we combine 1714 * socket + core id. 1715 */ 1716 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1717 } 1718 1719 return core; 1720 } 1721 1722 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1723 struct cpu_map **sockp) 1724 { 1725 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1726 } 1727 1728 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1729 struct cpu_map **corep) 1730 { 1731 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1732 } 1733 1734 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1735 { 1736 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1737 } 1738 1739 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1740 { 1741 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1742 } 1743 1744 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1745 { 1746 struct perf_env *env = &st->session->header.env; 1747 1748 switch (stat_config.aggr_mode) { 1749 case AGGR_SOCKET: 1750 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1751 perror("cannot build socket map"); 1752 return -1; 1753 } 1754 aggr_get_id = perf_stat__get_socket_file; 1755 break; 1756 case AGGR_CORE: 1757 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1758 perror("cannot build core map"); 1759 return -1; 1760 } 1761 aggr_get_id = perf_stat__get_core_file; 1762 break; 1763 case AGGR_NONE: 1764 case AGGR_GLOBAL: 1765 case AGGR_THREAD: 1766 case AGGR_UNSET: 1767 default: 1768 break; 1769 } 1770 1771 return 0; 1772 } 1773 1774 /* 1775 * Add default attributes, if there were no attributes specified or 1776 * if -d/--detailed, -d -d or -d -d -d is used: 1777 */ 1778 static int add_default_attributes(void) 1779 { 1780 struct perf_event_attr default_attrs0[] = { 1781 1782 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1783 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1784 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1785 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1786 1787 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1788 }; 1789 struct perf_event_attr frontend_attrs[] = { 1790 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1791 }; 1792 struct perf_event_attr backend_attrs[] = { 1793 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1794 }; 1795 struct perf_event_attr default_attrs1[] = { 1796 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1797 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1798 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1799 1800 }; 1801 1802 /* 1803 * Detailed stats (-d), covering the L1 and last level data caches: 1804 */ 1805 struct perf_event_attr detailed_attrs[] = { 1806 1807 { .type = PERF_TYPE_HW_CACHE, 1808 .config = 1809 PERF_COUNT_HW_CACHE_L1D << 0 | 1810 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1811 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1812 1813 { .type = PERF_TYPE_HW_CACHE, 1814 .config = 1815 PERF_COUNT_HW_CACHE_L1D << 0 | 1816 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1817 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1818 1819 { .type = PERF_TYPE_HW_CACHE, 1820 .config = 1821 PERF_COUNT_HW_CACHE_LL << 0 | 1822 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1823 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1824 1825 { .type = PERF_TYPE_HW_CACHE, 1826 .config = 1827 PERF_COUNT_HW_CACHE_LL << 0 | 1828 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1829 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1830 }; 1831 1832 /* 1833 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1834 */ 1835 struct perf_event_attr very_detailed_attrs[] = { 1836 1837 { .type = PERF_TYPE_HW_CACHE, 1838 .config = 1839 PERF_COUNT_HW_CACHE_L1I << 0 | 1840 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1841 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1842 1843 { .type = PERF_TYPE_HW_CACHE, 1844 .config = 1845 PERF_COUNT_HW_CACHE_L1I << 0 | 1846 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1847 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1848 1849 { .type = PERF_TYPE_HW_CACHE, 1850 .config = 1851 PERF_COUNT_HW_CACHE_DTLB << 0 | 1852 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1853 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1854 1855 { .type = PERF_TYPE_HW_CACHE, 1856 .config = 1857 PERF_COUNT_HW_CACHE_DTLB << 0 | 1858 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1859 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1860 1861 { .type = PERF_TYPE_HW_CACHE, 1862 .config = 1863 PERF_COUNT_HW_CACHE_ITLB << 0 | 1864 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1865 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1866 1867 { .type = PERF_TYPE_HW_CACHE, 1868 .config = 1869 PERF_COUNT_HW_CACHE_ITLB << 0 | 1870 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1871 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1872 1873 }; 1874 1875 /* 1876 * Very, very detailed stats (-d -d -d), adding prefetch events: 1877 */ 1878 struct perf_event_attr very_very_detailed_attrs[] = { 1879 1880 { .type = PERF_TYPE_HW_CACHE, 1881 .config = 1882 PERF_COUNT_HW_CACHE_L1D << 0 | 1883 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1884 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1885 1886 { .type = PERF_TYPE_HW_CACHE, 1887 .config = 1888 PERF_COUNT_HW_CACHE_L1D << 0 | 1889 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1890 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1891 }; 1892 1893 /* Set attrs if no event is selected and !null_run: */ 1894 if (null_run) 1895 return 0; 1896 1897 if (transaction_run) { 1898 int err; 1899 if (pmu_have_event("cpu", "cycles-ct") && 1900 pmu_have_event("cpu", "el-start")) 1901 err = parse_events(evsel_list, transaction_attrs, NULL); 1902 else 1903 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1904 if (err) { 1905 fprintf(stderr, "Cannot set up transaction events\n"); 1906 return -1; 1907 } 1908 return 0; 1909 } 1910 1911 if (!evsel_list->nr_entries) { 1912 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 1913 return -1; 1914 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 1915 if (perf_evlist__add_default_attrs(evsel_list, 1916 frontend_attrs) < 0) 1917 return -1; 1918 } 1919 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 1920 if (perf_evlist__add_default_attrs(evsel_list, 1921 backend_attrs) < 0) 1922 return -1; 1923 } 1924 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 1925 return -1; 1926 } 1927 1928 /* Detailed events get appended to the event list: */ 1929 1930 if (detailed_run < 1) 1931 return 0; 1932 1933 /* Append detailed run extra attributes: */ 1934 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1935 return -1; 1936 1937 if (detailed_run < 2) 1938 return 0; 1939 1940 /* Append very detailed run extra attributes: */ 1941 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1942 return -1; 1943 1944 if (detailed_run < 3) 1945 return 0; 1946 1947 /* Append very, very detailed run extra attributes: */ 1948 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1949 } 1950 1951 static const char * const stat_record_usage[] = { 1952 "perf stat record [<options>]", 1953 NULL, 1954 }; 1955 1956 static void init_features(struct perf_session *session) 1957 { 1958 int feat; 1959 1960 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1961 perf_header__set_feat(&session->header, feat); 1962 1963 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1964 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1965 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1966 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1967 } 1968 1969 static int __cmd_record(int argc, const char **argv) 1970 { 1971 struct perf_session *session; 1972 struct perf_data_file *file = &perf_stat.file; 1973 1974 argc = parse_options(argc, argv, stat_options, stat_record_usage, 1975 PARSE_OPT_STOP_AT_NON_OPTION); 1976 1977 if (output_name) 1978 file->path = output_name; 1979 1980 if (run_count != 1 || forever) { 1981 pr_err("Cannot use -r option with perf stat record.\n"); 1982 return -1; 1983 } 1984 1985 session = perf_session__new(file, false, NULL); 1986 if (session == NULL) { 1987 pr_err("Perf session creation failed.\n"); 1988 return -1; 1989 } 1990 1991 init_features(session); 1992 1993 session->evlist = evsel_list; 1994 perf_stat.session = session; 1995 perf_stat.record = true; 1996 return argc; 1997 } 1998 1999 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2000 union perf_event *event, 2001 struct perf_session *session) 2002 { 2003 struct stat_round_event *round = &event->stat_round; 2004 struct perf_evsel *counter; 2005 struct timespec tsh, *ts = NULL; 2006 const char **argv = session->header.env.cmdline_argv; 2007 int argc = session->header.env.nr_cmdline; 2008 2009 evlist__for_each(evsel_list, counter) 2010 perf_stat_process_counter(&stat_config, counter); 2011 2012 if (round->type == PERF_STAT_ROUND_TYPE__FINAL) 2013 update_stats(&walltime_nsecs_stats, round->time); 2014 2015 if (stat_config.interval && round->time) { 2016 tsh.tv_sec = round->time / NSECS_PER_SEC; 2017 tsh.tv_nsec = round->time % NSECS_PER_SEC; 2018 ts = &tsh; 2019 } 2020 2021 print_counters(ts, argc, argv); 2022 return 0; 2023 } 2024 2025 static 2026 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2027 union perf_event *event, 2028 struct perf_session *session __maybe_unused) 2029 { 2030 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2031 2032 perf_event__read_stat_config(&stat_config, &event->stat_config); 2033 2034 if (cpu_map__empty(st->cpus)) { 2035 if (st->aggr_mode != AGGR_UNSET) 2036 pr_warning("warning: processing task data, aggregation mode not set\n"); 2037 return 0; 2038 } 2039 2040 if (st->aggr_mode != AGGR_UNSET) 2041 stat_config.aggr_mode = st->aggr_mode; 2042 2043 if (perf_stat.file.is_pipe) 2044 perf_stat_init_aggr_mode(); 2045 else 2046 perf_stat_init_aggr_mode_file(st); 2047 2048 return 0; 2049 } 2050 2051 static int set_maps(struct perf_stat *st) 2052 { 2053 if (!st->cpus || !st->threads) 2054 return 0; 2055 2056 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2057 return -EINVAL; 2058 2059 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2060 2061 if (perf_evlist__alloc_stats(evsel_list, true)) 2062 return -ENOMEM; 2063 2064 st->maps_allocated = true; 2065 return 0; 2066 } 2067 2068 static 2069 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2070 union perf_event *event, 2071 struct perf_session *session __maybe_unused) 2072 { 2073 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2074 2075 if (st->threads) { 2076 pr_warning("Extra thread map event, ignoring.\n"); 2077 return 0; 2078 } 2079 2080 st->threads = thread_map__new_event(&event->thread_map); 2081 if (!st->threads) 2082 return -ENOMEM; 2083 2084 return set_maps(st); 2085 } 2086 2087 static 2088 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2089 union perf_event *event, 2090 struct perf_session *session __maybe_unused) 2091 { 2092 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2093 struct cpu_map *cpus; 2094 2095 if (st->cpus) { 2096 pr_warning("Extra cpu map event, ignoring.\n"); 2097 return 0; 2098 } 2099 2100 cpus = cpu_map__new_data(&event->cpu_map.data); 2101 if (!cpus) 2102 return -ENOMEM; 2103 2104 st->cpus = cpus; 2105 return set_maps(st); 2106 } 2107 2108 static const char * const stat_report_usage[] = { 2109 "perf stat report [<options>]", 2110 NULL, 2111 }; 2112 2113 static struct perf_stat perf_stat = { 2114 .tool = { 2115 .attr = perf_event__process_attr, 2116 .event_update = perf_event__process_event_update, 2117 .thread_map = process_thread_map_event, 2118 .cpu_map = process_cpu_map_event, 2119 .stat_config = process_stat_config_event, 2120 .stat = perf_event__process_stat_event, 2121 .stat_round = process_stat_round_event, 2122 }, 2123 .aggr_mode = AGGR_UNSET, 2124 }; 2125 2126 static int __cmd_report(int argc, const char **argv) 2127 { 2128 struct perf_session *session; 2129 const struct option options[] = { 2130 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2131 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2132 "aggregate counts per processor socket", AGGR_SOCKET), 2133 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2134 "aggregate counts per physical processor core", AGGR_CORE), 2135 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2136 "disable CPU count aggregation", AGGR_NONE), 2137 OPT_END() 2138 }; 2139 struct stat st; 2140 int ret; 2141 2142 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2143 2144 if (!input_name || !strlen(input_name)) { 2145 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2146 input_name = "-"; 2147 else 2148 input_name = "perf.data"; 2149 } 2150 2151 perf_stat.file.path = input_name; 2152 perf_stat.file.mode = PERF_DATA_MODE_READ; 2153 2154 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2155 if (session == NULL) 2156 return -1; 2157 2158 perf_stat.session = session; 2159 stat_config.output = stderr; 2160 evsel_list = session->evlist; 2161 2162 ret = perf_session__process_events(session); 2163 if (ret) 2164 return ret; 2165 2166 perf_session__delete(session); 2167 return 0; 2168 } 2169 2170 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2171 { 2172 const char * const stat_usage[] = { 2173 "perf stat [<options>] [<command>]", 2174 NULL 2175 }; 2176 int status = -EINVAL, run_idx; 2177 const char *mode; 2178 FILE *output = stderr; 2179 unsigned int interval; 2180 const char * const stat_subcommands[] = { "record", "report" }; 2181 2182 setlocale(LC_ALL, ""); 2183 2184 evsel_list = perf_evlist__new(); 2185 if (evsel_list == NULL) 2186 return -ENOMEM; 2187 2188 parse_events__shrink_config_terms(); 2189 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2190 (const char **) stat_usage, 2191 PARSE_OPT_STOP_AT_NON_OPTION); 2192 perf_stat__init_shadow_stats(); 2193 2194 if (csv_sep) { 2195 csv_output = true; 2196 if (!strcmp(csv_sep, "\\t")) 2197 csv_sep = "\t"; 2198 } else 2199 csv_sep = DEFAULT_SEPARATOR; 2200 2201 if (argc && !strncmp(argv[0], "rec", 3)) { 2202 argc = __cmd_record(argc, argv); 2203 if (argc < 0) 2204 return -1; 2205 } else if (argc && !strncmp(argv[0], "rep", 3)) 2206 return __cmd_report(argc, argv); 2207 2208 interval = stat_config.interval; 2209 2210 /* 2211 * For record command the -o is already taken care of. 2212 */ 2213 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2214 output = NULL; 2215 2216 if (output_name && output_fd) { 2217 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2218 parse_options_usage(stat_usage, stat_options, "o", 1); 2219 parse_options_usage(NULL, stat_options, "log-fd", 0); 2220 goto out; 2221 } 2222 2223 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2224 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2225 goto out; 2226 } 2227 2228 if (metric_only && run_count > 1) { 2229 fprintf(stderr, "--metric-only is not supported with -r\n"); 2230 goto out; 2231 } 2232 2233 if (output_fd < 0) { 2234 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2235 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2236 goto out; 2237 } 2238 2239 if (!output) { 2240 struct timespec tm; 2241 mode = append_file ? "a" : "w"; 2242 2243 output = fopen(output_name, mode); 2244 if (!output) { 2245 perror("failed to create output file"); 2246 return -1; 2247 } 2248 clock_gettime(CLOCK_REALTIME, &tm); 2249 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2250 } else if (output_fd > 0) { 2251 mode = append_file ? "a" : "w"; 2252 output = fdopen(output_fd, mode); 2253 if (!output) { 2254 perror("Failed opening logfd"); 2255 return -errno; 2256 } 2257 } 2258 2259 stat_config.output = output; 2260 2261 /* 2262 * let the spreadsheet do the pretty-printing 2263 */ 2264 if (csv_output) { 2265 /* User explicitly passed -B? */ 2266 if (big_num_opt == 1) { 2267 fprintf(stderr, "-B option not supported with -x\n"); 2268 parse_options_usage(stat_usage, stat_options, "B", 1); 2269 parse_options_usage(NULL, stat_options, "x", 1); 2270 goto out; 2271 } else /* Nope, so disable big number formatting */ 2272 big_num = false; 2273 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2274 big_num = false; 2275 2276 if (!argc && target__none(&target)) 2277 usage_with_options(stat_usage, stat_options); 2278 2279 if (run_count < 0) { 2280 pr_err("Run count must be a positive number\n"); 2281 parse_options_usage(stat_usage, stat_options, "r", 1); 2282 goto out; 2283 } else if (run_count == 0) { 2284 forever = true; 2285 run_count = 1; 2286 } 2287 2288 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2289 fprintf(stderr, "The --per-thread option is only available " 2290 "when monitoring via -p -t options.\n"); 2291 parse_options_usage(NULL, stat_options, "p", 1); 2292 parse_options_usage(NULL, stat_options, "t", 1); 2293 goto out; 2294 } 2295 2296 /* 2297 * no_aggr, cgroup are for system-wide only 2298 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2299 */ 2300 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2301 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2302 !target__has_cpu(&target)) { 2303 fprintf(stderr, "both cgroup and no-aggregation " 2304 "modes only available in system-wide mode\n"); 2305 2306 parse_options_usage(stat_usage, stat_options, "G", 1); 2307 parse_options_usage(NULL, stat_options, "A", 1); 2308 parse_options_usage(NULL, stat_options, "a", 1); 2309 goto out; 2310 } 2311 2312 if (add_default_attributes()) 2313 goto out; 2314 2315 target__validate(&target); 2316 2317 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2318 if (target__has_task(&target)) { 2319 pr_err("Problems finding threads of monitor\n"); 2320 parse_options_usage(stat_usage, stat_options, "p", 1); 2321 parse_options_usage(NULL, stat_options, "t", 1); 2322 } else if (target__has_cpu(&target)) { 2323 perror("failed to parse CPUs map"); 2324 parse_options_usage(stat_usage, stat_options, "C", 1); 2325 parse_options_usage(NULL, stat_options, "a", 1); 2326 } 2327 goto out; 2328 } 2329 2330 /* 2331 * Initialize thread_map with comm names, 2332 * so we could print it out on output. 2333 */ 2334 if (stat_config.aggr_mode == AGGR_THREAD) 2335 thread_map__read_comms(evsel_list->threads); 2336 2337 if (interval && interval < 100) { 2338 if (interval < 10) { 2339 pr_err("print interval must be >= 10ms\n"); 2340 parse_options_usage(stat_usage, stat_options, "I", 1); 2341 goto out; 2342 } else 2343 pr_warning("print interval < 100ms. " 2344 "The overhead percentage could be high in some cases. " 2345 "Please proceed with caution.\n"); 2346 } 2347 2348 if (perf_evlist__alloc_stats(evsel_list, interval)) 2349 goto out; 2350 2351 if (perf_stat_init_aggr_mode()) 2352 goto out; 2353 2354 /* 2355 * We dont want to block the signals - that would cause 2356 * child tasks to inherit that and Ctrl-C would not work. 2357 * What we want is for Ctrl-C to work in the exec()-ed 2358 * task, but being ignored by perf stat itself: 2359 */ 2360 atexit(sig_atexit); 2361 if (!forever) 2362 signal(SIGINT, skip_signal); 2363 signal(SIGCHLD, skip_signal); 2364 signal(SIGALRM, skip_signal); 2365 signal(SIGABRT, skip_signal); 2366 2367 status = 0; 2368 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2369 if (run_count != 1 && verbose) 2370 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2371 run_idx + 1); 2372 2373 status = run_perf_stat(argc, argv); 2374 if (forever && status != -1) { 2375 print_counters(NULL, argc, argv); 2376 perf_stat__reset_stats(); 2377 } 2378 } 2379 2380 if (!forever && status != -1 && !interval) 2381 print_counters(NULL, argc, argv); 2382 2383 if (STAT_RECORD) { 2384 /* 2385 * We synthesize the kernel mmap record just so that older tools 2386 * don't emit warnings about not being able to resolve symbols 2387 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2388 * a saner message about no samples being in the perf.data file. 2389 * 2390 * This also serves to suppress a warning about f_header.data.size == 0 2391 * in header.c at the moment 'perf stat record' gets introduced, which 2392 * is not really needed once we start adding the stat specific PERF_RECORD_ 2393 * records, but the need to suppress the kptr_restrict messages in older 2394 * tools remain -acme 2395 */ 2396 int fd = perf_data_file__fd(&perf_stat.file); 2397 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2398 process_synthesized_event, 2399 &perf_stat.session->machines.host); 2400 if (err) { 2401 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2402 "older tools may produce warnings about this file\n."); 2403 } 2404 2405 if (!interval) { 2406 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2407 pr_err("failed to write stat round event\n"); 2408 } 2409 2410 if (!perf_stat.file.is_pipe) { 2411 perf_stat.session->header.data_size += perf_stat.bytes_written; 2412 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2413 } 2414 2415 perf_session__delete(perf_stat.session); 2416 } 2417 2418 perf_stat__exit_aggr_mode(); 2419 perf_evlist__free_stats(evsel_list); 2420 out: 2421 perf_evlist__delete(evsel_list); 2422 return status; 2423 } 2424