1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/group.h" 63 #include "util/session.h" 64 #include "util/tool.h" 65 #include "util/group.h" 66 #include "asm/bug.h" 67 68 #include <api/fs/fs.h> 69 #include <stdlib.h> 70 #include <sys/prctl.h> 71 #include <locale.h> 72 #include <math.h> 73 74 #define DEFAULT_SEPARATOR " " 75 #define CNTR_NOT_SUPPORTED "<not supported>" 76 #define CNTR_NOT_COUNTED "<not counted>" 77 78 static void print_counters(struct timespec *ts, int argc, const char **argv); 79 80 /* Default events used for perf stat -T */ 81 static const char *transaction_attrs = { 82 "task-clock," 83 "{" 84 "instructions," 85 "cycles," 86 "cpu/cycles-t/," 87 "cpu/tx-start/," 88 "cpu/el-start/," 89 "cpu/cycles-ct/" 90 "}" 91 }; 92 93 /* More limited version when the CPU does not have all events. */ 94 static const char * transaction_limited_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/" 101 "}" 102 }; 103 104 static const char * topdown_attrs[] = { 105 "topdown-total-slots", 106 "topdown-slots-retired", 107 "topdown-recovery-bubbles", 108 "topdown-fetch-bubbles", 109 "topdown-slots-issued", 110 NULL, 111 }; 112 113 static struct perf_evlist *evsel_list; 114 115 static struct target target = { 116 .uid = UINT_MAX, 117 }; 118 119 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 120 121 static int run_count = 1; 122 static bool no_inherit = false; 123 static volatile pid_t child_pid = -1; 124 static bool null_run = false; 125 static int detailed_run = 0; 126 static bool transaction_run; 127 static bool topdown_run = false; 128 static bool big_num = true; 129 static int big_num_opt = -1; 130 static const char *csv_sep = NULL; 131 static bool csv_output = false; 132 static bool group = false; 133 static const char *pre_cmd = NULL; 134 static const char *post_cmd = NULL; 135 static bool sync_run = false; 136 static unsigned int initial_delay = 0; 137 static unsigned int unit_width = 4; /* strlen("unit") */ 138 static bool forever = false; 139 static bool metric_only = false; 140 static bool force_metric_only = false; 141 static struct timespec ref_time; 142 static struct cpu_map *aggr_map; 143 static aggr_get_id_t aggr_get_id; 144 static bool append_file; 145 static const char *output_name; 146 static int output_fd; 147 148 struct perf_stat { 149 bool record; 150 struct perf_data_file file; 151 struct perf_session *session; 152 u64 bytes_written; 153 struct perf_tool tool; 154 bool maps_allocated; 155 struct cpu_map *cpus; 156 struct thread_map *threads; 157 enum aggr_mode aggr_mode; 158 }; 159 160 static struct perf_stat perf_stat; 161 #define STAT_RECORD perf_stat.record 162 163 static volatile int done = 0; 164 165 static struct perf_stat_config stat_config = { 166 .aggr_mode = AGGR_GLOBAL, 167 .scale = true, 168 }; 169 170 static inline void diff_timespec(struct timespec *r, struct timespec *a, 171 struct timespec *b) 172 { 173 r->tv_sec = a->tv_sec - b->tv_sec; 174 if (a->tv_nsec < b->tv_nsec) { 175 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 176 r->tv_sec--; 177 } else { 178 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 179 } 180 } 181 182 static void perf_stat__reset_stats(void) 183 { 184 perf_evlist__reset_stats(evsel_list); 185 perf_stat__reset_shadow_stats(); 186 } 187 188 static int create_perf_stat_counter(struct perf_evsel *evsel) 189 { 190 struct perf_event_attr *attr = &evsel->attr; 191 192 if (stat_config.scale) 193 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 194 PERF_FORMAT_TOTAL_TIME_RUNNING; 195 196 attr->inherit = !no_inherit; 197 198 /* 199 * Some events get initialized with sample_(period/type) set, 200 * like tracepoints. Clear it up for counting. 201 */ 202 attr->sample_period = 0; 203 204 /* 205 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 206 * while avoiding that older tools show confusing messages. 207 * 208 * However for pipe sessions we need to keep it zero, 209 * because script's perf_evsel__check_attr is triggered 210 * by attr->sample_type != 0, and we can't run it on 211 * stat sessions. 212 */ 213 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 214 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 215 216 /* 217 * Disabling all counters initially, they will be enabled 218 * either manually by us or by kernel via enable_on_exec 219 * set later. 220 */ 221 if (perf_evsel__is_group_leader(evsel)) { 222 attr->disabled = 1; 223 224 /* 225 * In case of initial_delay we enable tracee 226 * events manually. 227 */ 228 if (target__none(&target) && !initial_delay) 229 attr->enable_on_exec = 1; 230 } 231 232 if (target__has_cpu(&target)) 233 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 234 235 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 236 } 237 238 /* 239 * Does the counter have nsecs as a unit? 240 */ 241 static inline int nsec_counter(struct perf_evsel *evsel) 242 { 243 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 244 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 245 return 1; 246 247 return 0; 248 } 249 250 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 251 union perf_event *event, 252 struct perf_sample *sample __maybe_unused, 253 struct machine *machine __maybe_unused) 254 { 255 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 256 pr_err("failed to write perf data, error: %m\n"); 257 return -1; 258 } 259 260 perf_stat.bytes_written += event->header.size; 261 return 0; 262 } 263 264 static int write_stat_round_event(u64 tm, u64 type) 265 { 266 return perf_event__synthesize_stat_round(NULL, tm, type, 267 process_synthesized_event, 268 NULL); 269 } 270 271 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 272 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 273 274 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 275 276 static int 277 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 278 struct perf_counts_values *count) 279 { 280 struct perf_sample_id *sid = SID(counter, cpu, thread); 281 282 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 283 process_synthesized_event, NULL); 284 } 285 286 /* 287 * Read out the results of a single counter: 288 * do not aggregate counts across CPUs in system-wide mode 289 */ 290 static int read_counter(struct perf_evsel *counter) 291 { 292 int nthreads = thread_map__nr(evsel_list->threads); 293 int ncpus = perf_evsel__nr_cpus(counter); 294 int cpu, thread; 295 296 if (!counter->supported) 297 return -ENOENT; 298 299 if (counter->system_wide) 300 nthreads = 1; 301 302 for (thread = 0; thread < nthreads; thread++) { 303 for (cpu = 0; cpu < ncpus; cpu++) { 304 struct perf_counts_values *count; 305 306 count = perf_counts(counter->counts, cpu, thread); 307 if (perf_evsel__read(counter, cpu, thread, count)) 308 return -1; 309 310 if (STAT_RECORD) { 311 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 312 pr_err("failed to write stat event\n"); 313 return -1; 314 } 315 } 316 317 if (verbose > 1) { 318 fprintf(stat_config.output, 319 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 320 perf_evsel__name(counter), 321 cpu, 322 count->val, count->ena, count->run); 323 } 324 } 325 } 326 327 return 0; 328 } 329 330 static void read_counters(bool close_counters) 331 { 332 struct perf_evsel *counter; 333 334 evlist__for_each_entry(evsel_list, counter) { 335 if (read_counter(counter)) 336 pr_debug("failed to read counter %s\n", counter->name); 337 338 if (perf_stat_process_counter(&stat_config, counter)) 339 pr_warning("failed to process counter %s\n", counter->name); 340 341 if (close_counters) { 342 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 343 thread_map__nr(evsel_list->threads)); 344 } 345 } 346 } 347 348 static void process_interval(void) 349 { 350 struct timespec ts, rs; 351 352 read_counters(false); 353 354 clock_gettime(CLOCK_MONOTONIC, &ts); 355 diff_timespec(&rs, &ts, &ref_time); 356 357 if (STAT_RECORD) { 358 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 359 pr_err("failed to write stat round event\n"); 360 } 361 362 print_counters(&rs, 0, NULL); 363 } 364 365 static void enable_counters(void) 366 { 367 if (initial_delay) 368 usleep(initial_delay * 1000); 369 370 /* 371 * We need to enable counters only if: 372 * - we don't have tracee (attaching to task or cpu) 373 * - we have initial delay configured 374 */ 375 if (!target__none(&target) || initial_delay) 376 perf_evlist__enable(evsel_list); 377 } 378 379 static volatile int workload_exec_errno; 380 381 /* 382 * perf_evlist__prepare_workload will send a SIGUSR1 383 * if the fork fails, since we asked by setting its 384 * want_signal to true. 385 */ 386 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 387 void *ucontext __maybe_unused) 388 { 389 workload_exec_errno = info->si_value.sival_int; 390 } 391 392 static bool has_unit(struct perf_evsel *counter) 393 { 394 return counter->unit && *counter->unit; 395 } 396 397 static bool has_scale(struct perf_evsel *counter) 398 { 399 return counter->scale != 1; 400 } 401 402 static int perf_stat_synthesize_config(bool is_pipe) 403 { 404 struct perf_evsel *counter; 405 int err; 406 407 if (is_pipe) { 408 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 409 process_synthesized_event); 410 if (err < 0) { 411 pr_err("Couldn't synthesize attrs.\n"); 412 return err; 413 } 414 } 415 416 /* 417 * Synthesize other events stuff not carried within 418 * attr event - unit, scale, name 419 */ 420 evlist__for_each_entry(evsel_list, counter) { 421 if (!counter->supported) 422 continue; 423 424 /* 425 * Synthesize unit and scale only if it's defined. 426 */ 427 if (has_unit(counter)) { 428 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 429 if (err < 0) { 430 pr_err("Couldn't synthesize evsel unit.\n"); 431 return err; 432 } 433 } 434 435 if (has_scale(counter)) { 436 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 437 if (err < 0) { 438 pr_err("Couldn't synthesize evsel scale.\n"); 439 return err; 440 } 441 } 442 443 if (counter->own_cpus) { 444 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 445 if (err < 0) { 446 pr_err("Couldn't synthesize evsel scale.\n"); 447 return err; 448 } 449 } 450 451 /* 452 * Name is needed only for pipe output, 453 * perf.data carries event names. 454 */ 455 if (is_pipe) { 456 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 457 if (err < 0) { 458 pr_err("Couldn't synthesize evsel name.\n"); 459 return err; 460 } 461 } 462 } 463 464 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 465 process_synthesized_event, 466 NULL); 467 if (err < 0) { 468 pr_err("Couldn't synthesize thread map.\n"); 469 return err; 470 } 471 472 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 473 process_synthesized_event, NULL); 474 if (err < 0) { 475 pr_err("Couldn't synthesize thread map.\n"); 476 return err; 477 } 478 479 err = perf_event__synthesize_stat_config(NULL, &stat_config, 480 process_synthesized_event, NULL); 481 if (err < 0) { 482 pr_err("Couldn't synthesize config.\n"); 483 return err; 484 } 485 486 return 0; 487 } 488 489 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 490 491 static int __store_counter_ids(struct perf_evsel *counter, 492 struct cpu_map *cpus, 493 struct thread_map *threads) 494 { 495 int cpu, thread; 496 497 for (cpu = 0; cpu < cpus->nr; cpu++) { 498 for (thread = 0; thread < threads->nr; thread++) { 499 int fd = FD(counter, cpu, thread); 500 501 if (perf_evlist__id_add_fd(evsel_list, counter, 502 cpu, thread, fd) < 0) 503 return -1; 504 } 505 } 506 507 return 0; 508 } 509 510 static int store_counter_ids(struct perf_evsel *counter) 511 { 512 struct cpu_map *cpus = counter->cpus; 513 struct thread_map *threads = counter->threads; 514 515 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 516 return -ENOMEM; 517 518 return __store_counter_ids(counter, cpus, threads); 519 } 520 521 static int __run_perf_stat(int argc, const char **argv) 522 { 523 int interval = stat_config.interval; 524 char msg[512]; 525 unsigned long long t0, t1; 526 struct perf_evsel *counter; 527 struct timespec ts; 528 size_t l; 529 int status = 0; 530 const bool forks = (argc > 0); 531 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 532 533 if (interval) { 534 ts.tv_sec = interval / 1000; 535 ts.tv_nsec = (interval % 1000) * 1000000; 536 } else { 537 ts.tv_sec = 1; 538 ts.tv_nsec = 0; 539 } 540 541 if (forks) { 542 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 543 workload_exec_failed_signal) < 0) { 544 perror("failed to prepare workload"); 545 return -1; 546 } 547 child_pid = evsel_list->workload.pid; 548 } 549 550 if (group) 551 perf_evlist__set_leader(evsel_list); 552 553 evlist__for_each_entry(evsel_list, counter) { 554 try_again: 555 if (create_perf_stat_counter(counter) < 0) { 556 /* 557 * PPC returns ENXIO for HW counters until 2.6.37 558 * (behavior changed with commit b0a873e). 559 */ 560 if (errno == EINVAL || errno == ENOSYS || 561 errno == ENOENT || errno == EOPNOTSUPP || 562 errno == ENXIO) { 563 if (verbose) 564 ui__warning("%s event is not supported by the kernel.\n", 565 perf_evsel__name(counter)); 566 counter->supported = false; 567 568 if ((counter->leader != counter) || 569 !(counter->leader->nr_members > 1)) 570 continue; 571 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 572 if (verbose) 573 ui__warning("%s\n", msg); 574 goto try_again; 575 } 576 577 perf_evsel__open_strerror(counter, &target, 578 errno, msg, sizeof(msg)); 579 ui__error("%s\n", msg); 580 581 if (child_pid != -1) 582 kill(child_pid, SIGTERM); 583 584 return -1; 585 } 586 counter->supported = true; 587 588 l = strlen(counter->unit); 589 if (l > unit_width) 590 unit_width = l; 591 592 if (STAT_RECORD && store_counter_ids(counter)) 593 return -1; 594 } 595 596 if (perf_evlist__apply_filters(evsel_list, &counter)) { 597 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 598 counter->filter, perf_evsel__name(counter), errno, 599 str_error_r(errno, msg, sizeof(msg))); 600 return -1; 601 } 602 603 if (STAT_RECORD) { 604 int err, fd = perf_data_file__fd(&perf_stat.file); 605 606 if (is_pipe) { 607 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 608 } else { 609 err = perf_session__write_header(perf_stat.session, evsel_list, 610 fd, false); 611 } 612 613 if (err < 0) 614 return err; 615 616 err = perf_stat_synthesize_config(is_pipe); 617 if (err < 0) 618 return err; 619 } 620 621 /* 622 * Enable counters and exec the command: 623 */ 624 t0 = rdclock(); 625 clock_gettime(CLOCK_MONOTONIC, &ref_time); 626 627 if (forks) { 628 perf_evlist__start_workload(evsel_list); 629 enable_counters(); 630 631 if (interval) { 632 while (!waitpid(child_pid, &status, WNOHANG)) { 633 nanosleep(&ts, NULL); 634 process_interval(); 635 } 636 } 637 wait(&status); 638 639 if (workload_exec_errno) { 640 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 641 pr_err("Workload failed: %s\n", emsg); 642 return -1; 643 } 644 645 if (WIFSIGNALED(status)) 646 psignal(WTERMSIG(status), argv[0]); 647 } else { 648 enable_counters(); 649 while (!done) { 650 nanosleep(&ts, NULL); 651 if (interval) 652 process_interval(); 653 } 654 } 655 656 t1 = rdclock(); 657 658 update_stats(&walltime_nsecs_stats, t1 - t0); 659 660 read_counters(true); 661 662 return WEXITSTATUS(status); 663 } 664 665 static int run_perf_stat(int argc, const char **argv) 666 { 667 int ret; 668 669 if (pre_cmd) { 670 ret = system(pre_cmd); 671 if (ret) 672 return ret; 673 } 674 675 if (sync_run) 676 sync(); 677 678 ret = __run_perf_stat(argc, argv); 679 if (ret) 680 return ret; 681 682 if (post_cmd) { 683 ret = system(post_cmd); 684 if (ret) 685 return ret; 686 } 687 688 return ret; 689 } 690 691 static void print_running(u64 run, u64 ena) 692 { 693 if (csv_output) { 694 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 695 csv_sep, 696 run, 697 csv_sep, 698 ena ? 100.0 * run / ena : 100.0); 699 } else if (run != ena) { 700 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 701 } 702 } 703 704 static void print_noise_pct(double total, double avg) 705 { 706 double pct = rel_stddev_stats(total, avg); 707 708 if (csv_output) 709 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 710 else if (pct) 711 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 712 } 713 714 static void print_noise(struct perf_evsel *evsel, double avg) 715 { 716 struct perf_stat_evsel *ps; 717 718 if (run_count == 1) 719 return; 720 721 ps = evsel->priv; 722 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 723 } 724 725 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 726 { 727 switch (stat_config.aggr_mode) { 728 case AGGR_CORE: 729 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 730 cpu_map__id_to_socket(id), 731 csv_output ? 0 : -8, 732 cpu_map__id_to_cpu(id), 733 csv_sep, 734 csv_output ? 0 : 4, 735 nr, 736 csv_sep); 737 break; 738 case AGGR_SOCKET: 739 fprintf(stat_config.output, "S%*d%s%*d%s", 740 csv_output ? 0 : -5, 741 id, 742 csv_sep, 743 csv_output ? 0 : 4, 744 nr, 745 csv_sep); 746 break; 747 case AGGR_NONE: 748 fprintf(stat_config.output, "CPU%*d%s", 749 csv_output ? 0 : -4, 750 perf_evsel__cpus(evsel)->map[id], csv_sep); 751 break; 752 case AGGR_THREAD: 753 fprintf(stat_config.output, "%*s-%*d%s", 754 csv_output ? 0 : 16, 755 thread_map__comm(evsel->threads, id), 756 csv_output ? 0 : -8, 757 thread_map__pid(evsel->threads, id), 758 csv_sep); 759 break; 760 case AGGR_GLOBAL: 761 case AGGR_UNSET: 762 default: 763 break; 764 } 765 } 766 767 struct outstate { 768 FILE *fh; 769 bool newline; 770 const char *prefix; 771 int nfields; 772 int id, nr; 773 struct perf_evsel *evsel; 774 }; 775 776 #define METRIC_LEN 35 777 778 static void new_line_std(void *ctx) 779 { 780 struct outstate *os = ctx; 781 782 os->newline = true; 783 } 784 785 static void do_new_line_std(struct outstate *os) 786 { 787 fputc('\n', os->fh); 788 fputs(os->prefix, os->fh); 789 aggr_printout(os->evsel, os->id, os->nr); 790 if (stat_config.aggr_mode == AGGR_NONE) 791 fprintf(os->fh, " "); 792 fprintf(os->fh, " "); 793 } 794 795 static void print_metric_std(void *ctx, const char *color, const char *fmt, 796 const char *unit, double val) 797 { 798 struct outstate *os = ctx; 799 FILE *out = os->fh; 800 int n; 801 bool newline = os->newline; 802 803 os->newline = false; 804 805 if (unit == NULL || fmt == NULL) { 806 fprintf(out, "%-*s", METRIC_LEN, ""); 807 return; 808 } 809 810 if (newline) 811 do_new_line_std(os); 812 813 n = fprintf(out, " # "); 814 if (color) 815 n += color_fprintf(out, color, fmt, val); 816 else 817 n += fprintf(out, fmt, val); 818 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 819 } 820 821 static void new_line_csv(void *ctx) 822 { 823 struct outstate *os = ctx; 824 int i; 825 826 fputc('\n', os->fh); 827 if (os->prefix) 828 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 829 aggr_printout(os->evsel, os->id, os->nr); 830 for (i = 0; i < os->nfields; i++) 831 fputs(csv_sep, os->fh); 832 } 833 834 static void print_metric_csv(void *ctx, 835 const char *color __maybe_unused, 836 const char *fmt, const char *unit, double val) 837 { 838 struct outstate *os = ctx; 839 FILE *out = os->fh; 840 char buf[64], *vals, *ends; 841 842 if (unit == NULL || fmt == NULL) { 843 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 844 return; 845 } 846 snprintf(buf, sizeof(buf), fmt, val); 847 vals = buf; 848 while (isspace(*vals)) 849 vals++; 850 ends = vals; 851 while (isdigit(*ends) || *ends == '.') 852 ends++; 853 *ends = 0; 854 while (isspace(*unit)) 855 unit++; 856 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 857 } 858 859 #define METRIC_ONLY_LEN 20 860 861 /* Filter out some columns that don't work well in metrics only mode */ 862 863 static bool valid_only_metric(const char *unit) 864 { 865 if (!unit) 866 return false; 867 if (strstr(unit, "/sec") || 868 strstr(unit, "hz") || 869 strstr(unit, "Hz") || 870 strstr(unit, "CPUs utilized")) 871 return false; 872 return true; 873 } 874 875 static const char *fixunit(char *buf, struct perf_evsel *evsel, 876 const char *unit) 877 { 878 if (!strncmp(unit, "of all", 6)) { 879 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 880 unit); 881 return buf; 882 } 883 return unit; 884 } 885 886 static void print_metric_only(void *ctx, const char *color, const char *fmt, 887 const char *unit, double val) 888 { 889 struct outstate *os = ctx; 890 FILE *out = os->fh; 891 int n; 892 char buf[1024]; 893 unsigned mlen = METRIC_ONLY_LEN; 894 895 if (!valid_only_metric(unit)) 896 return; 897 unit = fixunit(buf, os->evsel, unit); 898 if (color) 899 n = color_fprintf(out, color, fmt, val); 900 else 901 n = fprintf(out, fmt, val); 902 if (n > METRIC_ONLY_LEN) 903 n = METRIC_ONLY_LEN; 904 if (mlen < strlen(unit)) 905 mlen = strlen(unit) + 1; 906 fprintf(out, "%*s", mlen - n, ""); 907 } 908 909 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 910 const char *fmt, 911 const char *unit, double val) 912 { 913 struct outstate *os = ctx; 914 FILE *out = os->fh; 915 char buf[64], *vals, *ends; 916 char tbuf[1024]; 917 918 if (!valid_only_metric(unit)) 919 return; 920 unit = fixunit(tbuf, os->evsel, unit); 921 snprintf(buf, sizeof buf, fmt, val); 922 vals = buf; 923 while (isspace(*vals)) 924 vals++; 925 ends = vals; 926 while (isdigit(*ends) || *ends == '.') 927 ends++; 928 *ends = 0; 929 fprintf(out, "%s%s", vals, csv_sep); 930 } 931 932 static void new_line_metric(void *ctx __maybe_unused) 933 { 934 } 935 936 static void print_metric_header(void *ctx, const char *color __maybe_unused, 937 const char *fmt __maybe_unused, 938 const char *unit, double val __maybe_unused) 939 { 940 struct outstate *os = ctx; 941 char tbuf[1024]; 942 943 if (!valid_only_metric(unit)) 944 return; 945 unit = fixunit(tbuf, os->evsel, unit); 946 if (csv_output) 947 fprintf(os->fh, "%s%s", unit, csv_sep); 948 else 949 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 950 } 951 952 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 953 { 954 FILE *output = stat_config.output; 955 double msecs = avg / 1e6; 956 const char *fmt_v, *fmt_n; 957 char name[25]; 958 959 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 960 fmt_n = csv_output ? "%s" : "%-25s"; 961 962 aggr_printout(evsel, id, nr); 963 964 scnprintf(name, sizeof(name), "%s%s", 965 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 966 967 fprintf(output, fmt_v, msecs, csv_sep); 968 969 if (csv_output) 970 fprintf(output, "%s%s", evsel->unit, csv_sep); 971 else 972 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 973 974 fprintf(output, fmt_n, name); 975 976 if (evsel->cgrp) 977 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 978 } 979 980 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 981 { 982 int i; 983 984 if (!aggr_get_id) 985 return 0; 986 987 if (stat_config.aggr_mode == AGGR_NONE) 988 return id; 989 990 if (stat_config.aggr_mode == AGGR_GLOBAL) 991 return 0; 992 993 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 994 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 995 996 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 997 return cpu2; 998 } 999 return 0; 1000 } 1001 1002 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1003 { 1004 FILE *output = stat_config.output; 1005 double sc = evsel->scale; 1006 const char *fmt; 1007 1008 if (csv_output) { 1009 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1010 } else { 1011 if (big_num) 1012 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1013 else 1014 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1015 } 1016 1017 aggr_printout(evsel, id, nr); 1018 1019 fprintf(output, fmt, avg, csv_sep); 1020 1021 if (evsel->unit) 1022 fprintf(output, "%-*s%s", 1023 csv_output ? 0 : unit_width, 1024 evsel->unit, csv_sep); 1025 1026 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1027 1028 if (evsel->cgrp) 1029 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1030 } 1031 1032 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1033 char *prefix, u64 run, u64 ena, double noise) 1034 { 1035 struct perf_stat_output_ctx out; 1036 struct outstate os = { 1037 .fh = stat_config.output, 1038 .prefix = prefix ? prefix : "", 1039 .id = id, 1040 .nr = nr, 1041 .evsel = counter, 1042 }; 1043 print_metric_t pm = print_metric_std; 1044 void (*nl)(void *); 1045 1046 if (metric_only) { 1047 nl = new_line_metric; 1048 if (csv_output) 1049 pm = print_metric_only_csv; 1050 else 1051 pm = print_metric_only; 1052 } else 1053 nl = new_line_std; 1054 1055 if (csv_output && !metric_only) { 1056 static int aggr_fields[] = { 1057 [AGGR_GLOBAL] = 0, 1058 [AGGR_THREAD] = 1, 1059 [AGGR_NONE] = 1, 1060 [AGGR_SOCKET] = 2, 1061 [AGGR_CORE] = 2, 1062 }; 1063 1064 pm = print_metric_csv; 1065 nl = new_line_csv; 1066 os.nfields = 3; 1067 os.nfields += aggr_fields[stat_config.aggr_mode]; 1068 if (counter->cgrp) 1069 os.nfields++; 1070 } 1071 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1072 if (metric_only) { 1073 pm(&os, NULL, "", "", 0); 1074 return; 1075 } 1076 aggr_printout(counter, id, nr); 1077 1078 fprintf(stat_config.output, "%*s%s", 1079 csv_output ? 0 : 18, 1080 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1081 csv_sep); 1082 1083 fprintf(stat_config.output, "%-*s%s", 1084 csv_output ? 0 : unit_width, 1085 counter->unit, csv_sep); 1086 1087 fprintf(stat_config.output, "%*s", 1088 csv_output ? 0 : -25, 1089 perf_evsel__name(counter)); 1090 1091 if (counter->cgrp) 1092 fprintf(stat_config.output, "%s%s", 1093 csv_sep, counter->cgrp->name); 1094 1095 if (!csv_output) 1096 pm(&os, NULL, NULL, "", 0); 1097 print_noise(counter, noise); 1098 print_running(run, ena); 1099 if (csv_output) 1100 pm(&os, NULL, NULL, "", 0); 1101 return; 1102 } 1103 1104 if (metric_only) 1105 /* nothing */; 1106 else if (nsec_counter(counter)) 1107 nsec_printout(id, nr, counter, uval); 1108 else 1109 abs_printout(id, nr, counter, uval); 1110 1111 out.print_metric = pm; 1112 out.new_line = nl; 1113 out.ctx = &os; 1114 1115 if (csv_output && !metric_only) { 1116 print_noise(counter, noise); 1117 print_running(run, ena); 1118 } 1119 1120 perf_stat__print_shadow_stats(counter, uval, 1121 first_shadow_cpu(counter, id), 1122 &out); 1123 if (!csv_output && !metric_only) { 1124 print_noise(counter, noise); 1125 print_running(run, ena); 1126 } 1127 } 1128 1129 static void aggr_update_shadow(void) 1130 { 1131 int cpu, s2, id, s; 1132 u64 val; 1133 struct perf_evsel *counter; 1134 1135 for (s = 0; s < aggr_map->nr; s++) { 1136 id = aggr_map->map[s]; 1137 evlist__for_each_entry(evsel_list, counter) { 1138 val = 0; 1139 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1140 s2 = aggr_get_id(evsel_list->cpus, cpu); 1141 if (s2 != id) 1142 continue; 1143 val += perf_counts(counter->counts, cpu, 0)->val; 1144 } 1145 val = val * counter->scale; 1146 perf_stat__update_shadow_stats(counter, &val, 1147 first_shadow_cpu(counter, id)); 1148 } 1149 } 1150 } 1151 1152 static void print_aggr(char *prefix) 1153 { 1154 FILE *output = stat_config.output; 1155 struct perf_evsel *counter; 1156 int cpu, s, s2, id, nr; 1157 double uval; 1158 u64 ena, run, val; 1159 bool first; 1160 1161 if (!(aggr_map || aggr_get_id)) 1162 return; 1163 1164 aggr_update_shadow(); 1165 1166 /* 1167 * With metric_only everything is on a single line. 1168 * Without each counter has its own line. 1169 */ 1170 for (s = 0; s < aggr_map->nr; s++) { 1171 if (prefix && metric_only) 1172 fprintf(output, "%s", prefix); 1173 1174 id = aggr_map->map[s]; 1175 first = true; 1176 evlist__for_each_entry(evsel_list, counter) { 1177 val = ena = run = 0; 1178 nr = 0; 1179 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1180 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1181 if (s2 != id) 1182 continue; 1183 val += perf_counts(counter->counts, cpu, 0)->val; 1184 ena += perf_counts(counter->counts, cpu, 0)->ena; 1185 run += perf_counts(counter->counts, cpu, 0)->run; 1186 nr++; 1187 } 1188 if (first && metric_only) { 1189 first = false; 1190 aggr_printout(counter, id, nr); 1191 } 1192 if (prefix && !metric_only) 1193 fprintf(output, "%s", prefix); 1194 1195 uval = val * counter->scale; 1196 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1197 if (!metric_only) 1198 fputc('\n', output); 1199 } 1200 if (metric_only) 1201 fputc('\n', output); 1202 } 1203 } 1204 1205 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1206 { 1207 FILE *output = stat_config.output; 1208 int nthreads = thread_map__nr(counter->threads); 1209 int ncpus = cpu_map__nr(counter->cpus); 1210 int cpu, thread; 1211 double uval; 1212 1213 for (thread = 0; thread < nthreads; thread++) { 1214 u64 ena = 0, run = 0, val = 0; 1215 1216 for (cpu = 0; cpu < ncpus; cpu++) { 1217 val += perf_counts(counter->counts, cpu, thread)->val; 1218 ena += perf_counts(counter->counts, cpu, thread)->ena; 1219 run += perf_counts(counter->counts, cpu, thread)->run; 1220 } 1221 1222 if (prefix) 1223 fprintf(output, "%s", prefix); 1224 1225 uval = val * counter->scale; 1226 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1227 fputc('\n', output); 1228 } 1229 } 1230 1231 /* 1232 * Print out the results of a single counter: 1233 * aggregated counts in system-wide mode 1234 */ 1235 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1236 { 1237 FILE *output = stat_config.output; 1238 struct perf_stat_evsel *ps = counter->priv; 1239 double avg = avg_stats(&ps->res_stats[0]); 1240 double uval; 1241 double avg_enabled, avg_running; 1242 1243 avg_enabled = avg_stats(&ps->res_stats[1]); 1244 avg_running = avg_stats(&ps->res_stats[2]); 1245 1246 if (prefix && !metric_only) 1247 fprintf(output, "%s", prefix); 1248 1249 uval = avg * counter->scale; 1250 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1251 if (!metric_only) 1252 fprintf(output, "\n"); 1253 } 1254 1255 /* 1256 * Print out the results of a single counter: 1257 * does not use aggregated count in system-wide 1258 */ 1259 static void print_counter(struct perf_evsel *counter, char *prefix) 1260 { 1261 FILE *output = stat_config.output; 1262 u64 ena, run, val; 1263 double uval; 1264 int cpu; 1265 1266 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1267 val = perf_counts(counter->counts, cpu, 0)->val; 1268 ena = perf_counts(counter->counts, cpu, 0)->ena; 1269 run = perf_counts(counter->counts, cpu, 0)->run; 1270 1271 if (prefix) 1272 fprintf(output, "%s", prefix); 1273 1274 uval = val * counter->scale; 1275 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1276 1277 fputc('\n', output); 1278 } 1279 } 1280 1281 static void print_no_aggr_metric(char *prefix) 1282 { 1283 int cpu; 1284 int nrcpus = 0; 1285 struct perf_evsel *counter; 1286 u64 ena, run, val; 1287 double uval; 1288 1289 nrcpus = evsel_list->cpus->nr; 1290 for (cpu = 0; cpu < nrcpus; cpu++) { 1291 bool first = true; 1292 1293 if (prefix) 1294 fputs(prefix, stat_config.output); 1295 evlist__for_each_entry(evsel_list, counter) { 1296 if (first) { 1297 aggr_printout(counter, cpu, 0); 1298 first = false; 1299 } 1300 val = perf_counts(counter->counts, cpu, 0)->val; 1301 ena = perf_counts(counter->counts, cpu, 0)->ena; 1302 run = perf_counts(counter->counts, cpu, 0)->run; 1303 1304 uval = val * counter->scale; 1305 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1306 } 1307 fputc('\n', stat_config.output); 1308 } 1309 } 1310 1311 static int aggr_header_lens[] = { 1312 [AGGR_CORE] = 18, 1313 [AGGR_SOCKET] = 12, 1314 [AGGR_NONE] = 6, 1315 [AGGR_THREAD] = 24, 1316 [AGGR_GLOBAL] = 0, 1317 }; 1318 1319 static const char *aggr_header_csv[] = { 1320 [AGGR_CORE] = "core,cpus,", 1321 [AGGR_SOCKET] = "socket,cpus", 1322 [AGGR_NONE] = "cpu,", 1323 [AGGR_THREAD] = "comm-pid,", 1324 [AGGR_GLOBAL] = "" 1325 }; 1326 1327 static void print_metric_headers(const char *prefix, bool no_indent) 1328 { 1329 struct perf_stat_output_ctx out; 1330 struct perf_evsel *counter; 1331 struct outstate os = { 1332 .fh = stat_config.output 1333 }; 1334 1335 if (prefix) 1336 fprintf(stat_config.output, "%s", prefix); 1337 1338 if (!csv_output && !no_indent) 1339 fprintf(stat_config.output, "%*s", 1340 aggr_header_lens[stat_config.aggr_mode], ""); 1341 if (csv_output) { 1342 if (stat_config.interval) 1343 fputs("time,", stat_config.output); 1344 fputs(aggr_header_csv[stat_config.aggr_mode], 1345 stat_config.output); 1346 } 1347 1348 /* Print metrics headers only */ 1349 evlist__for_each_entry(evsel_list, counter) { 1350 os.evsel = counter; 1351 out.ctx = &os; 1352 out.print_metric = print_metric_header; 1353 out.new_line = new_line_metric; 1354 os.evsel = counter; 1355 perf_stat__print_shadow_stats(counter, 0, 1356 0, 1357 &out); 1358 } 1359 fputc('\n', stat_config.output); 1360 } 1361 1362 static void print_interval(char *prefix, struct timespec *ts) 1363 { 1364 FILE *output = stat_config.output; 1365 static int num_print_interval; 1366 1367 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1368 1369 if (num_print_interval == 0 && !csv_output) { 1370 switch (stat_config.aggr_mode) { 1371 case AGGR_SOCKET: 1372 fprintf(output, "# time socket cpus"); 1373 if (!metric_only) 1374 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1375 break; 1376 case AGGR_CORE: 1377 fprintf(output, "# time core cpus"); 1378 if (!metric_only) 1379 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1380 break; 1381 case AGGR_NONE: 1382 fprintf(output, "# time CPU"); 1383 if (!metric_only) 1384 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1385 break; 1386 case AGGR_THREAD: 1387 fprintf(output, "# time comm-pid"); 1388 if (!metric_only) 1389 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1390 break; 1391 case AGGR_GLOBAL: 1392 default: 1393 fprintf(output, "# time"); 1394 if (!metric_only) 1395 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1396 case AGGR_UNSET: 1397 break; 1398 } 1399 } 1400 1401 if (num_print_interval == 0 && metric_only) 1402 print_metric_headers(" ", true); 1403 if (++num_print_interval == 25) 1404 num_print_interval = 0; 1405 } 1406 1407 static void print_header(int argc, const char **argv) 1408 { 1409 FILE *output = stat_config.output; 1410 int i; 1411 1412 fflush(stdout); 1413 1414 if (!csv_output) { 1415 fprintf(output, "\n"); 1416 fprintf(output, " Performance counter stats for "); 1417 if (target.system_wide) 1418 fprintf(output, "\'system wide"); 1419 else if (target.cpu_list) 1420 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1421 else if (!target__has_task(&target)) { 1422 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1423 for (i = 1; argv && (i < argc); i++) 1424 fprintf(output, " %s", argv[i]); 1425 } else if (target.pid) 1426 fprintf(output, "process id \'%s", target.pid); 1427 else 1428 fprintf(output, "thread id \'%s", target.tid); 1429 1430 fprintf(output, "\'"); 1431 if (run_count > 1) 1432 fprintf(output, " (%d runs)", run_count); 1433 fprintf(output, ":\n\n"); 1434 } 1435 } 1436 1437 static void print_footer(void) 1438 { 1439 FILE *output = stat_config.output; 1440 1441 if (!null_run) 1442 fprintf(output, "\n"); 1443 fprintf(output, " %17.9f seconds time elapsed", 1444 avg_stats(&walltime_nsecs_stats)/1e9); 1445 if (run_count > 1) { 1446 fprintf(output, " "); 1447 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1448 avg_stats(&walltime_nsecs_stats)); 1449 } 1450 fprintf(output, "\n\n"); 1451 } 1452 1453 static void print_counters(struct timespec *ts, int argc, const char **argv) 1454 { 1455 int interval = stat_config.interval; 1456 struct perf_evsel *counter; 1457 char buf[64], *prefix = NULL; 1458 1459 /* Do not print anything if we record to the pipe. */ 1460 if (STAT_RECORD && perf_stat.file.is_pipe) 1461 return; 1462 1463 if (interval) 1464 print_interval(prefix = buf, ts); 1465 else 1466 print_header(argc, argv); 1467 1468 if (metric_only) { 1469 static int num_print_iv; 1470 1471 if (num_print_iv == 0 && !interval) 1472 print_metric_headers(prefix, false); 1473 if (num_print_iv++ == 25) 1474 num_print_iv = 0; 1475 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1476 fprintf(stat_config.output, "%s", prefix); 1477 } 1478 1479 switch (stat_config.aggr_mode) { 1480 case AGGR_CORE: 1481 case AGGR_SOCKET: 1482 print_aggr(prefix); 1483 break; 1484 case AGGR_THREAD: 1485 evlist__for_each_entry(evsel_list, counter) 1486 print_aggr_thread(counter, prefix); 1487 break; 1488 case AGGR_GLOBAL: 1489 evlist__for_each_entry(evsel_list, counter) 1490 print_counter_aggr(counter, prefix); 1491 if (metric_only) 1492 fputc('\n', stat_config.output); 1493 break; 1494 case AGGR_NONE: 1495 if (metric_only) 1496 print_no_aggr_metric(prefix); 1497 else { 1498 evlist__for_each_entry(evsel_list, counter) 1499 print_counter(counter, prefix); 1500 } 1501 break; 1502 case AGGR_UNSET: 1503 default: 1504 break; 1505 } 1506 1507 if (!interval && !csv_output) 1508 print_footer(); 1509 1510 fflush(stat_config.output); 1511 } 1512 1513 static volatile int signr = -1; 1514 1515 static void skip_signal(int signo) 1516 { 1517 if ((child_pid == -1) || stat_config.interval) 1518 done = 1; 1519 1520 signr = signo; 1521 /* 1522 * render child_pid harmless 1523 * won't send SIGTERM to a random 1524 * process in case of race condition 1525 * and fast PID recycling 1526 */ 1527 child_pid = -1; 1528 } 1529 1530 static void sig_atexit(void) 1531 { 1532 sigset_t set, oset; 1533 1534 /* 1535 * avoid race condition with SIGCHLD handler 1536 * in skip_signal() which is modifying child_pid 1537 * goal is to avoid send SIGTERM to a random 1538 * process 1539 */ 1540 sigemptyset(&set); 1541 sigaddset(&set, SIGCHLD); 1542 sigprocmask(SIG_BLOCK, &set, &oset); 1543 1544 if (child_pid != -1) 1545 kill(child_pid, SIGTERM); 1546 1547 sigprocmask(SIG_SETMASK, &oset, NULL); 1548 1549 if (signr == -1) 1550 return; 1551 1552 signal(signr, SIG_DFL); 1553 kill(getpid(), signr); 1554 } 1555 1556 static int stat__set_big_num(const struct option *opt __maybe_unused, 1557 const char *s __maybe_unused, int unset) 1558 { 1559 big_num_opt = unset ? 0 : 1; 1560 return 0; 1561 } 1562 1563 static int enable_metric_only(const struct option *opt __maybe_unused, 1564 const char *s __maybe_unused, int unset) 1565 { 1566 force_metric_only = true; 1567 metric_only = !unset; 1568 return 0; 1569 } 1570 1571 static const struct option stat_options[] = { 1572 OPT_BOOLEAN('T', "transaction", &transaction_run, 1573 "hardware transaction statistics"), 1574 OPT_CALLBACK('e', "event", &evsel_list, "event", 1575 "event selector. use 'perf list' to list available events", 1576 parse_events_option), 1577 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1578 "event filter", parse_filter), 1579 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1580 "child tasks do not inherit counters"), 1581 OPT_STRING('p', "pid", &target.pid, "pid", 1582 "stat events on existing process id"), 1583 OPT_STRING('t', "tid", &target.tid, "tid", 1584 "stat events on existing thread id"), 1585 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1586 "system-wide collection from all CPUs"), 1587 OPT_BOOLEAN('g', "group", &group, 1588 "put the counters into a counter group"), 1589 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1590 OPT_INCR('v', "verbose", &verbose, 1591 "be more verbose (show counter open errors, etc)"), 1592 OPT_INTEGER('r', "repeat", &run_count, 1593 "repeat command and print average + stddev (max: 100, forever: 0)"), 1594 OPT_BOOLEAN('n', "null", &null_run, 1595 "null run - dont start any counters"), 1596 OPT_INCR('d', "detailed", &detailed_run, 1597 "detailed run - start a lot of events"), 1598 OPT_BOOLEAN('S', "sync", &sync_run, 1599 "call sync() before starting a run"), 1600 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1601 "print large numbers with thousands\' separators", 1602 stat__set_big_num), 1603 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1604 "list of cpus to monitor in system-wide"), 1605 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1606 "disable CPU count aggregation", AGGR_NONE), 1607 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1608 "print counts with custom separator"), 1609 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1610 "monitor event in cgroup name only", parse_cgroups), 1611 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1612 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1613 OPT_INTEGER(0, "log-fd", &output_fd, 1614 "log output to fd, instead of stderr"), 1615 OPT_STRING(0, "pre", &pre_cmd, "command", 1616 "command to run prior to the measured command"), 1617 OPT_STRING(0, "post", &post_cmd, "command", 1618 "command to run after to the measured command"), 1619 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1620 "print counts at regular interval in ms (>= 10)"), 1621 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1622 "aggregate counts per processor socket", AGGR_SOCKET), 1623 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1624 "aggregate counts per physical processor core", AGGR_CORE), 1625 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1626 "aggregate counts per thread", AGGR_THREAD), 1627 OPT_UINTEGER('D', "delay", &initial_delay, 1628 "ms to wait before starting measurement after program start"), 1629 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1630 "Only print computed metrics. No raw values", enable_metric_only), 1631 OPT_BOOLEAN(0, "topdown", &topdown_run, 1632 "measure topdown level 1 statistics"), 1633 OPT_END() 1634 }; 1635 1636 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1637 { 1638 return cpu_map__get_socket(map, cpu, NULL); 1639 } 1640 1641 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1642 { 1643 return cpu_map__get_core(map, cpu, NULL); 1644 } 1645 1646 static int cpu_map__get_max(struct cpu_map *map) 1647 { 1648 int i, max = -1; 1649 1650 for (i = 0; i < map->nr; i++) { 1651 if (map->map[i] > max) 1652 max = map->map[i]; 1653 } 1654 1655 return max; 1656 } 1657 1658 static struct cpu_map *cpus_aggr_map; 1659 1660 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1661 { 1662 int cpu; 1663 1664 if (idx >= map->nr) 1665 return -1; 1666 1667 cpu = map->map[idx]; 1668 1669 if (cpus_aggr_map->map[cpu] == -1) 1670 cpus_aggr_map->map[cpu] = get_id(map, idx); 1671 1672 return cpus_aggr_map->map[cpu]; 1673 } 1674 1675 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1676 { 1677 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1678 } 1679 1680 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1681 { 1682 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1683 } 1684 1685 static int perf_stat_init_aggr_mode(void) 1686 { 1687 int nr; 1688 1689 switch (stat_config.aggr_mode) { 1690 case AGGR_SOCKET: 1691 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1692 perror("cannot build socket map"); 1693 return -1; 1694 } 1695 aggr_get_id = perf_stat__get_socket_cached; 1696 break; 1697 case AGGR_CORE: 1698 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1699 perror("cannot build core map"); 1700 return -1; 1701 } 1702 aggr_get_id = perf_stat__get_core_cached; 1703 break; 1704 case AGGR_NONE: 1705 case AGGR_GLOBAL: 1706 case AGGR_THREAD: 1707 case AGGR_UNSET: 1708 default: 1709 break; 1710 } 1711 1712 /* 1713 * The evsel_list->cpus is the base we operate on, 1714 * taking the highest cpu number to be the size of 1715 * the aggregation translate cpumap. 1716 */ 1717 nr = cpu_map__get_max(evsel_list->cpus); 1718 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1719 return cpus_aggr_map ? 0 : -ENOMEM; 1720 } 1721 1722 static void perf_stat__exit_aggr_mode(void) 1723 { 1724 cpu_map__put(aggr_map); 1725 cpu_map__put(cpus_aggr_map); 1726 aggr_map = NULL; 1727 cpus_aggr_map = NULL; 1728 } 1729 1730 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1731 { 1732 int cpu; 1733 1734 if (idx > map->nr) 1735 return -1; 1736 1737 cpu = map->map[idx]; 1738 1739 if (cpu >= env->nr_cpus_online) 1740 return -1; 1741 1742 return cpu; 1743 } 1744 1745 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1746 { 1747 struct perf_env *env = data; 1748 int cpu = perf_env__get_cpu(env, map, idx); 1749 1750 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1751 } 1752 1753 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1754 { 1755 struct perf_env *env = data; 1756 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1757 1758 if (cpu != -1) { 1759 int socket_id = env->cpu[cpu].socket_id; 1760 1761 /* 1762 * Encode socket in upper 16 bits 1763 * core_id is relative to socket, and 1764 * we need a global id. So we combine 1765 * socket + core id. 1766 */ 1767 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1768 } 1769 1770 return core; 1771 } 1772 1773 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1774 struct cpu_map **sockp) 1775 { 1776 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1777 } 1778 1779 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1780 struct cpu_map **corep) 1781 { 1782 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1783 } 1784 1785 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1786 { 1787 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1788 } 1789 1790 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1791 { 1792 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1793 } 1794 1795 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1796 { 1797 struct perf_env *env = &st->session->header.env; 1798 1799 switch (stat_config.aggr_mode) { 1800 case AGGR_SOCKET: 1801 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1802 perror("cannot build socket map"); 1803 return -1; 1804 } 1805 aggr_get_id = perf_stat__get_socket_file; 1806 break; 1807 case AGGR_CORE: 1808 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1809 perror("cannot build core map"); 1810 return -1; 1811 } 1812 aggr_get_id = perf_stat__get_core_file; 1813 break; 1814 case AGGR_NONE: 1815 case AGGR_GLOBAL: 1816 case AGGR_THREAD: 1817 case AGGR_UNSET: 1818 default: 1819 break; 1820 } 1821 1822 return 0; 1823 } 1824 1825 static int topdown_filter_events(const char **attr, char **str, bool use_group) 1826 { 1827 int off = 0; 1828 int i; 1829 int len = 0; 1830 char *s; 1831 1832 for (i = 0; attr[i]; i++) { 1833 if (pmu_have_event("cpu", attr[i])) { 1834 len += strlen(attr[i]) + 1; 1835 attr[i - off] = attr[i]; 1836 } else 1837 off++; 1838 } 1839 attr[i - off] = NULL; 1840 1841 *str = malloc(len + 1 + 2); 1842 if (!*str) 1843 return -1; 1844 s = *str; 1845 if (i - off == 0) { 1846 *s = 0; 1847 return 0; 1848 } 1849 if (use_group) 1850 *s++ = '{'; 1851 for (i = 0; attr[i]; i++) { 1852 strcpy(s, attr[i]); 1853 s += strlen(s); 1854 *s++ = ','; 1855 } 1856 if (use_group) { 1857 s[-1] = '}'; 1858 *s = 0; 1859 } else 1860 s[-1] = 0; 1861 return 0; 1862 } 1863 1864 __weak bool arch_topdown_check_group(bool *warn) 1865 { 1866 *warn = false; 1867 return false; 1868 } 1869 1870 __weak void arch_topdown_group_warn(void) 1871 { 1872 } 1873 1874 /* 1875 * Add default attributes, if there were no attributes specified or 1876 * if -d/--detailed, -d -d or -d -d -d is used: 1877 */ 1878 static int add_default_attributes(void) 1879 { 1880 int err; 1881 struct perf_event_attr default_attrs0[] = { 1882 1883 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1884 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1885 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1886 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1887 1888 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1889 }; 1890 struct perf_event_attr frontend_attrs[] = { 1891 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1892 }; 1893 struct perf_event_attr backend_attrs[] = { 1894 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1895 }; 1896 struct perf_event_attr default_attrs1[] = { 1897 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1898 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1899 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1900 1901 }; 1902 1903 /* 1904 * Detailed stats (-d), covering the L1 and last level data caches: 1905 */ 1906 struct perf_event_attr detailed_attrs[] = { 1907 1908 { .type = PERF_TYPE_HW_CACHE, 1909 .config = 1910 PERF_COUNT_HW_CACHE_L1D << 0 | 1911 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1912 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1913 1914 { .type = PERF_TYPE_HW_CACHE, 1915 .config = 1916 PERF_COUNT_HW_CACHE_L1D << 0 | 1917 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1918 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1919 1920 { .type = PERF_TYPE_HW_CACHE, 1921 .config = 1922 PERF_COUNT_HW_CACHE_LL << 0 | 1923 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1924 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1925 1926 { .type = PERF_TYPE_HW_CACHE, 1927 .config = 1928 PERF_COUNT_HW_CACHE_LL << 0 | 1929 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1930 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1931 }; 1932 1933 /* 1934 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1935 */ 1936 struct perf_event_attr very_detailed_attrs[] = { 1937 1938 { .type = PERF_TYPE_HW_CACHE, 1939 .config = 1940 PERF_COUNT_HW_CACHE_L1I << 0 | 1941 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1942 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1943 1944 { .type = PERF_TYPE_HW_CACHE, 1945 .config = 1946 PERF_COUNT_HW_CACHE_L1I << 0 | 1947 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1948 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1949 1950 { .type = PERF_TYPE_HW_CACHE, 1951 .config = 1952 PERF_COUNT_HW_CACHE_DTLB << 0 | 1953 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1954 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1955 1956 { .type = PERF_TYPE_HW_CACHE, 1957 .config = 1958 PERF_COUNT_HW_CACHE_DTLB << 0 | 1959 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1960 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1961 1962 { .type = PERF_TYPE_HW_CACHE, 1963 .config = 1964 PERF_COUNT_HW_CACHE_ITLB << 0 | 1965 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1966 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1967 1968 { .type = PERF_TYPE_HW_CACHE, 1969 .config = 1970 PERF_COUNT_HW_CACHE_ITLB << 0 | 1971 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1972 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1973 1974 }; 1975 1976 /* 1977 * Very, very detailed stats (-d -d -d), adding prefetch events: 1978 */ 1979 struct perf_event_attr very_very_detailed_attrs[] = { 1980 1981 { .type = PERF_TYPE_HW_CACHE, 1982 .config = 1983 PERF_COUNT_HW_CACHE_L1D << 0 | 1984 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1985 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1986 1987 { .type = PERF_TYPE_HW_CACHE, 1988 .config = 1989 PERF_COUNT_HW_CACHE_L1D << 0 | 1990 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1991 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1992 }; 1993 1994 /* Set attrs if no event is selected and !null_run: */ 1995 if (null_run) 1996 return 0; 1997 1998 if (transaction_run) { 1999 if (pmu_have_event("cpu", "cycles-ct") && 2000 pmu_have_event("cpu", "el-start")) 2001 err = parse_events(evsel_list, transaction_attrs, NULL); 2002 else 2003 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2004 if (err) { 2005 fprintf(stderr, "Cannot set up transaction events\n"); 2006 return -1; 2007 } 2008 return 0; 2009 } 2010 2011 if (topdown_run) { 2012 char *str = NULL; 2013 bool warn = false; 2014 2015 if (stat_config.aggr_mode != AGGR_GLOBAL && 2016 stat_config.aggr_mode != AGGR_CORE) { 2017 pr_err("top down event configuration requires --per-core mode\n"); 2018 return -1; 2019 } 2020 stat_config.aggr_mode = AGGR_CORE; 2021 if (nr_cgroups || !target__has_cpu(&target)) { 2022 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2023 return -1; 2024 } 2025 2026 if (!force_metric_only) 2027 metric_only = true; 2028 if (topdown_filter_events(topdown_attrs, &str, 2029 arch_topdown_check_group(&warn)) < 0) { 2030 pr_err("Out of memory\n"); 2031 return -1; 2032 } 2033 if (topdown_attrs[0] && str) { 2034 if (warn) 2035 arch_topdown_group_warn(); 2036 err = parse_events(evsel_list, str, NULL); 2037 if (err) { 2038 fprintf(stderr, 2039 "Cannot set up top down events %s: %d\n", 2040 str, err); 2041 free(str); 2042 return -1; 2043 } 2044 } else { 2045 fprintf(stderr, "System does not support topdown\n"); 2046 return -1; 2047 } 2048 free(str); 2049 } 2050 2051 if (!evsel_list->nr_entries) { 2052 if (target__has_cpu(&target)) 2053 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2054 2055 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2056 return -1; 2057 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2058 if (perf_evlist__add_default_attrs(evsel_list, 2059 frontend_attrs) < 0) 2060 return -1; 2061 } 2062 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2063 if (perf_evlist__add_default_attrs(evsel_list, 2064 backend_attrs) < 0) 2065 return -1; 2066 } 2067 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2068 return -1; 2069 } 2070 2071 /* Detailed events get appended to the event list: */ 2072 2073 if (detailed_run < 1) 2074 return 0; 2075 2076 /* Append detailed run extra attributes: */ 2077 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2078 return -1; 2079 2080 if (detailed_run < 2) 2081 return 0; 2082 2083 /* Append very detailed run extra attributes: */ 2084 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2085 return -1; 2086 2087 if (detailed_run < 3) 2088 return 0; 2089 2090 /* Append very, very detailed run extra attributes: */ 2091 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2092 } 2093 2094 static const char * const stat_record_usage[] = { 2095 "perf stat record [<options>]", 2096 NULL, 2097 }; 2098 2099 static void init_features(struct perf_session *session) 2100 { 2101 int feat; 2102 2103 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2104 perf_header__set_feat(&session->header, feat); 2105 2106 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2107 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2108 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2109 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2110 } 2111 2112 static int __cmd_record(int argc, const char **argv) 2113 { 2114 struct perf_session *session; 2115 struct perf_data_file *file = &perf_stat.file; 2116 2117 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2118 PARSE_OPT_STOP_AT_NON_OPTION); 2119 2120 if (output_name) 2121 file->path = output_name; 2122 2123 if (run_count != 1 || forever) { 2124 pr_err("Cannot use -r option with perf stat record.\n"); 2125 return -1; 2126 } 2127 2128 session = perf_session__new(file, false, NULL); 2129 if (session == NULL) { 2130 pr_err("Perf session creation failed.\n"); 2131 return -1; 2132 } 2133 2134 init_features(session); 2135 2136 session->evlist = evsel_list; 2137 perf_stat.session = session; 2138 perf_stat.record = true; 2139 return argc; 2140 } 2141 2142 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2143 union perf_event *event, 2144 struct perf_session *session) 2145 { 2146 struct stat_round_event *stat_round = &event->stat_round; 2147 struct perf_evsel *counter; 2148 struct timespec tsh, *ts = NULL; 2149 const char **argv = session->header.env.cmdline_argv; 2150 int argc = session->header.env.nr_cmdline; 2151 2152 evlist__for_each_entry(evsel_list, counter) 2153 perf_stat_process_counter(&stat_config, counter); 2154 2155 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2156 update_stats(&walltime_nsecs_stats, stat_round->time); 2157 2158 if (stat_config.interval && stat_round->time) { 2159 tsh.tv_sec = stat_round->time / NSECS_PER_SEC; 2160 tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; 2161 ts = &tsh; 2162 } 2163 2164 print_counters(ts, argc, argv); 2165 return 0; 2166 } 2167 2168 static 2169 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2170 union perf_event *event, 2171 struct perf_session *session __maybe_unused) 2172 { 2173 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2174 2175 perf_event__read_stat_config(&stat_config, &event->stat_config); 2176 2177 if (cpu_map__empty(st->cpus)) { 2178 if (st->aggr_mode != AGGR_UNSET) 2179 pr_warning("warning: processing task data, aggregation mode not set\n"); 2180 return 0; 2181 } 2182 2183 if (st->aggr_mode != AGGR_UNSET) 2184 stat_config.aggr_mode = st->aggr_mode; 2185 2186 if (perf_stat.file.is_pipe) 2187 perf_stat_init_aggr_mode(); 2188 else 2189 perf_stat_init_aggr_mode_file(st); 2190 2191 return 0; 2192 } 2193 2194 static int set_maps(struct perf_stat *st) 2195 { 2196 if (!st->cpus || !st->threads) 2197 return 0; 2198 2199 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2200 return -EINVAL; 2201 2202 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2203 2204 if (perf_evlist__alloc_stats(evsel_list, true)) 2205 return -ENOMEM; 2206 2207 st->maps_allocated = true; 2208 return 0; 2209 } 2210 2211 static 2212 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2213 union perf_event *event, 2214 struct perf_session *session __maybe_unused) 2215 { 2216 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2217 2218 if (st->threads) { 2219 pr_warning("Extra thread map event, ignoring.\n"); 2220 return 0; 2221 } 2222 2223 st->threads = thread_map__new_event(&event->thread_map); 2224 if (!st->threads) 2225 return -ENOMEM; 2226 2227 return set_maps(st); 2228 } 2229 2230 static 2231 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2232 union perf_event *event, 2233 struct perf_session *session __maybe_unused) 2234 { 2235 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2236 struct cpu_map *cpus; 2237 2238 if (st->cpus) { 2239 pr_warning("Extra cpu map event, ignoring.\n"); 2240 return 0; 2241 } 2242 2243 cpus = cpu_map__new_data(&event->cpu_map.data); 2244 if (!cpus) 2245 return -ENOMEM; 2246 2247 st->cpus = cpus; 2248 return set_maps(st); 2249 } 2250 2251 static const char * const stat_report_usage[] = { 2252 "perf stat report [<options>]", 2253 NULL, 2254 }; 2255 2256 static struct perf_stat perf_stat = { 2257 .tool = { 2258 .attr = perf_event__process_attr, 2259 .event_update = perf_event__process_event_update, 2260 .thread_map = process_thread_map_event, 2261 .cpu_map = process_cpu_map_event, 2262 .stat_config = process_stat_config_event, 2263 .stat = perf_event__process_stat_event, 2264 .stat_round = process_stat_round_event, 2265 }, 2266 .aggr_mode = AGGR_UNSET, 2267 }; 2268 2269 static int __cmd_report(int argc, const char **argv) 2270 { 2271 struct perf_session *session; 2272 const struct option options[] = { 2273 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2274 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2275 "aggregate counts per processor socket", AGGR_SOCKET), 2276 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2277 "aggregate counts per physical processor core", AGGR_CORE), 2278 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2279 "disable CPU count aggregation", AGGR_NONE), 2280 OPT_END() 2281 }; 2282 struct stat st; 2283 int ret; 2284 2285 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2286 2287 if (!input_name || !strlen(input_name)) { 2288 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2289 input_name = "-"; 2290 else 2291 input_name = "perf.data"; 2292 } 2293 2294 perf_stat.file.path = input_name; 2295 perf_stat.file.mode = PERF_DATA_MODE_READ; 2296 2297 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2298 if (session == NULL) 2299 return -1; 2300 2301 perf_stat.session = session; 2302 stat_config.output = stderr; 2303 evsel_list = session->evlist; 2304 2305 ret = perf_session__process_events(session); 2306 if (ret) 2307 return ret; 2308 2309 perf_session__delete(session); 2310 return 0; 2311 } 2312 2313 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2314 { 2315 const char * const stat_usage[] = { 2316 "perf stat [<options>] [<command>]", 2317 NULL 2318 }; 2319 int status = -EINVAL, run_idx; 2320 const char *mode; 2321 FILE *output = stderr; 2322 unsigned int interval; 2323 const char * const stat_subcommands[] = { "record", "report" }; 2324 2325 setlocale(LC_ALL, ""); 2326 2327 evsel_list = perf_evlist__new(); 2328 if (evsel_list == NULL) 2329 return -ENOMEM; 2330 2331 parse_events__shrink_config_terms(); 2332 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2333 (const char **) stat_usage, 2334 PARSE_OPT_STOP_AT_NON_OPTION); 2335 perf_stat__init_shadow_stats(); 2336 2337 if (csv_sep) { 2338 csv_output = true; 2339 if (!strcmp(csv_sep, "\\t")) 2340 csv_sep = "\t"; 2341 } else 2342 csv_sep = DEFAULT_SEPARATOR; 2343 2344 if (argc && !strncmp(argv[0], "rec", 3)) { 2345 argc = __cmd_record(argc, argv); 2346 if (argc < 0) 2347 return -1; 2348 } else if (argc && !strncmp(argv[0], "rep", 3)) 2349 return __cmd_report(argc, argv); 2350 2351 interval = stat_config.interval; 2352 2353 /* 2354 * For record command the -o is already taken care of. 2355 */ 2356 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2357 output = NULL; 2358 2359 if (output_name && output_fd) { 2360 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2361 parse_options_usage(stat_usage, stat_options, "o", 1); 2362 parse_options_usage(NULL, stat_options, "log-fd", 0); 2363 goto out; 2364 } 2365 2366 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2367 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2368 goto out; 2369 } 2370 2371 if (metric_only && run_count > 1) { 2372 fprintf(stderr, "--metric-only is not supported with -r\n"); 2373 goto out; 2374 } 2375 2376 if (output_fd < 0) { 2377 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2378 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2379 goto out; 2380 } 2381 2382 if (!output) { 2383 struct timespec tm; 2384 mode = append_file ? "a" : "w"; 2385 2386 output = fopen(output_name, mode); 2387 if (!output) { 2388 perror("failed to create output file"); 2389 return -1; 2390 } 2391 clock_gettime(CLOCK_REALTIME, &tm); 2392 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2393 } else if (output_fd > 0) { 2394 mode = append_file ? "a" : "w"; 2395 output = fdopen(output_fd, mode); 2396 if (!output) { 2397 perror("Failed opening logfd"); 2398 return -errno; 2399 } 2400 } 2401 2402 stat_config.output = output; 2403 2404 /* 2405 * let the spreadsheet do the pretty-printing 2406 */ 2407 if (csv_output) { 2408 /* User explicitly passed -B? */ 2409 if (big_num_opt == 1) { 2410 fprintf(stderr, "-B option not supported with -x\n"); 2411 parse_options_usage(stat_usage, stat_options, "B", 1); 2412 parse_options_usage(NULL, stat_options, "x", 1); 2413 goto out; 2414 } else /* Nope, so disable big number formatting */ 2415 big_num = false; 2416 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2417 big_num = false; 2418 2419 if (!argc && target__none(&target)) 2420 usage_with_options(stat_usage, stat_options); 2421 2422 if (run_count < 0) { 2423 pr_err("Run count must be a positive number\n"); 2424 parse_options_usage(stat_usage, stat_options, "r", 1); 2425 goto out; 2426 } else if (run_count == 0) { 2427 forever = true; 2428 run_count = 1; 2429 } 2430 2431 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2432 fprintf(stderr, "The --per-thread option is only available " 2433 "when monitoring via -p -t options.\n"); 2434 parse_options_usage(NULL, stat_options, "p", 1); 2435 parse_options_usage(NULL, stat_options, "t", 1); 2436 goto out; 2437 } 2438 2439 /* 2440 * no_aggr, cgroup are for system-wide only 2441 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2442 */ 2443 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2444 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2445 !target__has_cpu(&target)) { 2446 fprintf(stderr, "both cgroup and no-aggregation " 2447 "modes only available in system-wide mode\n"); 2448 2449 parse_options_usage(stat_usage, stat_options, "G", 1); 2450 parse_options_usage(NULL, stat_options, "A", 1); 2451 parse_options_usage(NULL, stat_options, "a", 1); 2452 goto out; 2453 } 2454 2455 if (add_default_attributes()) 2456 goto out; 2457 2458 target__validate(&target); 2459 2460 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2461 if (target__has_task(&target)) { 2462 pr_err("Problems finding threads of monitor\n"); 2463 parse_options_usage(stat_usage, stat_options, "p", 1); 2464 parse_options_usage(NULL, stat_options, "t", 1); 2465 } else if (target__has_cpu(&target)) { 2466 perror("failed to parse CPUs map"); 2467 parse_options_usage(stat_usage, stat_options, "C", 1); 2468 parse_options_usage(NULL, stat_options, "a", 1); 2469 } 2470 goto out; 2471 } 2472 2473 /* 2474 * Initialize thread_map with comm names, 2475 * so we could print it out on output. 2476 */ 2477 if (stat_config.aggr_mode == AGGR_THREAD) 2478 thread_map__read_comms(evsel_list->threads); 2479 2480 if (interval && interval < 100) { 2481 if (interval < 10) { 2482 pr_err("print interval must be >= 10ms\n"); 2483 parse_options_usage(stat_usage, stat_options, "I", 1); 2484 goto out; 2485 } else 2486 pr_warning("print interval < 100ms. " 2487 "The overhead percentage could be high in some cases. " 2488 "Please proceed with caution.\n"); 2489 } 2490 2491 if (perf_evlist__alloc_stats(evsel_list, interval)) 2492 goto out; 2493 2494 if (perf_stat_init_aggr_mode()) 2495 goto out; 2496 2497 /* 2498 * We dont want to block the signals - that would cause 2499 * child tasks to inherit that and Ctrl-C would not work. 2500 * What we want is for Ctrl-C to work in the exec()-ed 2501 * task, but being ignored by perf stat itself: 2502 */ 2503 atexit(sig_atexit); 2504 if (!forever) 2505 signal(SIGINT, skip_signal); 2506 signal(SIGCHLD, skip_signal); 2507 signal(SIGALRM, skip_signal); 2508 signal(SIGABRT, skip_signal); 2509 2510 status = 0; 2511 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2512 if (run_count != 1 && verbose) 2513 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2514 run_idx + 1); 2515 2516 status = run_perf_stat(argc, argv); 2517 if (forever && status != -1) { 2518 print_counters(NULL, argc, argv); 2519 perf_stat__reset_stats(); 2520 } 2521 } 2522 2523 if (!forever && status != -1 && !interval) 2524 print_counters(NULL, argc, argv); 2525 2526 if (STAT_RECORD) { 2527 /* 2528 * We synthesize the kernel mmap record just so that older tools 2529 * don't emit warnings about not being able to resolve symbols 2530 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2531 * a saner message about no samples being in the perf.data file. 2532 * 2533 * This also serves to suppress a warning about f_header.data.size == 0 2534 * in header.c at the moment 'perf stat record' gets introduced, which 2535 * is not really needed once we start adding the stat specific PERF_RECORD_ 2536 * records, but the need to suppress the kptr_restrict messages in older 2537 * tools remain -acme 2538 */ 2539 int fd = perf_data_file__fd(&perf_stat.file); 2540 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2541 process_synthesized_event, 2542 &perf_stat.session->machines.host); 2543 if (err) { 2544 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2545 "older tools may produce warnings about this file\n."); 2546 } 2547 2548 if (!interval) { 2549 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2550 pr_err("failed to write stat round event\n"); 2551 } 2552 2553 if (!perf_stat.file.is_pipe) { 2554 perf_stat.session->header.data_size += perf_stat.bytes_written; 2555 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2556 } 2557 2558 perf_session__delete(perf_stat.session); 2559 } 2560 2561 perf_stat__exit_aggr_mode(); 2562 perf_evlist__free_stats(evsel_list); 2563 out: 2564 perf_evlist__delete(evsel_list); 2565 return status; 2566 } 2567