1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/util.h" 47 #include "util/parse-options.h" 48 #include "util/parse-events.h" 49 #include "util/event.h" 50 #include "util/evlist.h" 51 #include "util/evsel.h" 52 #include "util/debug.h" 53 #include "util/color.h" 54 #include "util/stat.h" 55 #include "util/header.h" 56 #include "util/cpumap.h" 57 #include "util/thread.h" 58 #include "util/thread_map.h" 59 60 #include <stdlib.h> 61 #include <sys/prctl.h> 62 #include <locale.h> 63 64 #define DEFAULT_SEPARATOR " " 65 #define CNTR_NOT_SUPPORTED "<not supported>" 66 #define CNTR_NOT_COUNTED "<not counted>" 67 68 static void print_stat(int argc, const char **argv); 69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 70 static void print_counter(struct perf_evsel *counter, char *prefix); 71 static void print_aggr_socket(char *prefix); 72 73 static struct perf_evlist *evsel_list; 74 75 static struct perf_target target = { 76 .uid = UINT_MAX, 77 }; 78 79 static int run_count = 1; 80 static bool no_inherit = false; 81 static bool scale = true; 82 static bool no_aggr = false; 83 static bool aggr_socket = false; 84 static pid_t child_pid = -1; 85 static bool null_run = false; 86 static int detailed_run = 0; 87 static bool big_num = true; 88 static int big_num_opt = -1; 89 static const char *csv_sep = NULL; 90 static bool csv_output = false; 91 static bool group = false; 92 static FILE *output = NULL; 93 static const char *pre_cmd = NULL; 94 static const char *post_cmd = NULL; 95 static bool sync_run = false; 96 static unsigned int interval = 0; 97 static struct timespec ref_time; 98 static struct cpu_map *sock_map; 99 100 static volatile int done = 0; 101 102 struct perf_stat { 103 struct stats res_stats[3]; 104 }; 105 106 static inline void diff_timespec(struct timespec *r, struct timespec *a, 107 struct timespec *b) 108 { 109 r->tv_sec = a->tv_sec - b->tv_sec; 110 if (a->tv_nsec < b->tv_nsec) { 111 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 112 r->tv_sec--; 113 } else { 114 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 115 } 116 } 117 118 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 119 { 120 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 121 } 122 123 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 124 { 125 return perf_evsel__cpus(evsel)->nr; 126 } 127 128 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 129 { 130 evsel->priv = zalloc(sizeof(struct perf_stat)); 131 return evsel->priv == NULL ? -ENOMEM : 0; 132 } 133 134 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 135 { 136 free(evsel->priv); 137 evsel->priv = NULL; 138 } 139 140 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 141 { 142 void *addr; 143 size_t sz; 144 145 sz = sizeof(*evsel->counts) + 146 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); 147 148 addr = zalloc(sz); 149 if (!addr) 150 return -ENOMEM; 151 152 evsel->prev_raw_counts = addr; 153 154 return 0; 155 } 156 157 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 158 { 159 free(evsel->prev_raw_counts); 160 evsel->prev_raw_counts = NULL; 161 } 162 163 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 164 static struct stats runtime_cycles_stats[MAX_NR_CPUS]; 165 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 166 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; 167 static struct stats runtime_branches_stats[MAX_NR_CPUS]; 168 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 169 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 170 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; 171 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 172 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 173 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 174 static struct stats walltime_nsecs_stats; 175 176 static int create_perf_stat_counter(struct perf_evsel *evsel) 177 { 178 struct perf_event_attr *attr = &evsel->attr; 179 180 if (scale) 181 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 182 PERF_FORMAT_TOTAL_TIME_RUNNING; 183 184 attr->inherit = !no_inherit; 185 186 if (perf_target__has_cpu(&target)) 187 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 188 189 if (!perf_target__has_task(&target) && 190 perf_evsel__is_group_leader(evsel)) { 191 attr->disabled = 1; 192 attr->enable_on_exec = 1; 193 } 194 195 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 196 } 197 198 /* 199 * Does the counter have nsecs as a unit? 200 */ 201 static inline int nsec_counter(struct perf_evsel *evsel) 202 { 203 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 204 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 205 return 1; 206 207 return 0; 208 } 209 210 /* 211 * Update various tracking values we maintain to print 212 * more semantic information such as miss/hit ratios, 213 * instruction rates, etc: 214 */ 215 static void update_shadow_stats(struct perf_evsel *counter, u64 *count) 216 { 217 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 218 update_stats(&runtime_nsecs_stats[0], count[0]); 219 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 220 update_stats(&runtime_cycles_stats[0], count[0]); 221 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 222 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 223 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 224 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); 225 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 226 update_stats(&runtime_branches_stats[0], count[0]); 227 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 228 update_stats(&runtime_cacherefs_stats[0], count[0]); 229 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 230 update_stats(&runtime_l1_dcache_stats[0], count[0]); 231 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 232 update_stats(&runtime_l1_icache_stats[0], count[0]); 233 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 234 update_stats(&runtime_ll_cache_stats[0], count[0]); 235 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 236 update_stats(&runtime_dtlb_cache_stats[0], count[0]); 237 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 238 update_stats(&runtime_itlb_cache_stats[0], count[0]); 239 } 240 241 /* 242 * Read out the results of a single counter: 243 * aggregate counts across CPUs in system-wide mode 244 */ 245 static int read_counter_aggr(struct perf_evsel *counter) 246 { 247 struct perf_stat *ps = counter->priv; 248 u64 *count = counter->counts->aggr.values; 249 int i; 250 251 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 252 evsel_list->threads->nr, scale) < 0) 253 return -1; 254 255 for (i = 0; i < 3; i++) 256 update_stats(&ps->res_stats[i], count[i]); 257 258 if (verbose) { 259 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 260 perf_evsel__name(counter), count[0], count[1], count[2]); 261 } 262 263 /* 264 * Save the full runtime - to allow normalization during printout: 265 */ 266 update_shadow_stats(counter, count); 267 268 return 0; 269 } 270 271 /* 272 * Read out the results of a single counter: 273 * do not aggregate counts across CPUs in system-wide mode 274 */ 275 static int read_counter(struct perf_evsel *counter) 276 { 277 u64 *count; 278 int cpu; 279 280 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 281 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 282 return -1; 283 284 count = counter->counts->cpu[cpu].values; 285 286 update_shadow_stats(counter, count); 287 } 288 289 return 0; 290 } 291 292 static void print_interval(void) 293 { 294 static int num_print_interval; 295 struct perf_evsel *counter; 296 struct perf_stat *ps; 297 struct timespec ts, rs; 298 char prefix[64]; 299 300 if (no_aggr) { 301 list_for_each_entry(counter, &evsel_list->entries, node) { 302 ps = counter->priv; 303 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 304 read_counter(counter); 305 } 306 } else { 307 list_for_each_entry(counter, &evsel_list->entries, node) { 308 ps = counter->priv; 309 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 310 read_counter_aggr(counter); 311 } 312 } 313 clock_gettime(CLOCK_MONOTONIC, &ts); 314 diff_timespec(&rs, &ts, &ref_time); 315 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 316 317 if (num_print_interval == 0 && !csv_output) { 318 if (aggr_socket) 319 fprintf(output, "# time socket cpus counts events\n"); 320 else if (no_aggr) 321 fprintf(output, "# time CPU counts events\n"); 322 else 323 fprintf(output, "# time counts events\n"); 324 } 325 326 if (++num_print_interval == 25) 327 num_print_interval = 0; 328 329 if (aggr_socket) 330 print_aggr_socket(prefix); 331 else if (no_aggr) { 332 list_for_each_entry(counter, &evsel_list->entries, node) 333 print_counter(counter, prefix); 334 } else { 335 list_for_each_entry(counter, &evsel_list->entries, node) 336 print_counter_aggr(counter, prefix); 337 } 338 } 339 340 static int __run_perf_stat(int argc __maybe_unused, const char **argv) 341 { 342 char msg[512]; 343 unsigned long long t0, t1; 344 struct perf_evsel *counter; 345 struct timespec ts; 346 int status = 0; 347 int child_ready_pipe[2], go_pipe[2]; 348 const bool forks = (argc > 0); 349 char buf; 350 351 if (interval) { 352 ts.tv_sec = interval / 1000; 353 ts.tv_nsec = (interval % 1000) * 1000000; 354 } else { 355 ts.tv_sec = 1; 356 ts.tv_nsec = 0; 357 } 358 359 if (aggr_socket 360 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) { 361 perror("cannot build socket map"); 362 return -1; 363 } 364 365 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 366 perror("failed to create pipes"); 367 return -1; 368 } 369 370 if (forks) { 371 if ((child_pid = fork()) < 0) 372 perror("failed to fork"); 373 374 if (!child_pid) { 375 close(child_ready_pipe[0]); 376 close(go_pipe[1]); 377 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 378 379 /* 380 * Do a dummy execvp to get the PLT entry resolved, 381 * so we avoid the resolver overhead on the real 382 * execvp call. 383 */ 384 execvp("", (char **)argv); 385 386 /* 387 * Tell the parent we're ready to go 388 */ 389 close(child_ready_pipe[1]); 390 391 /* 392 * Wait until the parent tells us to go. 393 */ 394 if (read(go_pipe[0], &buf, 1) == -1) 395 perror("unable to read pipe"); 396 397 execvp(argv[0], (char **)argv); 398 399 perror(argv[0]); 400 exit(-1); 401 } 402 403 if (perf_target__none(&target)) 404 evsel_list->threads->map[0] = child_pid; 405 406 /* 407 * Wait for the child to be ready to exec. 408 */ 409 close(child_ready_pipe[1]); 410 close(go_pipe[0]); 411 if (read(child_ready_pipe[0], &buf, 1) == -1) 412 perror("unable to read pipe"); 413 close(child_ready_pipe[0]); 414 } 415 416 if (group) 417 perf_evlist__set_leader(evsel_list); 418 419 list_for_each_entry(counter, &evsel_list->entries, node) { 420 if (create_perf_stat_counter(counter) < 0) { 421 /* 422 * PPC returns ENXIO for HW counters until 2.6.37 423 * (behavior changed with commit b0a873e). 424 */ 425 if (errno == EINVAL || errno == ENOSYS || 426 errno == ENOENT || errno == EOPNOTSUPP || 427 errno == ENXIO) { 428 if (verbose) 429 ui__warning("%s event is not supported by the kernel.\n", 430 perf_evsel__name(counter)); 431 counter->supported = false; 432 continue; 433 } 434 435 perf_evsel__open_strerror(counter, &target, 436 errno, msg, sizeof(msg)); 437 ui__error("%s\n", msg); 438 439 if (child_pid != -1) 440 kill(child_pid, SIGTERM); 441 442 return -1; 443 } 444 counter->supported = true; 445 } 446 447 if (perf_evlist__apply_filters(evsel_list)) { 448 error("failed to set filter with %d (%s)\n", errno, 449 strerror(errno)); 450 return -1; 451 } 452 453 /* 454 * Enable counters and exec the command: 455 */ 456 t0 = rdclock(); 457 clock_gettime(CLOCK_MONOTONIC, &ref_time); 458 459 if (forks) { 460 close(go_pipe[1]); 461 if (interval) { 462 while (!waitpid(child_pid, &status, WNOHANG)) { 463 nanosleep(&ts, NULL); 464 print_interval(); 465 } 466 } 467 wait(&status); 468 if (WIFSIGNALED(status)) 469 psignal(WTERMSIG(status), argv[0]); 470 } else { 471 while (!done) { 472 nanosleep(&ts, NULL); 473 if (interval) 474 print_interval(); 475 } 476 } 477 478 t1 = rdclock(); 479 480 update_stats(&walltime_nsecs_stats, t1 - t0); 481 482 if (no_aggr) { 483 list_for_each_entry(counter, &evsel_list->entries, node) { 484 read_counter(counter); 485 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 486 } 487 } else { 488 list_for_each_entry(counter, &evsel_list->entries, node) { 489 read_counter_aggr(counter); 490 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 491 evsel_list->threads->nr); 492 } 493 } 494 495 return WEXITSTATUS(status); 496 } 497 498 static int run_perf_stat(int argc __maybe_unused, const char **argv) 499 { 500 int ret; 501 502 if (pre_cmd) { 503 ret = system(pre_cmd); 504 if (ret) 505 return ret; 506 } 507 508 if (sync_run) 509 sync(); 510 511 ret = __run_perf_stat(argc, argv); 512 if (ret) 513 return ret; 514 515 if (post_cmd) { 516 ret = system(post_cmd); 517 if (ret) 518 return ret; 519 } 520 521 return ret; 522 } 523 524 static void print_noise_pct(double total, double avg) 525 { 526 double pct = rel_stddev_stats(total, avg); 527 528 if (csv_output) 529 fprintf(output, "%s%.2f%%", csv_sep, pct); 530 else if (pct) 531 fprintf(output, " ( +-%6.2f%% )", pct); 532 } 533 534 static void print_noise(struct perf_evsel *evsel, double avg) 535 { 536 struct perf_stat *ps; 537 538 if (run_count == 1) 539 return; 540 541 ps = evsel->priv; 542 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 543 } 544 545 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 546 { 547 double msecs = avg / 1e6; 548 char cpustr[16] = { '\0', }; 549 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; 550 551 if (aggr_socket) 552 sprintf(cpustr, "S%*d%s%*d%s", 553 csv_output ? 0 : -5, 554 cpu, 555 csv_sep, 556 csv_output ? 0 : 4, 557 nr, 558 csv_sep); 559 else if (no_aggr) 560 sprintf(cpustr, "CPU%*d%s", 561 csv_output ? 0 : -4, 562 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 563 564 fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); 565 566 if (evsel->cgrp) 567 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 568 569 if (csv_output || interval) 570 return; 571 572 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 573 fprintf(output, " # %8.3f CPUs utilized ", 574 avg / avg_stats(&walltime_nsecs_stats)); 575 else 576 fprintf(output, " "); 577 } 578 579 /* used for get_ratio_color() */ 580 enum grc_type { 581 GRC_STALLED_CYCLES_FE, 582 GRC_STALLED_CYCLES_BE, 583 GRC_CACHE_MISSES, 584 GRC_MAX_NR 585 }; 586 587 static const char *get_ratio_color(enum grc_type type, double ratio) 588 { 589 static const double grc_table[GRC_MAX_NR][3] = { 590 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 591 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 592 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 593 }; 594 const char *color = PERF_COLOR_NORMAL; 595 596 if (ratio > grc_table[type][0]) 597 color = PERF_COLOR_RED; 598 else if (ratio > grc_table[type][1]) 599 color = PERF_COLOR_MAGENTA; 600 else if (ratio > grc_table[type][2]) 601 color = PERF_COLOR_YELLOW; 602 603 return color; 604 } 605 606 static void print_stalled_cycles_frontend(int cpu, 607 struct perf_evsel *evsel 608 __maybe_unused, double avg) 609 { 610 double total, ratio = 0.0; 611 const char *color; 612 613 total = avg_stats(&runtime_cycles_stats[cpu]); 614 615 if (total) 616 ratio = avg / total * 100.0; 617 618 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 619 620 fprintf(output, " # "); 621 color_fprintf(output, color, "%6.2f%%", ratio); 622 fprintf(output, " frontend cycles idle "); 623 } 624 625 static void print_stalled_cycles_backend(int cpu, 626 struct perf_evsel *evsel 627 __maybe_unused, double avg) 628 { 629 double total, ratio = 0.0; 630 const char *color; 631 632 total = avg_stats(&runtime_cycles_stats[cpu]); 633 634 if (total) 635 ratio = avg / total * 100.0; 636 637 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 638 639 fprintf(output, " # "); 640 color_fprintf(output, color, "%6.2f%%", ratio); 641 fprintf(output, " backend cycles idle "); 642 } 643 644 static void print_branch_misses(int cpu, 645 struct perf_evsel *evsel __maybe_unused, 646 double avg) 647 { 648 double total, ratio = 0.0; 649 const char *color; 650 651 total = avg_stats(&runtime_branches_stats[cpu]); 652 653 if (total) 654 ratio = avg / total * 100.0; 655 656 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 657 658 fprintf(output, " # "); 659 color_fprintf(output, color, "%6.2f%%", ratio); 660 fprintf(output, " of all branches "); 661 } 662 663 static void print_l1_dcache_misses(int cpu, 664 struct perf_evsel *evsel __maybe_unused, 665 double avg) 666 { 667 double total, ratio = 0.0; 668 const char *color; 669 670 total = avg_stats(&runtime_l1_dcache_stats[cpu]); 671 672 if (total) 673 ratio = avg / total * 100.0; 674 675 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 676 677 fprintf(output, " # "); 678 color_fprintf(output, color, "%6.2f%%", ratio); 679 fprintf(output, " of all L1-dcache hits "); 680 } 681 682 static void print_l1_icache_misses(int cpu, 683 struct perf_evsel *evsel __maybe_unused, 684 double avg) 685 { 686 double total, ratio = 0.0; 687 const char *color; 688 689 total = avg_stats(&runtime_l1_icache_stats[cpu]); 690 691 if (total) 692 ratio = avg / total * 100.0; 693 694 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 695 696 fprintf(output, " # "); 697 color_fprintf(output, color, "%6.2f%%", ratio); 698 fprintf(output, " of all L1-icache hits "); 699 } 700 701 static void print_dtlb_cache_misses(int cpu, 702 struct perf_evsel *evsel __maybe_unused, 703 double avg) 704 { 705 double total, ratio = 0.0; 706 const char *color; 707 708 total = avg_stats(&runtime_dtlb_cache_stats[cpu]); 709 710 if (total) 711 ratio = avg / total * 100.0; 712 713 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 714 715 fprintf(output, " # "); 716 color_fprintf(output, color, "%6.2f%%", ratio); 717 fprintf(output, " of all dTLB cache hits "); 718 } 719 720 static void print_itlb_cache_misses(int cpu, 721 struct perf_evsel *evsel __maybe_unused, 722 double avg) 723 { 724 double total, ratio = 0.0; 725 const char *color; 726 727 total = avg_stats(&runtime_itlb_cache_stats[cpu]); 728 729 if (total) 730 ratio = avg / total * 100.0; 731 732 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 733 734 fprintf(output, " # "); 735 color_fprintf(output, color, "%6.2f%%", ratio); 736 fprintf(output, " of all iTLB cache hits "); 737 } 738 739 static void print_ll_cache_misses(int cpu, 740 struct perf_evsel *evsel __maybe_unused, 741 double avg) 742 { 743 double total, ratio = 0.0; 744 const char *color; 745 746 total = avg_stats(&runtime_ll_cache_stats[cpu]); 747 748 if (total) 749 ratio = avg / total * 100.0; 750 751 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 752 753 fprintf(output, " # "); 754 color_fprintf(output, color, "%6.2f%%", ratio); 755 fprintf(output, " of all LL-cache hits "); 756 } 757 758 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 759 { 760 double total, ratio = 0.0; 761 char cpustr[16] = { '\0', }; 762 const char *fmt; 763 764 if (csv_output) 765 fmt = "%s%.0f%s%s"; 766 else if (big_num) 767 fmt = "%s%'18.0f%s%-25s"; 768 else 769 fmt = "%s%18.0f%s%-25s"; 770 771 if (aggr_socket) 772 sprintf(cpustr, "S%*d%s%*d%s", 773 csv_output ? 0 : -5, 774 cpu, 775 csv_sep, 776 csv_output ? 0 : 4, 777 nr, 778 csv_sep); 779 else if (no_aggr) 780 sprintf(cpustr, "CPU%*d%s", 781 csv_output ? 0 : -4, 782 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 783 else 784 cpu = 0; 785 786 fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); 787 788 if (evsel->cgrp) 789 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 790 791 if (csv_output || interval) 792 return; 793 794 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 795 total = avg_stats(&runtime_cycles_stats[cpu]); 796 if (total) 797 ratio = avg / total; 798 799 fprintf(output, " # %5.2f insns per cycle ", ratio); 800 801 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); 802 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); 803 804 if (total && avg) { 805 ratio = total / avg; 806 fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); 807 } 808 809 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 810 runtime_branches_stats[cpu].n != 0) { 811 print_branch_misses(cpu, evsel, avg); 812 } else if ( 813 evsel->attr.type == PERF_TYPE_HW_CACHE && 814 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 815 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 816 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 817 runtime_l1_dcache_stats[cpu].n != 0) { 818 print_l1_dcache_misses(cpu, evsel, avg); 819 } else if ( 820 evsel->attr.type == PERF_TYPE_HW_CACHE && 821 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 822 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 823 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 824 runtime_l1_icache_stats[cpu].n != 0) { 825 print_l1_icache_misses(cpu, evsel, avg); 826 } else if ( 827 evsel->attr.type == PERF_TYPE_HW_CACHE && 828 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 829 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 830 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 831 runtime_dtlb_cache_stats[cpu].n != 0) { 832 print_dtlb_cache_misses(cpu, evsel, avg); 833 } else if ( 834 evsel->attr.type == PERF_TYPE_HW_CACHE && 835 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 836 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 837 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 838 runtime_itlb_cache_stats[cpu].n != 0) { 839 print_itlb_cache_misses(cpu, evsel, avg); 840 } else if ( 841 evsel->attr.type == PERF_TYPE_HW_CACHE && 842 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 843 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 844 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 845 runtime_ll_cache_stats[cpu].n != 0) { 846 print_ll_cache_misses(cpu, evsel, avg); 847 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 848 runtime_cacherefs_stats[cpu].n != 0) { 849 total = avg_stats(&runtime_cacherefs_stats[cpu]); 850 851 if (total) 852 ratio = avg * 100 / total; 853 854 fprintf(output, " # %8.3f %% of all cache refs ", ratio); 855 856 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 857 print_stalled_cycles_frontend(cpu, evsel, avg); 858 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 859 print_stalled_cycles_backend(cpu, evsel, avg); 860 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 861 total = avg_stats(&runtime_nsecs_stats[cpu]); 862 863 if (total) 864 ratio = 1.0 * avg / total; 865 866 fprintf(output, " # %8.3f GHz ", ratio); 867 } else if (runtime_nsecs_stats[cpu].n != 0) { 868 char unit = 'M'; 869 870 total = avg_stats(&runtime_nsecs_stats[cpu]); 871 872 if (total) 873 ratio = 1000.0 * avg / total; 874 if (ratio < 0.001) { 875 ratio *= 1000; 876 unit = 'K'; 877 } 878 879 fprintf(output, " # %8.3f %c/sec ", ratio, unit); 880 } else { 881 fprintf(output, " "); 882 } 883 } 884 885 static void print_aggr_socket(char *prefix) 886 { 887 struct perf_evsel *counter; 888 u64 ena, run, val; 889 int cpu, s, s2, sock, nr; 890 891 if (!sock_map) 892 return; 893 894 for (s = 0; s < sock_map->nr; s++) { 895 sock = cpu_map__socket(sock_map, s); 896 list_for_each_entry(counter, &evsel_list->entries, node) { 897 val = ena = run = 0; 898 nr = 0; 899 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 900 s2 = cpu_map__get_socket(evsel_list->cpus, cpu); 901 if (s2 != sock) 902 continue; 903 val += counter->counts->cpu[cpu].val; 904 ena += counter->counts->cpu[cpu].ena; 905 run += counter->counts->cpu[cpu].run; 906 nr++; 907 } 908 if (prefix) 909 fprintf(output, "%s", prefix); 910 911 if (run == 0 || ena == 0) { 912 fprintf(output, "S%*d%s%*d%s%*s%s%*s", 913 csv_output ? 0 : -5, 914 s, 915 csv_sep, 916 csv_output ? 0 : 4, 917 nr, 918 csv_sep, 919 csv_output ? 0 : 18, 920 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 921 csv_sep, 922 csv_output ? 0 : -24, 923 perf_evsel__name(counter)); 924 if (counter->cgrp) 925 fprintf(output, "%s%s", 926 csv_sep, counter->cgrp->name); 927 928 fputc('\n', output); 929 continue; 930 } 931 932 if (nsec_counter(counter)) 933 nsec_printout(sock, nr, counter, val); 934 else 935 abs_printout(sock, nr, counter, val); 936 937 if (!csv_output) { 938 print_noise(counter, 1.0); 939 940 if (run != ena) 941 fprintf(output, " (%.2f%%)", 942 100.0 * run / ena); 943 } 944 fputc('\n', output); 945 } 946 } 947 } 948 949 /* 950 * Print out the results of a single counter: 951 * aggregated counts in system-wide mode 952 */ 953 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 954 { 955 struct perf_stat *ps = counter->priv; 956 double avg = avg_stats(&ps->res_stats[0]); 957 int scaled = counter->counts->scaled; 958 959 if (prefix) 960 fprintf(output, "%s", prefix); 961 962 if (scaled == -1) { 963 fprintf(output, "%*s%s%*s", 964 csv_output ? 0 : 18, 965 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 966 csv_sep, 967 csv_output ? 0 : -24, 968 perf_evsel__name(counter)); 969 970 if (counter->cgrp) 971 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 972 973 fputc('\n', output); 974 return; 975 } 976 977 if (nsec_counter(counter)) 978 nsec_printout(-1, 0, counter, avg); 979 else 980 abs_printout(-1, 0, counter, avg); 981 982 print_noise(counter, avg); 983 984 if (csv_output) { 985 fputc('\n', output); 986 return; 987 } 988 989 if (scaled) { 990 double avg_enabled, avg_running; 991 992 avg_enabled = avg_stats(&ps->res_stats[1]); 993 avg_running = avg_stats(&ps->res_stats[2]); 994 995 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); 996 } 997 fprintf(output, "\n"); 998 } 999 1000 /* 1001 * Print out the results of a single counter: 1002 * does not use aggregated count in system-wide 1003 */ 1004 static void print_counter(struct perf_evsel *counter, char *prefix) 1005 { 1006 u64 ena, run, val; 1007 int cpu; 1008 1009 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1010 val = counter->counts->cpu[cpu].val; 1011 ena = counter->counts->cpu[cpu].ena; 1012 run = counter->counts->cpu[cpu].run; 1013 1014 if (prefix) 1015 fprintf(output, "%s", prefix); 1016 1017 if (run == 0 || ena == 0) { 1018 fprintf(output, "CPU%*d%s%*s%s%*s", 1019 csv_output ? 0 : -4, 1020 perf_evsel__cpus(counter)->map[cpu], csv_sep, 1021 csv_output ? 0 : 18, 1022 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1023 csv_sep, 1024 csv_output ? 0 : -24, 1025 perf_evsel__name(counter)); 1026 1027 if (counter->cgrp) 1028 fprintf(output, "%s%s", 1029 csv_sep, counter->cgrp->name); 1030 1031 fputc('\n', output); 1032 continue; 1033 } 1034 1035 if (nsec_counter(counter)) 1036 nsec_printout(cpu, 0, counter, val); 1037 else 1038 abs_printout(cpu, 0, counter, val); 1039 1040 if (!csv_output) { 1041 print_noise(counter, 1.0); 1042 1043 if (run != ena) 1044 fprintf(output, " (%.2f%%)", 1045 100.0 * run / ena); 1046 } 1047 fputc('\n', output); 1048 } 1049 } 1050 1051 static void print_stat(int argc, const char **argv) 1052 { 1053 struct perf_evsel *counter; 1054 int i; 1055 1056 fflush(stdout); 1057 1058 if (!csv_output) { 1059 fprintf(output, "\n"); 1060 fprintf(output, " Performance counter stats for "); 1061 if (!perf_target__has_task(&target)) { 1062 fprintf(output, "\'%s", argv[0]); 1063 for (i = 1; i < argc; i++) 1064 fprintf(output, " %s", argv[i]); 1065 } else if (target.pid) 1066 fprintf(output, "process id \'%s", target.pid); 1067 else 1068 fprintf(output, "thread id \'%s", target.tid); 1069 1070 fprintf(output, "\'"); 1071 if (run_count > 1) 1072 fprintf(output, " (%d runs)", run_count); 1073 fprintf(output, ":\n\n"); 1074 } 1075 1076 if (aggr_socket) 1077 print_aggr_socket(NULL); 1078 else if (no_aggr) { 1079 list_for_each_entry(counter, &evsel_list->entries, node) 1080 print_counter(counter, NULL); 1081 } else { 1082 list_for_each_entry(counter, &evsel_list->entries, node) 1083 print_counter_aggr(counter, NULL); 1084 } 1085 1086 if (!csv_output) { 1087 if (!null_run) 1088 fprintf(output, "\n"); 1089 fprintf(output, " %17.9f seconds time elapsed", 1090 avg_stats(&walltime_nsecs_stats)/1e9); 1091 if (run_count > 1) { 1092 fprintf(output, " "); 1093 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1094 avg_stats(&walltime_nsecs_stats)); 1095 } 1096 fprintf(output, "\n\n"); 1097 } 1098 } 1099 1100 static volatile int signr = -1; 1101 1102 static void skip_signal(int signo) 1103 { 1104 if ((child_pid == -1) || interval) 1105 done = 1; 1106 1107 signr = signo; 1108 } 1109 1110 static void sig_atexit(void) 1111 { 1112 if (child_pid != -1) 1113 kill(child_pid, SIGTERM); 1114 1115 if (signr == -1) 1116 return; 1117 1118 signal(signr, SIG_DFL); 1119 kill(getpid(), signr); 1120 } 1121 1122 static int stat__set_big_num(const struct option *opt __maybe_unused, 1123 const char *s __maybe_unused, int unset) 1124 { 1125 big_num_opt = unset ? 0 : 1; 1126 return 0; 1127 } 1128 1129 /* 1130 * Add default attributes, if there were no attributes specified or 1131 * if -d/--detailed, -d -d or -d -d -d is used: 1132 */ 1133 static int add_default_attributes(void) 1134 { 1135 struct perf_event_attr default_attrs[] = { 1136 1137 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1138 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1139 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1140 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1141 1142 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1143 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1144 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1145 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1146 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1147 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1148 1149 }; 1150 1151 /* 1152 * Detailed stats (-d), covering the L1 and last level data caches: 1153 */ 1154 struct perf_event_attr detailed_attrs[] = { 1155 1156 { .type = PERF_TYPE_HW_CACHE, 1157 .config = 1158 PERF_COUNT_HW_CACHE_L1D << 0 | 1159 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1160 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1161 1162 { .type = PERF_TYPE_HW_CACHE, 1163 .config = 1164 PERF_COUNT_HW_CACHE_L1D << 0 | 1165 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1166 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1167 1168 { .type = PERF_TYPE_HW_CACHE, 1169 .config = 1170 PERF_COUNT_HW_CACHE_LL << 0 | 1171 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1172 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1173 1174 { .type = PERF_TYPE_HW_CACHE, 1175 .config = 1176 PERF_COUNT_HW_CACHE_LL << 0 | 1177 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1178 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1179 }; 1180 1181 /* 1182 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1183 */ 1184 struct perf_event_attr very_detailed_attrs[] = { 1185 1186 { .type = PERF_TYPE_HW_CACHE, 1187 .config = 1188 PERF_COUNT_HW_CACHE_L1I << 0 | 1189 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1190 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1191 1192 { .type = PERF_TYPE_HW_CACHE, 1193 .config = 1194 PERF_COUNT_HW_CACHE_L1I << 0 | 1195 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1196 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1197 1198 { .type = PERF_TYPE_HW_CACHE, 1199 .config = 1200 PERF_COUNT_HW_CACHE_DTLB << 0 | 1201 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1202 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1203 1204 { .type = PERF_TYPE_HW_CACHE, 1205 .config = 1206 PERF_COUNT_HW_CACHE_DTLB << 0 | 1207 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1208 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1209 1210 { .type = PERF_TYPE_HW_CACHE, 1211 .config = 1212 PERF_COUNT_HW_CACHE_ITLB << 0 | 1213 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1214 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1215 1216 { .type = PERF_TYPE_HW_CACHE, 1217 .config = 1218 PERF_COUNT_HW_CACHE_ITLB << 0 | 1219 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1220 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1221 1222 }; 1223 1224 /* 1225 * Very, very detailed stats (-d -d -d), adding prefetch events: 1226 */ 1227 struct perf_event_attr very_very_detailed_attrs[] = { 1228 1229 { .type = PERF_TYPE_HW_CACHE, 1230 .config = 1231 PERF_COUNT_HW_CACHE_L1D << 0 | 1232 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1233 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1234 1235 { .type = PERF_TYPE_HW_CACHE, 1236 .config = 1237 PERF_COUNT_HW_CACHE_L1D << 0 | 1238 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1239 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1240 }; 1241 1242 /* Set attrs if no event is selected and !null_run: */ 1243 if (null_run) 1244 return 0; 1245 1246 if (!evsel_list->nr_entries) { 1247 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1248 return -1; 1249 } 1250 1251 /* Detailed events get appended to the event list: */ 1252 1253 if (detailed_run < 1) 1254 return 0; 1255 1256 /* Append detailed run extra attributes: */ 1257 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1258 return -1; 1259 1260 if (detailed_run < 2) 1261 return 0; 1262 1263 /* Append very detailed run extra attributes: */ 1264 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1265 return -1; 1266 1267 if (detailed_run < 3) 1268 return 0; 1269 1270 /* Append very, very detailed run extra attributes: */ 1271 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1272 } 1273 1274 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1275 { 1276 bool append_file = false; 1277 int output_fd = 0; 1278 const char *output_name = NULL; 1279 const struct option options[] = { 1280 OPT_CALLBACK('e', "event", &evsel_list, "event", 1281 "event selector. use 'perf list' to list available events", 1282 parse_events_option), 1283 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1284 "event filter", parse_filter), 1285 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1286 "child tasks do not inherit counters"), 1287 OPT_STRING('p', "pid", &target.pid, "pid", 1288 "stat events on existing process id"), 1289 OPT_STRING('t', "tid", &target.tid, "tid", 1290 "stat events on existing thread id"), 1291 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1292 "system-wide collection from all CPUs"), 1293 OPT_BOOLEAN('g', "group", &group, 1294 "put the counters into a counter group"), 1295 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1296 OPT_INCR('v', "verbose", &verbose, 1297 "be more verbose (show counter open errors, etc)"), 1298 OPT_INTEGER('r', "repeat", &run_count, 1299 "repeat command and print average + stddev (max: 100)"), 1300 OPT_BOOLEAN('n', "null", &null_run, 1301 "null run - dont start any counters"), 1302 OPT_INCR('d', "detailed", &detailed_run, 1303 "detailed run - start a lot of events"), 1304 OPT_BOOLEAN('S', "sync", &sync_run, 1305 "call sync() before starting a run"), 1306 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1307 "print large numbers with thousands\' separators", 1308 stat__set_big_num), 1309 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1310 "list of cpus to monitor in system-wide"), 1311 OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), 1312 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1313 "print counts with custom separator"), 1314 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1315 "monitor event in cgroup name only", parse_cgroups), 1316 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1317 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1318 OPT_INTEGER(0, "log-fd", &output_fd, 1319 "log output to fd, instead of stderr"), 1320 OPT_STRING(0, "pre", &pre_cmd, "command", 1321 "command to run prior to the measured command"), 1322 OPT_STRING(0, "post", &post_cmd, "command", 1323 "command to run after to the measured command"), 1324 OPT_UINTEGER('I', "interval-print", &interval, 1325 "print counts at regular interval in ms (>= 100)"), 1326 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), 1327 OPT_END() 1328 }; 1329 const char * const stat_usage[] = { 1330 "perf stat [<options>] [<command>]", 1331 NULL 1332 }; 1333 struct perf_evsel *pos; 1334 int status = -ENOMEM, run_idx; 1335 const char *mode; 1336 1337 setlocale(LC_ALL, ""); 1338 1339 evsel_list = perf_evlist__new(NULL, NULL); 1340 if (evsel_list == NULL) 1341 return -ENOMEM; 1342 1343 argc = parse_options(argc, argv, options, stat_usage, 1344 PARSE_OPT_STOP_AT_NON_OPTION); 1345 1346 output = stderr; 1347 if (output_name && strcmp(output_name, "-")) 1348 output = NULL; 1349 1350 if (output_name && output_fd) { 1351 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1352 usage_with_options(stat_usage, options); 1353 } 1354 1355 if (output_fd < 0) { 1356 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1357 usage_with_options(stat_usage, options); 1358 } 1359 1360 if (!output) { 1361 struct timespec tm; 1362 mode = append_file ? "a" : "w"; 1363 1364 output = fopen(output_name, mode); 1365 if (!output) { 1366 perror("failed to create output file"); 1367 return -1; 1368 } 1369 clock_gettime(CLOCK_REALTIME, &tm); 1370 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1371 } else if (output_fd > 0) { 1372 mode = append_file ? "a" : "w"; 1373 output = fdopen(output_fd, mode); 1374 if (!output) { 1375 perror("Failed opening logfd"); 1376 return -errno; 1377 } 1378 } 1379 1380 if (csv_sep) { 1381 csv_output = true; 1382 if (!strcmp(csv_sep, "\\t")) 1383 csv_sep = "\t"; 1384 } else 1385 csv_sep = DEFAULT_SEPARATOR; 1386 1387 /* 1388 * let the spreadsheet do the pretty-printing 1389 */ 1390 if (csv_output) { 1391 /* User explicitly passed -B? */ 1392 if (big_num_opt == 1) { 1393 fprintf(stderr, "-B option not supported with -x\n"); 1394 usage_with_options(stat_usage, options); 1395 } else /* Nope, so disable big number formatting */ 1396 big_num = false; 1397 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1398 big_num = false; 1399 1400 if (!argc && !perf_target__has_task(&target)) 1401 usage_with_options(stat_usage, options); 1402 if (run_count <= 0) 1403 usage_with_options(stat_usage, options); 1404 1405 /* no_aggr, cgroup are for system-wide only */ 1406 if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { 1407 fprintf(stderr, "both cgroup and no-aggregation " 1408 "modes only available in system-wide mode\n"); 1409 1410 usage_with_options(stat_usage, options); 1411 } 1412 1413 if (aggr_socket) { 1414 if (!perf_target__has_cpu(&target)) { 1415 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n"); 1416 usage_with_options(stat_usage, options); 1417 } 1418 no_aggr = true; 1419 } 1420 1421 if (add_default_attributes()) 1422 goto out; 1423 1424 perf_target__validate(&target); 1425 1426 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1427 if (perf_target__has_task(&target)) 1428 pr_err("Problems finding threads of monitor\n"); 1429 if (perf_target__has_cpu(&target)) 1430 perror("failed to parse CPUs map"); 1431 1432 usage_with_options(stat_usage, options); 1433 return -1; 1434 } 1435 if (interval && interval < 100) { 1436 pr_err("print interval must be >= 100ms\n"); 1437 usage_with_options(stat_usage, options); 1438 return -1; 1439 } 1440 1441 list_for_each_entry(pos, &evsel_list->entries, node) { 1442 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1443 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) 1444 goto out_free_fd; 1445 } 1446 if (interval) { 1447 list_for_each_entry(pos, &evsel_list->entries, node) { 1448 if (perf_evsel__alloc_prev_raw_counts(pos) < 0) 1449 goto out_free_fd; 1450 } 1451 } 1452 1453 /* 1454 * We dont want to block the signals - that would cause 1455 * child tasks to inherit that and Ctrl-C would not work. 1456 * What we want is for Ctrl-C to work in the exec()-ed 1457 * task, but being ignored by perf stat itself: 1458 */ 1459 atexit(sig_atexit); 1460 signal(SIGINT, skip_signal); 1461 signal(SIGCHLD, skip_signal); 1462 signal(SIGALRM, skip_signal); 1463 signal(SIGABRT, skip_signal); 1464 1465 status = 0; 1466 for (run_idx = 0; run_idx < run_count; run_idx++) { 1467 if (run_count != 1 && verbose) 1468 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1469 run_idx + 1); 1470 1471 status = run_perf_stat(argc, argv); 1472 } 1473 1474 if (status != -1 && !interval) 1475 print_stat(argc, argv); 1476 out_free_fd: 1477 list_for_each_entry(pos, &evsel_list->entries, node) { 1478 perf_evsel__free_stat_priv(pos); 1479 perf_evsel__free_counts(pos); 1480 perf_evsel__free_prev_raw_counts(pos); 1481 } 1482 perf_evlist__delete_maps(evsel_list); 1483 out: 1484 perf_evlist__delete(evsel_list); 1485 return status; 1486 } 1487