1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 12 /* 13 * AGGR_GLOBAL: Use CPU 0 14 * AGGR_SOCKET: Use first CPU of socket 15 * AGGR_DIE: Use first CPU of die 16 * AGGR_CORE: Use first CPU of core 17 * AGGR_NONE: Use matching CPU 18 * AGGR_THREAD: Not supported? 19 */ 20 static bool have_frontend_stalled; 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct perf_evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 }; 34 35 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 36 { 37 struct saved_value *a = container_of(rb_node, 38 struct saved_value, 39 rb_node); 40 const struct saved_value *b = entry; 41 42 if (a->cpu != b->cpu) 43 return a->cpu - b->cpu; 44 45 /* 46 * Previously the rbtree was used to link generic metrics. 47 * The keys were evsel/cpu. Now the rbtree is extended to support 48 * per-thread shadow stats. For shadow stats case, the keys 49 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 50 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 51 */ 52 if (a->type != b->type) 53 return a->type - b->type; 54 55 if (a->ctx != b->ctx) 56 return a->ctx - b->ctx; 57 58 if (a->evsel == NULL && b->evsel == NULL) { 59 if (a->stat == b->stat) 60 return 0; 61 62 if ((char *)a->stat < (char *)b->stat) 63 return -1; 64 65 return 1; 66 } 67 68 if (a->evsel == b->evsel) 69 return 0; 70 if ((char *)a->evsel < (char *)b->evsel) 71 return -1; 72 return +1; 73 } 74 75 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 76 const void *entry) 77 { 78 struct saved_value *nd = malloc(sizeof(struct saved_value)); 79 80 if (!nd) 81 return NULL; 82 memcpy(nd, entry, sizeof(struct saved_value)); 83 return &nd->rb_node; 84 } 85 86 static void saved_value_delete(struct rblist *rblist __maybe_unused, 87 struct rb_node *rb_node) 88 { 89 struct saved_value *v; 90 91 BUG_ON(!rb_node); 92 v = container_of(rb_node, struct saved_value, rb_node); 93 free(v); 94 } 95 96 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 97 int cpu, 98 bool create, 99 enum stat_type type, 100 int ctx, 101 struct runtime_stat *st) 102 { 103 struct rblist *rblist; 104 struct rb_node *nd; 105 struct saved_value dm = { 106 .cpu = cpu, 107 .evsel = evsel, 108 .type = type, 109 .ctx = ctx, 110 .stat = st, 111 }; 112 113 rblist = &st->value_list; 114 115 nd = rblist__find(rblist, &dm); 116 if (nd) 117 return container_of(nd, struct saved_value, rb_node); 118 if (create) { 119 rblist__add_node(rblist, &dm); 120 nd = rblist__find(rblist, &dm); 121 if (nd) 122 return container_of(nd, struct saved_value, rb_node); 123 } 124 return NULL; 125 } 126 127 void runtime_stat__init(struct runtime_stat *st) 128 { 129 struct rblist *rblist = &st->value_list; 130 131 rblist__init(rblist); 132 rblist->node_cmp = saved_value_cmp; 133 rblist->node_new = saved_value_new; 134 rblist->node_delete = saved_value_delete; 135 } 136 137 void runtime_stat__exit(struct runtime_stat *st) 138 { 139 rblist__exit(&st->value_list); 140 } 141 142 void perf_stat__init_shadow_stats(void) 143 { 144 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 145 runtime_stat__init(&rt_stat); 146 } 147 148 static int evsel_context(struct perf_evsel *evsel) 149 { 150 int ctx = 0; 151 152 if (evsel->attr.exclude_kernel) 153 ctx |= CTX_BIT_KERNEL; 154 if (evsel->attr.exclude_user) 155 ctx |= CTX_BIT_USER; 156 if (evsel->attr.exclude_hv) 157 ctx |= CTX_BIT_HV; 158 if (evsel->attr.exclude_host) 159 ctx |= CTX_BIT_HOST; 160 if (evsel->attr.exclude_idle) 161 ctx |= CTX_BIT_IDLE; 162 163 return ctx; 164 } 165 166 static void reset_stat(struct runtime_stat *st) 167 { 168 struct rblist *rblist; 169 struct rb_node *pos, *next; 170 171 rblist = &st->value_list; 172 next = rb_first_cached(&rblist->entries); 173 while (next) { 174 pos = next; 175 next = rb_next(pos); 176 memset(&container_of(pos, struct saved_value, rb_node)->stats, 177 0, 178 sizeof(struct stats)); 179 } 180 } 181 182 void perf_stat__reset_shadow_stats(void) 183 { 184 reset_stat(&rt_stat); 185 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 186 } 187 188 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 189 { 190 reset_stat(st); 191 } 192 193 static void update_runtime_stat(struct runtime_stat *st, 194 enum stat_type type, 195 int ctx, int cpu, u64 count) 196 { 197 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 198 type, ctx, st); 199 200 if (v) 201 update_stats(&v->stats, count); 202 } 203 204 /* 205 * Update various tracking values we maintain to print 206 * more semantic information such as miss/hit ratios, 207 * instruction rates, etc: 208 */ 209 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, 210 int cpu, struct runtime_stat *st) 211 { 212 int ctx = evsel_context(counter); 213 u64 count_ns = count; 214 215 count *= counter->scale; 216 217 if (perf_evsel__is_clock(counter)) 218 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 219 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 220 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 221 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 222 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 224 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, ELISION_START)) 226 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 228 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 229 ctx, cpu, count); 230 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 231 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 232 ctx, cpu, count); 233 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 234 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 235 ctx, cpu, count); 236 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 237 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 238 ctx, cpu, count); 239 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 240 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 241 ctx, cpu, count); 242 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 243 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 244 ctx, cpu, count); 245 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 246 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 247 ctx, cpu, count); 248 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 249 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 250 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 251 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 252 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 253 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 254 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 255 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 257 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 259 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 261 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 262 else if (perf_stat_evsel__is(counter, SMI_NUM)) 263 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, APERF)) 265 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 266 267 if (counter->collect_stat) { 268 struct saved_value *v = saved_value_lookup(counter, cpu, true, 269 STAT_NONE, 0, st); 270 update_stats(&v->stats, count); 271 } 272 } 273 274 /* used for get_ratio_color() */ 275 enum grc_type { 276 GRC_STALLED_CYCLES_FE, 277 GRC_STALLED_CYCLES_BE, 278 GRC_CACHE_MISSES, 279 GRC_MAX_NR 280 }; 281 282 static const char *get_ratio_color(enum grc_type type, double ratio) 283 { 284 static const double grc_table[GRC_MAX_NR][3] = { 285 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 286 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 287 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 288 }; 289 const char *color = PERF_COLOR_NORMAL; 290 291 if (ratio > grc_table[type][0]) 292 color = PERF_COLOR_RED; 293 else if (ratio > grc_table[type][1]) 294 color = PERF_COLOR_MAGENTA; 295 else if (ratio > grc_table[type][2]) 296 color = PERF_COLOR_YELLOW; 297 298 return color; 299 } 300 301 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, 302 const char *name) 303 { 304 struct perf_evsel *c2; 305 306 evlist__for_each_entry (evsel_list, c2) { 307 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 308 return c2; 309 } 310 return NULL; 311 } 312 313 /* Mark MetricExpr target events and link events using them to them. */ 314 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) 315 { 316 struct perf_evsel *counter, *leader, **metric_events, *oc; 317 bool found; 318 const char **metric_names; 319 int i; 320 int num_metric_names; 321 322 evlist__for_each_entry(evsel_list, counter) { 323 bool invalid = false; 324 325 leader = counter->leader; 326 if (!counter->metric_expr) 327 continue; 328 metric_events = counter->metric_events; 329 if (!metric_events) { 330 if (expr__find_other(counter->metric_expr, counter->name, 331 &metric_names, &num_metric_names) < 0) 332 continue; 333 334 metric_events = calloc(sizeof(struct perf_evsel *), 335 num_metric_names + 1); 336 if (!metric_events) 337 return; 338 counter->metric_events = metric_events; 339 } 340 341 for (i = 0; i < num_metric_names; i++) { 342 found = false; 343 if (leader) { 344 /* Search in group */ 345 for_each_group_member (oc, leader) { 346 if (!strcasecmp(oc->name, metric_names[i]) && 347 !oc->collect_stat) { 348 found = true; 349 break; 350 } 351 } 352 } 353 if (!found) { 354 /* Search ignoring groups */ 355 oc = perf_stat__find_event(evsel_list, metric_names[i]); 356 } 357 if (!oc) { 358 /* Deduping one is good enough to handle duplicated PMUs. */ 359 static char *printed; 360 361 /* 362 * Adding events automatically would be difficult, because 363 * it would risk creating groups that are not schedulable. 364 * perf stat doesn't understand all the scheduling constraints 365 * of events. So we ask the user instead to add the missing 366 * events. 367 */ 368 if (!printed || strcasecmp(printed, metric_names[i])) { 369 fprintf(stderr, 370 "Add %s event to groups to get metric expression for %s\n", 371 metric_names[i], 372 counter->name); 373 printed = strdup(metric_names[i]); 374 } 375 invalid = true; 376 continue; 377 } 378 metric_events[i] = oc; 379 oc->collect_stat = true; 380 } 381 metric_events[i] = NULL; 382 free(metric_names); 383 if (invalid) { 384 free(metric_events); 385 counter->metric_events = NULL; 386 counter->metric_expr = NULL; 387 } 388 } 389 } 390 391 static double runtime_stat_avg(struct runtime_stat *st, 392 enum stat_type type, int ctx, int cpu) 393 { 394 struct saved_value *v; 395 396 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 397 if (!v) 398 return 0.0; 399 400 return avg_stats(&v->stats); 401 } 402 403 static double runtime_stat_n(struct runtime_stat *st, 404 enum stat_type type, int ctx, int cpu) 405 { 406 struct saved_value *v; 407 408 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 409 if (!v) 410 return 0.0; 411 412 return v->stats.n; 413 } 414 415 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 416 int cpu, 417 struct perf_evsel *evsel, double avg, 418 struct perf_stat_output_ctx *out, 419 struct runtime_stat *st) 420 { 421 double total, ratio = 0.0; 422 const char *color; 423 int ctx = evsel_context(evsel); 424 425 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 426 427 if (total) 428 ratio = avg / total * 100.0; 429 430 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 431 432 if (ratio) 433 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 434 ratio); 435 else 436 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 437 } 438 439 static void print_stalled_cycles_backend(struct perf_stat_config *config, 440 int cpu, 441 struct perf_evsel *evsel, double avg, 442 struct perf_stat_output_ctx *out, 443 struct runtime_stat *st) 444 { 445 double total, ratio = 0.0; 446 const char *color; 447 int ctx = evsel_context(evsel); 448 449 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 450 451 if (total) 452 ratio = avg / total * 100.0; 453 454 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 455 456 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 457 } 458 459 static void print_branch_misses(struct perf_stat_config *config, 460 int cpu, 461 struct perf_evsel *evsel, 462 double avg, 463 struct perf_stat_output_ctx *out, 464 struct runtime_stat *st) 465 { 466 double total, ratio = 0.0; 467 const char *color; 468 int ctx = evsel_context(evsel); 469 470 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 471 472 if (total) 473 ratio = avg / total * 100.0; 474 475 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 476 477 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 478 } 479 480 static void print_l1_dcache_misses(struct perf_stat_config *config, 481 int cpu, 482 struct perf_evsel *evsel, 483 double avg, 484 struct perf_stat_output_ctx *out, 485 struct runtime_stat *st) 486 487 { 488 double total, ratio = 0.0; 489 const char *color; 490 int ctx = evsel_context(evsel); 491 492 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 493 494 if (total) 495 ratio = avg / total * 100.0; 496 497 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 498 499 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 500 } 501 502 static void print_l1_icache_misses(struct perf_stat_config *config, 503 int cpu, 504 struct perf_evsel *evsel, 505 double avg, 506 struct perf_stat_output_ctx *out, 507 struct runtime_stat *st) 508 509 { 510 double total, ratio = 0.0; 511 const char *color; 512 int ctx = evsel_context(evsel); 513 514 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 515 516 if (total) 517 ratio = avg / total * 100.0; 518 519 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 520 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 521 } 522 523 static void print_dtlb_cache_misses(struct perf_stat_config *config, 524 int cpu, 525 struct perf_evsel *evsel, 526 double avg, 527 struct perf_stat_output_ctx *out, 528 struct runtime_stat *st) 529 { 530 double total, ratio = 0.0; 531 const char *color; 532 int ctx = evsel_context(evsel); 533 534 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 535 536 if (total) 537 ratio = avg / total * 100.0; 538 539 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 540 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 541 } 542 543 static void print_itlb_cache_misses(struct perf_stat_config *config, 544 int cpu, 545 struct perf_evsel *evsel, 546 double avg, 547 struct perf_stat_output_ctx *out, 548 struct runtime_stat *st) 549 { 550 double total, ratio = 0.0; 551 const char *color; 552 int ctx = evsel_context(evsel); 553 554 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 555 556 if (total) 557 ratio = avg / total * 100.0; 558 559 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 560 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 561 } 562 563 static void print_ll_cache_misses(struct perf_stat_config *config, 564 int cpu, 565 struct perf_evsel *evsel, 566 double avg, 567 struct perf_stat_output_ctx *out, 568 struct runtime_stat *st) 569 { 570 double total, ratio = 0.0; 571 const char *color; 572 int ctx = evsel_context(evsel); 573 574 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 575 576 if (total) 577 ratio = avg / total * 100.0; 578 579 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 580 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 581 } 582 583 /* 584 * High level "TopDown" CPU core pipe line bottleneck break down. 585 * 586 * Basic concept following 587 * Yasin, A Top Down Method for Performance analysis and Counter architecture 588 * ISPASS14 589 * 590 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 591 * 592 * Frontend -> Backend -> Retiring 593 * BadSpeculation in addition means out of order execution that is thrown away 594 * (for example branch mispredictions) 595 * Frontend is instruction decoding. 596 * Backend is execution, like computation and accessing data in memory 597 * Retiring is good execution that is not directly bottlenecked 598 * 599 * The formulas are computed in slots. 600 * A slot is an entry in the pipeline each for the pipeline width 601 * (for example a 4-wide pipeline has 4 slots for each cycle) 602 * 603 * Formulas: 604 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 605 * TotalSlots 606 * Retiring = SlotsRetired / TotalSlots 607 * FrontendBound = FetchBubbles / TotalSlots 608 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 609 * 610 * The kernel provides the mapping to the low level CPU events and any scaling 611 * needed for the CPU pipeline width, for example: 612 * 613 * TotalSlots = Cycles * 4 614 * 615 * The scaling factor is communicated in the sysfs unit. 616 * 617 * In some cases the CPU may not be able to measure all the formulas due to 618 * missing events. In this case multiple formulas are combined, as possible. 619 * 620 * Full TopDown supports more levels to sub-divide each area: for example 621 * BackendBound into computing bound and memory bound. For now we only 622 * support Level 1 TopDown. 623 */ 624 625 static double sanitize_val(double x) 626 { 627 if (x < 0 && x >= -0.02) 628 return 0.0; 629 return x; 630 } 631 632 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 633 { 634 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 635 } 636 637 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 638 { 639 double bad_spec = 0; 640 double total_slots; 641 double total; 642 643 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 644 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 645 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 646 647 total_slots = td_total_slots(ctx, cpu, st); 648 if (total_slots) 649 bad_spec = total / total_slots; 650 return sanitize_val(bad_spec); 651 } 652 653 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 654 { 655 double retiring = 0; 656 double total_slots = td_total_slots(ctx, cpu, st); 657 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 658 ctx, cpu); 659 660 if (total_slots) 661 retiring = ret_slots / total_slots; 662 return retiring; 663 } 664 665 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 666 { 667 double fe_bound = 0; 668 double total_slots = td_total_slots(ctx, cpu, st); 669 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 670 ctx, cpu); 671 672 if (total_slots) 673 fe_bound = fetch_bub / total_slots; 674 return fe_bound; 675 } 676 677 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 678 { 679 double sum = (td_fe_bound(ctx, cpu, st) + 680 td_bad_spec(ctx, cpu, st) + 681 td_retiring(ctx, cpu, st)); 682 if (sum == 0) 683 return 0; 684 return sanitize_val(1.0 - sum); 685 } 686 687 static void print_smi_cost(struct perf_stat_config *config, 688 int cpu, struct perf_evsel *evsel, 689 struct perf_stat_output_ctx *out, 690 struct runtime_stat *st) 691 { 692 double smi_num, aperf, cycles, cost = 0.0; 693 int ctx = evsel_context(evsel); 694 const char *color = NULL; 695 696 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 697 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 698 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 699 700 if ((cycles == 0) || (aperf == 0)) 701 return; 702 703 if (smi_num) 704 cost = (aperf - cycles) / aperf * 100.00; 705 706 if (cost > 10) 707 color = PERF_COLOR_RED; 708 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 709 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 710 } 711 712 static void generic_metric(struct perf_stat_config *config, 713 const char *metric_expr, 714 struct perf_evsel **metric_events, 715 char *name, 716 const char *metric_name, 717 double avg, 718 int cpu, 719 struct perf_stat_output_ctx *out, 720 struct runtime_stat *st) 721 { 722 print_metric_t print_metric = out->print_metric; 723 struct parse_ctx pctx; 724 double ratio; 725 int i; 726 void *ctxp = out->ctx; 727 char *n, *pn; 728 729 expr__ctx_init(&pctx); 730 expr__add_id(&pctx, name, avg); 731 for (i = 0; metric_events[i]; i++) { 732 struct saved_value *v; 733 struct stats *stats; 734 double scale; 735 736 if (!strcmp(metric_events[i]->name, "duration_time")) { 737 stats = &walltime_nsecs_stats; 738 scale = 1e-9; 739 } else { 740 v = saved_value_lookup(metric_events[i], cpu, false, 741 STAT_NONE, 0, st); 742 if (!v) 743 break; 744 stats = &v->stats; 745 scale = 1.0; 746 } 747 748 n = strdup(metric_events[i]->name); 749 if (!n) 750 return; 751 /* 752 * This display code with --no-merge adds [cpu] postfixes. 753 * These are not supported by the parser. Remove everything 754 * after the space. 755 */ 756 pn = strchr(n, ' '); 757 if (pn) 758 *pn = 0; 759 expr__add_id(&pctx, n, avg_stats(stats)*scale); 760 } 761 if (!metric_events[i]) { 762 const char *p = metric_expr; 763 764 if (expr__parse(&ratio, &pctx, &p) == 0) 765 print_metric(config, ctxp, NULL, "%8.1f", 766 metric_name ? 767 metric_name : 768 out->force_header ? name : "", 769 ratio); 770 else 771 print_metric(config, ctxp, NULL, NULL, 772 out->force_header ? 773 (metric_name ? metric_name : name) : "", 0); 774 } else 775 print_metric(config, ctxp, NULL, NULL, "", 0); 776 777 for (i = 1; i < pctx.num_ids; i++) 778 free((void *)pctx.ids[i].name); 779 } 780 781 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 782 struct perf_evsel *evsel, 783 double avg, int cpu, 784 struct perf_stat_output_ctx *out, 785 struct rblist *metric_events, 786 struct runtime_stat *st) 787 { 788 void *ctxp = out->ctx; 789 print_metric_t print_metric = out->print_metric; 790 double total, ratio = 0.0, total2; 791 const char *color = NULL; 792 int ctx = evsel_context(evsel); 793 struct metric_event *me; 794 int num = 1; 795 796 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 797 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 798 799 if (total) { 800 ratio = avg / total; 801 print_metric(config, ctxp, NULL, "%7.2f ", 802 "insn per cycle", ratio); 803 } else { 804 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 805 } 806 807 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 808 ctx, cpu); 809 810 total = max(total, runtime_stat_avg(st, 811 STAT_STALLED_CYCLES_BACK, 812 ctx, cpu)); 813 814 if (total && avg) { 815 out->new_line(config, ctxp); 816 ratio = total / avg; 817 print_metric(config, ctxp, NULL, "%7.2f ", 818 "stalled cycles per insn", 819 ratio); 820 } else if (have_frontend_stalled) { 821 print_metric(config, ctxp, NULL, NULL, 822 "stalled cycles per insn", 0); 823 } 824 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 825 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 826 print_branch_misses(config, cpu, evsel, avg, out, st); 827 else 828 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 829 } else if ( 830 evsel->attr.type == PERF_TYPE_HW_CACHE && 831 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 832 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 833 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 834 835 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 836 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 837 else 838 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 839 } else if ( 840 evsel->attr.type == PERF_TYPE_HW_CACHE && 841 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 842 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 843 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 844 845 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 846 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 847 else 848 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 849 } else if ( 850 evsel->attr.type == PERF_TYPE_HW_CACHE && 851 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 852 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 853 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 854 855 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 856 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 857 else 858 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 859 } else if ( 860 evsel->attr.type == PERF_TYPE_HW_CACHE && 861 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 862 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 863 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 864 865 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 866 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 867 else 868 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 869 } else if ( 870 evsel->attr.type == PERF_TYPE_HW_CACHE && 871 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 872 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 873 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 874 875 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 876 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 877 else 878 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 879 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 880 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 881 882 if (total) 883 ratio = avg * 100 / total; 884 885 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 886 print_metric(config, ctxp, NULL, "%8.3f %%", 887 "of all cache refs", ratio); 888 else 889 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 890 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 891 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 892 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 893 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 894 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 895 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 896 897 if (total) { 898 ratio = avg / total; 899 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 900 } else { 901 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 902 } 903 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 904 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 905 906 if (total) 907 print_metric(config, ctxp, NULL, 908 "%7.2f%%", "transactional cycles", 909 100.0 * (avg / total)); 910 else 911 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 912 0); 913 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 914 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 915 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 916 917 if (total2 < avg) 918 total2 = avg; 919 if (total) 920 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 921 100.0 * ((total2-avg) / total)); 922 else 923 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 924 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 925 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 926 ctx, cpu); 927 928 if (avg) 929 ratio = total / avg; 930 931 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 932 print_metric(config, ctxp, NULL, "%8.0f", 933 "cycles / transaction", ratio); 934 else 935 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 936 0); 937 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 938 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 939 ctx, cpu); 940 941 if (avg) 942 ratio = total / avg; 943 944 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 945 } else if (perf_evsel__is_clock(evsel)) { 946 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 947 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 948 avg / (ratio * evsel->scale)); 949 else 950 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 951 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 952 double fe_bound = td_fe_bound(ctx, cpu, st); 953 954 if (fe_bound > 0.2) 955 color = PERF_COLOR_RED; 956 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 957 fe_bound * 100.); 958 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 959 double retiring = td_retiring(ctx, cpu, st); 960 961 if (retiring > 0.7) 962 color = PERF_COLOR_GREEN; 963 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 964 retiring * 100.); 965 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 966 double bad_spec = td_bad_spec(ctx, cpu, st); 967 968 if (bad_spec > 0.1) 969 color = PERF_COLOR_RED; 970 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 971 bad_spec * 100.); 972 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 973 double be_bound = td_be_bound(ctx, cpu, st); 974 const char *name = "backend bound"; 975 static int have_recovery_bubbles = -1; 976 977 /* In case the CPU does not support topdown-recovery-bubbles */ 978 if (have_recovery_bubbles < 0) 979 have_recovery_bubbles = pmu_have_event("cpu", 980 "topdown-recovery-bubbles"); 981 if (!have_recovery_bubbles) 982 name = "backend bound/bad spec"; 983 984 if (be_bound > 0.2) 985 color = PERF_COLOR_RED; 986 if (td_total_slots(ctx, cpu, st) > 0) 987 print_metric(config, ctxp, color, "%8.1f%%", name, 988 be_bound * 100.); 989 else 990 print_metric(config, ctxp, NULL, NULL, name, 0); 991 } else if (evsel->metric_expr) { 992 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 993 evsel->metric_name, avg, cpu, out, st); 994 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 995 char unit = 'M'; 996 char unit_buf[10]; 997 998 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 999 1000 if (total) 1001 ratio = 1000.0 * avg / total; 1002 if (ratio < 0.001) { 1003 ratio *= 1000; 1004 unit = 'K'; 1005 } 1006 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1007 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1008 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1009 print_smi_cost(config, cpu, evsel, out, st); 1010 } else { 1011 num = 0; 1012 } 1013 1014 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1015 struct metric_expr *mexp; 1016 1017 list_for_each_entry (mexp, &me->head, nd) { 1018 if (num++ > 0) 1019 out->new_line(config, ctxp); 1020 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1021 evsel->name, mexp->metric_name, 1022 avg, cpu, out, st); 1023 } 1024 } 1025 if (num == 0) 1026 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1027 } 1028