1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 12 /* 13 * AGGR_GLOBAL: Use CPU 0 14 * AGGR_SOCKET: Use first CPU of socket 15 * AGGR_DIE: Use first CPU of die 16 * AGGR_CORE: Use first CPU of core 17 * AGGR_NONE: Use matching CPU 18 * AGGR_THREAD: Not supported? 19 */ 20 static bool have_frontend_stalled; 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct perf_evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 }; 34 35 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 36 { 37 struct saved_value *a = container_of(rb_node, 38 struct saved_value, 39 rb_node); 40 const struct saved_value *b = entry; 41 42 if (a->cpu != b->cpu) 43 return a->cpu - b->cpu; 44 45 /* 46 * Previously the rbtree was used to link generic metrics. 47 * The keys were evsel/cpu. Now the rbtree is extended to support 48 * per-thread shadow stats. For shadow stats case, the keys 49 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 50 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 51 */ 52 if (a->type != b->type) 53 return a->type - b->type; 54 55 if (a->ctx != b->ctx) 56 return a->ctx - b->ctx; 57 58 if (a->evsel == NULL && b->evsel == NULL) { 59 if (a->stat == b->stat) 60 return 0; 61 62 if ((char *)a->stat < (char *)b->stat) 63 return -1; 64 65 return 1; 66 } 67 68 if (a->evsel == b->evsel) 69 return 0; 70 if ((char *)a->evsel < (char *)b->evsel) 71 return -1; 72 return +1; 73 } 74 75 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 76 const void *entry) 77 { 78 struct saved_value *nd = malloc(sizeof(struct saved_value)); 79 80 if (!nd) 81 return NULL; 82 memcpy(nd, entry, sizeof(struct saved_value)); 83 return &nd->rb_node; 84 } 85 86 static void saved_value_delete(struct rblist *rblist __maybe_unused, 87 struct rb_node *rb_node) 88 { 89 struct saved_value *v; 90 91 BUG_ON(!rb_node); 92 v = container_of(rb_node, struct saved_value, rb_node); 93 free(v); 94 } 95 96 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 97 int cpu, 98 bool create, 99 enum stat_type type, 100 int ctx, 101 struct runtime_stat *st) 102 { 103 struct rblist *rblist; 104 struct rb_node *nd; 105 struct saved_value dm = { 106 .cpu = cpu, 107 .evsel = evsel, 108 .type = type, 109 .ctx = ctx, 110 .stat = st, 111 }; 112 113 rblist = &st->value_list; 114 115 nd = rblist__find(rblist, &dm); 116 if (nd) 117 return container_of(nd, struct saved_value, rb_node); 118 if (create) { 119 rblist__add_node(rblist, &dm); 120 nd = rblist__find(rblist, &dm); 121 if (nd) 122 return container_of(nd, struct saved_value, rb_node); 123 } 124 return NULL; 125 } 126 127 void runtime_stat__init(struct runtime_stat *st) 128 { 129 struct rblist *rblist = &st->value_list; 130 131 rblist__init(rblist); 132 rblist->node_cmp = saved_value_cmp; 133 rblist->node_new = saved_value_new; 134 rblist->node_delete = saved_value_delete; 135 } 136 137 void runtime_stat__exit(struct runtime_stat *st) 138 { 139 rblist__exit(&st->value_list); 140 } 141 142 void perf_stat__init_shadow_stats(void) 143 { 144 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 145 runtime_stat__init(&rt_stat); 146 } 147 148 static int evsel_context(struct perf_evsel *evsel) 149 { 150 int ctx = 0; 151 152 if (evsel->attr.exclude_kernel) 153 ctx |= CTX_BIT_KERNEL; 154 if (evsel->attr.exclude_user) 155 ctx |= CTX_BIT_USER; 156 if (evsel->attr.exclude_hv) 157 ctx |= CTX_BIT_HV; 158 if (evsel->attr.exclude_host) 159 ctx |= CTX_BIT_HOST; 160 if (evsel->attr.exclude_idle) 161 ctx |= CTX_BIT_IDLE; 162 163 return ctx; 164 } 165 166 static void reset_stat(struct runtime_stat *st) 167 { 168 struct rblist *rblist; 169 struct rb_node *pos, *next; 170 171 rblist = &st->value_list; 172 next = rb_first_cached(&rblist->entries); 173 while (next) { 174 pos = next; 175 next = rb_next(pos); 176 memset(&container_of(pos, struct saved_value, rb_node)->stats, 177 0, 178 sizeof(struct stats)); 179 } 180 } 181 182 void perf_stat__reset_shadow_stats(void) 183 { 184 reset_stat(&rt_stat); 185 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 186 } 187 188 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 189 { 190 reset_stat(st); 191 } 192 193 static void update_runtime_stat(struct runtime_stat *st, 194 enum stat_type type, 195 int ctx, int cpu, u64 count) 196 { 197 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 198 type, ctx, st); 199 200 if (v) 201 update_stats(&v->stats, count); 202 } 203 204 /* 205 * Update various tracking values we maintain to print 206 * more semantic information such as miss/hit ratios, 207 * instruction rates, etc: 208 */ 209 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, 210 int cpu, struct runtime_stat *st) 211 { 212 int ctx = evsel_context(counter); 213 u64 count_ns = count; 214 215 count *= counter->scale; 216 217 if (perf_evsel__is_clock(counter)) 218 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 219 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 220 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 221 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 222 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 224 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, ELISION_START)) 226 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 228 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 229 ctx, cpu, count); 230 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 231 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 232 ctx, cpu, count); 233 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 234 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 235 ctx, cpu, count); 236 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 237 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 238 ctx, cpu, count); 239 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 240 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 241 ctx, cpu, count); 242 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 243 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 244 ctx, cpu, count); 245 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 246 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 247 ctx, cpu, count); 248 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 249 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 250 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 251 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 252 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 253 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 254 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 255 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 257 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 259 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 261 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 262 else if (perf_stat_evsel__is(counter, SMI_NUM)) 263 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, APERF)) 265 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 266 267 if (counter->collect_stat) { 268 struct saved_value *v = saved_value_lookup(counter, cpu, true, 269 STAT_NONE, 0, st); 270 update_stats(&v->stats, count); 271 } 272 } 273 274 /* used for get_ratio_color() */ 275 enum grc_type { 276 GRC_STALLED_CYCLES_FE, 277 GRC_STALLED_CYCLES_BE, 278 GRC_CACHE_MISSES, 279 GRC_MAX_NR 280 }; 281 282 static const char *get_ratio_color(enum grc_type type, double ratio) 283 { 284 static const double grc_table[GRC_MAX_NR][3] = { 285 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 286 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 287 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 288 }; 289 const char *color = PERF_COLOR_NORMAL; 290 291 if (ratio > grc_table[type][0]) 292 color = PERF_COLOR_RED; 293 else if (ratio > grc_table[type][1]) 294 color = PERF_COLOR_MAGENTA; 295 else if (ratio > grc_table[type][2]) 296 color = PERF_COLOR_YELLOW; 297 298 return color; 299 } 300 301 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, 302 const char *name) 303 { 304 struct perf_evsel *c2; 305 306 evlist__for_each_entry (evsel_list, c2) { 307 if (!strcasecmp(c2->name, name)) 308 return c2; 309 } 310 return NULL; 311 } 312 313 /* Mark MetricExpr target events and link events using them to them. */ 314 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) 315 { 316 struct perf_evsel *counter, *leader, **metric_events, *oc; 317 bool found; 318 const char **metric_names; 319 int i; 320 int num_metric_names; 321 322 evlist__for_each_entry(evsel_list, counter) { 323 bool invalid = false; 324 325 leader = counter->leader; 326 if (!counter->metric_expr) 327 continue; 328 metric_events = counter->metric_events; 329 if (!metric_events) { 330 if (expr__find_other(counter->metric_expr, counter->name, 331 &metric_names, &num_metric_names) < 0) 332 continue; 333 334 metric_events = calloc(sizeof(struct perf_evsel *), 335 num_metric_names + 1); 336 if (!metric_events) 337 return; 338 counter->metric_events = metric_events; 339 } 340 341 for (i = 0; i < num_metric_names; i++) { 342 found = false; 343 if (leader) { 344 /* Search in group */ 345 for_each_group_member (oc, leader) { 346 if (!strcasecmp(oc->name, metric_names[i])) { 347 found = true; 348 break; 349 } 350 } 351 } 352 if (!found) { 353 /* Search ignoring groups */ 354 oc = perf_stat__find_event(evsel_list, metric_names[i]); 355 } 356 if (!oc) { 357 /* Deduping one is good enough to handle duplicated PMUs. */ 358 static char *printed; 359 360 /* 361 * Adding events automatically would be difficult, because 362 * it would risk creating groups that are not schedulable. 363 * perf stat doesn't understand all the scheduling constraints 364 * of events. So we ask the user instead to add the missing 365 * events. 366 */ 367 if (!printed || strcasecmp(printed, metric_names[i])) { 368 fprintf(stderr, 369 "Add %s event to groups to get metric expression for %s\n", 370 metric_names[i], 371 counter->name); 372 printed = strdup(metric_names[i]); 373 } 374 invalid = true; 375 continue; 376 } 377 metric_events[i] = oc; 378 oc->collect_stat = true; 379 } 380 metric_events[i] = NULL; 381 free(metric_names); 382 if (invalid) { 383 free(metric_events); 384 counter->metric_events = NULL; 385 counter->metric_expr = NULL; 386 } 387 } 388 } 389 390 static double runtime_stat_avg(struct runtime_stat *st, 391 enum stat_type type, int ctx, int cpu) 392 { 393 struct saved_value *v; 394 395 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 396 if (!v) 397 return 0.0; 398 399 return avg_stats(&v->stats); 400 } 401 402 static double runtime_stat_n(struct runtime_stat *st, 403 enum stat_type type, int ctx, int cpu) 404 { 405 struct saved_value *v; 406 407 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 408 if (!v) 409 return 0.0; 410 411 return v->stats.n; 412 } 413 414 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 415 int cpu, 416 struct perf_evsel *evsel, double avg, 417 struct perf_stat_output_ctx *out, 418 struct runtime_stat *st) 419 { 420 double total, ratio = 0.0; 421 const char *color; 422 int ctx = evsel_context(evsel); 423 424 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 425 426 if (total) 427 ratio = avg / total * 100.0; 428 429 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 430 431 if (ratio) 432 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 433 ratio); 434 else 435 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 436 } 437 438 static void print_stalled_cycles_backend(struct perf_stat_config *config, 439 int cpu, 440 struct perf_evsel *evsel, double avg, 441 struct perf_stat_output_ctx *out, 442 struct runtime_stat *st) 443 { 444 double total, ratio = 0.0; 445 const char *color; 446 int ctx = evsel_context(evsel); 447 448 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 449 450 if (total) 451 ratio = avg / total * 100.0; 452 453 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 454 455 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 456 } 457 458 static void print_branch_misses(struct perf_stat_config *config, 459 int cpu, 460 struct perf_evsel *evsel, 461 double avg, 462 struct perf_stat_output_ctx *out, 463 struct runtime_stat *st) 464 { 465 double total, ratio = 0.0; 466 const char *color; 467 int ctx = evsel_context(evsel); 468 469 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 470 471 if (total) 472 ratio = avg / total * 100.0; 473 474 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 475 476 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 477 } 478 479 static void print_l1_dcache_misses(struct perf_stat_config *config, 480 int cpu, 481 struct perf_evsel *evsel, 482 double avg, 483 struct perf_stat_output_ctx *out, 484 struct runtime_stat *st) 485 486 { 487 double total, ratio = 0.0; 488 const char *color; 489 int ctx = evsel_context(evsel); 490 491 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 492 493 if (total) 494 ratio = avg / total * 100.0; 495 496 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 497 498 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 499 } 500 501 static void print_l1_icache_misses(struct perf_stat_config *config, 502 int cpu, 503 struct perf_evsel *evsel, 504 double avg, 505 struct perf_stat_output_ctx *out, 506 struct runtime_stat *st) 507 508 { 509 double total, ratio = 0.0; 510 const char *color; 511 int ctx = evsel_context(evsel); 512 513 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 514 515 if (total) 516 ratio = avg / total * 100.0; 517 518 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 519 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 520 } 521 522 static void print_dtlb_cache_misses(struct perf_stat_config *config, 523 int cpu, 524 struct perf_evsel *evsel, 525 double avg, 526 struct perf_stat_output_ctx *out, 527 struct runtime_stat *st) 528 { 529 double total, ratio = 0.0; 530 const char *color; 531 int ctx = evsel_context(evsel); 532 533 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 534 535 if (total) 536 ratio = avg / total * 100.0; 537 538 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 539 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 540 } 541 542 static void print_itlb_cache_misses(struct perf_stat_config *config, 543 int cpu, 544 struct perf_evsel *evsel, 545 double avg, 546 struct perf_stat_output_ctx *out, 547 struct runtime_stat *st) 548 { 549 double total, ratio = 0.0; 550 const char *color; 551 int ctx = evsel_context(evsel); 552 553 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 554 555 if (total) 556 ratio = avg / total * 100.0; 557 558 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 559 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 560 } 561 562 static void print_ll_cache_misses(struct perf_stat_config *config, 563 int cpu, 564 struct perf_evsel *evsel, 565 double avg, 566 struct perf_stat_output_ctx *out, 567 struct runtime_stat *st) 568 { 569 double total, ratio = 0.0; 570 const char *color; 571 int ctx = evsel_context(evsel); 572 573 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 574 575 if (total) 576 ratio = avg / total * 100.0; 577 578 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 579 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 580 } 581 582 /* 583 * High level "TopDown" CPU core pipe line bottleneck break down. 584 * 585 * Basic concept following 586 * Yasin, A Top Down Method for Performance analysis and Counter architecture 587 * ISPASS14 588 * 589 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 590 * 591 * Frontend -> Backend -> Retiring 592 * BadSpeculation in addition means out of order execution that is thrown away 593 * (for example branch mispredictions) 594 * Frontend is instruction decoding. 595 * Backend is execution, like computation and accessing data in memory 596 * Retiring is good execution that is not directly bottlenecked 597 * 598 * The formulas are computed in slots. 599 * A slot is an entry in the pipeline each for the pipeline width 600 * (for example a 4-wide pipeline has 4 slots for each cycle) 601 * 602 * Formulas: 603 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 604 * TotalSlots 605 * Retiring = SlotsRetired / TotalSlots 606 * FrontendBound = FetchBubbles / TotalSlots 607 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 608 * 609 * The kernel provides the mapping to the low level CPU events and any scaling 610 * needed for the CPU pipeline width, for example: 611 * 612 * TotalSlots = Cycles * 4 613 * 614 * The scaling factor is communicated in the sysfs unit. 615 * 616 * In some cases the CPU may not be able to measure all the formulas due to 617 * missing events. In this case multiple formulas are combined, as possible. 618 * 619 * Full TopDown supports more levels to sub-divide each area: for example 620 * BackendBound into computing bound and memory bound. For now we only 621 * support Level 1 TopDown. 622 */ 623 624 static double sanitize_val(double x) 625 { 626 if (x < 0 && x >= -0.02) 627 return 0.0; 628 return x; 629 } 630 631 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 632 { 633 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 634 } 635 636 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 637 { 638 double bad_spec = 0; 639 double total_slots; 640 double total; 641 642 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 643 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 644 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 645 646 total_slots = td_total_slots(ctx, cpu, st); 647 if (total_slots) 648 bad_spec = total / total_slots; 649 return sanitize_val(bad_spec); 650 } 651 652 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 653 { 654 double retiring = 0; 655 double total_slots = td_total_slots(ctx, cpu, st); 656 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 657 ctx, cpu); 658 659 if (total_slots) 660 retiring = ret_slots / total_slots; 661 return retiring; 662 } 663 664 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 665 { 666 double fe_bound = 0; 667 double total_slots = td_total_slots(ctx, cpu, st); 668 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 669 ctx, cpu); 670 671 if (total_slots) 672 fe_bound = fetch_bub / total_slots; 673 return fe_bound; 674 } 675 676 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 677 { 678 double sum = (td_fe_bound(ctx, cpu, st) + 679 td_bad_spec(ctx, cpu, st) + 680 td_retiring(ctx, cpu, st)); 681 if (sum == 0) 682 return 0; 683 return sanitize_val(1.0 - sum); 684 } 685 686 static void print_smi_cost(struct perf_stat_config *config, 687 int cpu, struct perf_evsel *evsel, 688 struct perf_stat_output_ctx *out, 689 struct runtime_stat *st) 690 { 691 double smi_num, aperf, cycles, cost = 0.0; 692 int ctx = evsel_context(evsel); 693 const char *color = NULL; 694 695 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 696 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 697 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 698 699 if ((cycles == 0) || (aperf == 0)) 700 return; 701 702 if (smi_num) 703 cost = (aperf - cycles) / aperf * 100.00; 704 705 if (cost > 10) 706 color = PERF_COLOR_RED; 707 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 708 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 709 } 710 711 static void generic_metric(struct perf_stat_config *config, 712 const char *metric_expr, 713 struct perf_evsel **metric_events, 714 char *name, 715 const char *metric_name, 716 double avg, 717 int cpu, 718 struct perf_stat_output_ctx *out, 719 struct runtime_stat *st) 720 { 721 print_metric_t print_metric = out->print_metric; 722 struct parse_ctx pctx; 723 double ratio; 724 int i; 725 void *ctxp = out->ctx; 726 727 expr__ctx_init(&pctx); 728 expr__add_id(&pctx, name, avg); 729 for (i = 0; metric_events[i]; i++) { 730 struct saved_value *v; 731 struct stats *stats; 732 double scale; 733 734 if (!strcmp(metric_events[i]->name, "duration_time")) { 735 stats = &walltime_nsecs_stats; 736 scale = 1e-9; 737 } else { 738 v = saved_value_lookup(metric_events[i], cpu, false, 739 STAT_NONE, 0, st); 740 if (!v) 741 break; 742 stats = &v->stats; 743 scale = 1.0; 744 } 745 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); 746 } 747 if (!metric_events[i]) { 748 const char *p = metric_expr; 749 750 if (expr__parse(&ratio, &pctx, &p) == 0) 751 print_metric(config, ctxp, NULL, "%8.1f", 752 metric_name ? 753 metric_name : 754 out->force_header ? name : "", 755 ratio); 756 else 757 print_metric(config, ctxp, NULL, NULL, 758 out->force_header ? 759 (metric_name ? metric_name : name) : "", 0); 760 } else 761 print_metric(config, ctxp, NULL, NULL, "", 0); 762 } 763 764 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 765 struct perf_evsel *evsel, 766 double avg, int cpu, 767 struct perf_stat_output_ctx *out, 768 struct rblist *metric_events, 769 struct runtime_stat *st) 770 { 771 void *ctxp = out->ctx; 772 print_metric_t print_metric = out->print_metric; 773 double total, ratio = 0.0, total2; 774 const char *color = NULL; 775 int ctx = evsel_context(evsel); 776 struct metric_event *me; 777 int num = 1; 778 779 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 780 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 781 782 if (total) { 783 ratio = avg / total; 784 print_metric(config, ctxp, NULL, "%7.2f ", 785 "insn per cycle", ratio); 786 } else { 787 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 788 } 789 790 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 791 ctx, cpu); 792 793 total = max(total, runtime_stat_avg(st, 794 STAT_STALLED_CYCLES_BACK, 795 ctx, cpu)); 796 797 if (total && avg) { 798 out->new_line(config, ctxp); 799 ratio = total / avg; 800 print_metric(config, ctxp, NULL, "%7.2f ", 801 "stalled cycles per insn", 802 ratio); 803 } else if (have_frontend_stalled) { 804 print_metric(config, ctxp, NULL, NULL, 805 "stalled cycles per insn", 0); 806 } 807 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 808 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 809 print_branch_misses(config, cpu, evsel, avg, out, st); 810 else 811 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 812 } else if ( 813 evsel->attr.type == PERF_TYPE_HW_CACHE && 814 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 815 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 816 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 817 818 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 819 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 820 else 821 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 822 } else if ( 823 evsel->attr.type == PERF_TYPE_HW_CACHE && 824 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 825 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 826 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 827 828 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 829 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 830 else 831 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 832 } else if ( 833 evsel->attr.type == PERF_TYPE_HW_CACHE && 834 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 835 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 836 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 837 838 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 839 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 840 else 841 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 842 } else if ( 843 evsel->attr.type == PERF_TYPE_HW_CACHE && 844 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 845 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 846 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 847 848 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 849 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 850 else 851 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 852 } else if ( 853 evsel->attr.type == PERF_TYPE_HW_CACHE && 854 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 855 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 856 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 857 858 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 859 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 860 else 861 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 862 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 863 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 864 865 if (total) 866 ratio = avg * 100 / total; 867 868 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 869 print_metric(config, ctxp, NULL, "%8.3f %%", 870 "of all cache refs", ratio); 871 else 872 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 873 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 874 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 875 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 876 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 877 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 878 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 879 880 if (total) { 881 ratio = avg / total; 882 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 883 } else { 884 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 885 } 886 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 887 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 888 889 if (total) 890 print_metric(config, ctxp, NULL, 891 "%7.2f%%", "transactional cycles", 892 100.0 * (avg / total)); 893 else 894 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 895 0); 896 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 897 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 898 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 899 900 if (total2 < avg) 901 total2 = avg; 902 if (total) 903 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 904 100.0 * ((total2-avg) / total)); 905 else 906 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 907 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 908 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 909 ctx, cpu); 910 911 if (avg) 912 ratio = total / avg; 913 914 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 915 print_metric(config, ctxp, NULL, "%8.0f", 916 "cycles / transaction", ratio); 917 else 918 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 919 0); 920 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 921 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 922 ctx, cpu); 923 924 if (avg) 925 ratio = total / avg; 926 927 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 928 } else if (perf_evsel__is_clock(evsel)) { 929 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 930 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 931 avg / (ratio * evsel->scale)); 932 else 933 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 934 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 935 double fe_bound = td_fe_bound(ctx, cpu, st); 936 937 if (fe_bound > 0.2) 938 color = PERF_COLOR_RED; 939 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 940 fe_bound * 100.); 941 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 942 double retiring = td_retiring(ctx, cpu, st); 943 944 if (retiring > 0.7) 945 color = PERF_COLOR_GREEN; 946 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 947 retiring * 100.); 948 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 949 double bad_spec = td_bad_spec(ctx, cpu, st); 950 951 if (bad_spec > 0.1) 952 color = PERF_COLOR_RED; 953 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 954 bad_spec * 100.); 955 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 956 double be_bound = td_be_bound(ctx, cpu, st); 957 const char *name = "backend bound"; 958 static int have_recovery_bubbles = -1; 959 960 /* In case the CPU does not support topdown-recovery-bubbles */ 961 if (have_recovery_bubbles < 0) 962 have_recovery_bubbles = pmu_have_event("cpu", 963 "topdown-recovery-bubbles"); 964 if (!have_recovery_bubbles) 965 name = "backend bound/bad spec"; 966 967 if (be_bound > 0.2) 968 color = PERF_COLOR_RED; 969 if (td_total_slots(ctx, cpu, st) > 0) 970 print_metric(config, ctxp, color, "%8.1f%%", name, 971 be_bound * 100.); 972 else 973 print_metric(config, ctxp, NULL, NULL, name, 0); 974 } else if (evsel->metric_expr) { 975 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 976 evsel->metric_name, avg, cpu, out, st); 977 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 978 char unit = 'M'; 979 char unit_buf[10]; 980 981 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 982 983 if (total) 984 ratio = 1000.0 * avg / total; 985 if (ratio < 0.001) { 986 ratio *= 1000; 987 unit = 'K'; 988 } 989 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 990 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 991 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 992 print_smi_cost(config, cpu, evsel, out, st); 993 } else { 994 num = 0; 995 } 996 997 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 998 struct metric_expr *mexp; 999 1000 list_for_each_entry (mexp, &me->head, nd) { 1001 if (num++ > 0) 1002 out->new_line(config, ctxp); 1003 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1004 evsel->name, mexp->metric_name, 1005 avg, cpu, out, st); 1006 } 1007 } 1008 if (num == 0) 1009 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1010 } 1011