1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 static bool have_frontend_stalled; 22 23 struct runtime_stat rt_stat; 24 struct stats walltime_nsecs_stats; 25 26 struct saved_value { 27 struct rb_node rb_node; 28 struct evsel *evsel; 29 enum stat_type type; 30 int ctx; 31 int cpu; 32 struct runtime_stat *stat; 33 struct stats stats; 34 u64 metric_total; 35 int metric_other; 36 }; 37 38 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 39 { 40 struct saved_value *a = container_of(rb_node, 41 struct saved_value, 42 rb_node); 43 const struct saved_value *b = entry; 44 45 if (a->cpu != b->cpu) 46 return a->cpu - b->cpu; 47 48 /* 49 * Previously the rbtree was used to link generic metrics. 50 * The keys were evsel/cpu. Now the rbtree is extended to support 51 * per-thread shadow stats. For shadow stats case, the keys 52 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 53 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 54 */ 55 if (a->type != b->type) 56 return a->type - b->type; 57 58 if (a->ctx != b->ctx) 59 return a->ctx - b->ctx; 60 61 if (a->evsel == NULL && b->evsel == NULL) { 62 if (a->stat == b->stat) 63 return 0; 64 65 if ((char *)a->stat < (char *)b->stat) 66 return -1; 67 68 return 1; 69 } 70 71 if (a->evsel == b->evsel) 72 return 0; 73 if ((char *)a->evsel < (char *)b->evsel) 74 return -1; 75 return +1; 76 } 77 78 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 79 const void *entry) 80 { 81 struct saved_value *nd = malloc(sizeof(struct saved_value)); 82 83 if (!nd) 84 return NULL; 85 memcpy(nd, entry, sizeof(struct saved_value)); 86 return &nd->rb_node; 87 } 88 89 static void saved_value_delete(struct rblist *rblist __maybe_unused, 90 struct rb_node *rb_node) 91 { 92 struct saved_value *v; 93 94 BUG_ON(!rb_node); 95 v = container_of(rb_node, struct saved_value, rb_node); 96 free(v); 97 } 98 99 static struct saved_value *saved_value_lookup(struct evsel *evsel, 100 int cpu, 101 bool create, 102 enum stat_type type, 103 int ctx, 104 struct runtime_stat *st) 105 { 106 struct rblist *rblist; 107 struct rb_node *nd; 108 struct saved_value dm = { 109 .cpu = cpu, 110 .evsel = evsel, 111 .type = type, 112 .ctx = ctx, 113 .stat = st, 114 }; 115 116 rblist = &st->value_list; 117 118 nd = rblist__find(rblist, &dm); 119 if (nd) 120 return container_of(nd, struct saved_value, rb_node); 121 if (create) { 122 rblist__add_node(rblist, &dm); 123 nd = rblist__find(rblist, &dm); 124 if (nd) 125 return container_of(nd, struct saved_value, rb_node); 126 } 127 return NULL; 128 } 129 130 void runtime_stat__init(struct runtime_stat *st) 131 { 132 struct rblist *rblist = &st->value_list; 133 134 rblist__init(rblist); 135 rblist->node_cmp = saved_value_cmp; 136 rblist->node_new = saved_value_new; 137 rblist->node_delete = saved_value_delete; 138 } 139 140 void runtime_stat__exit(struct runtime_stat *st) 141 { 142 rblist__exit(&st->value_list); 143 } 144 145 void perf_stat__init_shadow_stats(void) 146 { 147 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 148 runtime_stat__init(&rt_stat); 149 } 150 151 static int evsel_context(struct evsel *evsel) 152 { 153 int ctx = 0; 154 155 if (evsel->core.attr.exclude_kernel) 156 ctx |= CTX_BIT_KERNEL; 157 if (evsel->core.attr.exclude_user) 158 ctx |= CTX_BIT_USER; 159 if (evsel->core.attr.exclude_hv) 160 ctx |= CTX_BIT_HV; 161 if (evsel->core.attr.exclude_host) 162 ctx |= CTX_BIT_HOST; 163 if (evsel->core.attr.exclude_idle) 164 ctx |= CTX_BIT_IDLE; 165 166 return ctx; 167 } 168 169 static void reset_stat(struct runtime_stat *st) 170 { 171 struct rblist *rblist; 172 struct rb_node *pos, *next; 173 174 rblist = &st->value_list; 175 next = rb_first_cached(&rblist->entries); 176 while (next) { 177 pos = next; 178 next = rb_next(pos); 179 memset(&container_of(pos, struct saved_value, rb_node)->stats, 180 0, 181 sizeof(struct stats)); 182 } 183 } 184 185 void perf_stat__reset_shadow_stats(void) 186 { 187 reset_stat(&rt_stat); 188 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 189 } 190 191 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 192 { 193 reset_stat(st); 194 } 195 196 static void update_runtime_stat(struct runtime_stat *st, 197 enum stat_type type, 198 int ctx, int cpu, u64 count) 199 { 200 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 201 type, ctx, st); 202 203 if (v) 204 update_stats(&v->stats, count); 205 } 206 207 /* 208 * Update various tracking values we maintain to print 209 * more semantic information such as miss/hit ratios, 210 * instruction rates, etc: 211 */ 212 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 213 int cpu, struct runtime_stat *st) 214 { 215 int ctx = evsel_context(counter); 216 u64 count_ns = count; 217 struct saved_value *v; 218 219 count *= counter->scale; 220 221 if (perf_evsel__is_clock(counter)) 222 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 223 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 224 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 226 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 228 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, ELISION_START)) 230 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 231 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 232 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 233 ctx, cpu, count); 234 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 235 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 236 ctx, cpu, count); 237 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 238 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 239 ctx, cpu, count); 240 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 241 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 242 ctx, cpu, count); 243 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 244 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 245 ctx, cpu, count); 246 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 247 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 248 ctx, cpu, count); 249 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 250 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 251 ctx, cpu, count); 252 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 253 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 254 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 255 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 257 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 259 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 261 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 262 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 263 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 264 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 265 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 266 else if (perf_stat_evsel__is(counter, SMI_NUM)) 267 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 268 else if (perf_stat_evsel__is(counter, APERF)) 269 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 270 271 if (counter->collect_stat) { 272 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 273 update_stats(&v->stats, count); 274 if (counter->metric_leader) 275 v->metric_total += count; 276 } else if (counter->metric_leader) { 277 v = saved_value_lookup(counter->metric_leader, 278 cpu, true, STAT_NONE, 0, st); 279 v->metric_total += count; 280 v->metric_other++; 281 } 282 } 283 284 /* used for get_ratio_color() */ 285 enum grc_type { 286 GRC_STALLED_CYCLES_FE, 287 GRC_STALLED_CYCLES_BE, 288 GRC_CACHE_MISSES, 289 GRC_MAX_NR 290 }; 291 292 static const char *get_ratio_color(enum grc_type type, double ratio) 293 { 294 static const double grc_table[GRC_MAX_NR][3] = { 295 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 296 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 297 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 298 }; 299 const char *color = PERF_COLOR_NORMAL; 300 301 if (ratio > grc_table[type][0]) 302 color = PERF_COLOR_RED; 303 else if (ratio > grc_table[type][1]) 304 color = PERF_COLOR_MAGENTA; 305 else if (ratio > grc_table[type][2]) 306 color = PERF_COLOR_YELLOW; 307 308 return color; 309 } 310 311 static struct evsel *perf_stat__find_event(struct evlist *evsel_list, 312 const char *name) 313 { 314 struct evsel *c2; 315 316 evlist__for_each_entry (evsel_list, c2) { 317 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 318 return c2; 319 } 320 return NULL; 321 } 322 323 /* Mark MetricExpr target events and link events using them to them. */ 324 void perf_stat__collect_metric_expr(struct evlist *evsel_list) 325 { 326 struct evsel *counter, *leader, **metric_events, *oc; 327 bool found; 328 const char **metric_names; 329 int i; 330 int num_metric_names; 331 332 evlist__for_each_entry(evsel_list, counter) { 333 bool invalid = false; 334 335 leader = counter->leader; 336 if (!counter->metric_expr) 337 continue; 338 metric_events = counter->metric_events; 339 if (!metric_events) { 340 if (expr__find_other(counter->metric_expr, counter->name, 341 &metric_names, &num_metric_names) < 0) 342 continue; 343 344 metric_events = calloc(sizeof(struct evsel *), 345 num_metric_names + 1); 346 if (!metric_events) 347 return; 348 counter->metric_events = metric_events; 349 } 350 351 for (i = 0; i < num_metric_names; i++) { 352 found = false; 353 if (leader) { 354 /* Search in group */ 355 for_each_group_member (oc, leader) { 356 if (!strcasecmp(oc->name, metric_names[i]) && 357 !oc->collect_stat) { 358 found = true; 359 break; 360 } 361 } 362 } 363 if (!found) { 364 /* Search ignoring groups */ 365 oc = perf_stat__find_event(evsel_list, metric_names[i]); 366 } 367 if (!oc) { 368 /* Deduping one is good enough to handle duplicated PMUs. */ 369 static char *printed; 370 371 /* 372 * Adding events automatically would be difficult, because 373 * it would risk creating groups that are not schedulable. 374 * perf stat doesn't understand all the scheduling constraints 375 * of events. So we ask the user instead to add the missing 376 * events. 377 */ 378 if (!printed || strcasecmp(printed, metric_names[i])) { 379 fprintf(stderr, 380 "Add %s event to groups to get metric expression for %s\n", 381 metric_names[i], 382 counter->name); 383 printed = strdup(metric_names[i]); 384 } 385 invalid = true; 386 continue; 387 } 388 metric_events[i] = oc; 389 oc->collect_stat = true; 390 } 391 metric_events[i] = NULL; 392 free(metric_names); 393 if (invalid) { 394 free(metric_events); 395 counter->metric_events = NULL; 396 counter->metric_expr = NULL; 397 } 398 } 399 } 400 401 static double runtime_stat_avg(struct runtime_stat *st, 402 enum stat_type type, int ctx, int cpu) 403 { 404 struct saved_value *v; 405 406 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 407 if (!v) 408 return 0.0; 409 410 return avg_stats(&v->stats); 411 } 412 413 static double runtime_stat_n(struct runtime_stat *st, 414 enum stat_type type, int ctx, int cpu) 415 { 416 struct saved_value *v; 417 418 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 419 if (!v) 420 return 0.0; 421 422 return v->stats.n; 423 } 424 425 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 426 int cpu, 427 struct evsel *evsel, double avg, 428 struct perf_stat_output_ctx *out, 429 struct runtime_stat *st) 430 { 431 double total, ratio = 0.0; 432 const char *color; 433 int ctx = evsel_context(evsel); 434 435 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 436 437 if (total) 438 ratio = avg / total * 100.0; 439 440 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 441 442 if (ratio) 443 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 444 ratio); 445 else 446 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 447 } 448 449 static void print_stalled_cycles_backend(struct perf_stat_config *config, 450 int cpu, 451 struct evsel *evsel, double avg, 452 struct perf_stat_output_ctx *out, 453 struct runtime_stat *st) 454 { 455 double total, ratio = 0.0; 456 const char *color; 457 int ctx = evsel_context(evsel); 458 459 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 460 461 if (total) 462 ratio = avg / total * 100.0; 463 464 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 465 466 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 467 } 468 469 static void print_branch_misses(struct perf_stat_config *config, 470 int cpu, 471 struct evsel *evsel, 472 double avg, 473 struct perf_stat_output_ctx *out, 474 struct runtime_stat *st) 475 { 476 double total, ratio = 0.0; 477 const char *color; 478 int ctx = evsel_context(evsel); 479 480 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 481 482 if (total) 483 ratio = avg / total * 100.0; 484 485 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 486 487 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 488 } 489 490 static void print_l1_dcache_misses(struct perf_stat_config *config, 491 int cpu, 492 struct evsel *evsel, 493 double avg, 494 struct perf_stat_output_ctx *out, 495 struct runtime_stat *st) 496 497 { 498 double total, ratio = 0.0; 499 const char *color; 500 int ctx = evsel_context(evsel); 501 502 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 503 504 if (total) 505 ratio = avg / total * 100.0; 506 507 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 508 509 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 510 } 511 512 static void print_l1_icache_misses(struct perf_stat_config *config, 513 int cpu, 514 struct evsel *evsel, 515 double avg, 516 struct perf_stat_output_ctx *out, 517 struct runtime_stat *st) 518 519 { 520 double total, ratio = 0.0; 521 const char *color; 522 int ctx = evsel_context(evsel); 523 524 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 525 526 if (total) 527 ratio = avg / total * 100.0; 528 529 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 530 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 531 } 532 533 static void print_dtlb_cache_misses(struct perf_stat_config *config, 534 int cpu, 535 struct evsel *evsel, 536 double avg, 537 struct perf_stat_output_ctx *out, 538 struct runtime_stat *st) 539 { 540 double total, ratio = 0.0; 541 const char *color; 542 int ctx = evsel_context(evsel); 543 544 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 545 546 if (total) 547 ratio = avg / total * 100.0; 548 549 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 550 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 551 } 552 553 static void print_itlb_cache_misses(struct perf_stat_config *config, 554 int cpu, 555 struct evsel *evsel, 556 double avg, 557 struct perf_stat_output_ctx *out, 558 struct runtime_stat *st) 559 { 560 double total, ratio = 0.0; 561 const char *color; 562 int ctx = evsel_context(evsel); 563 564 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 565 566 if (total) 567 ratio = avg / total * 100.0; 568 569 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 570 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 571 } 572 573 static void print_ll_cache_misses(struct perf_stat_config *config, 574 int cpu, 575 struct evsel *evsel, 576 double avg, 577 struct perf_stat_output_ctx *out, 578 struct runtime_stat *st) 579 { 580 double total, ratio = 0.0; 581 const char *color; 582 int ctx = evsel_context(evsel); 583 584 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 585 586 if (total) 587 ratio = avg / total * 100.0; 588 589 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 590 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 591 } 592 593 /* 594 * High level "TopDown" CPU core pipe line bottleneck break down. 595 * 596 * Basic concept following 597 * Yasin, A Top Down Method for Performance analysis and Counter architecture 598 * ISPASS14 599 * 600 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 601 * 602 * Frontend -> Backend -> Retiring 603 * BadSpeculation in addition means out of order execution that is thrown away 604 * (for example branch mispredictions) 605 * Frontend is instruction decoding. 606 * Backend is execution, like computation and accessing data in memory 607 * Retiring is good execution that is not directly bottlenecked 608 * 609 * The formulas are computed in slots. 610 * A slot is an entry in the pipeline each for the pipeline width 611 * (for example a 4-wide pipeline has 4 slots for each cycle) 612 * 613 * Formulas: 614 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 615 * TotalSlots 616 * Retiring = SlotsRetired / TotalSlots 617 * FrontendBound = FetchBubbles / TotalSlots 618 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 619 * 620 * The kernel provides the mapping to the low level CPU events and any scaling 621 * needed for the CPU pipeline width, for example: 622 * 623 * TotalSlots = Cycles * 4 624 * 625 * The scaling factor is communicated in the sysfs unit. 626 * 627 * In some cases the CPU may not be able to measure all the formulas due to 628 * missing events. In this case multiple formulas are combined, as possible. 629 * 630 * Full TopDown supports more levels to sub-divide each area: for example 631 * BackendBound into computing bound and memory bound. For now we only 632 * support Level 1 TopDown. 633 */ 634 635 static double sanitize_val(double x) 636 { 637 if (x < 0 && x >= -0.02) 638 return 0.0; 639 return x; 640 } 641 642 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 643 { 644 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 645 } 646 647 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 648 { 649 double bad_spec = 0; 650 double total_slots; 651 double total; 652 653 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 654 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 655 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 656 657 total_slots = td_total_slots(ctx, cpu, st); 658 if (total_slots) 659 bad_spec = total / total_slots; 660 return sanitize_val(bad_spec); 661 } 662 663 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 664 { 665 double retiring = 0; 666 double total_slots = td_total_slots(ctx, cpu, st); 667 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 668 ctx, cpu); 669 670 if (total_slots) 671 retiring = ret_slots / total_slots; 672 return retiring; 673 } 674 675 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 676 { 677 double fe_bound = 0; 678 double total_slots = td_total_slots(ctx, cpu, st); 679 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 680 ctx, cpu); 681 682 if (total_slots) 683 fe_bound = fetch_bub / total_slots; 684 return fe_bound; 685 } 686 687 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 688 { 689 double sum = (td_fe_bound(ctx, cpu, st) + 690 td_bad_spec(ctx, cpu, st) + 691 td_retiring(ctx, cpu, st)); 692 if (sum == 0) 693 return 0; 694 return sanitize_val(1.0 - sum); 695 } 696 697 static void print_smi_cost(struct perf_stat_config *config, 698 int cpu, struct evsel *evsel, 699 struct perf_stat_output_ctx *out, 700 struct runtime_stat *st) 701 { 702 double smi_num, aperf, cycles, cost = 0.0; 703 int ctx = evsel_context(evsel); 704 const char *color = NULL; 705 706 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 707 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 708 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 709 710 if ((cycles == 0) || (aperf == 0)) 711 return; 712 713 if (smi_num) 714 cost = (aperf - cycles) / aperf * 100.00; 715 716 if (cost > 10) 717 color = PERF_COLOR_RED; 718 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 719 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 720 } 721 722 static void generic_metric(struct perf_stat_config *config, 723 const char *metric_expr, 724 struct evsel **metric_events, 725 char *name, 726 const char *metric_name, 727 const char *metric_unit, 728 double avg, 729 int cpu, 730 struct perf_stat_output_ctx *out, 731 struct runtime_stat *st) 732 { 733 print_metric_t print_metric = out->print_metric; 734 struct parse_ctx pctx; 735 double ratio, scale; 736 int i; 737 void *ctxp = out->ctx; 738 char *n, *pn; 739 740 expr__ctx_init(&pctx); 741 /* Must be first id entry */ 742 expr__add_id(&pctx, name, avg); 743 for (i = 0; metric_events[i]; i++) { 744 struct saved_value *v; 745 struct stats *stats; 746 u64 metric_total = 0; 747 748 if (!strcmp(metric_events[i]->name, "duration_time")) { 749 stats = &walltime_nsecs_stats; 750 scale = 1e-9; 751 } else { 752 v = saved_value_lookup(metric_events[i], cpu, false, 753 STAT_NONE, 0, st); 754 if (!v) 755 break; 756 stats = &v->stats; 757 scale = 1.0; 758 759 if (v->metric_other) 760 metric_total = v->metric_total; 761 } 762 763 n = strdup(metric_events[i]->name); 764 if (!n) 765 return; 766 /* 767 * This display code with --no-merge adds [cpu] postfixes. 768 * These are not supported by the parser. Remove everything 769 * after the space. 770 */ 771 pn = strchr(n, ' '); 772 if (pn) 773 *pn = 0; 774 775 if (metric_total) 776 expr__add_id(&pctx, n, metric_total); 777 else 778 expr__add_id(&pctx, n, avg_stats(stats)*scale); 779 } 780 781 if (!metric_events[i]) { 782 const char *p = metric_expr; 783 784 if (expr__parse(&ratio, &pctx, &p) == 0) { 785 char *unit; 786 char metric_bf[64]; 787 788 if (metric_unit && metric_name) { 789 if (perf_pmu__convert_scale(metric_unit, 790 &unit, &scale) >= 0) { 791 ratio *= scale; 792 } 793 794 scnprintf(metric_bf, sizeof(metric_bf), 795 "%s %s", unit, metric_name); 796 print_metric(config, ctxp, NULL, "%8.1f", 797 metric_bf, ratio); 798 } else { 799 print_metric(config, ctxp, NULL, "%8.1f", 800 metric_name ? 801 metric_name : 802 out->force_header ? name : "", 803 ratio); 804 } 805 } else { 806 print_metric(config, ctxp, NULL, NULL, 807 out->force_header ? 808 (metric_name ? metric_name : name) : "", 0); 809 } 810 } else 811 print_metric(config, ctxp, NULL, NULL, "", 0); 812 813 for (i = 1; i < pctx.num_ids; i++) 814 zfree(&pctx.ids[i].name); 815 } 816 817 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 818 struct evsel *evsel, 819 double avg, int cpu, 820 struct perf_stat_output_ctx *out, 821 struct rblist *metric_events, 822 struct runtime_stat *st) 823 { 824 void *ctxp = out->ctx; 825 print_metric_t print_metric = out->print_metric; 826 double total, ratio = 0.0, total2; 827 const char *color = NULL; 828 int ctx = evsel_context(evsel); 829 struct metric_event *me; 830 int num = 1; 831 832 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 833 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 834 835 if (total) { 836 ratio = avg / total; 837 print_metric(config, ctxp, NULL, "%7.2f ", 838 "insn per cycle", ratio); 839 } else { 840 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 841 } 842 843 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 844 ctx, cpu); 845 846 total = max(total, runtime_stat_avg(st, 847 STAT_STALLED_CYCLES_BACK, 848 ctx, cpu)); 849 850 if (total && avg) { 851 out->new_line(config, ctxp); 852 ratio = total / avg; 853 print_metric(config, ctxp, NULL, "%7.2f ", 854 "stalled cycles per insn", 855 ratio); 856 } else if (have_frontend_stalled) { 857 out->new_line(config, ctxp); 858 print_metric(config, ctxp, NULL, "%7.2f ", 859 "stalled cycles per insn", 0); 860 } 861 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 862 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 863 print_branch_misses(config, cpu, evsel, avg, out, st); 864 else 865 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 866 } else if ( 867 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 868 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D | 869 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 870 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 871 872 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 873 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 874 else 875 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 876 } else if ( 877 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 878 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | 879 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 880 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 881 882 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 883 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 884 else 885 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 886 } else if ( 887 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 888 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 889 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 890 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 891 892 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 893 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 894 else 895 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 896 } else if ( 897 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 898 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 899 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 900 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 901 902 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 903 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 904 else 905 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 906 } else if ( 907 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 908 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | 909 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 910 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 911 912 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 913 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 914 else 915 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 916 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 917 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 918 919 if (total) 920 ratio = avg * 100 / total; 921 922 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 923 print_metric(config, ctxp, NULL, "%8.3f %%", 924 "of all cache refs", ratio); 925 else 926 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 927 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 928 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 929 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 930 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 931 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 932 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 933 934 if (total) { 935 ratio = avg / total; 936 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 937 } else { 938 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 939 } 940 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 941 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 942 943 if (total) 944 print_metric(config, ctxp, NULL, 945 "%7.2f%%", "transactional cycles", 946 100.0 * (avg / total)); 947 else 948 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 949 0); 950 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 951 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 952 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 953 954 if (total2 < avg) 955 total2 = avg; 956 if (total) 957 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 958 100.0 * ((total2-avg) / total)); 959 else 960 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 961 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 962 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 963 ctx, cpu); 964 965 if (avg) 966 ratio = total / avg; 967 968 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 969 print_metric(config, ctxp, NULL, "%8.0f", 970 "cycles / transaction", ratio); 971 else 972 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 973 0); 974 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 975 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 976 ctx, cpu); 977 978 if (avg) 979 ratio = total / avg; 980 981 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 982 } else if (perf_evsel__is_clock(evsel)) { 983 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 984 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 985 avg / (ratio * evsel->scale)); 986 else 987 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 988 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 989 double fe_bound = td_fe_bound(ctx, cpu, st); 990 991 if (fe_bound > 0.2) 992 color = PERF_COLOR_RED; 993 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 994 fe_bound * 100.); 995 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 996 double retiring = td_retiring(ctx, cpu, st); 997 998 if (retiring > 0.7) 999 color = PERF_COLOR_GREEN; 1000 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 1001 retiring * 100.); 1002 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 1003 double bad_spec = td_bad_spec(ctx, cpu, st); 1004 1005 if (bad_spec > 0.1) 1006 color = PERF_COLOR_RED; 1007 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 1008 bad_spec * 100.); 1009 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 1010 double be_bound = td_be_bound(ctx, cpu, st); 1011 const char *name = "backend bound"; 1012 static int have_recovery_bubbles = -1; 1013 1014 /* In case the CPU does not support topdown-recovery-bubbles */ 1015 if (have_recovery_bubbles < 0) 1016 have_recovery_bubbles = pmu_have_event("cpu", 1017 "topdown-recovery-bubbles"); 1018 if (!have_recovery_bubbles) 1019 name = "backend bound/bad spec"; 1020 1021 if (be_bound > 0.2) 1022 color = PERF_COLOR_RED; 1023 if (td_total_slots(ctx, cpu, st) > 0) 1024 print_metric(config, ctxp, color, "%8.1f%%", name, 1025 be_bound * 100.); 1026 else 1027 print_metric(config, ctxp, NULL, NULL, name, 0); 1028 } else if (evsel->metric_expr) { 1029 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 1030 evsel->metric_name, NULL, avg, cpu, out, st); 1031 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1032 char unit = 'M'; 1033 char unit_buf[10]; 1034 1035 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1036 1037 if (total) 1038 ratio = 1000.0 * avg / total; 1039 if (ratio < 0.001) { 1040 ratio *= 1000; 1041 unit = 'K'; 1042 } 1043 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1044 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1045 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1046 print_smi_cost(config, cpu, evsel, out, st); 1047 } else { 1048 num = 0; 1049 } 1050 1051 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1052 struct metric_expr *mexp; 1053 1054 list_for_each_entry (mexp, &me->head, nd) { 1055 if (num++ > 0) 1056 out->new_line(config, ctxp); 1057 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1058 evsel->name, mexp->metric_name, 1059 mexp->metric_unit, avg, cpu, out, st); 1060 } 1061 } 1062 if (num == 0) 1063 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1064 } 1065