1 #include <stdio.h> 2 #include "evsel.h" 3 #include "stat.h" 4 #include "color.h" 5 6 enum { 7 CTX_BIT_USER = 1 << 0, 8 CTX_BIT_KERNEL = 1 << 1, 9 CTX_BIT_HV = 1 << 2, 10 CTX_BIT_HOST = 1 << 3, 11 CTX_BIT_IDLE = 1 << 4, 12 CTX_BIT_MAX = 1 << 5, 13 }; 14 15 #define NUM_CTX CTX_BIT_MAX 16 17 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 18 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; 19 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; 20 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; 21 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; 22 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; 23 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; 24 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; 25 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; 26 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; 27 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; 28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; 29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; 30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; 31 32 struct stats walltime_nsecs_stats; 33 34 static int evsel_context(struct perf_evsel *evsel) 35 { 36 int ctx = 0; 37 38 if (evsel->attr.exclude_kernel) 39 ctx |= CTX_BIT_KERNEL; 40 if (evsel->attr.exclude_user) 41 ctx |= CTX_BIT_USER; 42 if (evsel->attr.exclude_hv) 43 ctx |= CTX_BIT_HV; 44 if (evsel->attr.exclude_host) 45 ctx |= CTX_BIT_HOST; 46 if (evsel->attr.exclude_idle) 47 ctx |= CTX_BIT_IDLE; 48 49 return ctx; 50 } 51 52 void perf_stat__reset_shadow_stats(void) 53 { 54 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); 55 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); 56 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); 57 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); 58 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); 59 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); 60 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); 61 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); 62 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 63 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 64 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 65 memset(runtime_cycles_in_tx_stats, 0, 66 sizeof(runtime_cycles_in_tx_stats)); 67 memset(runtime_transaction_stats, 0, 68 sizeof(runtime_transaction_stats)); 69 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); 70 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 71 } 72 73 /* 74 * Update various tracking values we maintain to print 75 * more semantic information such as miss/hit ratios, 76 * instruction rates, etc: 77 */ 78 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, 79 int cpu) 80 { 81 int ctx = evsel_context(counter); 82 83 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 84 update_stats(&runtime_nsecs_stats[cpu], count[0]); 85 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 86 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); 87 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 88 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); 89 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 90 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); 91 else if (perf_stat_evsel__is(counter, ELISION_START)) 92 update_stats(&runtime_elision_stats[ctx][cpu], count[0]); 93 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 94 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); 95 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 96 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); 97 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 98 update_stats(&runtime_branches_stats[ctx][cpu], count[0]); 99 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 100 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); 101 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 102 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); 103 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 104 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); 105 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 106 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); 107 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 108 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); 109 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 110 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); 111 } 112 113 /* used for get_ratio_color() */ 114 enum grc_type { 115 GRC_STALLED_CYCLES_FE, 116 GRC_STALLED_CYCLES_BE, 117 GRC_CACHE_MISSES, 118 GRC_MAX_NR 119 }; 120 121 static const char *get_ratio_color(enum grc_type type, double ratio) 122 { 123 static const double grc_table[GRC_MAX_NR][3] = { 124 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 125 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 126 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 127 }; 128 const char *color = PERF_COLOR_NORMAL; 129 130 if (ratio > grc_table[type][0]) 131 color = PERF_COLOR_RED; 132 else if (ratio > grc_table[type][1]) 133 color = PERF_COLOR_MAGENTA; 134 else if (ratio > grc_table[type][2]) 135 color = PERF_COLOR_YELLOW; 136 137 return color; 138 } 139 140 static void print_stalled_cycles_frontend(FILE *out, int cpu, 141 struct perf_evsel *evsel 142 __maybe_unused, double avg) 143 { 144 double total, ratio = 0.0; 145 const char *color; 146 int ctx = evsel_context(evsel); 147 148 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 149 150 if (total) 151 ratio = avg / total * 100.0; 152 153 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 154 155 fprintf(out, " # "); 156 color_fprintf(out, color, "%6.2f%%", ratio); 157 fprintf(out, " frontend cycles idle "); 158 } 159 160 static void print_stalled_cycles_backend(FILE *out, int cpu, 161 struct perf_evsel *evsel 162 __maybe_unused, double avg) 163 { 164 double total, ratio = 0.0; 165 const char *color; 166 int ctx = evsel_context(evsel); 167 168 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 169 170 if (total) 171 ratio = avg / total * 100.0; 172 173 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 174 175 fprintf(out, " # "); 176 color_fprintf(out, color, "%6.2f%%", ratio); 177 fprintf(out, " backend cycles idle "); 178 } 179 180 static void print_branch_misses(FILE *out, int cpu, 181 struct perf_evsel *evsel __maybe_unused, 182 double avg) 183 { 184 double total, ratio = 0.0; 185 const char *color; 186 int ctx = evsel_context(evsel); 187 188 total = avg_stats(&runtime_branches_stats[ctx][cpu]); 189 190 if (total) 191 ratio = avg / total * 100.0; 192 193 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 194 195 fprintf(out, " # "); 196 color_fprintf(out, color, "%6.2f%%", ratio); 197 fprintf(out, " of all branches "); 198 } 199 200 static void print_l1_dcache_misses(FILE *out, int cpu, 201 struct perf_evsel *evsel __maybe_unused, 202 double avg) 203 { 204 double total, ratio = 0.0; 205 const char *color; 206 int ctx = evsel_context(evsel); 207 208 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); 209 210 if (total) 211 ratio = avg / total * 100.0; 212 213 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 214 215 fprintf(out, " # "); 216 color_fprintf(out, color, "%6.2f%%", ratio); 217 fprintf(out, " of all L1-dcache hits "); 218 } 219 220 static void print_l1_icache_misses(FILE *out, int cpu, 221 struct perf_evsel *evsel __maybe_unused, 222 double avg) 223 { 224 double total, ratio = 0.0; 225 const char *color; 226 int ctx = evsel_context(evsel); 227 228 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); 229 230 if (total) 231 ratio = avg / total * 100.0; 232 233 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 234 235 fprintf(out, " # "); 236 color_fprintf(out, color, "%6.2f%%", ratio); 237 fprintf(out, " of all L1-icache hits "); 238 } 239 240 static void print_dtlb_cache_misses(FILE *out, int cpu, 241 struct perf_evsel *evsel __maybe_unused, 242 double avg) 243 { 244 double total, ratio = 0.0; 245 const char *color; 246 int ctx = evsel_context(evsel); 247 248 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); 249 250 if (total) 251 ratio = avg / total * 100.0; 252 253 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 254 255 fprintf(out, " # "); 256 color_fprintf(out, color, "%6.2f%%", ratio); 257 fprintf(out, " of all dTLB cache hits "); 258 } 259 260 static void print_itlb_cache_misses(FILE *out, int cpu, 261 struct perf_evsel *evsel __maybe_unused, 262 double avg) 263 { 264 double total, ratio = 0.0; 265 const char *color; 266 int ctx = evsel_context(evsel); 267 268 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); 269 270 if (total) 271 ratio = avg / total * 100.0; 272 273 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 274 275 fprintf(out, " # "); 276 color_fprintf(out, color, "%6.2f%%", ratio); 277 fprintf(out, " of all iTLB cache hits "); 278 } 279 280 static void print_ll_cache_misses(FILE *out, int cpu, 281 struct perf_evsel *evsel __maybe_unused, 282 double avg) 283 { 284 double total, ratio = 0.0; 285 const char *color; 286 int ctx = evsel_context(evsel); 287 288 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); 289 290 if (total) 291 ratio = avg / total * 100.0; 292 293 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 294 295 fprintf(out, " # "); 296 color_fprintf(out, color, "%6.2f%%", ratio); 297 fprintf(out, " of all LL-cache hits "); 298 } 299 300 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, 301 double avg, int cpu, enum aggr_mode aggr) 302 { 303 double total, ratio = 0.0, total2; 304 int ctx = evsel_context(evsel); 305 306 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 307 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 308 if (total) { 309 ratio = avg / total; 310 fprintf(out, " # %5.2f insns per cycle ", ratio); 311 } else { 312 fprintf(out, " "); 313 } 314 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); 315 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); 316 317 if (total && avg) { 318 ratio = total / avg; 319 fprintf(out, "\n"); 320 if (aggr == AGGR_NONE) 321 fprintf(out, " "); 322 fprintf(out, " # %5.2f stalled cycles per insn", ratio); 323 } 324 325 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 326 runtime_branches_stats[ctx][cpu].n != 0) { 327 print_branch_misses(out, cpu, evsel, avg); 328 } else if ( 329 evsel->attr.type == PERF_TYPE_HW_CACHE && 330 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 331 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 332 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 333 runtime_l1_dcache_stats[ctx][cpu].n != 0) { 334 print_l1_dcache_misses(out, cpu, evsel, avg); 335 } else if ( 336 evsel->attr.type == PERF_TYPE_HW_CACHE && 337 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 338 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 339 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 340 runtime_l1_icache_stats[ctx][cpu].n != 0) { 341 print_l1_icache_misses(out, cpu, evsel, avg); 342 } else if ( 343 evsel->attr.type == PERF_TYPE_HW_CACHE && 344 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 345 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 346 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 347 runtime_dtlb_cache_stats[ctx][cpu].n != 0) { 348 print_dtlb_cache_misses(out, cpu, evsel, avg); 349 } else if ( 350 evsel->attr.type == PERF_TYPE_HW_CACHE && 351 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 352 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 353 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 354 runtime_itlb_cache_stats[ctx][cpu].n != 0) { 355 print_itlb_cache_misses(out, cpu, evsel, avg); 356 } else if ( 357 evsel->attr.type == PERF_TYPE_HW_CACHE && 358 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 359 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 360 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 361 runtime_ll_cache_stats[ctx][cpu].n != 0) { 362 print_ll_cache_misses(out, cpu, evsel, avg); 363 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 364 runtime_cacherefs_stats[ctx][cpu].n != 0) { 365 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); 366 367 if (total) 368 ratio = avg * 100 / total; 369 370 fprintf(out, " # %8.3f %% of all cache refs ", ratio); 371 372 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 373 print_stalled_cycles_frontend(out, cpu, evsel, avg); 374 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 375 print_stalled_cycles_backend(out, cpu, evsel, avg); 376 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 377 total = avg_stats(&runtime_nsecs_stats[cpu]); 378 379 if (total) { 380 ratio = avg / total; 381 fprintf(out, " # %8.3f GHz ", ratio); 382 } else { 383 fprintf(out, " "); 384 } 385 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 386 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 387 if (total) 388 fprintf(out, 389 " # %5.2f%% transactional cycles ", 390 100.0 * (avg / total)); 391 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 392 total = avg_stats(&runtime_cycles_stats[ctx][cpu]); 393 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); 394 if (total2 < avg) 395 total2 = avg; 396 if (total) 397 fprintf(out, 398 " # %5.2f%% aborted cycles ", 399 100.0 * ((total2-avg) / total)); 400 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && 401 avg > 0 && 402 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { 403 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); 404 405 if (total) 406 ratio = total / avg; 407 408 fprintf(out, " # %8.0f cycles / transaction ", ratio); 409 } else if (perf_stat_evsel__is(evsel, ELISION_START) && 410 avg > 0 && 411 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { 412 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); 413 414 if (total) 415 ratio = total / avg; 416 417 fprintf(out, " # %8.0f cycles / elision ", ratio); 418 } else if (runtime_nsecs_stats[cpu].n != 0) { 419 char unit = 'M'; 420 421 total = avg_stats(&runtime_nsecs_stats[cpu]); 422 423 if (total) 424 ratio = 1000.0 * avg / total; 425 if (ratio < 0.001) { 426 ratio *= 1000; 427 unit = 'K'; 428 } 429 430 fprintf(out, " # %8.3f %c/sec ", ratio, unit); 431 } else { 432 fprintf(out, " "); 433 } 434 } 435