1#!/usr/bin/env python3 2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 3import argparse 4import math 5import os 6from typing import Optional 7from common_metrics import Cycles 8from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric, 9 JsonEncodeMetricGroupDescriptions, Literal, LoadEvents, 10 Metric, MetricGroup, Select) 11 12# Global command line arguments. 13_args = None 14_zen_model: int = 1 15interval_sec = Event("duration_time") 16ins = Event("instructions") 17cycles = Event("cycles") 18# Number of CPU cycles scaled for SMT. 19smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles) 20 21 22def AmdBr(): 23 def Total() -> MetricGroup: 24 br = Event("ex_ret_brn") 25 br_m_all = Event("ex_ret_brn_misp") 26 br_clr = Event("ex_ret_brn_cond_misp", 27 "ex_ret_msprd_brnch_instr_dir_msmtch", 28 "ex_ret_brn_resync") 29 30 br_r = d_ratio(br, interval_sec) 31 ins_r = d_ratio(ins, br) 32 misp_r = d_ratio(br_m_all, br) 33 clr_r = d_ratio(br_clr, interval_sec) 34 35 return MetricGroup("lpm_br_total", [ 36 Metric("lpm_br_total_retired", 37 "The number of branch instructions retired per second.", br_r, 38 "insn/s"), 39 Metric( 40 "lpm_br_total_mispred", 41 "The number of branch instructions retired, of any type, that were " 42 "not correctly predicted as a percentage of all branch instrucions.", 43 misp_r, "100%"), 44 Metric("lpm_br_total_insn_between_branches", 45 "The number of instructions divided by the number of branches.", 46 ins_r, "insn"), 47 Metric("lpm_br_total_insn_fe_resteers", 48 "The number of resync branches per second.", clr_r, "req/s") 49 ]) 50 51 def Taken() -> MetricGroup: 52 br = Event("ex_ret_brn_tkn") 53 br_m_tk = Event("ex_ret_brn_tkn_misp") 54 br_r = d_ratio(br, interval_sec) 55 ins_r = d_ratio(ins, br) 56 misp_r = d_ratio(br_m_tk, br) 57 return MetricGroup("lpm_br_taken", [ 58 Metric("lpm_br_taken_retired", 59 "The number of taken branches that were retired per second.", 60 br_r, "insn/s"), 61 Metric( 62 "lpm_br_taken_mispred", 63 "The number of retired taken branch instructions that were " 64 "mispredicted as a percentage of all taken branches.", misp_r, 65 "100%"), 66 Metric( 67 "lpm_br_taken_insn_between_branches", 68 "The number of instructions divided by the number of taken branches.", 69 ins_r, "insn"), 70 ]) 71 72 def Conditional() -> Optional[MetricGroup]: 73 global _zen_model 74 br = Event("ex_ret_brn_cond", "ex_ret_cond") 75 br_r = d_ratio(br, interval_sec) 76 ins_r = d_ratio(ins, br) 77 78 metrics = [ 79 Metric("lpm_br_cond_retired", "Retired conditional branch instructions.", 80 br_r, "insn/s"), 81 Metric("lpm_br_cond_insn_between_branches", 82 "The number of instructions divided by the number of conditional " 83 "branches.", ins_r, "insn"), 84 ] 85 if _zen_model == 2: 86 br_m_cond = Event("ex_ret_cond_misp") 87 misp_r = d_ratio(br_m_cond, br) 88 metrics += [ 89 Metric("lpm_br_cond_mispred", 90 "Retired conditional branch instructions mispredicted as a " 91 "percentage of all conditional branches.", misp_r, "100%"), 92 ] 93 94 return MetricGroup("lpm_br_cond", metrics) 95 96 def Fused() -> MetricGroup: 97 br = Event("ex_ret_fused_instr", "ex_ret_fus_brnch_inst") 98 br_r = d_ratio(br, interval_sec) 99 ins_r = d_ratio(ins, br) 100 return MetricGroup("lpm_br_cond", [ 101 Metric("lpm_br_fused_retired", 102 "Retired fused branch instructions per second.", br_r, "insn/s"), 103 Metric( 104 "lpm_br_fused_insn_between_branches", 105 "The number of instructions divided by the number of fused " 106 "branches.", ins_r, "insn"), 107 ]) 108 109 def Far() -> MetricGroup: 110 br = Event("ex_ret_brn_far") 111 br_r = d_ratio(br, interval_sec) 112 ins_r = d_ratio(ins, br) 113 return MetricGroup("lpm_br_far", [ 114 Metric("lpm_br_far_retired", "Retired far control transfers per second.", 115 br_r, "insn/s"), 116 Metric( 117 "lpm_br_far_insn_between_branches", 118 "The number of instructions divided by the number of far branches.", 119 ins_r, "insn"), 120 ]) 121 122 return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Fused(), Far()], 123 description="breakdown of retired branch instructions") 124 125 126def AmdCtxSw() -> MetricGroup: 127 cs = Event("context\\-switches") 128 metrics = [ 129 Metric("lpm_cs_rate", "Context switches per second", 130 d_ratio(cs, interval_sec), "ctxsw/s") 131 ] 132 133 ev = Event("instructions") 134 metrics.append(Metric("lpm_cs_instr", "Instructions per context switch", 135 d_ratio(ev, cs), "instr/cs")) 136 137 ev = Event("cycles") 138 metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch", 139 d_ratio(ev, cs), "cycles/cs")) 140 141 ev = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch") 142 metrics.append(Metric("lpm_cs_loads", "Loads per context switch", 143 d_ratio(ev, cs), "loads/cs")) 144 145 ev = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch") 146 metrics.append(Metric("lpm_cs_stores", "Stores per context switch", 147 d_ratio(ev, cs), "stores/cs")) 148 149 ev = Event("ex_ret_brn_tkn") 150 metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch", 151 d_ratio(ev, cs), "br_taken/cs")) 152 153 return MetricGroup("lpm_cs", metrics, 154 description=("Number of context switches per second, instructions " 155 "retired & core cycles between context switches")) 156 157 158def AmdDtlb() -> Optional[MetricGroup]: 159 global _zen_model 160 if _zen_model >= 4: 161 return None 162 163 d_dat = Event("ls_dc_accesses") if _zen_model <= 3 else None 164 d_h4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit") 165 d_hcoal = Event( 166 "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit") if _zen_model >= 2 else 0 167 d_h2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit") 168 d_h1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit") 169 170 d_m4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss") 171 d_mcoal = Event( 172 "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss") if _zen_model >= 2 else 0 173 d_m2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss") 174 d_m1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss") 175 176 d_w0 = Event("ls_tablewalker.dc_type0") if _zen_model <= 3 else None 177 d_w1 = Event("ls_tablewalker.dc_type1") if _zen_model <= 3 else None 178 walks = d_w0 + d_w1 179 walks_r = d_ratio(walks, interval_sec) 180 ins_w = d_ratio(ins, walks) 181 l1 = d_dat 182 l1_r = d_ratio(l1, interval_sec) 183 l2_hits = d_h4k + d_hcoal + d_h2m + d_h1g 184 l2_miss = d_m4k + d_mcoal + d_m2m + d_m1g 185 l2_r = d_ratio(l2_hits + l2_miss, interval_sec) 186 l1_miss = l2_hits + l2_miss + walks 187 l1_hits = max(l1 - l1_miss, 0) 188 ins_l = d_ratio(ins, l1_miss) 189 190 return MetricGroup("lpm_dtlb", [ 191 MetricGroup("lpm_dtlb_ov", [ 192 Metric("lpm_dtlb_ov_insn_bt_l1_miss", 193 "DTLB overview: instructions between l1 misses.", ins_l, 194 "insns"), 195 Metric("lpm_dtlb_ov_insn_bt_walks", 196 "DTLB overview: instructions between dtlb page table walks.", 197 ins_w, "insns"), 198 ]), 199 MetricGroup("lpm_dtlb_l1", [ 200 Metric("lpm_dtlb_l1_hits", 201 "DTLB L1 hits as percentage of all DTLB L1 accesses.", 202 d_ratio(l1_hits, l1), "100%"), 203 Metric("lpm_dtlb_l1_miss", 204 "DTLB L1 misses as percentage of all DTLB L1 accesses.", 205 d_ratio(l1_miss, l1), "100%"), 206 Metric("lpm_dtlb_l1_reqs", "DTLB L1 accesses per second.", l1_r, 207 "insns/s"), 208 ]), 209 MetricGroup("lpm_dtlb_l2", [ 210 Metric("lpm_dtlb_l2_hits", 211 "DTLB L2 hits as percentage of all DTLB L2 accesses.", 212 d_ratio(l2_hits, l2_hits + l2_miss), "100%"), 213 Metric("lpm_dtlb_l2_miss", 214 "DTLB L2 misses as percentage of all DTLB L2 accesses.", 215 d_ratio(l2_miss, l2_hits + l2_miss), "100%"), 216 Metric("lpm_dtlb_l2_reqs", "DTLB L2 accesses per second.", l2_r, 217 "insns/s"), 218 MetricGroup("lpm_dtlb_l2_4kb", [ 219 Metric( 220 "lpm_dtlb_l2_4kb_hits", 221 "DTLB L2 4kb page size hits as percentage of all DTLB L2 4kb " 222 "accesses.", d_ratio(d_h4k, d_h4k + d_m4k), "100%"), 223 Metric( 224 "lpm_dtlb_l2_4kb_miss", 225 "DTLB L2 4kb page size misses as percentage of all DTLB L2 4kb" 226 "accesses.", d_ratio(d_m4k, d_h4k + d_m4k), "100%") 227 ]), 228 MetricGroup("lpm_dtlb_l2_coalesced", [ 229 Metric( 230 "lpm_dtlb_l2_coal_hits", 231 "DTLB L2 coalesced page (16kb) hits as percentage of all DTLB " 232 "L2 coalesced accesses.", d_ratio(d_hcoal, 233 d_hcoal + d_mcoal), "100%"), 234 Metric( 235 "lpm_dtlb_l2_coal_miss", 236 "DTLB L2 coalesced page (16kb) misses as percentage of all " 237 "DTLB L2 coalesced accesses.", 238 d_ratio(d_mcoal, d_hcoal + d_mcoal), "100%") 239 ]), 240 MetricGroup("lpm_dtlb_l2_2mb", [ 241 Metric( 242 "lpm_dtlb_l2_2mb_hits", 243 "DTLB L2 2mb page size hits as percentage of all DTLB L2 2mb " 244 "accesses.", d_ratio(d_h2m, d_h2m + d_m2m), "100%"), 245 Metric( 246 "lpm_dtlb_l2_2mb_miss", 247 "DTLB L2 2mb page size misses as percentage of all DTLB L2 " 248 "accesses.", d_ratio(d_m2m, d_h2m + d_m2m), "100%") 249 ]), 250 MetricGroup("lpm_dtlb_l2_1g", [ 251 Metric( 252 "lpm_dtlb_l2_1g_hits", 253 "DTLB L2 1gb page size hits as percentage of all DTLB L2 1gb " 254 "accesses.", d_ratio(d_h1g, d_h1g + d_m1g), "100%"), 255 Metric( 256 "lpm_dtlb_l2_1g_miss", 257 "DTLB L2 1gb page size misses as percentage of all DTLB L2 " 258 "1gb accesses.", d_ratio(d_m1g, d_h1g + d_m1g), "100%") 259 ]), 260 ]), 261 MetricGroup("lpm_dtlb_walks", [ 262 Metric("lpm_dtlb_walks_reqs", "DTLB page table walks per second.", 263 walks_r, "walks/s"), 264 ]), 265 ], description="Data TLB metrics") 266 267 268def AmdItlb(): 269 global _zen_model 270 l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit") 271 l2m = Event("l2_itlb_misses") 272 l2r = l2h + l2m 273 274 itlb_l1_mg = None 275 l1m = l2r 276 if _zen_model <= 3: 277 l1r = Event("ic_fw32") 278 l1h = max(l1r - l1m, 0) 279 itlb_l1_mg = MetricGroup("lpm_itlb_l1", [ 280 Metric("lpm_itlb_l1_hits", 281 "L1 ITLB hits as a perecentage of L1 ITLB accesses.", 282 d_ratio(l1h, l1h + l1m), "100%"), 283 Metric("lpm_itlb_l1_miss", 284 "L1 ITLB misses as a perecentage of L1 ITLB accesses.", 285 d_ratio(l1m, l1h + l1m), "100%"), 286 Metric("lpm_itlb_l1_reqs", 287 "The number of 32B fetch windows transferred from IC pipe to DE " 288 "instruction decoder per second.", d_ratio( 289 l1r, interval_sec), 290 "windows/sec"), 291 ]) 292 293 return MetricGroup("lpm_itlb", [ 294 MetricGroup("lpm_itlb_ov", [ 295 Metric("lpm_itlb_ov_insn_bt_l1_miss", 296 "Number of instructions between l1 misses", d_ratio( 297 ins, l1m), "insns"), 298 Metric("lpm_itlb_ov_insn_bt_l2_miss", 299 "Number of instructions between l2 misses", d_ratio( 300 ins, l2m), "insns"), 301 ]), 302 itlb_l1_mg, 303 MetricGroup("lpm_itlb_l2", [ 304 Metric("lpm_itlb_l2_hits", 305 "L2 ITLB hits as a percentage of all L2 ITLB accesses.", 306 d_ratio(l2h, l2r), "100%"), 307 Metric("lpm_itlb_l2_miss", 308 "L2 ITLB misses as a percentage of all L2 ITLB accesses.", 309 d_ratio(l2m, l2r), "100%"), 310 Metric("lpm_itlb_l2_reqs", "ITLB accesses per second.", 311 d_ratio(l2r, interval_sec), "accesses/sec"), 312 ]), 313 ], description="Instruction TLB breakdown") 314 315 316def AmdLdSt() -> MetricGroup: 317 ldst_ld = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch") 318 ldst_st = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch") 319 ldst_ldc1 = Event(f"{ldst_ld}/cmask=1/") 320 ldst_stc1 = Event(f"{ldst_st}/cmask=1/") 321 ldst_ldc2 = Event(f"{ldst_ld}/cmask=2/") 322 ldst_stc2 = Event(f"{ldst_st}/cmask=2/") 323 ldst_ldc3 = Event(f"{ldst_ld}/cmask=3/") 324 ldst_stc3 = Event(f"{ldst_st}/cmask=3/") 325 ldst_cyc = Event("ls_not_halted_cyc") 326 327 ld_rate = d_ratio(ldst_ld, interval_sec) 328 st_rate = d_ratio(ldst_st, interval_sec) 329 330 ld_v1 = max(ldst_ldc1 - ldst_ldc2, 0) 331 ld_v2 = max(ldst_ldc2 - ldst_ldc3, 0) 332 ld_v3 = ldst_ldc3 333 334 st_v1 = max(ldst_stc1 - ldst_stc2, 0) 335 st_v2 = max(ldst_stc2 - ldst_stc3, 0) 336 st_v3 = ldst_stc3 337 338 return MetricGroup("lpm_ldst", [ 339 MetricGroup("lpm_ldst_total", [ 340 Metric("lpm_ldst_total_ld", "Number of loads dispatched per second.", 341 ld_rate, "insns/sec"), 342 Metric("lpm_ldst_total_st", "Number of stores dispatched per second.", 343 st_rate, "insns/sec"), 344 ]), 345 MetricGroup("lpm_ldst_percent_insn", [ 346 Metric("lpm_ldst_percent_insn_ld", 347 "Load instructions as a percentage of all instructions.", 348 d_ratio(ldst_ld, ins), "100%"), 349 Metric("lpm_ldst_percent_insn_st", 350 "Store instructions as a percentage of all instructions.", 351 d_ratio(ldst_st, ins), "100%"), 352 ]), 353 MetricGroup("lpm_ldst_ret_loads_per_cycle", [ 354 Metric( 355 "lpm_ldst_ret_loads_per_cycle_1", 356 "Load instructions retiring in 1 cycle as a percentage of all " 357 "unhalted cycles.", d_ratio(ld_v1, ldst_cyc), "100%"), 358 Metric( 359 "lpm_ldst_ret_loads_per_cycle_2", 360 "Load instructions retiring in 2 cycles as a percentage of all " 361 "unhalted cycles.", d_ratio(ld_v2, ldst_cyc), "100%"), 362 Metric( 363 "lpm_ldst_ret_loads_per_cycle_3", 364 "Load instructions retiring in 3 or more cycles as a percentage" 365 "of all unhalted cycles.", d_ratio(ld_v3, ldst_cyc), "100%"), 366 ]), 367 MetricGroup("lpm_ldst_ret_stores_per_cycle", [ 368 Metric( 369 "lpm_ldst_ret_stores_per_cycle_1", 370 "Store instructions retiring in 1 cycle as a percentage of all " 371 "unhalted cycles.", d_ratio(st_v1, ldst_cyc), "100%"), 372 Metric( 373 "lpm_ldst_ret_stores_per_cycle_2", 374 "Store instructions retiring in 2 cycles as a percentage of all " 375 "unhalted cycles.", d_ratio(st_v2, ldst_cyc), "100%"), 376 Metric( 377 "lpm_ldst_ret_stores_per_cycle_3", 378 "Store instructions retiring in 3 or more cycles as a percentage" 379 "of all unhalted cycles.", d_ratio(st_v3, ldst_cyc), "100%"), 380 ]), 381 MetricGroup("lpm_ldst_insn_bt", [ 382 Metric("lpm_ldst_insn_bt_ld", "Number of instructions between loads.", 383 d_ratio(ins, ldst_ld), "insns"), 384 Metric("lpm_ldst_insn_bt_st", "Number of instructions between stores.", 385 d_ratio(ins, ldst_st), "insns"), 386 ]) 387 ], description="Breakdown of load/store instructions") 388 389 390def AmdUpc() -> Metric: 391 ops = Event("ex_ret_ops", "ex_ret_cops") 392 upc = d_ratio(ops, smt_cycles) 393 return Metric("lpm_upc", "Micro-ops retired per core cycle (higher is better)", 394 upc, "uops/cycle") 395 396 397def Idle() -> Metric: 398 cyc = Event("msr/mperf/") 399 tsc = Event("msr/tsc/") 400 low = max(tsc - cyc, 0) 401 return Metric( 402 "lpm_idle", 403 "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)", 404 d_ratio(low, tsc), "100%") 405 406 407def Rapl() -> MetricGroup: 408 """Processor socket power consumption estimate. 409 410 Use events from the running average power limit (RAPL) driver. 411 """ 412 # Watts = joules/second 413 # Currently only energy-pkg is supported by AMD: 414 # https://lore.kernel.org/lkml/20220105185659.643355-1-eranian@google.com/ 415 pkg = Event("power/energy\\-pkg/") 416 cond_pkg = Select(pkg, has_event(pkg), math.nan) 417 scale = 2.3283064365386962890625e-10 418 metrics = [ 419 Metric("lpm_cpu_power_pkg", "", 420 d_ratio(cond_pkg * scale, interval_sec), "Watts"), 421 ] 422 423 return MetricGroup("lpm_cpu_power", metrics, 424 description="Processor socket power consumption estimates") 425 426 427def UncoreL3(): 428 acc = Event("l3_lookup_state.all_coherent_accesses_to_l3", 429 "l3_lookup_state.all_l3_req_typs") 430 miss = Event("l3_lookup_state.l3_miss", 431 "l3_comb_clstr_state.request_miss") 432 acc = max(acc, miss) 433 hits = acc - miss 434 435 return MetricGroup("lpm_l3", [ 436 Metric("lpm_l3_accesses", "L3 victim cache accesses", 437 d_ratio(acc, interval_sec), "accesses/sec"), 438 Metric("lpm_l3_hits", "L3 victim cache hit rate", 439 d_ratio(hits, acc), "100%"), 440 Metric("lpm_l3_miss", "L3 victim cache miss rate", d_ratio(miss, acc), 441 "100%"), 442 ], description="L3 cache breakdown per CCX") 443 444 445def main() -> None: 446 global _args 447 global _zen_model 448 449 def dir_path(path: str) -> str: 450 """Validate path is a directory for argparse.""" 451 if os.path.isdir(path): 452 return path 453 raise argparse.ArgumentTypeError( 454 f'\'{path}\' is not a valid directory') 455 456 parser = argparse.ArgumentParser(description="AMD perf json generator") 457 parser.add_argument( 458 "-metricgroups", help="Generate metricgroups data", action='store_true') 459 parser.add_argument("model", help="e.g. amdzen[123]") 460 parser.add_argument( 461 'events_path', 462 type=dir_path, 463 help='Root of tree containing architecture directories containing json files' 464 ) 465 _args = parser.parse_args() 466 467 directory = f"{_args.events_path}/x86/{_args.model}/" 468 LoadEvents(directory) 469 470 _zen_model = int(_args.model[6:]) 471 472 all_metrics = MetricGroup("", [ 473 AmdBr(), 474 AmdCtxSw(), 475 AmdDtlb(), 476 AmdItlb(), 477 AmdLdSt(), 478 AmdUpc(), 479 Cycles(), 480 Idle(), 481 Rapl(), 482 UncoreL3(), 483 ]) 484 485 if _args.metricgroups: 486 print(JsonEncodeMetricGroupDescriptions(all_metrics)) 487 else: 488 print(JsonEncodeMetric(all_metrics)) 489 490 491if __name__ == '__main__': 492 main() 493