1#!/usr/bin/env python3 2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 3import argparse 4import json 5import math 6import os 7import re 8from typing import Optional 9from common_metrics import Cycles 10from metric import (d_ratio, has_event, max, source_count, CheckPmu, Event, 11 JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, 12 Literal, LoadEvents, Metric, MetricConstraint, MetricGroup, 13 MetricRef, Select) 14 15# Global command line arguments. 16_args = None 17interval_sec = Event("duration_time") 18 19 20def Idle() -> Metric: 21 cyc = Event("msr/mperf/") 22 tsc = Event("msr/tsc/") 23 low = max(tsc - cyc, 0) 24 return Metric( 25 "lpm_idle", 26 "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)", 27 d_ratio(low, tsc), "100%") 28 29 30def Rapl() -> MetricGroup: 31 """Processor power consumption estimate. 32 33 Use events from the running average power limit (RAPL) driver. 34 """ 35 # Watts = joules/second 36 pkg = Event("power/energy\\-pkg/") 37 cond_pkg = Select(pkg, has_event(pkg), math.nan) 38 cores = Event("power/energy\\-cores/") 39 cond_cores = Select(cores, has_event(cores), math.nan) 40 ram = Event("power/energy\\-ram/") 41 cond_ram = Select(ram, has_event(ram), math.nan) 42 gpu = Event("power/energy\\-gpu/") 43 cond_gpu = Select(gpu, has_event(gpu), math.nan) 44 psys = Event("power/energy\\-psys/") 45 cond_psys = Select(psys, has_event(psys), math.nan) 46 scale = 2.3283064365386962890625e-10 47 metrics = [ 48 Metric("lpm_cpu_power_pkg", "", 49 d_ratio(cond_pkg * scale, interval_sec), "Watts"), 50 Metric("lpm_cpu_power_cores", "", 51 d_ratio(cond_cores * scale, interval_sec), "Watts"), 52 Metric("lpm_cpu_power_ram", "", 53 d_ratio(cond_ram * scale, interval_sec), "Watts"), 54 Metric("lpm_cpu_power_gpu", "", 55 d_ratio(cond_gpu * scale, interval_sec), "Watts"), 56 Metric("lpm_cpu_power_psys", "", 57 d_ratio(cond_psys * scale, interval_sec), "Watts"), 58 ] 59 60 return MetricGroup("lpm_cpu_power", metrics, 61 description="Running Average Power Limit (RAPL) power consumption estimates") 62 63 64def Smi() -> MetricGroup: 65 pmu = "<cpu_core or cpu_atom>" if CheckPmu("cpu_core") else "cpu" 66 aperf = Event('msr/aperf/') 67 cycles = Event('cycles') 68 smi_num = Event('msr/smi/') 69 smi_cycles = Select(Select((aperf - cycles) / aperf, smi_num > 0, 0), 70 has_event(aperf), 71 0) 72 return MetricGroup('smi', [ 73 Metric('smi_num', 'Number of SMI interrupts.', 74 Select(smi_num, has_event(smi_num), 0), 'SMI#'), 75 # Note, the smi_cycles "Event" is really a reference to the metric. 76 Metric('smi_cycles', 77 'Percentage of cycles spent in System Management Interrupts. ' 78 f'Requires /sys/bus/event_source/devices/{pmu}/freeze_on_smi to be 1.', 79 smi_cycles, '100%', threshold=(MetricRef('smi_cycles') > 0.10)) 80 ], description='System Management Interrupt metrics') 81 82 83def Tsx() -> Optional[MetricGroup]: 84 pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu" 85 cycles = Event('cycles') 86 cycles_in_tx = Event(f'{pmu}/cycles\\-t/') 87 cycles_in_tx_cp = Event(f'{pmu}/cycles\\-ct/') 88 try: 89 # Test if the tsx event is present in the json, prefer the 90 # sysfs version so that we can detect its presence at runtime. 91 transaction_start = Event("RTM_RETIRED.START") 92 transaction_start = Event(f'{pmu}/tx\\-start/') 93 except: 94 return None 95 96 elision_start = None 97 try: 98 # Elision start isn't supported by all models, but we'll not 99 # generate the tsx_cycles_per_elision metric in that 100 # case. Again, prefer the sysfs encoding of the event. 101 elision_start = Event("HLE_RETIRED.START") 102 elision_start = Event(f'{pmu}/el\\-start/') 103 except: 104 pass 105 106 return MetricGroup('transaction', [ 107 Metric('tsx_transactional_cycles', 108 'Percentage of cycles within a transaction region.', 109 Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0), 110 '100%'), 111 Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.', 112 Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles, 113 has_event(cycles_in_tx), 114 0), 115 '100%'), 116 Metric('tsx_cycles_per_transaction', 117 'Number of cycles within a transaction divided by the number of transactions.', 118 Select(cycles_in_tx / transaction_start, 119 has_event(cycles_in_tx), 120 0), 121 "cycles / transaction"), 122 Metric('tsx_cycles_per_elision', 123 'Number of cycles within a transaction divided by the number of elisions.', 124 Select(cycles_in_tx / elision_start, 125 has_event(elision_start), 126 0), 127 "cycles / elision") if elision_start else None, 128 ], description="Breakdown of transactional memory statistics") 129 130 131def IntelBr(): 132 ins = Event("instructions") 133 134 def Total() -> MetricGroup: 135 br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") 136 br_m_all = Event("BR_MISP_RETIRED.ALL_BRANCHES", 137 "BR_INST_RETIRED.MISPRED", 138 "BR_MISP_EXEC.ANY") 139 br_clr = None 140 try: 141 br_clr = Event("BACLEARS.ANY", "BACLEARS.ALL") 142 except: 143 pass 144 145 br_r = d_ratio(br_all, interval_sec) 146 ins_r = d_ratio(ins, br_all) 147 misp_r = d_ratio(br_m_all, br_all) 148 clr_r = d_ratio(br_clr, interval_sec) if br_clr else None 149 150 return MetricGroup("lpm_br_total", [ 151 Metric("lpm_br_total_retired", 152 "The number of branch instructions retired per second.", br_r, 153 "insn/s"), 154 Metric( 155 "lpm_br_total_mispred", 156 "The number of branch instructions retired, of any type, that were " 157 "not correctly predicted as a percentage of all branch instrucions.", 158 misp_r, "100%"), 159 Metric("lpm_br_total_insn_between_branches", 160 "The number of instructions divided by the number of branches.", 161 ins_r, "insn"), 162 Metric("lpm_br_total_insn_fe_resteers", 163 "The number of resync branches per second.", clr_r, "req/s" 164 ) if clr_r else None 165 ]) 166 167 def Taken() -> MetricGroup: 168 br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") 169 br_m_tk = None 170 try: 171 br_m_tk = Event("BR_MISP_RETIRED.NEAR_TAKEN", 172 "BR_MISP_RETIRED.TAKEN_JCC", 173 "BR_INST_RETIRED.MISPRED_TAKEN") 174 except: 175 pass 176 br_r = d_ratio(br_all, interval_sec) 177 ins_r = d_ratio(ins, br_all) 178 misp_r = d_ratio(br_m_tk, br_all) if br_m_tk else None 179 return MetricGroup("lpm_br_taken", [ 180 Metric("lpm_br_taken_retired", 181 "The number of taken branches that were retired per second.", 182 br_r, "insn/s"), 183 Metric( 184 "lpm_br_taken_mispred", 185 "The number of retired taken branch instructions that were " 186 "mispredicted as a percentage of all taken branches.", misp_r, 187 "100%") if misp_r else None, 188 Metric( 189 "lpm_br_taken_insn_between_branches", 190 "The number of instructions divided by the number of taken branches.", 191 ins_r, "insn"), 192 ]) 193 194 def Conditional() -> Optional[MetricGroup]: 195 try: 196 br_cond = Event("BR_INST_RETIRED.COND", 197 "BR_INST_RETIRED.CONDITIONAL", 198 "BR_INST_RETIRED.TAKEN_JCC") 199 br_m_cond = Event("BR_MISP_RETIRED.COND", 200 "BR_MISP_RETIRED.CONDITIONAL", 201 "BR_MISP_RETIRED.TAKEN_JCC") 202 except: 203 return None 204 205 br_cond_nt = None 206 br_m_cond_nt = None 207 try: 208 br_cond_nt = Event("BR_INST_RETIRED.COND_NTAKEN") 209 br_m_cond_nt = Event("BR_MISP_RETIRED.COND_NTAKEN") 210 except: 211 pass 212 br_r = d_ratio(br_cond, interval_sec) 213 ins_r = d_ratio(ins, br_cond) 214 misp_r = d_ratio(br_m_cond, br_cond) 215 taken_metrics = [ 216 Metric("lpm_br_cond_retired", "Retired conditional branch instructions.", 217 br_r, "insn/s"), 218 Metric("lpm_br_cond_insn_between_branches", 219 "The number of instructions divided by the number of conditional " 220 "branches.", ins_r, "insn"), 221 Metric("lpm_br_cond_mispred", 222 "Retired conditional branch instructions mispredicted as a " 223 "percentage of all conditional branches.", misp_r, "100%"), 224 ] 225 if not br_m_cond_nt: 226 return MetricGroup("lpm_br_cond", taken_metrics) 227 228 br_r = d_ratio(br_cond_nt, interval_sec) 229 ins_r = d_ratio(ins, br_cond_nt) 230 misp_r = d_ratio(br_m_cond_nt, br_cond_nt) 231 232 not_taken_metrics = [ 233 Metric("lpm_br_cond_retired", "Retired conditional not taken branch instructions.", 234 br_r, "insn/s"), 235 Metric("lpm_br_cond_insn_between_branches", 236 "The number of instructions divided by the number of not taken conditional " 237 "branches.", ins_r, "insn"), 238 Metric("lpm_br_cond_mispred", 239 "Retired not taken conditional branch instructions mispredicted as a " 240 "percentage of all not taken conditional branches.", misp_r, "100%"), 241 ] 242 return MetricGroup("lpm_br_cond", [ 243 MetricGroup("lpm_br_cond_nt", not_taken_metrics), 244 MetricGroup("lpm_br_cond_tkn", taken_metrics), 245 ]) 246 247 def Far() -> Optional[MetricGroup]: 248 try: 249 br_far = Event("BR_INST_RETIRED.FAR_BRANCH") 250 except: 251 return None 252 253 br_r = d_ratio(br_far, interval_sec) 254 ins_r = d_ratio(ins, br_far) 255 return MetricGroup("lpm_br_far", [ 256 Metric("lpm_br_far_retired", "Retired far control transfers per second.", 257 br_r, "insn/s"), 258 Metric( 259 "lpm_br_far_insn_between_branches", 260 "The number of instructions divided by the number of far branches.", 261 ins_r, "insn"), 262 ]) 263 264 return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Far()], 265 description="breakdown of retired branch instructions") 266 267 268def IntelCtxSw() -> MetricGroup: 269 cs = Event("context\\-switches") 270 metrics = [ 271 Metric("lpm_cs_rate", "Context switches per second", 272 d_ratio(cs, interval_sec), "ctxsw/s") 273 ] 274 275 ev = Event("instructions") 276 metrics.append(Metric("lpm_cs_instr", "Instructions per context switch", 277 d_ratio(ev, cs), "instr/cs")) 278 279 ev = Event("cycles") 280 metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch", 281 d_ratio(ev, cs), "cycles/cs")) 282 283 try: 284 ev = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") 285 metrics.append(Metric("lpm_cs_loads", "Loads per context switch", 286 d_ratio(ev, cs), "loads/cs")) 287 except: 288 pass 289 290 try: 291 ev = Event("MEM_INST_RETIRED.ALL_STORES", 292 "MEM_UOPS_RETIRED.ALL_STORES") 293 metrics.append(Metric("lpm_cs_stores", "Stores per context switch", 294 d_ratio(ev, cs), "stores/cs")) 295 except: 296 pass 297 298 try: 299 ev = Event("BR_INST_RETIRED.NEAR_TAKEN", "BR_INST_RETIRED.TAKEN_JCC") 300 metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch", 301 d_ratio(ev, cs), "br_taken/cs")) 302 except: 303 pass 304 305 try: 306 l2_misses = (Event("L2_RQSTS.DEMAND_DATA_RD_MISS") + 307 Event("L2_RQSTS.RFO_MISS") + 308 Event("L2_RQSTS.CODE_RD_MISS")) 309 try: 310 l2_misses += Event("L2_RQSTS.HWPF_MISS", 311 "L2_RQSTS.L2_PF_MISS", "L2_RQSTS.PF_MISS") 312 except: 313 pass 314 315 metrics.append(Metric("lpm_cs_l2_misses", "L2 misses per context switch", 316 d_ratio(l2_misses, cs), "l2_misses/cs")) 317 except: 318 pass 319 320 return MetricGroup("lpm_cs", metrics, 321 description=("Number of context switches per second, instructions " 322 "retired & core cycles between context switches")) 323 324 325def IntelFpu() -> Optional[MetricGroup]: 326 cyc = Event("cycles") 327 try: 328 s_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 329 "SIMD_INST_RETIRED.SCALAR_SINGLE") 330 except: 331 return None 332 d_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 333 "SIMD_INST_RETIRED.SCALAR_DOUBLE") 334 s_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", 335 "SIMD_INST_RETIRED.PACKED_SINGLE") 336 337 flop = s_64 + d_64 + 4 * s_128 338 339 d_128 = None 340 s_256 = None 341 d_256 = None 342 s_512 = None 343 d_512 = None 344 try: 345 d_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE") 346 flop += 2 * d_128 347 s_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE") 348 flop += 8 * s_256 349 d_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE") 350 flop += 4 * d_256 351 s_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE") 352 flop += 16 * s_512 353 d_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE") 354 flop += 8 * d_512 355 except: 356 pass 357 358 f_assist = Event("ASSISTS.FP", "FP_ASSIST.ANY", "FP_ASSIST.S") 359 if f_assist in [ 360 "ASSISTS.FP", 361 "FP_ASSIST.S", 362 ]: 363 f_assist += "/cmask=1/" 364 365 flop_r = d_ratio(flop, interval_sec) 366 flop_c = d_ratio(flop, cyc) 367 nmi_constraint = MetricConstraint.GROUPED_EVENTS 368 if f_assist.name == "ASSISTS.FP": # Icelake+ 369 nmi_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI 370 371 def FpuMetrics(group: str, fl: Optional[Event], mult: int, desc: str) -> Optional[MetricGroup]: 372 if not fl: 373 return None 374 375 f = fl * mult 376 fl_r = d_ratio(f, interval_sec) 377 r_s = d_ratio(fl, interval_sec) 378 return MetricGroup(group, [ 379 Metric(f"{group}_of_total", desc + " floating point operations per second", 380 d_ratio(f, flop), "100%"), 381 Metric(f"{group}_flops", desc + " floating point operations per second", 382 fl_r, "flops/s"), 383 Metric(f"{group}_ops", desc + " operations per second", 384 r_s, "ops/s"), 385 ]) 386 387 return MetricGroup("lpm_fpu", [ 388 MetricGroup("lpm_fpu_total", [ 389 Metric("lpm_fpu_total_flops", "Floating point operations per second", 390 flop_r, "flops/s"), 391 Metric("lpm_fpu_total_flopc", "Floating point operations per cycle", 392 flop_c, "flops/cycle", constraint=nmi_constraint), 393 ]), 394 MetricGroup("lpm_fpu_64", [ 395 FpuMetrics("lpm_fpu_64_single", s_64, 1, "64-bit single"), 396 FpuMetrics("lpm_fpu_64_double", d_64, 1, "64-bit double"), 397 ]), 398 MetricGroup("lpm_fpu_128", [ 399 FpuMetrics("lpm_fpu_128_single", s_128, 400 4, "128-bit packed single"), 401 FpuMetrics("lpm_fpu_128_double", d_128, 402 2, "128-bit packed double"), 403 ]), 404 MetricGroup("lpm_fpu_256", [ 405 FpuMetrics("lpm_fpu_256_single", s_256, 406 8, "128-bit packed single"), 407 FpuMetrics("lpm_fpu_256_double", d_256, 408 4, "128-bit packed double"), 409 ]), 410 MetricGroup("lpm_fpu_512", [ 411 FpuMetrics("lpm_fpu_512_single", s_512, 412 16, "128-bit packed single"), 413 FpuMetrics("lpm_fpu_512_double", d_512, 414 8, "128-bit packed double"), 415 ]), 416 Metric("lpm_fpu_assists", "FP assists as a percentage of cycles", 417 d_ratio(f_assist, cyc), "100%"), 418 ]) 419 420 421def IntelIlp() -> MetricGroup: 422 tsc = Event("msr/tsc/") 423 c0 = Event("msr/mperf/") 424 low = tsc - c0 425 inst_ret = Event("INST_RETIRED.ANY_P") 426 inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)] 427 core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", 428 "CPU_CLK_UNHALTED.DISTRIBUTED", 429 "cycles") 430 ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), core_cycles) 431 for x in range(0, 4)] 432 ilp.append(d_ratio(inst_ret_c[4], core_cycles)) 433 ilp0 = 1 434 for x in ilp: 435 ilp0 -= x 436 return MetricGroup("lpm_ilp", [ 437 Metric("lpm_ilp_idle", "Lower power cycles as a percentage of all cycles", 438 d_ratio(low, tsc), "100%"), 439 Metric("lpm_ilp_inst_ret_0", 440 "Instructions retired in 0 cycles as a percentage of all cycles", 441 ilp0, "100%"), 442 Metric("lpm_ilp_inst_ret_1", 443 "Instructions retired in 1 cycles as a percentage of all cycles", 444 ilp[0], "100%"), 445 Metric("lpm_ilp_inst_ret_2", 446 "Instructions retired in 2 cycles as a percentage of all cycles", 447 ilp[1], "100%"), 448 Metric("lpm_ilp_inst_ret_3", 449 "Instructions retired in 3 cycles as a percentage of all cycles", 450 ilp[2], "100%"), 451 Metric("lpm_ilp_inst_ret_4", 452 "Instructions retired in 4 cycles as a percentage of all cycles", 453 ilp[3], "100%"), 454 Metric("lpm_ilp_inst_ret_5", 455 "Instructions retired in 5 or more cycles as a percentage of all cycles", 456 ilp[4], "100%"), 457 ]) 458 459 460def IntelL2() -> Optional[MetricGroup]: 461 try: 462 DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT") 463 except: 464 return None 465 try: 466 DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS") 467 l2_dmnd_miss = DC_MISS 468 l2_dmnd_rd_all = DC_MISS + DC_HIT 469 except: 470 DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD") 471 l2_dmnd_miss = DC_ALL - DC_HIT 472 l2_dmnd_rd_all = DC_ALL 473 l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec) 474 l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec) 475 476 DC_PFH = None 477 DC_PFM = None 478 l2_pf_all = None 479 l2_pf_mrate = None 480 l2_pf_rrate = None 481 try: 482 DC_PFH = Event("L2_RQSTS.PF_HIT") 483 DC_PFM = Event("L2_RQSTS.PF_MISS") 484 l2_pf_all = DC_PFH + DC_PFM 485 l2_pf_mrate = d_ratio(DC_PFM, interval_sec) 486 l2_pf_rrate = d_ratio(l2_pf_all, interval_sec) 487 except: 488 pass 489 490 DC_RFOH = None 491 DC_RFOM = None 492 l2_rfo_all = None 493 l2_rfo_mrate = None 494 l2_rfo_rrate = None 495 try: 496 DC_RFOH = Event("L2_RQSTS.RFO_HIT") 497 DC_RFOM = Event("L2_RQSTS.RFO_MISS") 498 l2_rfo_all = DC_RFOH + DC_RFOM 499 l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec) 500 l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec) 501 except: 502 pass 503 504 DC_CH = None 505 try: 506 DC_CH = Event("L2_RQSTS.CODE_RD_HIT") 507 except: 508 pass 509 DC_CM = Event("L2_RQSTS.CODE_RD_MISS") 510 DC_IN = Event("L2_LINES_IN.ALL") 511 DC_OUT_NS = None 512 DC_OUT_S = None 513 l2_lines_out = None 514 l2_out_rate = None 515 wbn = None 516 isd = None 517 try: 518 DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT", 519 "L2_LINES_OUT.DEMAND_DIRTY", 520 "L2_LINES_IN.S") 521 DC_OUT_S = Event("L2_LINES_OUT.SILENT", 522 "L2_LINES_OUT.DEMAND_CLEAN", 523 "L2_LINES_IN.I") 524 if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and ( 525 args.model.startswith("skylake") or 526 args.model == "cascadelakex"): 527 DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/" 528 # bring is back to per-CPU 529 l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S) 530 l2_ns = DC_OUT_NS 531 l2_lines_out = l2_s + l2_ns 532 l2_out_rate = d_ratio(l2_lines_out, interval_sec) 533 nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0) 534 wbn = d_ratio(nlr, interval_sec) 535 isd = d_ratio(l2_s, interval_sec) 536 except: 537 pass 538 DC_OUT_U = None 539 l2_pf_useless = None 540 l2_useless_rate = None 541 try: 542 DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF") 543 l2_pf_useless = DC_OUT_U 544 l2_useless_rate = d_ratio(l2_pf_useless, interval_sec) 545 except: 546 pass 547 DC_WB_U = None 548 DC_WB_D = None 549 wbu = None 550 wbd = None 551 try: 552 DC_WB_U = Event("IDI_MISC.WB_UPGRADE") 553 DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE") 554 wbu = d_ratio(DC_WB_U, interval_sec) 555 wbd = d_ratio(DC_WB_D, interval_sec) 556 except: 557 pass 558 559 l2_lines_in = DC_IN 560 l2_code_all = (DC_CH + DC_CM) if DC_CH else None 561 l2_code_rate = d_ratio(l2_code_all, interval_sec) if DC_CH else None 562 l2_code_miss_rate = d_ratio(DC_CM, interval_sec) 563 l2_in_rate = d_ratio(l2_lines_in, interval_sec) 564 565 return MetricGroup("lpm_l2", [ 566 MetricGroup("lpm_l2_totals", [ 567 Metric("lpm_l2_totals_in", "L2 cache total in per second", 568 l2_in_rate, "In/s"), 569 Metric("lpm_l2_totals_out", "L2 cache total out per second", 570 l2_out_rate, "Out/s") if l2_out_rate else None, 571 ]), 572 MetricGroup("lpm_l2_rd", [ 573 Metric("lpm_l2_rd_hits", "L2 cache data read hits", 574 d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"), 575 Metric("lpm_l2_rd_hits", "L2 cache data read hits", 576 d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"), 577 Metric("lpm_l2_rd_requests", "L2 cache data read requests per second", 578 l2_dmnd_rrate, "requests/s"), 579 Metric("lpm_l2_rd_misses", "L2 cache data read misses per second", 580 l2_dmnd_mrate, "misses/s"), 581 ]), 582 MetricGroup("lpm_l2_hwpf", [ 583 Metric("lpm_l2_hwpf_hits", "L2 cache hardware prefetcher hits", 584 d_ratio(DC_PFH, l2_pf_all), "100%"), 585 Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses", 586 d_ratio(DC_PFM, l2_pf_all), "100%"), 587 Metric("lpm_l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second", 588 l2_useless_rate, "100%") if l2_useless_rate else None, 589 Metric("lpm_l2_hwpf_requests", "L2 cache hardware prefetcher requests per second", 590 l2_pf_rrate, "100%"), 591 Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses per second", 592 l2_pf_mrate, "100%"), 593 ]) if DC_PFH else None, 594 MetricGroup("lpm_l2_rfo", [ 595 Metric("lpm_l2_rfo_hits", "L2 cache request for ownership (RFO) hits", 596 d_ratio(DC_RFOH, l2_rfo_all), "100%"), 597 Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses", 598 d_ratio(DC_RFOM, l2_rfo_all), "100%"), 599 Metric("lpm_l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second", 600 l2_rfo_rrate, "requests/s"), 601 Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second", 602 l2_rfo_mrate, "misses/s"), 603 ]) if DC_RFOH else None, 604 MetricGroup("lpm_l2_code", [ 605 Metric("lpm_l2_code_hits", "L2 cache code hits", 606 d_ratio(DC_CH, l2_code_all), "100%") if DC_CH else None, 607 Metric("lpm_l2_code_misses", "L2 cache code misses", 608 d_ratio(DC_CM, l2_code_all), "100%") if DC_CH else None, 609 Metric("lpm_l2_code_requests", "L2 cache code requests per second", 610 l2_code_rate, "requests/s") if DC_CH else None, 611 Metric("lpm_l2_code_misses", "L2 cache code misses per second", 612 l2_code_miss_rate, "misses/s"), 613 ]), 614 MetricGroup("lpm_l2_evict", [ 615 MetricGroup("lpm_l2_evict_mef_lines", [ 616 Metric("lpm_l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second", 617 wbu, "HotLRU/s") if wbu else None, 618 Metric("lpm_l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second", 619 wbn, "NormLRU/s") if wbn else None, 620 Metric("lpm_l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second", 621 wbd, "dropped/s") if wbd else None, 622 Metric("lpm_l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second", 623 isd, "dropped/s") if isd else None, 624 ]), 625 ]), 626 ], description="L2 data cache analysis") 627 628 629def IntelMissLat() -> Optional[MetricGroup]: 630 try: 631 ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") 632 data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", 633 "UNC_CHA_TOR_OCCUPANCY.IA_MISS", 634 "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE", 635 "UNC_C_TOR_OCCUPANCY.MISS_OPCODE") 636 data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL", 637 "UNC_CHA_TOR_INSERTS.IA_MISS", 638 "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE", 639 "UNC_C_TOR_INSERTS.MISS_OPCODE") 640 data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE", 641 "UNC_CHA_TOR_OCCUPANCY.IA_MISS", 642 "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE", 643 "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE") 644 data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE", 645 "UNC_CHA_TOR_INSERTS.IA_MISS", 646 "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE", 647 "UNC_C_TOR_INSERTS.NID_MISS_OPCODE") 648 except: 649 return None 650 651 if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or 652 data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"): 653 data_rd = 0x182 654 for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]: 655 e.name += f"/filter_opc={hex(data_rd)}/" 656 elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS": 657 # Demand Data Read - Full cache-line read requests from core for 658 # lines to be cached in S or E, typically for data 659 demand_data_rd = 0x202 660 # LLC Prefetch Data - Uncore will first look up the line in the 661 # LLC; for a cache hit, the LRU will be updated, on a miss, the 662 # DRd will be initiated 663 llc_prefetch_data = 0x25a 664 local_filter = (f"/filter_opc0={hex(demand_data_rd)}," 665 f"filter_opc1={hex(llc_prefetch_data)}," 666 "filter_loc,filter_nm,filter_not_nm/") 667 remote_filter = (f"/filter_opc0={hex(demand_data_rd)}," 668 f"filter_opc1={hex(llc_prefetch_data)}," 669 "filter_rem,filter_nm,filter_not_nm/") 670 for e in [data_rd_loc_occ, data_rd_loc_ins]: 671 e.name += local_filter 672 for e in [data_rd_rem_occ, data_rd_rem_ins]: 673 e.name += remote_filter 674 else: 675 assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ 676 677 ticks_per_cha = ticks / source_count(data_rd_loc_ins) 678 loc_lat = interval_sec * 1e9 * data_rd_loc_occ / \ 679 (ticks_per_cha * data_rd_loc_ins) 680 ticks_per_cha = ticks / source_count(data_rd_rem_ins) 681 rem_lat = interval_sec * 1e9 * data_rd_rem_occ / \ 682 (ticks_per_cha * data_rd_rem_ins) 683 return MetricGroup("lpm_miss_lat", [ 684 Metric("lpm_miss_lat_loc", "Local to a socket miss latency in nanoseconds", 685 loc_lat, "ns"), 686 Metric("lpm_miss_lat_rem", "Remote to a socket miss latency in nanoseconds", 687 rem_lat, "ns"), 688 ]) 689 690 691def IntelMlp() -> Optional[Metric]: 692 try: 693 l1d = Event("L1D_PEND_MISS.PENDING") 694 l1dc = Event("L1D_PEND_MISS.PENDING_CYCLES") 695 except: 696 return None 697 698 l1dc = Select(l1dc / 2, Literal("#smt_on"), l1dc) 699 ml = d_ratio(l1d, l1dc) 700 return Metric("lpm_mlp", 701 "Miss level parallelism - number of outstanding load misses per cycle (higher is better)", 702 ml, "load_miss_pending/cycle") 703 704 705def IntelPorts() -> Optional[MetricGroup]: 706 pipeline_events = json.load( 707 open(f"{_args.events_path}/x86/{_args.model}/pipeline.json")) 708 709 core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", 710 "CPU_CLK_UNHALTED.DISTRIBUTED", 711 "cycles") 712 # Number of CPU cycles scaled for SMT. 713 smt_cycles = Select(core_cycles / 2, Literal("#smt_on"), core_cycles) 714 715 metrics = [] 716 for x in pipeline_events: 717 if "EventName" in x and re.search("^UOPS_DISPATCHED.PORT", x["EventName"]): 718 name = x["EventName"] 719 port = re.search(r"(PORT_[0-9].*)", name).group(0).lower() 720 if name.endswith("_CORE"): 721 cyc = core_cycles 722 else: 723 cyc = smt_cycles 724 metrics.append(Metric(f"lpm_{port}", f"{port} utilization (higher is better)", 725 d_ratio(Event(name), cyc), "100%")) 726 if len(metrics) == 0: 727 return None 728 729 return MetricGroup("lpm_ports", metrics, "functional unit (port) utilization -- " 730 "fraction of cycles each port is utilized (higher is better)") 731 732 733def IntelSwpf() -> Optional[MetricGroup]: 734 ins = Event("instructions") 735 try: 736 s_ld = Event("MEM_INST_RETIRED.ALL_LOADS", 737 "MEM_UOPS_RETIRED.ALL_LOADS") 738 s_nta = Event("SW_PREFETCH_ACCESS.NTA") 739 s_t0 = Event("SW_PREFETCH_ACCESS.T0") 740 s_t1 = Event("SW_PREFETCH_ACCESS.T1_T2") 741 s_w = Event("SW_PREFETCH_ACCESS.PREFETCHW") 742 except: 743 return None 744 745 all_sw = s_nta + s_t0 + s_t1 + s_w 746 swp_r = d_ratio(all_sw, interval_sec) 747 ins_r = d_ratio(ins, all_sw) 748 ld_r = d_ratio(s_ld, all_sw) 749 750 return MetricGroup("lpm_swpf", [ 751 MetricGroup("lpm_swpf_totals", [ 752 Metric("lpm_swpf_totals_exec", "Software prefetch instructions per second", 753 swp_r, "swpf/s"), 754 Metric("lpm_swpf_totals_insn_per_pf", 755 "Average number of instructions between software prefetches", 756 ins_r, "insn/swpf"), 757 Metric("lpm_swpf_totals_loads_per_pf", 758 "Average number of loads between software prefetches", 759 ld_r, "loads/swpf"), 760 ]), 761 MetricGroup("lpm_swpf_bkdwn", [ 762 MetricGroup("lpm_swpf_bkdwn_nta", [ 763 Metric("lpm_swpf_bkdwn_nta_per_swpf", 764 "Software prefetch NTA instructions as a percent of all prefetch instructions", 765 d_ratio(s_nta, all_sw), "100%"), 766 Metric("lpm_swpf_bkdwn_nta_rate", 767 "Software prefetch NTA instructions per second", 768 d_ratio(s_nta, interval_sec), "insn/s"), 769 ]), 770 MetricGroup("lpm_swpf_bkdwn_t0", [ 771 Metric("lpm_swpf_bkdwn_t0_per_swpf", 772 "Software prefetch T0 instructions as a percent of all prefetch instructions", 773 d_ratio(s_t0, all_sw), "100%"), 774 Metric("lpm_swpf_bkdwn_t0_rate", 775 "Software prefetch T0 instructions per second", 776 d_ratio(s_t0, interval_sec), "insn/s"), 777 ]), 778 MetricGroup("lpm_swpf_bkdwn_t1_t2", [ 779 Metric("lpm_swpf_bkdwn_t1_t2_per_swpf", 780 "Software prefetch T1 or T2 instructions as a percent of all prefetch instructions", 781 d_ratio(s_t1, all_sw), "100%"), 782 Metric("lpm_swpf_bkdwn_t1_t2_rate", 783 "Software prefetch T1 or T2 instructions per second", 784 d_ratio(s_t1, interval_sec), "insn/s"), 785 ]), 786 MetricGroup("lpm_swpf_bkdwn_w", [ 787 Metric("lpm_swpf_bkdwn_w_per_swpf", 788 "Software prefetch W instructions as a percent of all prefetch instructions", 789 d_ratio(s_w, all_sw), "100%"), 790 Metric("lpm_swpf_bkdwn_w_rate", 791 "Software prefetch W instructions per second", 792 d_ratio(s_w, interval_sec), "insn/s"), 793 ]), 794 ]), 795 ], description="Software prefetch instruction breakdown") 796 797 798def IntelLdSt() -> Optional[MetricGroup]: 799 if _args.model in [ 800 "bonnell", 801 "nehalemep", 802 "nehalemex", 803 "westmereep-dp", 804 "westmereep-sp", 805 "westmereex", 806 ]: 807 return None 808 LDST_LD = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") 809 LDST_ST = Event("MEM_INST_RETIRED.ALL_STORES", 810 "MEM_UOPS_RETIRED.ALL_STORES") 811 LDST_LDC1 = Event(f"{LDST_LD.name}/cmask=1/") 812 LDST_STC1 = Event(f"{LDST_ST.name}/cmask=1/") 813 LDST_LDC2 = Event(f"{LDST_LD.name}/cmask=2/") 814 LDST_STC2 = Event(f"{LDST_ST.name}/cmask=2/") 815 LDST_LDC3 = Event(f"{LDST_LD.name}/cmask=3/") 816 LDST_STC3 = Event(f"{LDST_ST.name}/cmask=3/") 817 ins = Event("instructions") 818 LDST_CYC = Event("CPU_CLK_UNHALTED.THREAD", 819 "CPU_CLK_UNHALTED.CORE_P", 820 "CPU_CLK_UNHALTED.THREAD_P") 821 LDST_PRE = None 822 try: 823 LDST_PRE = Event("LOAD_HIT_PREFETCH.SWPF", "LOAD_HIT_PRE.SW_PF") 824 except: 825 pass 826 LDST_AT = None 827 try: 828 LDST_AT = Event("MEM_INST_RETIRED.LOCK_LOADS") 829 except: 830 pass 831 cyc = LDST_CYC 832 833 ld_rate = d_ratio(LDST_LD, interval_sec) 834 st_rate = d_ratio(LDST_ST, interval_sec) 835 pf_rate = d_ratio(LDST_PRE, interval_sec) if LDST_PRE else None 836 at_rate = d_ratio(LDST_AT, interval_sec) if LDST_AT else None 837 838 ldst_ret_constraint = MetricConstraint.GROUPED_EVENTS 839 if LDST_LD.name == "MEM_UOPS_RETIRED.ALL_LOADS": 840 ldst_ret_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI 841 842 return MetricGroup("lpm_ldst", [ 843 MetricGroup("lpm_ldst_total", [ 844 Metric("lpm_ldst_total_loads", "Load/store instructions total loads", 845 ld_rate, "loads"), 846 Metric("lpm_ldst_total_stores", "Load/store instructions total stores", 847 st_rate, "stores"), 848 ]), 849 MetricGroup("lpm_ldst_prcnt", [ 850 Metric("lpm_ldst_prcnt_loads", "Percent of all instructions that are loads", 851 d_ratio(LDST_LD, ins), "100%"), 852 Metric("lpm_ldst_prcnt_stores", "Percent of all instructions that are stores", 853 d_ratio(LDST_ST, ins), "100%"), 854 ]), 855 MetricGroup("lpm_ldst_ret_lds", [ 856 Metric("lpm_ldst_ret_lds_1", "Retired loads in 1 cycle", 857 d_ratio(max(LDST_LDC1 - LDST_LDC2, 0), cyc), "100%", 858 constraint=ldst_ret_constraint), 859 Metric("lpm_ldst_ret_lds_2", "Retired loads in 2 cycles", 860 d_ratio(max(LDST_LDC2 - LDST_LDC3, 0), cyc), "100%", 861 constraint=ldst_ret_constraint), 862 Metric("lpm_ldst_ret_lds_3", "Retired loads in 3 or more cycles", 863 d_ratio(LDST_LDC3, cyc), "100%"), 864 ]), 865 MetricGroup("lpm_ldst_ret_sts", [ 866 Metric("lpm_ldst_ret_sts_1", "Retired stores in 1 cycle", 867 d_ratio(max(LDST_STC1 - LDST_STC2, 0), cyc), "100%", 868 constraint=ldst_ret_constraint), 869 Metric("lpm_ldst_ret_sts_2", "Retired stores in 2 cycles", 870 d_ratio(max(LDST_STC2 - LDST_STC3, 0), cyc), "100%", 871 constraint=ldst_ret_constraint), 872 Metric("lpm_ldst_ret_sts_3", "Retired stores in 3 more cycles", 873 d_ratio(LDST_STC3, cyc), "100%"), 874 ]), 875 Metric("lpm_ldst_ld_hit_swpf", "Load hit software prefetches per second", 876 pf_rate, "swpf/s") if pf_rate else None, 877 Metric("lpm_ldst_atomic_lds", "Atomic loads per second", 878 at_rate, "loads/s") if at_rate else None, 879 ], description="Breakdown of load/store instructions") 880 881 882def UncoreCState() -> Optional[MetricGroup]: 883 try: 884 pcu_ticks = Event("UNC_P_CLOCKTICKS") 885 c0 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C0") 886 c3 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C3") 887 c6 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C6") 888 except: 889 return None 890 891 num_cores = Literal("#num_cores") / Literal("#num_packages") 892 893 max_cycles = pcu_ticks * num_cores 894 total_cycles = c0 + c3 + c6 895 896 # remove fused-off cores which show up in C6/C7. 897 c6 = Select(max(c6 - (total_cycles - max_cycles), 0), 898 total_cycles > max_cycles, 899 c6) 900 901 return MetricGroup("lpm_cstate", [ 902 Metric("lpm_cstate_c0", "C-State cores in C0/C1", 903 d_ratio(c0, pcu_ticks), "cores"), 904 Metric("lpm_cstate_c3", "C-State cores in C3", 905 d_ratio(c3, pcu_ticks), "cores"), 906 Metric("lpm_cstate_c6", "C-State cores in C6/C7", 907 d_ratio(c6, pcu_ticks), "cores"), 908 ]) 909 910 911def UncoreDir() -> Optional[MetricGroup]: 912 try: 913 m2m_upd = Event("UNC_M2M_DIRECTORY_UPDATE.ANY") 914 m2m_hits = Event("UNC_M2M_DIRECTORY_HIT.DIRTY_I") 915 # Turn the umask into a ANY rather than DIRTY_I filter. 916 m2m_hits.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_HIT.ANY/" 917 m2m_miss = Event("UNC_M2M_DIRECTORY_MISS.DIRTY_I") 918 # Turn the umask into a ANY rather than DIRTY_I filter. 919 m2m_miss.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_MISS.ANY/" 920 cha_upd = Event("UNC_CHA_DIR_UPDATE.HA") 921 # Turn the umask into a ANY rather than HA filter. 922 cha_upd.name += "/umask=3,name=UNC_CHA_DIR_UPDATE.ANY/" 923 except: 924 return None 925 926 m2m_total = m2m_hits + m2m_miss 927 upd = m2m_upd + cha_upd # in cache lines 928 upd_r = upd / interval_sec 929 look_r = m2m_total / interval_sec 930 931 scale = 64 / 1_000_000 # Cache lines to MB 932 return MetricGroup("lpm_dir", [ 933 Metric("lpm_dir_lookup_rate", "", 934 d_ratio(m2m_total, interval_sec), "requests/s"), 935 Metric("lpm_dir_lookup_hits", "", 936 d_ratio(m2m_hits, m2m_total), "100%"), 937 Metric("lpm_dir_lookup_misses", "", 938 d_ratio(m2m_miss, m2m_total), "100%"), 939 Metric("lpm_dir_update_requests", "", 940 d_ratio(m2m_upd + cha_upd, interval_sec), "requests/s"), 941 Metric("lpm_dir_update_bw", "", 942 d_ratio(m2m_upd + cha_upd, interval_sec), f"{scale}MB/s"), 943 ]) 944 945 946def UncoreMem() -> Optional[MetricGroup]: 947 try: 948 loc_rds = Event("UNC_CHA_REQUESTS.READS_LOCAL", 949 "UNC_H_REQUESTS.READS_LOCAL") 950 rem_rds = Event("UNC_CHA_REQUESTS.READS_REMOTE", 951 "UNC_H_REQUESTS.READS_REMOTE") 952 loc_wrs = Event("UNC_CHA_REQUESTS.WRITES_LOCAL", 953 "UNC_H_REQUESTS.WRITES_LOCAL") 954 rem_wrs = Event("UNC_CHA_REQUESTS.WRITES_REMOTE", 955 "UNC_H_REQUESTS.WRITES_REMOTE") 956 except: 957 return None 958 959 scale = 64 / 1_000_000 960 return MetricGroup("lpm_mem", [ 961 MetricGroup("lpm_mem_local", [ 962 Metric("lpm_mem_local_read", "Local memory read bandwidth not including directory updates", 963 d_ratio(loc_rds, interval_sec), f"{scale}MB/s"), 964 Metric("lpm_mem_local_write", "Local memory write bandwidth not including directory updates", 965 d_ratio(loc_wrs, interval_sec), f"{scale}MB/s"), 966 ]), 967 MetricGroup("lpm_mem_remote", [ 968 Metric("lpm_mem_remote_read", "Remote memory read bandwidth not including directory updates", 969 d_ratio(rem_rds, interval_sec), f"{scale}MB/s"), 970 Metric("lpm_mem_remote_write", "Remote memory write bandwidth not including directory updates", 971 d_ratio(rem_wrs, interval_sec), f"{scale}MB/s"), 972 ]), 973 ], description="Memory Bandwidth breakdown local vs. remote (remote requests in). directory updates not included") 974 975 976def UncoreMemBw() -> Optional[MetricGroup]: 977 mem_events = [] 978 try: 979 mem_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}" 980 f"/arch/x86/{args.model}/uncore-memory.json")) 981 except: 982 pass 983 984 ddr_rds = 0 985 ddr_wrs = 0 986 ddr_total = 0 987 for x in mem_events: 988 if "EventName" in x: 989 name = x["EventName"] 990 if re.search("^UNC_MC[0-9]+_RDCAS_COUNT_FREERUN", name): 991 ddr_rds += Event(name) 992 elif re.search("^UNC_MC[0-9]+_WRCAS_COUNT_FREERUN", name): 993 ddr_wrs += Event(name) 994 # elif re.search("^UNC_MC[0-9]+_TOTAL_REQCOUNT_FREERUN", name): 995 # ddr_total += Event(name) 996 997 if ddr_rds == 0: 998 try: 999 ddr_rds = Event("UNC_M_CAS_COUNT.RD") 1000 ddr_wrs = Event("UNC_M_CAS_COUNT.WR") 1001 except: 1002 return None 1003 1004 ddr_total = ddr_rds + ddr_wrs 1005 1006 pmm_rds = 0 1007 pmm_wrs = 0 1008 try: 1009 pmm_rds = Event("UNC_M_PMM_RPQ_INSERTS") 1010 pmm_wrs = Event("UNC_M_PMM_WPQ_INSERTS") 1011 except: 1012 pass 1013 1014 pmm_total = pmm_rds + pmm_wrs 1015 1016 scale = 64 / 1_000_000 1017 return MetricGroup("lpm_mem_bw", [ 1018 MetricGroup("lpm_mem_bw_ddr", [ 1019 Metric("lpm_mem_bw_ddr_read", "DDR memory read bandwidth", 1020 d_ratio(ddr_rds, interval_sec), f"{scale}MB/s"), 1021 Metric("lpm_mem_bw_ddr_write", "DDR memory write bandwidth", 1022 d_ratio(ddr_wrs, interval_sec), f"{scale}MB/s"), 1023 Metric("lpm_mem_bw_ddr_total", "DDR memory write bandwidth", 1024 d_ratio(ddr_total, interval_sec), f"{scale}MB/s"), 1025 ], description="DDR Memory Bandwidth"), 1026 MetricGroup("lpm_mem_bw_pmm", [ 1027 Metric("lpm_mem_bw_pmm_read", "PMM memory read bandwidth", 1028 d_ratio(pmm_rds, interval_sec), f"{scale}MB/s"), 1029 Metric("lpm_mem_bw_pmm_write", "PMM memory write bandwidth", 1030 d_ratio(pmm_wrs, interval_sec), f"{scale}MB/s"), 1031 Metric("lpm_mem_bw_pmm_total", "PMM memory write bandwidth", 1032 d_ratio(pmm_total, interval_sec), f"{scale}MB/s"), 1033 ], description="PMM Memory Bandwidth") if pmm_rds != 0 else None, 1034 ], description="Memory Bandwidth") 1035 1036 1037def UncoreMemSat() -> Optional[Metric]: 1038 try: 1039 clocks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") 1040 sat = Event("UNC_CHA_DISTRESS_ASSERTED.VERT", "UNC_CHA_FAST_ASSERTED.VERT", 1041 "UNC_C_FAST_ASSERTED") 1042 except: 1043 return None 1044 1045 desc = ("Mesh Bandwidth saturation (% CBOX cycles with FAST signal asserted, " 1046 "include QPI bandwidth saturation), lower is better") 1047 if "UNC_CHA_" in sat.name: 1048 desc = ("Mesh Bandwidth saturation (% CHA cycles with FAST signal asserted, " 1049 "include UPI bandwidth saturation), lower is better") 1050 return Metric("lpm_mem_sat", desc, d_ratio(sat, clocks), "100%") 1051 1052 1053def UncoreUpiBw() -> Optional[MetricGroup]: 1054 try: 1055 upi_rds = Event("UNC_UPI_RxL_FLITS.ALL_DATA") 1056 upi_wrs = Event("UNC_UPI_TxL_FLITS.ALL_DATA") 1057 except: 1058 return None 1059 1060 upi_total = upi_rds + upi_wrs 1061 1062 # From "Uncore Performance Monitoring": When measuring the amount of 1063 # bandwidth consumed by transmission of the data (i.e. NOT including 1064 # the header), it should be .ALL_DATA / 9 * 64B. 1065 scale = (64 / 9) / 1_000_000 1066 return MetricGroup("lpm_upi_bw", [ 1067 Metric("lpm_upi_bw_read", "UPI read bandwidth", 1068 d_ratio(upi_rds, interval_sec), f"{scale}MB/s"), 1069 Metric("lpm_upi_bw_write", "DDR memory write bandwidth", 1070 d_ratio(upi_wrs, interval_sec), f"{scale}MB/s"), 1071 ], description="UPI Bandwidth") 1072 1073 1074def main() -> None: 1075 global _args 1076 1077 def dir_path(path: str) -> str: 1078 """Validate path is a directory for argparse.""" 1079 if os.path.isdir(path): 1080 return path 1081 raise argparse.ArgumentTypeError( 1082 f'\'{path}\' is not a valid directory') 1083 1084 parser = argparse.ArgumentParser(description="Intel perf json generator") 1085 parser.add_argument( 1086 "-metricgroups", help="Generate metricgroups data", action='store_true') 1087 parser.add_argument("model", help="e.g. skylakex") 1088 parser.add_argument( 1089 'events_path', 1090 type=dir_path, 1091 help='Root of tree containing architecture directories containing json files' 1092 ) 1093 _args = parser.parse_args() 1094 1095 directory = f"{_args.events_path}/x86/{_args.model}/" 1096 LoadEvents(directory) 1097 1098 all_metrics = MetricGroup("", [ 1099 Cycles(), 1100 Idle(), 1101 Rapl(), 1102 Smi(), 1103 Tsx(), 1104 IntelBr(), 1105 IntelCtxSw(), 1106 IntelFpu(), 1107 IntelIlp(), 1108 IntelL2(), 1109 IntelLdSt(), 1110 IntelMissLat(), 1111 IntelMlp(), 1112 IntelPorts(), 1113 IntelSwpf(), 1114 UncoreCState(), 1115 UncoreDir(), 1116 UncoreMem(), 1117 UncoreMemBw(), 1118 UncoreMemSat(), 1119 UncoreUpiBw(), 1120 ]) 1121 1122 if _args.metricgroups: 1123 print(JsonEncodeMetricGroupDescriptions(all_metrics)) 1124 else: 1125 print(JsonEncodeMetric(all_metrics)) 1126 1127 1128if __name__ == '__main__': 1129 main() 1130