119eab0efSIan Rogers#!/usr/bin/env python3 219eab0efSIan Rogers# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 319eab0efSIan Rogersimport argparse 4cd1c6a48SIan Rogersimport json 5bab90b3bSIan Rogersimport math 619eab0efSIan Rogersimport os 7cd1c6a48SIan Rogersimport re 88c345f35SIan Rogersfrom typing import Optional 9*82e53e7aSIan Rogersfrom common_metrics import Cycles 106ec3058eSIan Rogersfrom metric import (d_ratio, has_event, max, source_count, CheckPmu, Event, 116ec3058eSIan Rogers JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, 126ec3058eSIan Rogers Literal, LoadEvents, Metric, MetricConstraint, MetricGroup, 136ec3058eSIan Rogers MetricRef, Select) 1419eab0efSIan Rogers 1519eab0efSIan Rogers# Global command line arguments. 1619eab0efSIan Rogers_args = None 17bab90b3bSIan Rogersinterval_sec = Event("duration_time") 18bab90b3bSIan Rogers 19bab90b3bSIan Rogers 201d519e5aSIan Rogersdef Idle() -> Metric: 211d519e5aSIan Rogers cyc = Event("msr/mperf/") 221d519e5aSIan Rogers tsc = Event("msr/tsc/") 231d519e5aSIan Rogers low = max(tsc - cyc, 0) 241d519e5aSIan Rogers return Metric( 251d519e5aSIan Rogers "lpm_idle", 261d519e5aSIan Rogers "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)", 271d519e5aSIan Rogers d_ratio(low, tsc), "100%") 281d519e5aSIan Rogers 291d519e5aSIan Rogers 30bab90b3bSIan Rogersdef Rapl() -> MetricGroup: 31bab90b3bSIan Rogers """Processor power consumption estimate. 32bab90b3bSIan Rogers 33bab90b3bSIan Rogers Use events from the running average power limit (RAPL) driver. 34bab90b3bSIan Rogers """ 35bab90b3bSIan Rogers # Watts = joules/second 36bab90b3bSIan Rogers pkg = Event("power/energy\\-pkg/") 37bab90b3bSIan Rogers cond_pkg = Select(pkg, has_event(pkg), math.nan) 38bab90b3bSIan Rogers cores = Event("power/energy\\-cores/") 39bab90b3bSIan Rogers cond_cores = Select(cores, has_event(cores), math.nan) 40bab90b3bSIan Rogers ram = Event("power/energy\\-ram/") 41bab90b3bSIan Rogers cond_ram = Select(ram, has_event(ram), math.nan) 42bab90b3bSIan Rogers gpu = Event("power/energy\\-gpu/") 43bab90b3bSIan Rogers cond_gpu = Select(gpu, has_event(gpu), math.nan) 44bab90b3bSIan Rogers psys = Event("power/energy\\-psys/") 45bab90b3bSIan Rogers cond_psys = Select(psys, has_event(psys), math.nan) 46bab90b3bSIan Rogers scale = 2.3283064365386962890625e-10 47bab90b3bSIan Rogers metrics = [ 48bab90b3bSIan Rogers Metric("lpm_cpu_power_pkg", "", 49bab90b3bSIan Rogers d_ratio(cond_pkg * scale, interval_sec), "Watts"), 50bab90b3bSIan Rogers Metric("lpm_cpu_power_cores", "", 51bab90b3bSIan Rogers d_ratio(cond_cores * scale, interval_sec), "Watts"), 52bab90b3bSIan Rogers Metric("lpm_cpu_power_ram", "", 53bab90b3bSIan Rogers d_ratio(cond_ram * scale, interval_sec), "Watts"), 54bab90b3bSIan Rogers Metric("lpm_cpu_power_gpu", "", 55bab90b3bSIan Rogers d_ratio(cond_gpu * scale, interval_sec), "Watts"), 56bab90b3bSIan Rogers Metric("lpm_cpu_power_psys", "", 57bab90b3bSIan Rogers d_ratio(cond_psys * scale, interval_sec), "Watts"), 58bab90b3bSIan Rogers ] 59bab90b3bSIan Rogers 60bab90b3bSIan Rogers return MetricGroup("lpm_cpu_power", metrics, 61bab90b3bSIan Rogers description="Running Average Power Limit (RAPL) power consumption estimates") 6219eab0efSIan Rogers 6319eab0efSIan Rogers 6417d616b7SIan Rogersdef Smi() -> MetricGroup: 6517d616b7SIan Rogers pmu = "<cpu_core or cpu_atom>" if CheckPmu("cpu_core") else "cpu" 6617d616b7SIan Rogers aperf = Event('msr/aperf/') 6717d616b7SIan Rogers cycles = Event('cycles') 6817d616b7SIan Rogers smi_num = Event('msr/smi/') 6917d616b7SIan Rogers smi_cycles = Select(Select((aperf - cycles) / aperf, smi_num > 0, 0), 7017d616b7SIan Rogers has_event(aperf), 7117d616b7SIan Rogers 0) 7217d616b7SIan Rogers return MetricGroup('smi', [ 7317d616b7SIan Rogers Metric('smi_num', 'Number of SMI interrupts.', 7417d616b7SIan Rogers Select(smi_num, has_event(smi_num), 0), 'SMI#'), 7517d616b7SIan Rogers # Note, the smi_cycles "Event" is really a reference to the metric. 7617d616b7SIan Rogers Metric('smi_cycles', 7717d616b7SIan Rogers 'Percentage of cycles spent in System Management Interrupts. ' 7817d616b7SIan Rogers f'Requires /sys/bus/event_source/devices/{pmu}/freeze_on_smi to be 1.', 7917d616b7SIan Rogers smi_cycles, '100%', threshold=(MetricRef('smi_cycles') > 0.10)) 8017d616b7SIan Rogers ], description='System Management Interrupt metrics') 8117d616b7SIan Rogers 8217d616b7SIan Rogers 838c345f35SIan Rogersdef Tsx() -> Optional[MetricGroup]: 848c345f35SIan Rogers pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu" 858c345f35SIan Rogers cycles = Event('cycles') 868c345f35SIan Rogers cycles_in_tx = Event(f'{pmu}/cycles\\-t/') 878c345f35SIan Rogers cycles_in_tx_cp = Event(f'{pmu}/cycles\\-ct/') 888c345f35SIan Rogers try: 898c345f35SIan Rogers # Test if the tsx event is present in the json, prefer the 908c345f35SIan Rogers # sysfs version so that we can detect its presence at runtime. 918c345f35SIan Rogers transaction_start = Event("RTM_RETIRED.START") 928c345f35SIan Rogers transaction_start = Event(f'{pmu}/tx\\-start/') 938c345f35SIan Rogers except: 948c345f35SIan Rogers return None 958c345f35SIan Rogers 968c345f35SIan Rogers elision_start = None 978c345f35SIan Rogers try: 988c345f35SIan Rogers # Elision start isn't supported by all models, but we'll not 998c345f35SIan Rogers # generate the tsx_cycles_per_elision metric in that 1008c345f35SIan Rogers # case. Again, prefer the sysfs encoding of the event. 1018c345f35SIan Rogers elision_start = Event("HLE_RETIRED.START") 1028c345f35SIan Rogers elision_start = Event(f'{pmu}/el\\-start/') 1038c345f35SIan Rogers except: 1048c345f35SIan Rogers pass 1058c345f35SIan Rogers 1068c345f35SIan Rogers return MetricGroup('transaction', [ 1078c345f35SIan Rogers Metric('tsx_transactional_cycles', 1088c345f35SIan Rogers 'Percentage of cycles within a transaction region.', 1098c345f35SIan Rogers Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0), 1108c345f35SIan Rogers '100%'), 1118c345f35SIan Rogers Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.', 1128c345f35SIan Rogers Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles, 1138c345f35SIan Rogers has_event(cycles_in_tx), 1148c345f35SIan Rogers 0), 1158c345f35SIan Rogers '100%'), 1168c345f35SIan Rogers Metric('tsx_cycles_per_transaction', 1178c345f35SIan Rogers 'Number of cycles within a transaction divided by the number of transactions.', 1188c345f35SIan Rogers Select(cycles_in_tx / transaction_start, 1198c345f35SIan Rogers has_event(cycles_in_tx), 1208c345f35SIan Rogers 0), 1218c345f35SIan Rogers "cycles / transaction"), 1228c345f35SIan Rogers Metric('tsx_cycles_per_elision', 1238c345f35SIan Rogers 'Number of cycles within a transaction divided by the number of elisions.', 1248c345f35SIan Rogers Select(cycles_in_tx / elision_start, 1258c345f35SIan Rogers has_event(elision_start), 1268c345f35SIan Rogers 0), 1278c345f35SIan Rogers "cycles / elision") if elision_start else None, 1288c345f35SIan Rogers ], description="Breakdown of transactional memory statistics") 1298c345f35SIan Rogers 1308c345f35SIan Rogers 13137d0b00aSIan Rogersdef IntelBr(): 13237d0b00aSIan Rogers ins = Event("instructions") 13337d0b00aSIan Rogers 13437d0b00aSIan Rogers def Total() -> MetricGroup: 13537d0b00aSIan Rogers br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") 13637d0b00aSIan Rogers br_m_all = Event("BR_MISP_RETIRED.ALL_BRANCHES", 13737d0b00aSIan Rogers "BR_INST_RETIRED.MISPRED", 13837d0b00aSIan Rogers "BR_MISP_EXEC.ANY") 13937d0b00aSIan Rogers br_clr = None 14037d0b00aSIan Rogers try: 14137d0b00aSIan Rogers br_clr = Event("BACLEARS.ANY", "BACLEARS.ALL") 14237d0b00aSIan Rogers except: 14337d0b00aSIan Rogers pass 14437d0b00aSIan Rogers 14537d0b00aSIan Rogers br_r = d_ratio(br_all, interval_sec) 14637d0b00aSIan Rogers ins_r = d_ratio(ins, br_all) 14737d0b00aSIan Rogers misp_r = d_ratio(br_m_all, br_all) 14837d0b00aSIan Rogers clr_r = d_ratio(br_clr, interval_sec) if br_clr else None 14937d0b00aSIan Rogers 15037d0b00aSIan Rogers return MetricGroup("lpm_br_total", [ 15137d0b00aSIan Rogers Metric("lpm_br_total_retired", 15237d0b00aSIan Rogers "The number of branch instructions retired per second.", br_r, 15337d0b00aSIan Rogers "insn/s"), 15437d0b00aSIan Rogers Metric( 15537d0b00aSIan Rogers "lpm_br_total_mispred", 15637d0b00aSIan Rogers "The number of branch instructions retired, of any type, that were " 15737d0b00aSIan Rogers "not correctly predicted as a percentage of all branch instrucions.", 15837d0b00aSIan Rogers misp_r, "100%"), 15937d0b00aSIan Rogers Metric("lpm_br_total_insn_between_branches", 16037d0b00aSIan Rogers "The number of instructions divided by the number of branches.", 16137d0b00aSIan Rogers ins_r, "insn"), 16237d0b00aSIan Rogers Metric("lpm_br_total_insn_fe_resteers", 16337d0b00aSIan Rogers "The number of resync branches per second.", clr_r, "req/s" 16437d0b00aSIan Rogers ) if clr_r else None 16537d0b00aSIan Rogers ]) 16637d0b00aSIan Rogers 16737d0b00aSIan Rogers def Taken() -> MetricGroup: 16837d0b00aSIan Rogers br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") 16937d0b00aSIan Rogers br_m_tk = None 17037d0b00aSIan Rogers try: 17137d0b00aSIan Rogers br_m_tk = Event("BR_MISP_RETIRED.NEAR_TAKEN", 17237d0b00aSIan Rogers "BR_MISP_RETIRED.TAKEN_JCC", 17337d0b00aSIan Rogers "BR_INST_RETIRED.MISPRED_TAKEN") 17437d0b00aSIan Rogers except: 17537d0b00aSIan Rogers pass 17637d0b00aSIan Rogers br_r = d_ratio(br_all, interval_sec) 17737d0b00aSIan Rogers ins_r = d_ratio(ins, br_all) 17837d0b00aSIan Rogers misp_r = d_ratio(br_m_tk, br_all) if br_m_tk else None 17937d0b00aSIan Rogers return MetricGroup("lpm_br_taken", [ 18037d0b00aSIan Rogers Metric("lpm_br_taken_retired", 18137d0b00aSIan Rogers "The number of taken branches that were retired per second.", 18237d0b00aSIan Rogers br_r, "insn/s"), 18337d0b00aSIan Rogers Metric( 18437d0b00aSIan Rogers "lpm_br_taken_mispred", 18537d0b00aSIan Rogers "The number of retired taken branch instructions that were " 18637d0b00aSIan Rogers "mispredicted as a percentage of all taken branches.", misp_r, 18737d0b00aSIan Rogers "100%") if misp_r else None, 18837d0b00aSIan Rogers Metric( 18937d0b00aSIan Rogers "lpm_br_taken_insn_between_branches", 19037d0b00aSIan Rogers "The number of instructions divided by the number of taken branches.", 19137d0b00aSIan Rogers ins_r, "insn"), 19237d0b00aSIan Rogers ]) 19337d0b00aSIan Rogers 19437d0b00aSIan Rogers def Conditional() -> Optional[MetricGroup]: 19537d0b00aSIan Rogers try: 19637d0b00aSIan Rogers br_cond = Event("BR_INST_RETIRED.COND", 19737d0b00aSIan Rogers "BR_INST_RETIRED.CONDITIONAL", 19837d0b00aSIan Rogers "BR_INST_RETIRED.TAKEN_JCC") 19937d0b00aSIan Rogers br_m_cond = Event("BR_MISP_RETIRED.COND", 20037d0b00aSIan Rogers "BR_MISP_RETIRED.CONDITIONAL", 20137d0b00aSIan Rogers "BR_MISP_RETIRED.TAKEN_JCC") 20237d0b00aSIan Rogers except: 20337d0b00aSIan Rogers return None 20437d0b00aSIan Rogers 20537d0b00aSIan Rogers br_cond_nt = None 20637d0b00aSIan Rogers br_m_cond_nt = None 20737d0b00aSIan Rogers try: 20837d0b00aSIan Rogers br_cond_nt = Event("BR_INST_RETIRED.COND_NTAKEN") 20937d0b00aSIan Rogers br_m_cond_nt = Event("BR_MISP_RETIRED.COND_NTAKEN") 21037d0b00aSIan Rogers except: 21137d0b00aSIan Rogers pass 21237d0b00aSIan Rogers br_r = d_ratio(br_cond, interval_sec) 21337d0b00aSIan Rogers ins_r = d_ratio(ins, br_cond) 21437d0b00aSIan Rogers misp_r = d_ratio(br_m_cond, br_cond) 21537d0b00aSIan Rogers taken_metrics = [ 21637d0b00aSIan Rogers Metric("lpm_br_cond_retired", "Retired conditional branch instructions.", 21737d0b00aSIan Rogers br_r, "insn/s"), 21837d0b00aSIan Rogers Metric("lpm_br_cond_insn_between_branches", 21937d0b00aSIan Rogers "The number of instructions divided by the number of conditional " 22037d0b00aSIan Rogers "branches.", ins_r, "insn"), 22137d0b00aSIan Rogers Metric("lpm_br_cond_mispred", 22237d0b00aSIan Rogers "Retired conditional branch instructions mispredicted as a " 22337d0b00aSIan Rogers "percentage of all conditional branches.", misp_r, "100%"), 22437d0b00aSIan Rogers ] 22537d0b00aSIan Rogers if not br_m_cond_nt: 22637d0b00aSIan Rogers return MetricGroup("lpm_br_cond", taken_metrics) 22737d0b00aSIan Rogers 22837d0b00aSIan Rogers br_r = d_ratio(br_cond_nt, interval_sec) 22937d0b00aSIan Rogers ins_r = d_ratio(ins, br_cond_nt) 23037d0b00aSIan Rogers misp_r = d_ratio(br_m_cond_nt, br_cond_nt) 23137d0b00aSIan Rogers 23237d0b00aSIan Rogers not_taken_metrics = [ 23337d0b00aSIan Rogers Metric("lpm_br_cond_retired", "Retired conditional not taken branch instructions.", 23437d0b00aSIan Rogers br_r, "insn/s"), 23537d0b00aSIan Rogers Metric("lpm_br_cond_insn_between_branches", 23637d0b00aSIan Rogers "The number of instructions divided by the number of not taken conditional " 23737d0b00aSIan Rogers "branches.", ins_r, "insn"), 23837d0b00aSIan Rogers Metric("lpm_br_cond_mispred", 23937d0b00aSIan Rogers "Retired not taken conditional branch instructions mispredicted as a " 24037d0b00aSIan Rogers "percentage of all not taken conditional branches.", misp_r, "100%"), 24137d0b00aSIan Rogers ] 24237d0b00aSIan Rogers return MetricGroup("lpm_br_cond", [ 24337d0b00aSIan Rogers MetricGroup("lpm_br_cond_nt", not_taken_metrics), 24437d0b00aSIan Rogers MetricGroup("lpm_br_cond_tkn", taken_metrics), 24537d0b00aSIan Rogers ]) 24637d0b00aSIan Rogers 24737d0b00aSIan Rogers def Far() -> Optional[MetricGroup]: 24837d0b00aSIan Rogers try: 24937d0b00aSIan Rogers br_far = Event("BR_INST_RETIRED.FAR_BRANCH") 25037d0b00aSIan Rogers except: 25137d0b00aSIan Rogers return None 25237d0b00aSIan Rogers 25337d0b00aSIan Rogers br_r = d_ratio(br_far, interval_sec) 25437d0b00aSIan Rogers ins_r = d_ratio(ins, br_far) 25537d0b00aSIan Rogers return MetricGroup("lpm_br_far", [ 25637d0b00aSIan Rogers Metric("lpm_br_far_retired", "Retired far control transfers per second.", 25737d0b00aSIan Rogers br_r, "insn/s"), 25837d0b00aSIan Rogers Metric( 25937d0b00aSIan Rogers "lpm_br_far_insn_between_branches", 26037d0b00aSIan Rogers "The number of instructions divided by the number of far branches.", 26137d0b00aSIan Rogers ins_r, "insn"), 26237d0b00aSIan Rogers ]) 26337d0b00aSIan Rogers 26437d0b00aSIan Rogers return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Far()], 26537d0b00aSIan Rogers description="breakdown of retired branch instructions") 26637d0b00aSIan Rogers 26737d0b00aSIan Rogers 2682f3d6ea0SIan Rogersdef IntelCtxSw() -> MetricGroup: 2692f3d6ea0SIan Rogers cs = Event("context\\-switches") 2702f3d6ea0SIan Rogers metrics = [ 2712f3d6ea0SIan Rogers Metric("lpm_cs_rate", "Context switches per second", 2722f3d6ea0SIan Rogers d_ratio(cs, interval_sec), "ctxsw/s") 2732f3d6ea0SIan Rogers ] 2742f3d6ea0SIan Rogers 2752f3d6ea0SIan Rogers ev = Event("instructions") 2762f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_instr", "Instructions per context switch", 2772f3d6ea0SIan Rogers d_ratio(ev, cs), "instr/cs")) 2782f3d6ea0SIan Rogers 2792f3d6ea0SIan Rogers ev = Event("cycles") 2802f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch", 2812f3d6ea0SIan Rogers d_ratio(ev, cs), "cycles/cs")) 2822f3d6ea0SIan Rogers 2832f3d6ea0SIan Rogers try: 2842f3d6ea0SIan Rogers ev = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") 2852f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_loads", "Loads per context switch", 2862f3d6ea0SIan Rogers d_ratio(ev, cs), "loads/cs")) 2872f3d6ea0SIan Rogers except: 2882f3d6ea0SIan Rogers pass 2892f3d6ea0SIan Rogers 2902f3d6ea0SIan Rogers try: 2912f3d6ea0SIan Rogers ev = Event("MEM_INST_RETIRED.ALL_STORES", 2922f3d6ea0SIan Rogers "MEM_UOPS_RETIRED.ALL_STORES") 2932f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_stores", "Stores per context switch", 2942f3d6ea0SIan Rogers d_ratio(ev, cs), "stores/cs")) 2952f3d6ea0SIan Rogers except: 2962f3d6ea0SIan Rogers pass 2972f3d6ea0SIan Rogers 2982f3d6ea0SIan Rogers try: 2992f3d6ea0SIan Rogers ev = Event("BR_INST_RETIRED.NEAR_TAKEN", "BR_INST_RETIRED.TAKEN_JCC") 3002f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch", 3012f3d6ea0SIan Rogers d_ratio(ev, cs), "br_taken/cs")) 3022f3d6ea0SIan Rogers except: 3032f3d6ea0SIan Rogers pass 3042f3d6ea0SIan Rogers 3052f3d6ea0SIan Rogers try: 3062f3d6ea0SIan Rogers l2_misses = (Event("L2_RQSTS.DEMAND_DATA_RD_MISS") + 3072f3d6ea0SIan Rogers Event("L2_RQSTS.RFO_MISS") + 3082f3d6ea0SIan Rogers Event("L2_RQSTS.CODE_RD_MISS")) 3092f3d6ea0SIan Rogers try: 3102f3d6ea0SIan Rogers l2_misses += Event("L2_RQSTS.HWPF_MISS", 3112f3d6ea0SIan Rogers "L2_RQSTS.L2_PF_MISS", "L2_RQSTS.PF_MISS") 3122f3d6ea0SIan Rogers except: 3132f3d6ea0SIan Rogers pass 3142f3d6ea0SIan Rogers 3152f3d6ea0SIan Rogers metrics.append(Metric("lpm_cs_l2_misses", "L2 misses per context switch", 3162f3d6ea0SIan Rogers d_ratio(l2_misses, cs), "l2_misses/cs")) 3172f3d6ea0SIan Rogers except: 3182f3d6ea0SIan Rogers pass 3192f3d6ea0SIan Rogers 3202f3d6ea0SIan Rogers return MetricGroup("lpm_cs", metrics, 3212f3d6ea0SIan Rogers description=("Number of context switches per second, instructions " 3222f3d6ea0SIan Rogers "retired & core cycles between context switches")) 3232f3d6ea0SIan Rogers 3242f3d6ea0SIan Rogers 325d666f017SIan Rogersdef IntelFpu() -> Optional[MetricGroup]: 326d666f017SIan Rogers cyc = Event("cycles") 327d666f017SIan Rogers try: 328d666f017SIan Rogers s_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 329d666f017SIan Rogers "SIMD_INST_RETIRED.SCALAR_SINGLE") 330d666f017SIan Rogers except: 331d666f017SIan Rogers return None 332d666f017SIan Rogers d_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 333d666f017SIan Rogers "SIMD_INST_RETIRED.SCALAR_DOUBLE") 334d666f017SIan Rogers s_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", 335d666f017SIan Rogers "SIMD_INST_RETIRED.PACKED_SINGLE") 336d666f017SIan Rogers 337d666f017SIan Rogers flop = s_64 + d_64 + 4 * s_128 338d666f017SIan Rogers 339d666f017SIan Rogers d_128 = None 340d666f017SIan Rogers s_256 = None 341d666f017SIan Rogers d_256 = None 342d666f017SIan Rogers s_512 = None 343d666f017SIan Rogers d_512 = None 344d666f017SIan Rogers try: 345d666f017SIan Rogers d_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE") 346d666f017SIan Rogers flop += 2 * d_128 347d666f017SIan Rogers s_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE") 348d666f017SIan Rogers flop += 8 * s_256 349d666f017SIan Rogers d_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE") 350d666f017SIan Rogers flop += 4 * d_256 351d666f017SIan Rogers s_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE") 352d666f017SIan Rogers flop += 16 * s_512 353d666f017SIan Rogers d_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE") 354d666f017SIan Rogers flop += 8 * d_512 355d666f017SIan Rogers except: 356d666f017SIan Rogers pass 357d666f017SIan Rogers 358d666f017SIan Rogers f_assist = Event("ASSISTS.FP", "FP_ASSIST.ANY", "FP_ASSIST.S") 359d666f017SIan Rogers if f_assist in [ 360d666f017SIan Rogers "ASSISTS.FP", 361d666f017SIan Rogers "FP_ASSIST.S", 362d666f017SIan Rogers ]: 363d666f017SIan Rogers f_assist += "/cmask=1/" 364d666f017SIan Rogers 365d666f017SIan Rogers flop_r = d_ratio(flop, interval_sec) 366d666f017SIan Rogers flop_c = d_ratio(flop, cyc) 367d666f017SIan Rogers nmi_constraint = MetricConstraint.GROUPED_EVENTS 368d666f017SIan Rogers if f_assist.name == "ASSISTS.FP": # Icelake+ 369d666f017SIan Rogers nmi_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI 370d666f017SIan Rogers 371d666f017SIan Rogers def FpuMetrics(group: str, fl: Optional[Event], mult: int, desc: str) -> Optional[MetricGroup]: 372d666f017SIan Rogers if not fl: 373d666f017SIan Rogers return None 374d666f017SIan Rogers 375d666f017SIan Rogers f = fl * mult 376d666f017SIan Rogers fl_r = d_ratio(f, interval_sec) 377d666f017SIan Rogers r_s = d_ratio(fl, interval_sec) 378d666f017SIan Rogers return MetricGroup(group, [ 379d666f017SIan Rogers Metric(f"{group}_of_total", desc + " floating point operations per second", 380d666f017SIan Rogers d_ratio(f, flop), "100%"), 381d666f017SIan Rogers Metric(f"{group}_flops", desc + " floating point operations per second", 382d666f017SIan Rogers fl_r, "flops/s"), 383d666f017SIan Rogers Metric(f"{group}_ops", desc + " operations per second", 384d666f017SIan Rogers r_s, "ops/s"), 385d666f017SIan Rogers ]) 386d666f017SIan Rogers 387d666f017SIan Rogers return MetricGroup("lpm_fpu", [ 388d666f017SIan Rogers MetricGroup("lpm_fpu_total", [ 389d666f017SIan Rogers Metric("lpm_fpu_total_flops", "Floating point operations per second", 390d666f017SIan Rogers flop_r, "flops/s"), 391d666f017SIan Rogers Metric("lpm_fpu_total_flopc", "Floating point operations per cycle", 392d666f017SIan Rogers flop_c, "flops/cycle", constraint=nmi_constraint), 393d666f017SIan Rogers ]), 394d666f017SIan Rogers MetricGroup("lpm_fpu_64", [ 395d666f017SIan Rogers FpuMetrics("lpm_fpu_64_single", s_64, 1, "64-bit single"), 396d666f017SIan Rogers FpuMetrics("lpm_fpu_64_double", d_64, 1, "64-bit double"), 397d666f017SIan Rogers ]), 398d666f017SIan Rogers MetricGroup("lpm_fpu_128", [ 399d666f017SIan Rogers FpuMetrics("lpm_fpu_128_single", s_128, 400d666f017SIan Rogers 4, "128-bit packed single"), 401d666f017SIan Rogers FpuMetrics("lpm_fpu_128_double", d_128, 402d666f017SIan Rogers 2, "128-bit packed double"), 403d666f017SIan Rogers ]), 404d666f017SIan Rogers MetricGroup("lpm_fpu_256", [ 405d666f017SIan Rogers FpuMetrics("lpm_fpu_256_single", s_256, 406d666f017SIan Rogers 8, "128-bit packed single"), 407d666f017SIan Rogers FpuMetrics("lpm_fpu_256_double", d_256, 408d666f017SIan Rogers 4, "128-bit packed double"), 409d666f017SIan Rogers ]), 410d666f017SIan Rogers MetricGroup("lpm_fpu_512", [ 411d666f017SIan Rogers FpuMetrics("lpm_fpu_512_single", s_512, 412d666f017SIan Rogers 16, "128-bit packed single"), 413d666f017SIan Rogers FpuMetrics("lpm_fpu_512_double", d_512, 414d666f017SIan Rogers 8, "128-bit packed double"), 415d666f017SIan Rogers ]), 416d666f017SIan Rogers Metric("lpm_fpu_assists", "FP assists as a percentage of cycles", 417d666f017SIan Rogers d_ratio(f_assist, cyc), "100%"), 418d666f017SIan Rogers ]) 419d666f017SIan Rogers 420d666f017SIan Rogers 42159341f4eSIan Rogersdef IntelIlp() -> MetricGroup: 42259341f4eSIan Rogers tsc = Event("msr/tsc/") 42359341f4eSIan Rogers c0 = Event("msr/mperf/") 42459341f4eSIan Rogers low = tsc - c0 42559341f4eSIan Rogers inst_ret = Event("INST_RETIRED.ANY_P") 42659341f4eSIan Rogers inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)] 42759341f4eSIan Rogers core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", 42859341f4eSIan Rogers "CPU_CLK_UNHALTED.DISTRIBUTED", 42959341f4eSIan Rogers "cycles") 43059341f4eSIan Rogers ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), core_cycles) 43159341f4eSIan Rogers for x in range(0, 4)] 43259341f4eSIan Rogers ilp.append(d_ratio(inst_ret_c[4], core_cycles)) 43359341f4eSIan Rogers ilp0 = 1 43459341f4eSIan Rogers for x in ilp: 43559341f4eSIan Rogers ilp0 -= x 43659341f4eSIan Rogers return MetricGroup("lpm_ilp", [ 43759341f4eSIan Rogers Metric("lpm_ilp_idle", "Lower power cycles as a percentage of all cycles", 43859341f4eSIan Rogers d_ratio(low, tsc), "100%"), 43959341f4eSIan Rogers Metric("lpm_ilp_inst_ret_0", 44059341f4eSIan Rogers "Instructions retired in 0 cycles as a percentage of all cycles", 44159341f4eSIan Rogers ilp0, "100%"), 44259341f4eSIan Rogers Metric("lpm_ilp_inst_ret_1", 44359341f4eSIan Rogers "Instructions retired in 1 cycles as a percentage of all cycles", 44459341f4eSIan Rogers ilp[0], "100%"), 44559341f4eSIan Rogers Metric("lpm_ilp_inst_ret_2", 44659341f4eSIan Rogers "Instructions retired in 2 cycles as a percentage of all cycles", 44759341f4eSIan Rogers ilp[1], "100%"), 44859341f4eSIan Rogers Metric("lpm_ilp_inst_ret_3", 44959341f4eSIan Rogers "Instructions retired in 3 cycles as a percentage of all cycles", 45059341f4eSIan Rogers ilp[2], "100%"), 45159341f4eSIan Rogers Metric("lpm_ilp_inst_ret_4", 45259341f4eSIan Rogers "Instructions retired in 4 cycles as a percentage of all cycles", 45359341f4eSIan Rogers ilp[3], "100%"), 45459341f4eSIan Rogers Metric("lpm_ilp_inst_ret_5", 45559341f4eSIan Rogers "Instructions retired in 5 or more cycles as a percentage of all cycles", 45659341f4eSIan Rogers ilp[4], "100%"), 45759341f4eSIan Rogers ]) 45859341f4eSIan Rogers 45959341f4eSIan Rogers 4607413633eSIan Rogersdef IntelL2() -> Optional[MetricGroup]: 4617413633eSIan Rogers try: 4627413633eSIan Rogers DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT") 4637413633eSIan Rogers except: 4647413633eSIan Rogers return None 4657413633eSIan Rogers try: 4667413633eSIan Rogers DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS") 4677413633eSIan Rogers l2_dmnd_miss = DC_MISS 4687413633eSIan Rogers l2_dmnd_rd_all = DC_MISS + DC_HIT 4697413633eSIan Rogers except: 4707413633eSIan Rogers DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD") 4717413633eSIan Rogers l2_dmnd_miss = DC_ALL - DC_HIT 4727413633eSIan Rogers l2_dmnd_rd_all = DC_ALL 4737413633eSIan Rogers l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec) 4747413633eSIan Rogers l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec) 4757413633eSIan Rogers 4767413633eSIan Rogers DC_PFH = None 4777413633eSIan Rogers DC_PFM = None 4787413633eSIan Rogers l2_pf_all = None 4797413633eSIan Rogers l2_pf_mrate = None 4807413633eSIan Rogers l2_pf_rrate = None 4817413633eSIan Rogers try: 4827413633eSIan Rogers DC_PFH = Event("L2_RQSTS.PF_HIT") 4837413633eSIan Rogers DC_PFM = Event("L2_RQSTS.PF_MISS") 4847413633eSIan Rogers l2_pf_all = DC_PFH + DC_PFM 4857413633eSIan Rogers l2_pf_mrate = d_ratio(DC_PFM, interval_sec) 4867413633eSIan Rogers l2_pf_rrate = d_ratio(l2_pf_all, interval_sec) 4877413633eSIan Rogers except: 4887413633eSIan Rogers pass 4897413633eSIan Rogers 4907413633eSIan Rogers DC_RFOH = None 4917413633eSIan Rogers DC_RFOM = None 4927413633eSIan Rogers l2_rfo_all = None 4937413633eSIan Rogers l2_rfo_mrate = None 4947413633eSIan Rogers l2_rfo_rrate = None 4957413633eSIan Rogers try: 4967413633eSIan Rogers DC_RFOH = Event("L2_RQSTS.RFO_HIT") 4977413633eSIan Rogers DC_RFOM = Event("L2_RQSTS.RFO_MISS") 4987413633eSIan Rogers l2_rfo_all = DC_RFOH + DC_RFOM 4997413633eSIan Rogers l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec) 5007413633eSIan Rogers l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec) 5017413633eSIan Rogers except: 5027413633eSIan Rogers pass 5037413633eSIan Rogers 5047413633eSIan Rogers DC_CH = None 5057413633eSIan Rogers try: 5067413633eSIan Rogers DC_CH = Event("L2_RQSTS.CODE_RD_HIT") 5077413633eSIan Rogers except: 5087413633eSIan Rogers pass 5097413633eSIan Rogers DC_CM = Event("L2_RQSTS.CODE_RD_MISS") 5107413633eSIan Rogers DC_IN = Event("L2_LINES_IN.ALL") 5117413633eSIan Rogers DC_OUT_NS = None 5127413633eSIan Rogers DC_OUT_S = None 5137413633eSIan Rogers l2_lines_out = None 5147413633eSIan Rogers l2_out_rate = None 5157413633eSIan Rogers wbn = None 5167413633eSIan Rogers isd = None 5177413633eSIan Rogers try: 5187413633eSIan Rogers DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT", 5197413633eSIan Rogers "L2_LINES_OUT.DEMAND_DIRTY", 5207413633eSIan Rogers "L2_LINES_IN.S") 5217413633eSIan Rogers DC_OUT_S = Event("L2_LINES_OUT.SILENT", 5227413633eSIan Rogers "L2_LINES_OUT.DEMAND_CLEAN", 5237413633eSIan Rogers "L2_LINES_IN.I") 5247413633eSIan Rogers if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and ( 5257413633eSIan Rogers args.model.startswith("skylake") or 5267413633eSIan Rogers args.model == "cascadelakex"): 5277413633eSIan Rogers DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/" 5287413633eSIan Rogers # bring is back to per-CPU 5297413633eSIan Rogers l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S) 5307413633eSIan Rogers l2_ns = DC_OUT_NS 5317413633eSIan Rogers l2_lines_out = l2_s + l2_ns 5327413633eSIan Rogers l2_out_rate = d_ratio(l2_lines_out, interval_sec) 5337413633eSIan Rogers nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0) 5347413633eSIan Rogers wbn = d_ratio(nlr, interval_sec) 5357413633eSIan Rogers isd = d_ratio(l2_s, interval_sec) 5367413633eSIan Rogers except: 5377413633eSIan Rogers pass 5387413633eSIan Rogers DC_OUT_U = None 5397413633eSIan Rogers l2_pf_useless = None 5407413633eSIan Rogers l2_useless_rate = None 5417413633eSIan Rogers try: 5427413633eSIan Rogers DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF") 5437413633eSIan Rogers l2_pf_useless = DC_OUT_U 5447413633eSIan Rogers l2_useless_rate = d_ratio(l2_pf_useless, interval_sec) 5457413633eSIan Rogers except: 5467413633eSIan Rogers pass 5477413633eSIan Rogers DC_WB_U = None 5487413633eSIan Rogers DC_WB_D = None 5497413633eSIan Rogers wbu = None 5507413633eSIan Rogers wbd = None 5517413633eSIan Rogers try: 5527413633eSIan Rogers DC_WB_U = Event("IDI_MISC.WB_UPGRADE") 5537413633eSIan Rogers DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE") 5547413633eSIan Rogers wbu = d_ratio(DC_WB_U, interval_sec) 5557413633eSIan Rogers wbd = d_ratio(DC_WB_D, interval_sec) 5567413633eSIan Rogers except: 5577413633eSIan Rogers pass 5587413633eSIan Rogers 5597413633eSIan Rogers l2_lines_in = DC_IN 5607413633eSIan Rogers l2_code_all = (DC_CH + DC_CM) if DC_CH else None 5617413633eSIan Rogers l2_code_rate = d_ratio(l2_code_all, interval_sec) if DC_CH else None 5627413633eSIan Rogers l2_code_miss_rate = d_ratio(DC_CM, interval_sec) 5637413633eSIan Rogers l2_in_rate = d_ratio(l2_lines_in, interval_sec) 5647413633eSIan Rogers 5657413633eSIan Rogers return MetricGroup("lpm_l2", [ 5667413633eSIan Rogers MetricGroup("lpm_l2_totals", [ 5677413633eSIan Rogers Metric("lpm_l2_totals_in", "L2 cache total in per second", 5687413633eSIan Rogers l2_in_rate, "In/s"), 5697413633eSIan Rogers Metric("lpm_l2_totals_out", "L2 cache total out per second", 5707413633eSIan Rogers l2_out_rate, "Out/s") if l2_out_rate else None, 5717413633eSIan Rogers ]), 5727413633eSIan Rogers MetricGroup("lpm_l2_rd", [ 5737413633eSIan Rogers Metric("lpm_l2_rd_hits", "L2 cache data read hits", 5747413633eSIan Rogers d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"), 5757413633eSIan Rogers Metric("lpm_l2_rd_hits", "L2 cache data read hits", 5767413633eSIan Rogers d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"), 5777413633eSIan Rogers Metric("lpm_l2_rd_requests", "L2 cache data read requests per second", 5787413633eSIan Rogers l2_dmnd_rrate, "requests/s"), 5797413633eSIan Rogers Metric("lpm_l2_rd_misses", "L2 cache data read misses per second", 5807413633eSIan Rogers l2_dmnd_mrate, "misses/s"), 5817413633eSIan Rogers ]), 5827413633eSIan Rogers MetricGroup("lpm_l2_hwpf", [ 5837413633eSIan Rogers Metric("lpm_l2_hwpf_hits", "L2 cache hardware prefetcher hits", 5847413633eSIan Rogers d_ratio(DC_PFH, l2_pf_all), "100%"), 5857413633eSIan Rogers Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses", 5867413633eSIan Rogers d_ratio(DC_PFM, l2_pf_all), "100%"), 5877413633eSIan Rogers Metric("lpm_l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second", 5887413633eSIan Rogers l2_useless_rate, "100%") if l2_useless_rate else None, 5897413633eSIan Rogers Metric("lpm_l2_hwpf_requests", "L2 cache hardware prefetcher requests per second", 5907413633eSIan Rogers l2_pf_rrate, "100%"), 5917413633eSIan Rogers Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses per second", 5927413633eSIan Rogers l2_pf_mrate, "100%"), 5937413633eSIan Rogers ]) if DC_PFH else None, 5947413633eSIan Rogers MetricGroup("lpm_l2_rfo", [ 5957413633eSIan Rogers Metric("lpm_l2_rfo_hits", "L2 cache request for ownership (RFO) hits", 5967413633eSIan Rogers d_ratio(DC_RFOH, l2_rfo_all), "100%"), 5977413633eSIan Rogers Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses", 5987413633eSIan Rogers d_ratio(DC_RFOM, l2_rfo_all), "100%"), 5997413633eSIan Rogers Metric("lpm_l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second", 6007413633eSIan Rogers l2_rfo_rrate, "requests/s"), 6017413633eSIan Rogers Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second", 6027413633eSIan Rogers l2_rfo_mrate, "misses/s"), 6037413633eSIan Rogers ]) if DC_RFOH else None, 6047413633eSIan Rogers MetricGroup("lpm_l2_code", [ 6057413633eSIan Rogers Metric("lpm_l2_code_hits", "L2 cache code hits", 6067413633eSIan Rogers d_ratio(DC_CH, l2_code_all), "100%") if DC_CH else None, 6077413633eSIan Rogers Metric("lpm_l2_code_misses", "L2 cache code misses", 6087413633eSIan Rogers d_ratio(DC_CM, l2_code_all), "100%") if DC_CH else None, 6097413633eSIan Rogers Metric("lpm_l2_code_requests", "L2 cache code requests per second", 6107413633eSIan Rogers l2_code_rate, "requests/s") if DC_CH else None, 6117413633eSIan Rogers Metric("lpm_l2_code_misses", "L2 cache code misses per second", 6127413633eSIan Rogers l2_code_miss_rate, "misses/s"), 6137413633eSIan Rogers ]), 6147413633eSIan Rogers MetricGroup("lpm_l2_evict", [ 6157413633eSIan Rogers MetricGroup("lpm_l2_evict_mef_lines", [ 6167413633eSIan Rogers Metric("lpm_l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second", 6177413633eSIan Rogers wbu, "HotLRU/s") if wbu else None, 6187413633eSIan Rogers Metric("lpm_l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second", 6197413633eSIan Rogers wbn, "NormLRU/s") if wbn else None, 6207413633eSIan Rogers Metric("lpm_l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second", 6217413633eSIan Rogers wbd, "dropped/s") if wbd else None, 6227413633eSIan Rogers Metric("lpm_l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second", 6237413633eSIan Rogers isd, "dropped/s") if isd else None, 6247413633eSIan Rogers ]), 6257413633eSIan Rogers ]), 6267413633eSIan Rogers ], description="L2 data cache analysis") 6277413633eSIan Rogers 6287413633eSIan Rogers 6296ec3058eSIan Rogersdef IntelMissLat() -> Optional[MetricGroup]: 6306ec3058eSIan Rogers try: 6316ec3058eSIan Rogers ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") 6326ec3058eSIan Rogers data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", 6336ec3058eSIan Rogers "UNC_CHA_TOR_OCCUPANCY.IA_MISS", 6346ec3058eSIan Rogers "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE", 6356ec3058eSIan Rogers "UNC_C_TOR_OCCUPANCY.MISS_OPCODE") 6366ec3058eSIan Rogers data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL", 6376ec3058eSIan Rogers "UNC_CHA_TOR_INSERTS.IA_MISS", 6386ec3058eSIan Rogers "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE", 6396ec3058eSIan Rogers "UNC_C_TOR_INSERTS.MISS_OPCODE") 6406ec3058eSIan Rogers data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE", 6416ec3058eSIan Rogers "UNC_CHA_TOR_OCCUPANCY.IA_MISS", 6426ec3058eSIan Rogers "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE", 6436ec3058eSIan Rogers "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE") 6446ec3058eSIan Rogers data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE", 6456ec3058eSIan Rogers "UNC_CHA_TOR_INSERTS.IA_MISS", 6466ec3058eSIan Rogers "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE", 6476ec3058eSIan Rogers "UNC_C_TOR_INSERTS.NID_MISS_OPCODE") 6486ec3058eSIan Rogers except: 6496ec3058eSIan Rogers return None 6506ec3058eSIan Rogers 6516ec3058eSIan Rogers if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or 6526ec3058eSIan Rogers data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"): 6536ec3058eSIan Rogers data_rd = 0x182 6546ec3058eSIan Rogers for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]: 6556ec3058eSIan Rogers e.name += f"/filter_opc={hex(data_rd)}/" 6566ec3058eSIan Rogers elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS": 6576ec3058eSIan Rogers # Demand Data Read - Full cache-line read requests from core for 6586ec3058eSIan Rogers # lines to be cached in S or E, typically for data 6596ec3058eSIan Rogers demand_data_rd = 0x202 6606ec3058eSIan Rogers # LLC Prefetch Data - Uncore will first look up the line in the 6616ec3058eSIan Rogers # LLC; for a cache hit, the LRU will be updated, on a miss, the 6626ec3058eSIan Rogers # DRd will be initiated 6636ec3058eSIan Rogers llc_prefetch_data = 0x25a 6646ec3058eSIan Rogers local_filter = (f"/filter_opc0={hex(demand_data_rd)}," 6656ec3058eSIan Rogers f"filter_opc1={hex(llc_prefetch_data)}," 6666ec3058eSIan Rogers "filter_loc,filter_nm,filter_not_nm/") 6676ec3058eSIan Rogers remote_filter = (f"/filter_opc0={hex(demand_data_rd)}," 6686ec3058eSIan Rogers f"filter_opc1={hex(llc_prefetch_data)}," 6696ec3058eSIan Rogers "filter_rem,filter_nm,filter_not_nm/") 6706ec3058eSIan Rogers for e in [data_rd_loc_occ, data_rd_loc_ins]: 6716ec3058eSIan Rogers e.name += local_filter 6726ec3058eSIan Rogers for e in [data_rd_rem_occ, data_rd_rem_ins]: 6736ec3058eSIan Rogers e.name += remote_filter 6746ec3058eSIan Rogers else: 6756ec3058eSIan Rogers assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ 6766ec3058eSIan Rogers 6776ec3058eSIan Rogers ticks_per_cha = ticks / source_count(data_rd_loc_ins) 6786ec3058eSIan Rogers loc_lat = interval_sec * 1e9 * data_rd_loc_occ / \ 6796ec3058eSIan Rogers (ticks_per_cha * data_rd_loc_ins) 6806ec3058eSIan Rogers ticks_per_cha = ticks / source_count(data_rd_rem_ins) 6816ec3058eSIan Rogers rem_lat = interval_sec * 1e9 * data_rd_rem_occ / \ 6826ec3058eSIan Rogers (ticks_per_cha * data_rd_rem_ins) 6836ec3058eSIan Rogers return MetricGroup("lpm_miss_lat", [ 6846ec3058eSIan Rogers Metric("lpm_miss_lat_loc", "Local to a socket miss latency in nanoseconds", 6856ec3058eSIan Rogers loc_lat, "ns"), 6866ec3058eSIan Rogers Metric("lpm_miss_lat_rem", "Remote to a socket miss latency in nanoseconds", 6876ec3058eSIan Rogers rem_lat, "ns"), 6886ec3058eSIan Rogers ]) 6896ec3058eSIan Rogers 6906ec3058eSIan Rogers 691426b8442SIan Rogersdef IntelMlp() -> Optional[Metric]: 692426b8442SIan Rogers try: 693426b8442SIan Rogers l1d = Event("L1D_PEND_MISS.PENDING") 694426b8442SIan Rogers l1dc = Event("L1D_PEND_MISS.PENDING_CYCLES") 695426b8442SIan Rogers except: 696426b8442SIan Rogers return None 697426b8442SIan Rogers 698426b8442SIan Rogers l1dc = Select(l1dc / 2, Literal("#smt_on"), l1dc) 699426b8442SIan Rogers ml = d_ratio(l1d, l1dc) 700426b8442SIan Rogers return Metric("lpm_mlp", 701426b8442SIan Rogers "Miss level parallelism - number of outstanding load misses per cycle (higher is better)", 702426b8442SIan Rogers ml, "load_miss_pending/cycle") 703426b8442SIan Rogers 704426b8442SIan Rogers 705cd1c6a48SIan Rogersdef IntelPorts() -> Optional[MetricGroup]: 706cd1c6a48SIan Rogers pipeline_events = json.load( 707cd1c6a48SIan Rogers open(f"{_args.events_path}/x86/{_args.model}/pipeline.json")) 708cd1c6a48SIan Rogers 709cd1c6a48SIan Rogers core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", 710cd1c6a48SIan Rogers "CPU_CLK_UNHALTED.DISTRIBUTED", 711cd1c6a48SIan Rogers "cycles") 712cd1c6a48SIan Rogers # Number of CPU cycles scaled for SMT. 713cd1c6a48SIan Rogers smt_cycles = Select(core_cycles / 2, Literal("#smt_on"), core_cycles) 714cd1c6a48SIan Rogers 715cd1c6a48SIan Rogers metrics = [] 716cd1c6a48SIan Rogers for x in pipeline_events: 717cd1c6a48SIan Rogers if "EventName" in x and re.search("^UOPS_DISPATCHED.PORT", x["EventName"]): 718cd1c6a48SIan Rogers name = x["EventName"] 719cd1c6a48SIan Rogers port = re.search(r"(PORT_[0-9].*)", name).group(0).lower() 720cd1c6a48SIan Rogers if name.endswith("_CORE"): 721cd1c6a48SIan Rogers cyc = core_cycles 722cd1c6a48SIan Rogers else: 723cd1c6a48SIan Rogers cyc = smt_cycles 724cd1c6a48SIan Rogers metrics.append(Metric(f"lpm_{port}", f"{port} utilization (higher is better)", 725cd1c6a48SIan Rogers d_ratio(Event(name), cyc), "100%")) 726cd1c6a48SIan Rogers if len(metrics) == 0: 727cd1c6a48SIan Rogers return None 728cd1c6a48SIan Rogers 729cd1c6a48SIan Rogers return MetricGroup("lpm_ports", metrics, "functional unit (port) utilization -- " 730cd1c6a48SIan Rogers "fraction of cycles each port is utilized (higher is better)") 731cd1c6a48SIan Rogers 732cd1c6a48SIan Rogers 733397fdb3aSIan Rogersdef IntelSwpf() -> Optional[MetricGroup]: 734397fdb3aSIan Rogers ins = Event("instructions") 735397fdb3aSIan Rogers try: 736397fdb3aSIan Rogers s_ld = Event("MEM_INST_RETIRED.ALL_LOADS", 737397fdb3aSIan Rogers "MEM_UOPS_RETIRED.ALL_LOADS") 738397fdb3aSIan Rogers s_nta = Event("SW_PREFETCH_ACCESS.NTA") 739397fdb3aSIan Rogers s_t0 = Event("SW_PREFETCH_ACCESS.T0") 740397fdb3aSIan Rogers s_t1 = Event("SW_PREFETCH_ACCESS.T1_T2") 741397fdb3aSIan Rogers s_w = Event("SW_PREFETCH_ACCESS.PREFETCHW") 742397fdb3aSIan Rogers except: 743397fdb3aSIan Rogers return None 744397fdb3aSIan Rogers 745397fdb3aSIan Rogers all_sw = s_nta + s_t0 + s_t1 + s_w 746397fdb3aSIan Rogers swp_r = d_ratio(all_sw, interval_sec) 747397fdb3aSIan Rogers ins_r = d_ratio(ins, all_sw) 748397fdb3aSIan Rogers ld_r = d_ratio(s_ld, all_sw) 749397fdb3aSIan Rogers 750397fdb3aSIan Rogers return MetricGroup("lpm_swpf", [ 751397fdb3aSIan Rogers MetricGroup("lpm_swpf_totals", [ 752397fdb3aSIan Rogers Metric("lpm_swpf_totals_exec", "Software prefetch instructions per second", 753397fdb3aSIan Rogers swp_r, "swpf/s"), 754397fdb3aSIan Rogers Metric("lpm_swpf_totals_insn_per_pf", 755397fdb3aSIan Rogers "Average number of instructions between software prefetches", 756397fdb3aSIan Rogers ins_r, "insn/swpf"), 757397fdb3aSIan Rogers Metric("lpm_swpf_totals_loads_per_pf", 758397fdb3aSIan Rogers "Average number of loads between software prefetches", 759397fdb3aSIan Rogers ld_r, "loads/swpf"), 760397fdb3aSIan Rogers ]), 761397fdb3aSIan Rogers MetricGroup("lpm_swpf_bkdwn", [ 762397fdb3aSIan Rogers MetricGroup("lpm_swpf_bkdwn_nta", [ 763397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_nta_per_swpf", 764397fdb3aSIan Rogers "Software prefetch NTA instructions as a percent of all prefetch instructions", 765397fdb3aSIan Rogers d_ratio(s_nta, all_sw), "100%"), 766397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_nta_rate", 767397fdb3aSIan Rogers "Software prefetch NTA instructions per second", 768397fdb3aSIan Rogers d_ratio(s_nta, interval_sec), "insn/s"), 769397fdb3aSIan Rogers ]), 770397fdb3aSIan Rogers MetricGroup("lpm_swpf_bkdwn_t0", [ 771397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_t0_per_swpf", 772397fdb3aSIan Rogers "Software prefetch T0 instructions as a percent of all prefetch instructions", 773397fdb3aSIan Rogers d_ratio(s_t0, all_sw), "100%"), 774397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_t0_rate", 775397fdb3aSIan Rogers "Software prefetch T0 instructions per second", 776397fdb3aSIan Rogers d_ratio(s_t0, interval_sec), "insn/s"), 777397fdb3aSIan Rogers ]), 778397fdb3aSIan Rogers MetricGroup("lpm_swpf_bkdwn_t1_t2", [ 779397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_t1_t2_per_swpf", 780397fdb3aSIan Rogers "Software prefetch T1 or T2 instructions as a percent of all prefetch instructions", 781397fdb3aSIan Rogers d_ratio(s_t1, all_sw), "100%"), 782397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_t1_t2_rate", 783397fdb3aSIan Rogers "Software prefetch T1 or T2 instructions per second", 784397fdb3aSIan Rogers d_ratio(s_t1, interval_sec), "insn/s"), 785397fdb3aSIan Rogers ]), 786397fdb3aSIan Rogers MetricGroup("lpm_swpf_bkdwn_w", [ 787397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_w_per_swpf", 788397fdb3aSIan Rogers "Software prefetch W instructions as a percent of all prefetch instructions", 789397fdb3aSIan Rogers d_ratio(s_w, all_sw), "100%"), 790397fdb3aSIan Rogers Metric("lpm_swpf_bkdwn_w_rate", 791397fdb3aSIan Rogers "Software prefetch W instructions per second", 792397fdb3aSIan Rogers d_ratio(s_w, interval_sec), "insn/s"), 793397fdb3aSIan Rogers ]), 794397fdb3aSIan Rogers ]), 795397fdb3aSIan Rogers ], description="Software prefetch instruction breakdown") 796397fdb3aSIan Rogers 797397fdb3aSIan Rogers 798d80edef2SIan Rogersdef IntelLdSt() -> Optional[MetricGroup]: 799d80edef2SIan Rogers if _args.model in [ 800d80edef2SIan Rogers "bonnell", 801d80edef2SIan Rogers "nehalemep", 802d80edef2SIan Rogers "nehalemex", 803d80edef2SIan Rogers "westmereep-dp", 804d80edef2SIan Rogers "westmereep-sp", 805d80edef2SIan Rogers "westmereex", 806d80edef2SIan Rogers ]: 807d80edef2SIan Rogers return None 808d80edef2SIan Rogers LDST_LD = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") 809d80edef2SIan Rogers LDST_ST = Event("MEM_INST_RETIRED.ALL_STORES", 810d80edef2SIan Rogers "MEM_UOPS_RETIRED.ALL_STORES") 811d80edef2SIan Rogers LDST_LDC1 = Event(f"{LDST_LD.name}/cmask=1/") 812d80edef2SIan Rogers LDST_STC1 = Event(f"{LDST_ST.name}/cmask=1/") 813d80edef2SIan Rogers LDST_LDC2 = Event(f"{LDST_LD.name}/cmask=2/") 814d80edef2SIan Rogers LDST_STC2 = Event(f"{LDST_ST.name}/cmask=2/") 815d80edef2SIan Rogers LDST_LDC3 = Event(f"{LDST_LD.name}/cmask=3/") 816d80edef2SIan Rogers LDST_STC3 = Event(f"{LDST_ST.name}/cmask=3/") 817d80edef2SIan Rogers ins = Event("instructions") 818d80edef2SIan Rogers LDST_CYC = Event("CPU_CLK_UNHALTED.THREAD", 819d80edef2SIan Rogers "CPU_CLK_UNHALTED.CORE_P", 820d80edef2SIan Rogers "CPU_CLK_UNHALTED.THREAD_P") 821d80edef2SIan Rogers LDST_PRE = None 822d80edef2SIan Rogers try: 823d80edef2SIan Rogers LDST_PRE = Event("LOAD_HIT_PREFETCH.SWPF", "LOAD_HIT_PRE.SW_PF") 824d80edef2SIan Rogers except: 825d80edef2SIan Rogers pass 826d80edef2SIan Rogers LDST_AT = None 827d80edef2SIan Rogers try: 828d80edef2SIan Rogers LDST_AT = Event("MEM_INST_RETIRED.LOCK_LOADS") 829d80edef2SIan Rogers except: 830d80edef2SIan Rogers pass 831d80edef2SIan Rogers cyc = LDST_CYC 832d80edef2SIan Rogers 833d80edef2SIan Rogers ld_rate = d_ratio(LDST_LD, interval_sec) 834d80edef2SIan Rogers st_rate = d_ratio(LDST_ST, interval_sec) 835d80edef2SIan Rogers pf_rate = d_ratio(LDST_PRE, interval_sec) if LDST_PRE else None 836d80edef2SIan Rogers at_rate = d_ratio(LDST_AT, interval_sec) if LDST_AT else None 837d80edef2SIan Rogers 838d80edef2SIan Rogers ldst_ret_constraint = MetricConstraint.GROUPED_EVENTS 839d80edef2SIan Rogers if LDST_LD.name == "MEM_UOPS_RETIRED.ALL_LOADS": 840d80edef2SIan Rogers ldst_ret_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI 841d80edef2SIan Rogers 842d80edef2SIan Rogers return MetricGroup("lpm_ldst", [ 843d80edef2SIan Rogers MetricGroup("lpm_ldst_total", [ 844d80edef2SIan Rogers Metric("lpm_ldst_total_loads", "Load/store instructions total loads", 845d80edef2SIan Rogers ld_rate, "loads"), 846d80edef2SIan Rogers Metric("lpm_ldst_total_stores", "Load/store instructions total stores", 847d80edef2SIan Rogers st_rate, "stores"), 848d80edef2SIan Rogers ]), 849d80edef2SIan Rogers MetricGroup("lpm_ldst_prcnt", [ 850d80edef2SIan Rogers Metric("lpm_ldst_prcnt_loads", "Percent of all instructions that are loads", 851d80edef2SIan Rogers d_ratio(LDST_LD, ins), "100%"), 852d80edef2SIan Rogers Metric("lpm_ldst_prcnt_stores", "Percent of all instructions that are stores", 853d80edef2SIan Rogers d_ratio(LDST_ST, ins), "100%"), 854d80edef2SIan Rogers ]), 855d80edef2SIan Rogers MetricGroup("lpm_ldst_ret_lds", [ 856d80edef2SIan Rogers Metric("lpm_ldst_ret_lds_1", "Retired loads in 1 cycle", 857d80edef2SIan Rogers d_ratio(max(LDST_LDC1 - LDST_LDC2, 0), cyc), "100%", 858d80edef2SIan Rogers constraint=ldst_ret_constraint), 859d80edef2SIan Rogers Metric("lpm_ldst_ret_lds_2", "Retired loads in 2 cycles", 860d80edef2SIan Rogers d_ratio(max(LDST_LDC2 - LDST_LDC3, 0), cyc), "100%", 861d80edef2SIan Rogers constraint=ldst_ret_constraint), 862d80edef2SIan Rogers Metric("lpm_ldst_ret_lds_3", "Retired loads in 3 or more cycles", 863d80edef2SIan Rogers d_ratio(LDST_LDC3, cyc), "100%"), 864d80edef2SIan Rogers ]), 865d80edef2SIan Rogers MetricGroup("lpm_ldst_ret_sts", [ 866d80edef2SIan Rogers Metric("lpm_ldst_ret_sts_1", "Retired stores in 1 cycle", 867d80edef2SIan Rogers d_ratio(max(LDST_STC1 - LDST_STC2, 0), cyc), "100%", 868d80edef2SIan Rogers constraint=ldst_ret_constraint), 869d80edef2SIan Rogers Metric("lpm_ldst_ret_sts_2", "Retired stores in 2 cycles", 870d80edef2SIan Rogers d_ratio(max(LDST_STC2 - LDST_STC3, 0), cyc), "100%", 871d80edef2SIan Rogers constraint=ldst_ret_constraint), 872d80edef2SIan Rogers Metric("lpm_ldst_ret_sts_3", "Retired stores in 3 more cycles", 873d80edef2SIan Rogers d_ratio(LDST_STC3, cyc), "100%"), 874d80edef2SIan Rogers ]), 875d80edef2SIan Rogers Metric("lpm_ldst_ld_hit_swpf", "Load hit software prefetches per second", 876d80edef2SIan Rogers pf_rate, "swpf/s") if pf_rate else None, 877d80edef2SIan Rogers Metric("lpm_ldst_atomic_lds", "Atomic loads per second", 878d80edef2SIan Rogers at_rate, "loads/s") if at_rate else None, 879d80edef2SIan Rogers ], description="Breakdown of load/store instructions") 880d80edef2SIan Rogers 881d80edef2SIan Rogers 8821fee2701SIan Rogersdef UncoreCState() -> Optional[MetricGroup]: 8831fee2701SIan Rogers try: 8841fee2701SIan Rogers pcu_ticks = Event("UNC_P_CLOCKTICKS") 8851fee2701SIan Rogers c0 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C0") 8861fee2701SIan Rogers c3 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C3") 8871fee2701SIan Rogers c6 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C6") 8881fee2701SIan Rogers except: 8891fee2701SIan Rogers return None 8901fee2701SIan Rogers 8911fee2701SIan Rogers num_cores = Literal("#num_cores") / Literal("#num_packages") 8921fee2701SIan Rogers 8931fee2701SIan Rogers max_cycles = pcu_ticks * num_cores 8941fee2701SIan Rogers total_cycles = c0 + c3 + c6 8951fee2701SIan Rogers 8961fee2701SIan Rogers # remove fused-off cores which show up in C6/C7. 8971fee2701SIan Rogers c6 = Select(max(c6 - (total_cycles - max_cycles), 0), 8981fee2701SIan Rogers total_cycles > max_cycles, 8991fee2701SIan Rogers c6) 9001fee2701SIan Rogers 9011fee2701SIan Rogers return MetricGroup("lpm_cstate", [ 9021fee2701SIan Rogers Metric("lpm_cstate_c0", "C-State cores in C0/C1", 9031fee2701SIan Rogers d_ratio(c0, pcu_ticks), "cores"), 9041fee2701SIan Rogers Metric("lpm_cstate_c3", "C-State cores in C3", 9051fee2701SIan Rogers d_ratio(c3, pcu_ticks), "cores"), 9061fee2701SIan Rogers Metric("lpm_cstate_c6", "C-State cores in C6/C7", 9071fee2701SIan Rogers d_ratio(c6, pcu_ticks), "cores"), 9081fee2701SIan Rogers ]) 9091fee2701SIan Rogers 9101fee2701SIan Rogers 9112166b44bSIan Rogersdef UncoreDir() -> Optional[MetricGroup]: 9122166b44bSIan Rogers try: 9132166b44bSIan Rogers m2m_upd = Event("UNC_M2M_DIRECTORY_UPDATE.ANY") 9142166b44bSIan Rogers m2m_hits = Event("UNC_M2M_DIRECTORY_HIT.DIRTY_I") 9152166b44bSIan Rogers # Turn the umask into a ANY rather than DIRTY_I filter. 9162166b44bSIan Rogers m2m_hits.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_HIT.ANY/" 9172166b44bSIan Rogers m2m_miss = Event("UNC_M2M_DIRECTORY_MISS.DIRTY_I") 9182166b44bSIan Rogers # Turn the umask into a ANY rather than DIRTY_I filter. 9192166b44bSIan Rogers m2m_miss.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_MISS.ANY/" 9202166b44bSIan Rogers cha_upd = Event("UNC_CHA_DIR_UPDATE.HA") 9212166b44bSIan Rogers # Turn the umask into a ANY rather than HA filter. 9222166b44bSIan Rogers cha_upd.name += "/umask=3,name=UNC_CHA_DIR_UPDATE.ANY/" 9232166b44bSIan Rogers except: 9242166b44bSIan Rogers return None 9252166b44bSIan Rogers 9262166b44bSIan Rogers m2m_total = m2m_hits + m2m_miss 9272166b44bSIan Rogers upd = m2m_upd + cha_upd # in cache lines 9282166b44bSIan Rogers upd_r = upd / interval_sec 9292166b44bSIan Rogers look_r = m2m_total / interval_sec 9302166b44bSIan Rogers 9312166b44bSIan Rogers scale = 64 / 1_000_000 # Cache lines to MB 9322166b44bSIan Rogers return MetricGroup("lpm_dir", [ 9332166b44bSIan Rogers Metric("lpm_dir_lookup_rate", "", 9342166b44bSIan Rogers d_ratio(m2m_total, interval_sec), "requests/s"), 9352166b44bSIan Rogers Metric("lpm_dir_lookup_hits", "", 9362166b44bSIan Rogers d_ratio(m2m_hits, m2m_total), "100%"), 9372166b44bSIan Rogers Metric("lpm_dir_lookup_misses", "", 9382166b44bSIan Rogers d_ratio(m2m_miss, m2m_total), "100%"), 9392166b44bSIan Rogers Metric("lpm_dir_update_requests", "", 9402166b44bSIan Rogers d_ratio(m2m_upd + cha_upd, interval_sec), "requests/s"), 9412166b44bSIan Rogers Metric("lpm_dir_update_bw", "", 9422166b44bSIan Rogers d_ratio(m2m_upd + cha_upd, interval_sec), f"{scale}MB/s"), 9432166b44bSIan Rogers ]) 9442166b44bSIan Rogers 9452166b44bSIan Rogers 946cde9c1a5SIan Rogersdef UncoreMem() -> Optional[MetricGroup]: 947cde9c1a5SIan Rogers try: 948cde9c1a5SIan Rogers loc_rds = Event("UNC_CHA_REQUESTS.READS_LOCAL", 949cde9c1a5SIan Rogers "UNC_H_REQUESTS.READS_LOCAL") 950cde9c1a5SIan Rogers rem_rds = Event("UNC_CHA_REQUESTS.READS_REMOTE", 951cde9c1a5SIan Rogers "UNC_H_REQUESTS.READS_REMOTE") 952cde9c1a5SIan Rogers loc_wrs = Event("UNC_CHA_REQUESTS.WRITES_LOCAL", 953cde9c1a5SIan Rogers "UNC_H_REQUESTS.WRITES_LOCAL") 954cde9c1a5SIan Rogers rem_wrs = Event("UNC_CHA_REQUESTS.WRITES_REMOTE", 955cde9c1a5SIan Rogers "UNC_H_REQUESTS.WRITES_REMOTE") 956cde9c1a5SIan Rogers except: 957cde9c1a5SIan Rogers return None 958cde9c1a5SIan Rogers 959cde9c1a5SIan Rogers scale = 64 / 1_000_000 960cde9c1a5SIan Rogers return MetricGroup("lpm_mem", [ 961cde9c1a5SIan Rogers MetricGroup("lpm_mem_local", [ 962cde9c1a5SIan Rogers Metric("lpm_mem_local_read", "Local memory read bandwidth not including directory updates", 963cde9c1a5SIan Rogers d_ratio(loc_rds, interval_sec), f"{scale}MB/s"), 964cde9c1a5SIan Rogers Metric("lpm_mem_local_write", "Local memory write bandwidth not including directory updates", 965cde9c1a5SIan Rogers d_ratio(loc_wrs, interval_sec), f"{scale}MB/s"), 966cde9c1a5SIan Rogers ]), 967cde9c1a5SIan Rogers MetricGroup("lpm_mem_remote", [ 968cde9c1a5SIan Rogers Metric("lpm_mem_remote_read", "Remote memory read bandwidth not including directory updates", 969cde9c1a5SIan Rogers d_ratio(rem_rds, interval_sec), f"{scale}MB/s"), 970cde9c1a5SIan Rogers Metric("lpm_mem_remote_write", "Remote memory write bandwidth not including directory updates", 971cde9c1a5SIan Rogers d_ratio(rem_wrs, interval_sec), f"{scale}MB/s"), 972cde9c1a5SIan Rogers ]), 973cde9c1a5SIan Rogers ], description="Memory Bandwidth breakdown local vs. remote (remote requests in). directory updates not included") 974cde9c1a5SIan Rogers 975cde9c1a5SIan Rogers 976130f4245SIan Rogersdef UncoreMemBw() -> Optional[MetricGroup]: 977130f4245SIan Rogers mem_events = [] 978130f4245SIan Rogers try: 979130f4245SIan Rogers mem_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}" 980130f4245SIan Rogers f"/arch/x86/{args.model}/uncore-memory.json")) 981130f4245SIan Rogers except: 982130f4245SIan Rogers pass 983130f4245SIan Rogers 984130f4245SIan Rogers ddr_rds = 0 985130f4245SIan Rogers ddr_wrs = 0 986130f4245SIan Rogers ddr_total = 0 987130f4245SIan Rogers for x in mem_events: 988130f4245SIan Rogers if "EventName" in x: 989130f4245SIan Rogers name = x["EventName"] 990130f4245SIan Rogers if re.search("^UNC_MC[0-9]+_RDCAS_COUNT_FREERUN", name): 991130f4245SIan Rogers ddr_rds += Event(name) 992130f4245SIan Rogers elif re.search("^UNC_MC[0-9]+_WRCAS_COUNT_FREERUN", name): 993130f4245SIan Rogers ddr_wrs += Event(name) 994130f4245SIan Rogers # elif re.search("^UNC_MC[0-9]+_TOTAL_REQCOUNT_FREERUN", name): 995130f4245SIan Rogers # ddr_total += Event(name) 996130f4245SIan Rogers 997130f4245SIan Rogers if ddr_rds == 0: 998130f4245SIan Rogers try: 999130f4245SIan Rogers ddr_rds = Event("UNC_M_CAS_COUNT.RD") 1000130f4245SIan Rogers ddr_wrs = Event("UNC_M_CAS_COUNT.WR") 1001130f4245SIan Rogers except: 1002130f4245SIan Rogers return None 1003130f4245SIan Rogers 1004130f4245SIan Rogers ddr_total = ddr_rds + ddr_wrs 1005130f4245SIan Rogers 1006130f4245SIan Rogers pmm_rds = 0 1007130f4245SIan Rogers pmm_wrs = 0 1008130f4245SIan Rogers try: 1009130f4245SIan Rogers pmm_rds = Event("UNC_M_PMM_RPQ_INSERTS") 1010130f4245SIan Rogers pmm_wrs = Event("UNC_M_PMM_WPQ_INSERTS") 1011130f4245SIan Rogers except: 1012130f4245SIan Rogers pass 1013130f4245SIan Rogers 1014130f4245SIan Rogers pmm_total = pmm_rds + pmm_wrs 1015130f4245SIan Rogers 1016130f4245SIan Rogers scale = 64 / 1_000_000 1017130f4245SIan Rogers return MetricGroup("lpm_mem_bw", [ 1018130f4245SIan Rogers MetricGroup("lpm_mem_bw_ddr", [ 1019130f4245SIan Rogers Metric("lpm_mem_bw_ddr_read", "DDR memory read bandwidth", 1020130f4245SIan Rogers d_ratio(ddr_rds, interval_sec), f"{scale}MB/s"), 1021130f4245SIan Rogers Metric("lpm_mem_bw_ddr_write", "DDR memory write bandwidth", 1022130f4245SIan Rogers d_ratio(ddr_wrs, interval_sec), f"{scale}MB/s"), 1023130f4245SIan Rogers Metric("lpm_mem_bw_ddr_total", "DDR memory write bandwidth", 1024130f4245SIan Rogers d_ratio(ddr_total, interval_sec), f"{scale}MB/s"), 1025130f4245SIan Rogers ], description="DDR Memory Bandwidth"), 1026130f4245SIan Rogers MetricGroup("lpm_mem_bw_pmm", [ 1027130f4245SIan Rogers Metric("lpm_mem_bw_pmm_read", "PMM memory read bandwidth", 1028130f4245SIan Rogers d_ratio(pmm_rds, interval_sec), f"{scale}MB/s"), 1029130f4245SIan Rogers Metric("lpm_mem_bw_pmm_write", "PMM memory write bandwidth", 1030130f4245SIan Rogers d_ratio(pmm_wrs, interval_sec), f"{scale}MB/s"), 1031130f4245SIan Rogers Metric("lpm_mem_bw_pmm_total", "PMM memory write bandwidth", 1032130f4245SIan Rogers d_ratio(pmm_total, interval_sec), f"{scale}MB/s"), 1033130f4245SIan Rogers ], description="PMM Memory Bandwidth") if pmm_rds != 0 else None, 1034130f4245SIan Rogers ], description="Memory Bandwidth") 1035130f4245SIan Rogers 1036130f4245SIan Rogers 1037e74f72a7SIan Rogersdef UncoreMemSat() -> Optional[Metric]: 1038e74f72a7SIan Rogers try: 1039e74f72a7SIan Rogers clocks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") 1040e74f72a7SIan Rogers sat = Event("UNC_CHA_DISTRESS_ASSERTED.VERT", "UNC_CHA_FAST_ASSERTED.VERT", 1041e74f72a7SIan Rogers "UNC_C_FAST_ASSERTED") 1042e74f72a7SIan Rogers except: 1043e74f72a7SIan Rogers return None 1044e74f72a7SIan Rogers 1045e74f72a7SIan Rogers desc = ("Mesh Bandwidth saturation (% CBOX cycles with FAST signal asserted, " 1046e74f72a7SIan Rogers "include QPI bandwidth saturation), lower is better") 1047e74f72a7SIan Rogers if "UNC_CHA_" in sat.name: 1048e74f72a7SIan Rogers desc = ("Mesh Bandwidth saturation (% CHA cycles with FAST signal asserted, " 1049e74f72a7SIan Rogers "include UPI bandwidth saturation), lower is better") 1050e74f72a7SIan Rogers return Metric("lpm_mem_sat", desc, d_ratio(sat, clocks), "100%") 1051e74f72a7SIan Rogers 1052e74f72a7SIan Rogers 10535dc81578SIan Rogersdef UncoreUpiBw() -> Optional[MetricGroup]: 10545dc81578SIan Rogers try: 10555dc81578SIan Rogers upi_rds = Event("UNC_UPI_RxL_FLITS.ALL_DATA") 10565dc81578SIan Rogers upi_wrs = Event("UNC_UPI_TxL_FLITS.ALL_DATA") 10575dc81578SIan Rogers except: 10585dc81578SIan Rogers return None 10595dc81578SIan Rogers 10605dc81578SIan Rogers upi_total = upi_rds + upi_wrs 10615dc81578SIan Rogers 10625dc81578SIan Rogers # From "Uncore Performance Monitoring": When measuring the amount of 10635dc81578SIan Rogers # bandwidth consumed by transmission of the data (i.e. NOT including 10645dc81578SIan Rogers # the header), it should be .ALL_DATA / 9 * 64B. 10655dc81578SIan Rogers scale = (64 / 9) / 1_000_000 10665dc81578SIan Rogers return MetricGroup("lpm_upi_bw", [ 10675dc81578SIan Rogers Metric("lpm_upi_bw_read", "UPI read bandwidth", 10685dc81578SIan Rogers d_ratio(upi_rds, interval_sec), f"{scale}MB/s"), 10695dc81578SIan Rogers Metric("lpm_upi_bw_write", "DDR memory write bandwidth", 10705dc81578SIan Rogers d_ratio(upi_wrs, interval_sec), f"{scale}MB/s"), 10715dc81578SIan Rogers ], description="UPI Bandwidth") 10725dc81578SIan Rogers 10735dc81578SIan Rogers 107419eab0efSIan Rogersdef main() -> None: 107519eab0efSIan Rogers global _args 107619eab0efSIan Rogers 107719eab0efSIan Rogers def dir_path(path: str) -> str: 107819eab0efSIan Rogers """Validate path is a directory for argparse.""" 107919eab0efSIan Rogers if os.path.isdir(path): 108019eab0efSIan Rogers return path 108119eab0efSIan Rogers raise argparse.ArgumentTypeError( 108219eab0efSIan Rogers f'\'{path}\' is not a valid directory') 108319eab0efSIan Rogers 108419eab0efSIan Rogers parser = argparse.ArgumentParser(description="Intel perf json generator") 108519eab0efSIan Rogers parser.add_argument( 108619eab0efSIan Rogers "-metricgroups", help="Generate metricgroups data", action='store_true') 108719eab0efSIan Rogers parser.add_argument("model", help="e.g. skylakex") 108819eab0efSIan Rogers parser.add_argument( 108919eab0efSIan Rogers 'events_path', 109019eab0efSIan Rogers type=dir_path, 109119eab0efSIan Rogers help='Root of tree containing architecture directories containing json files' 109219eab0efSIan Rogers ) 109319eab0efSIan Rogers _args = parser.parse_args() 109419eab0efSIan Rogers 10956bd6c5efSIan Rogers directory = f"{_args.events_path}/x86/{_args.model}/" 10966bd6c5efSIan Rogers LoadEvents(directory) 10976bd6c5efSIan Rogers 1098bab90b3bSIan Rogers all_metrics = MetricGroup("", [ 1099*82e53e7aSIan Rogers Cycles(), 11001d519e5aSIan Rogers Idle(), 1101bab90b3bSIan Rogers Rapl(), 110217d616b7SIan Rogers Smi(), 11038c345f35SIan Rogers Tsx(), 110437d0b00aSIan Rogers IntelBr(), 11052f3d6ea0SIan Rogers IntelCtxSw(), 1106d666f017SIan Rogers IntelFpu(), 110759341f4eSIan Rogers IntelIlp(), 11087413633eSIan Rogers IntelL2(), 1109d80edef2SIan Rogers IntelLdSt(), 11106ec3058eSIan Rogers IntelMissLat(), 1111426b8442SIan Rogers IntelMlp(), 1112cd1c6a48SIan Rogers IntelPorts(), 1113397fdb3aSIan Rogers IntelSwpf(), 11141fee2701SIan Rogers UncoreCState(), 11152166b44bSIan Rogers UncoreDir(), 1116cde9c1a5SIan Rogers UncoreMem(), 1117130f4245SIan Rogers UncoreMemBw(), 1118e74f72a7SIan Rogers UncoreMemSat(), 11195dc81578SIan Rogers UncoreUpiBw(), 1120bab90b3bSIan Rogers ]) 112119eab0efSIan Rogers 112219eab0efSIan Rogers if _args.metricgroups: 112319eab0efSIan Rogers print(JsonEncodeMetricGroupDescriptions(all_metrics)) 112419eab0efSIan Rogers else: 112519eab0efSIan Rogers print(JsonEncodeMetric(all_metrics)) 112619eab0efSIan Rogers 112719eab0efSIan Rogers 112819eab0efSIan Rogersif __name__ == '__main__': 112919eab0efSIan Rogers main() 1130