xref: /linux/tools/perf/pmu-events/intel_metrics.py (revision c7decec2f2d2ab0366567f9e30c0e1418cece43f)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3import argparse
4import json
5import math
6import os
7import re
8from typing import Optional
9from common_metrics import Cycles
10from metric import (d_ratio, has_event, max, source_count, CheckPmu, Event,
11                    JsonEncodeMetric, JsonEncodeMetricGroupDescriptions,
12                    Literal, LoadEvents, Metric, MetricConstraint, MetricGroup,
13                    MetricRef, Select)
14
15# Global command line arguments.
16_args = None
17interval_sec = Event("duration_time")
18
19
20def Idle() -> Metric:
21    cyc = Event("msr/mperf/")
22    tsc = Event("msr/tsc/")
23    low = max(tsc - cyc, 0)
24    return Metric(
25        "lpm_idle",
26        "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)",
27        d_ratio(low, tsc), "100%")
28
29
30def Rapl() -> MetricGroup:
31    """Processor power consumption estimate.
32
33    Use events from the running average power limit (RAPL) driver.
34    """
35    # Watts = joules/second
36    pkg = Event("power/energy\\-pkg/")
37    cond_pkg = Select(pkg, has_event(pkg), math.nan)
38    cores = Event("power/energy\\-cores/")
39    cond_cores = Select(cores, has_event(cores), math.nan)
40    ram = Event("power/energy\\-ram/")
41    cond_ram = Select(ram, has_event(ram), math.nan)
42    gpu = Event("power/energy\\-gpu/")
43    cond_gpu = Select(gpu, has_event(gpu), math.nan)
44    psys = Event("power/energy\\-psys/")
45    cond_psys = Select(psys, has_event(psys), math.nan)
46    scale = 2.3283064365386962890625e-10
47    metrics = [
48        Metric("lpm_cpu_power_pkg", "",
49               d_ratio(cond_pkg * scale, interval_sec), "Watts"),
50        Metric("lpm_cpu_power_cores", "",
51               d_ratio(cond_cores * scale, interval_sec), "Watts"),
52        Metric("lpm_cpu_power_ram", "",
53               d_ratio(cond_ram * scale, interval_sec), "Watts"),
54        Metric("lpm_cpu_power_gpu", "",
55               d_ratio(cond_gpu * scale, interval_sec), "Watts"),
56        Metric("lpm_cpu_power_psys", "",
57               d_ratio(cond_psys * scale, interval_sec), "Watts"),
58    ]
59
60    return MetricGroup("lpm_cpu_power", metrics,
61                       description="Running Average Power Limit (RAPL) power consumption estimates")
62
63
64def Smi() -> MetricGroup:
65    pmu = "<cpu_core or cpu_atom>" if CheckPmu("cpu_core") else "cpu"
66    aperf = Event('msr/aperf/')
67    cycles = Event('cycles')
68    smi_num = Event('msr/smi/')
69    smi_cycles = Select(Select((aperf - cycles) / aperf, smi_num > 0, 0),
70                        has_event(aperf),
71                        0)
72    return MetricGroup('smi', [
73        Metric('smi_num', 'Number of SMI interrupts.',
74               Select(smi_num, has_event(smi_num), 0), 'SMI#'),
75        # Note, the smi_cycles "Event" is really a reference to the metric.
76        Metric('smi_cycles',
77               'Percentage of cycles spent in System Management Interrupts. '
78               f'Requires /sys/bus/event_source/devices/{pmu}/freeze_on_smi to be 1.',
79               smi_cycles, '100%', threshold=(MetricRef('smi_cycles') > 0.10))
80    ], description='System Management Interrupt metrics')
81
82
83def Tsx() -> Optional[MetricGroup]:
84    pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
85    cycles = Event('cycles')
86    cycles_in_tx = Event(f'{pmu}/cycles\\-t/')
87    cycles_in_tx_cp = Event(f'{pmu}/cycles\\-ct/')
88    try:
89        # Test if the tsx event is present in the json, prefer the
90        # sysfs version so that we can detect its presence at runtime.
91        transaction_start = Event("RTM_RETIRED.START")
92        transaction_start = Event(f'{pmu}/tx\\-start/')
93    except:
94        return None
95
96    elision_start = None
97    try:
98        # Elision start isn't supported by all models, but we'll not
99        # generate the tsx_cycles_per_elision metric in that
100        # case. Again, prefer the sysfs encoding of the event.
101        elision_start = Event("HLE_RETIRED.START")
102        elision_start = Event(f'{pmu}/el\\-start/')
103    except:
104        pass
105
106    return MetricGroup('transaction', [
107        Metric('tsx_transactional_cycles',
108               'Percentage of cycles within a transaction region.',
109               Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
110               '100%'),
111        Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
112               Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
113                      has_event(cycles_in_tx),
114                      0),
115               '100%'),
116        Metric('tsx_cycles_per_transaction',
117               'Number of cycles within a transaction divided by the number of transactions.',
118               Select(cycles_in_tx / transaction_start,
119                      has_event(cycles_in_tx),
120                      0),
121               "cycles / transaction"),
122        Metric('tsx_cycles_per_elision',
123               'Number of cycles within a transaction divided by the number of elisions.',
124               Select(cycles_in_tx / elision_start,
125                      has_event(elision_start),
126                      0),
127               "cycles / elision") if elision_start else None,
128    ], description="Breakdown of transactional memory statistics")
129
130
131def IntelBr():
132    ins = Event("instructions")
133
134    def Total() -> MetricGroup:
135        br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY")
136        br_m_all = Event("BR_MISP_RETIRED.ALL_BRANCHES",
137                         "BR_INST_RETIRED.MISPRED",
138                         "BR_MISP_EXEC.ANY")
139        br_clr = None
140        try:
141            br_clr = Event("BACLEARS.ANY", "BACLEARS.ALL")
142        except:
143            pass
144
145        br_r = d_ratio(br_all, interval_sec)
146        ins_r = d_ratio(ins, br_all)
147        misp_r = d_ratio(br_m_all, br_all)
148        clr_r = d_ratio(br_clr, interval_sec) if br_clr else None
149
150        return MetricGroup("lpm_br_total", [
151            Metric("lpm_br_total_retired",
152                   "The number of branch instructions retired per second.", br_r,
153                   "insn/s"),
154            Metric(
155                "lpm_br_total_mispred",
156                "The number of branch instructions retired, of any type, that were "
157                "not correctly predicted as a percentage of all branch instrucions.",
158                misp_r, "100%"),
159            Metric("lpm_br_total_insn_between_branches",
160                   "The number of instructions divided by the number of branches.",
161                   ins_r, "insn"),
162            Metric("lpm_br_total_insn_fe_resteers",
163                   "The number of resync branches per second.", clr_r, "req/s"
164                   ) if clr_r else None
165        ])
166
167    def Taken() -> MetricGroup:
168        br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY")
169        br_m_tk = None
170        try:
171            br_m_tk = Event("BR_MISP_RETIRED.NEAR_TAKEN",
172                            "BR_MISP_RETIRED.TAKEN_JCC",
173                            "BR_INST_RETIRED.MISPRED_TAKEN")
174        except:
175            pass
176        br_r = d_ratio(br_all, interval_sec)
177        ins_r = d_ratio(ins, br_all)
178        misp_r = d_ratio(br_m_tk, br_all) if br_m_tk else None
179        return MetricGroup("lpm_br_taken", [
180            Metric("lpm_br_taken_retired",
181                   "The number of taken branches that were retired per second.",
182                   br_r, "insn/s"),
183            Metric(
184                "lpm_br_taken_mispred",
185                "The number of retired taken branch instructions that were "
186                "mispredicted as a percentage of all taken branches.", misp_r,
187                "100%") if misp_r else None,
188            Metric(
189                "lpm_br_taken_insn_between_branches",
190                "The number of instructions divided by the number of taken branches.",
191                ins_r, "insn"),
192        ])
193
194    def Conditional() -> Optional[MetricGroup]:
195        try:
196            br_cond = Event("BR_INST_RETIRED.COND",
197                            "BR_INST_RETIRED.CONDITIONAL",
198                            "BR_INST_RETIRED.TAKEN_JCC")
199            br_m_cond = Event("BR_MISP_RETIRED.COND",
200                              "BR_MISP_RETIRED.CONDITIONAL",
201                              "BR_MISP_RETIRED.TAKEN_JCC")
202        except:
203            return None
204
205        br_cond_nt = None
206        br_m_cond_nt = None
207        try:
208            br_cond_nt = Event("BR_INST_RETIRED.COND_NTAKEN")
209            br_m_cond_nt = Event("BR_MISP_RETIRED.COND_NTAKEN")
210        except:
211            pass
212        br_r = d_ratio(br_cond, interval_sec)
213        ins_r = d_ratio(ins, br_cond)
214        misp_r = d_ratio(br_m_cond, br_cond)
215        taken_metrics = [
216            Metric("lpm_br_cond_retired", "Retired conditional branch instructions.",
217                   br_r, "insn/s"),
218            Metric("lpm_br_cond_insn_between_branches",
219                   "The number of instructions divided by the number of conditional "
220                   "branches.", ins_r, "insn"),
221            Metric("lpm_br_cond_mispred",
222                   "Retired conditional branch instructions mispredicted as a "
223                   "percentage of all conditional branches.", misp_r, "100%"),
224        ]
225        if not br_m_cond_nt:
226            return MetricGroup("lpm_br_cond", taken_metrics)
227
228        br_r = d_ratio(br_cond_nt, interval_sec)
229        ins_r = d_ratio(ins, br_cond_nt)
230        misp_r = d_ratio(br_m_cond_nt, br_cond_nt)
231
232        not_taken_metrics = [
233            Metric("lpm_br_cond_retired", "Retired conditional not taken branch instructions.",
234                   br_r, "insn/s"),
235            Metric("lpm_br_cond_insn_between_branches",
236                   "The number of instructions divided by the number of not taken conditional "
237                   "branches.", ins_r, "insn"),
238            Metric("lpm_br_cond_mispred",
239                   "Retired not taken conditional branch instructions mispredicted as a "
240                   "percentage of all not taken conditional branches.", misp_r, "100%"),
241        ]
242        return MetricGroup("lpm_br_cond", [
243            MetricGroup("lpm_br_cond_nt", not_taken_metrics),
244            MetricGroup("lpm_br_cond_tkn", taken_metrics),
245        ])
246
247    def Far() -> Optional[MetricGroup]:
248        try:
249            br_far = Event("BR_INST_RETIRED.FAR_BRANCH")
250        except:
251            return None
252
253        br_r = d_ratio(br_far, interval_sec)
254        ins_r = d_ratio(ins, br_far)
255        return MetricGroup("lpm_br_far", [
256            Metric("lpm_br_far_retired", "Retired far control transfers per second.",
257                   br_r, "insn/s"),
258            Metric(
259                "lpm_br_far_insn_between_branches",
260                "The number of instructions divided by the number of far branches.",
261                ins_r, "insn"),
262        ])
263
264    return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Far()],
265                       description="breakdown of retired branch instructions")
266
267
268def IntelCtxSw() -> MetricGroup:
269    cs = Event("context\\-switches")
270    metrics = [
271        Metric("lpm_cs_rate", "Context switches per second",
272               d_ratio(cs, interval_sec), "ctxsw/s")
273    ]
274
275    ev = Event("instructions")
276    metrics.append(Metric("lpm_cs_instr", "Instructions per context switch",
277                          d_ratio(ev, cs), "instr/cs"))
278
279    ev = Event("cycles")
280    metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch",
281                          d_ratio(ev, cs), "cycles/cs"))
282
283    try:
284        ev = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS")
285        metrics.append(Metric("lpm_cs_loads", "Loads per context switch",
286                              d_ratio(ev, cs), "loads/cs"))
287    except:
288        pass
289
290    try:
291        ev = Event("MEM_INST_RETIRED.ALL_STORES",
292                   "MEM_UOPS_RETIRED.ALL_STORES")
293        metrics.append(Metric("lpm_cs_stores", "Stores per context switch",
294                              d_ratio(ev, cs), "stores/cs"))
295    except:
296        pass
297
298    try:
299        ev = Event("BR_INST_RETIRED.NEAR_TAKEN", "BR_INST_RETIRED.TAKEN_JCC")
300        metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch",
301                              d_ratio(ev, cs), "br_taken/cs"))
302    except:
303        pass
304
305    try:
306        l2_misses = (Event("L2_RQSTS.DEMAND_DATA_RD_MISS") +
307                     Event("L2_RQSTS.RFO_MISS") +
308                     Event("L2_RQSTS.CODE_RD_MISS"))
309        try:
310            l2_misses += Event("L2_RQSTS.HWPF_MISS",
311                               "L2_RQSTS.L2_PF_MISS", "L2_RQSTS.PF_MISS")
312        except:
313            pass
314
315        metrics.append(Metric("lpm_cs_l2_misses", "L2 misses per context switch",
316                              d_ratio(l2_misses, cs), "l2_misses/cs"))
317    except:
318        pass
319
320    return MetricGroup("lpm_cs", metrics,
321                       description=("Number of context switches per second, instructions "
322                                    "retired & core cycles between context switches"))
323
324
325def IntelFpu() -> Optional[MetricGroup]:
326    cyc = Event("cycles")
327    try:
328        s_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
329                     "SIMD_INST_RETIRED.SCALAR_SINGLE")
330    except:
331        return None
332    d_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
333                 "SIMD_INST_RETIRED.SCALAR_DOUBLE")
334    s_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
335                  "SIMD_INST_RETIRED.PACKED_SINGLE")
336
337    flop = s_64 + d_64 + 4 * s_128
338
339    d_128 = None
340    s_256 = None
341    d_256 = None
342    s_512 = None
343    d_512 = None
344    try:
345        d_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE")
346        flop += 2 * d_128
347        s_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE")
348        flop += 8 * s_256
349        d_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE")
350        flop += 4 * d_256
351        s_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE")
352        flop += 16 * s_512
353        d_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE")
354        flop += 8 * d_512
355    except:
356        pass
357
358    f_assist = Event("ASSISTS.FP", "FP_ASSIST.ANY", "FP_ASSIST.S")
359    if f_assist in [
360        "ASSISTS.FP",
361        "FP_ASSIST.S",
362    ]:
363        f_assist += "/cmask=1/"
364
365    flop_r = d_ratio(flop, interval_sec)
366    flop_c = d_ratio(flop, cyc)
367    nmi_constraint = MetricConstraint.GROUPED_EVENTS
368    if f_assist.name == "ASSISTS.FP":  # Icelake+
369        nmi_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI
370
371    def FpuMetrics(group: str, fl: Optional[Event], mult: int, desc: str) -> Optional[MetricGroup]:
372        if not fl:
373            return None
374
375        f = fl * mult
376        fl_r = d_ratio(f, interval_sec)
377        r_s = d_ratio(fl, interval_sec)
378        return MetricGroup(group, [
379            Metric(f"{group}_of_total", desc + " floating point operations per second",
380                   d_ratio(f, flop), "100%"),
381            Metric(f"{group}_flops", desc + " floating point operations per second",
382                   fl_r, "flops/s"),
383            Metric(f"{group}_ops", desc + " operations per second",
384                   r_s, "ops/s"),
385        ])
386
387    return MetricGroup("lpm_fpu", [
388        MetricGroup("lpm_fpu_total", [
389            Metric("lpm_fpu_total_flops", "Floating point operations per second",
390                   flop_r, "flops/s"),
391            Metric("lpm_fpu_total_flopc", "Floating point operations per cycle",
392                   flop_c, "flops/cycle", constraint=nmi_constraint),
393        ]),
394        MetricGroup("lpm_fpu_64", [
395            FpuMetrics("lpm_fpu_64_single", s_64, 1, "64-bit single"),
396            FpuMetrics("lpm_fpu_64_double", d_64, 1, "64-bit double"),
397        ]),
398        MetricGroup("lpm_fpu_128", [
399            FpuMetrics("lpm_fpu_128_single", s_128,
400                       4, "128-bit packed single"),
401            FpuMetrics("lpm_fpu_128_double", d_128,
402                       2, "128-bit packed double"),
403        ]),
404        MetricGroup("lpm_fpu_256", [
405            FpuMetrics("lpm_fpu_256_single", s_256,
406                       8, "128-bit packed single"),
407            FpuMetrics("lpm_fpu_256_double", d_256,
408                       4, "128-bit packed double"),
409        ]),
410        MetricGroup("lpm_fpu_512", [
411            FpuMetrics("lpm_fpu_512_single", s_512,
412                       16, "128-bit packed single"),
413            FpuMetrics("lpm_fpu_512_double", d_512,
414                       8, "128-bit packed double"),
415        ]),
416        Metric("lpm_fpu_assists", "FP assists as a percentage of cycles",
417               d_ratio(f_assist, cyc), "100%"),
418    ])
419
420
421def IntelIlp() -> MetricGroup:
422    tsc = Event("msr/tsc/")
423    c0 = Event("msr/mperf/")
424    low = tsc - c0
425    inst_ret = Event("INST_RETIRED.ANY_P")
426    inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)]
427    core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY",
428                        "CPU_CLK_UNHALTED.DISTRIBUTED",
429                        "cycles")
430    ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), core_cycles)
431           for x in range(0, 4)]
432    ilp.append(d_ratio(inst_ret_c[4], core_cycles))
433    ilp0 = 1
434    for x in ilp:
435        ilp0 -= x
436    return MetricGroup("lpm_ilp", [
437        Metric("lpm_ilp_idle", "Lower power cycles as a percentage of all cycles",
438               d_ratio(low, tsc), "100%"),
439        Metric("lpm_ilp_inst_ret_0",
440               "Instructions retired in 0 cycles as a percentage of all cycles",
441               ilp0, "100%"),
442        Metric("lpm_ilp_inst_ret_1",
443               "Instructions retired in 1 cycles as a percentage of all cycles",
444               ilp[0], "100%"),
445        Metric("lpm_ilp_inst_ret_2",
446               "Instructions retired in 2 cycles as a percentage of all cycles",
447               ilp[1], "100%"),
448        Metric("lpm_ilp_inst_ret_3",
449               "Instructions retired in 3 cycles as a percentage of all cycles",
450               ilp[2], "100%"),
451        Metric("lpm_ilp_inst_ret_4",
452               "Instructions retired in 4 cycles as a percentage of all cycles",
453               ilp[3], "100%"),
454        Metric("lpm_ilp_inst_ret_5",
455               "Instructions retired in 5 or more cycles as a percentage of all cycles",
456               ilp[4], "100%"),
457    ])
458
459
460def IntelL2() -> Optional[MetricGroup]:
461    try:
462        DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT")
463    except:
464        return None
465    try:
466        DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS")
467        l2_dmnd_miss = DC_MISS
468        l2_dmnd_rd_all = DC_MISS + DC_HIT
469    except:
470        DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD")
471        l2_dmnd_miss = DC_ALL - DC_HIT
472        l2_dmnd_rd_all = DC_ALL
473    l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec)
474    l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec)
475
476    DC_PFH = None
477    DC_PFM = None
478    l2_pf_all = None
479    l2_pf_mrate = None
480    l2_pf_rrate = None
481    try:
482        DC_PFH = Event("L2_RQSTS.PF_HIT")
483        DC_PFM = Event("L2_RQSTS.PF_MISS")
484        l2_pf_all = DC_PFH + DC_PFM
485        l2_pf_mrate = d_ratio(DC_PFM, interval_sec)
486        l2_pf_rrate = d_ratio(l2_pf_all, interval_sec)
487    except:
488        pass
489
490    DC_RFOH = None
491    DC_RFOM = None
492    l2_rfo_all = None
493    l2_rfo_mrate = None
494    l2_rfo_rrate = None
495    try:
496        DC_RFOH = Event("L2_RQSTS.RFO_HIT")
497        DC_RFOM = Event("L2_RQSTS.RFO_MISS")
498        l2_rfo_all = DC_RFOH + DC_RFOM
499        l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec)
500        l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec)
501    except:
502        pass
503
504    DC_CH = None
505    try:
506        DC_CH = Event("L2_RQSTS.CODE_RD_HIT")
507    except:
508        pass
509    DC_CM = Event("L2_RQSTS.CODE_RD_MISS")
510    DC_IN = Event("L2_LINES_IN.ALL")
511    DC_OUT_NS = None
512    DC_OUT_S = None
513    l2_lines_out = None
514    l2_out_rate = None
515    wbn = None
516    isd = None
517    try:
518        DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT",
519                          "L2_LINES_OUT.DEMAND_DIRTY",
520                          "L2_LINES_IN.S")
521        DC_OUT_S = Event("L2_LINES_OUT.SILENT",
522                         "L2_LINES_OUT.DEMAND_CLEAN",
523                         "L2_LINES_IN.I")
524        if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and (
525                args.model.startswith("skylake") or
526                args.model == "cascadelakex"):
527            DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/"
528        # bring is back to per-CPU
529        l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S)
530        l2_ns = DC_OUT_NS
531        l2_lines_out = l2_s + l2_ns
532        l2_out_rate = d_ratio(l2_lines_out, interval_sec)
533        nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0)
534        wbn = d_ratio(nlr, interval_sec)
535        isd = d_ratio(l2_s, interval_sec)
536    except:
537        pass
538    DC_OUT_U = None
539    l2_pf_useless = None
540    l2_useless_rate = None
541    try:
542        DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF")
543        l2_pf_useless = DC_OUT_U
544        l2_useless_rate = d_ratio(l2_pf_useless, interval_sec)
545    except:
546        pass
547    DC_WB_U = None
548    DC_WB_D = None
549    wbu = None
550    wbd = None
551    try:
552        DC_WB_U = Event("IDI_MISC.WB_UPGRADE")
553        DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE")
554        wbu = d_ratio(DC_WB_U, interval_sec)
555        wbd = d_ratio(DC_WB_D, interval_sec)
556    except:
557        pass
558
559    l2_lines_in = DC_IN
560    l2_code_all = (DC_CH + DC_CM) if DC_CH else None
561    l2_code_rate = d_ratio(l2_code_all, interval_sec) if DC_CH else None
562    l2_code_miss_rate = d_ratio(DC_CM, interval_sec)
563    l2_in_rate = d_ratio(l2_lines_in, interval_sec)
564
565    return MetricGroup("lpm_l2", [
566        MetricGroup("lpm_l2_totals", [
567            Metric("lpm_l2_totals_in", "L2 cache total in per second",
568                   l2_in_rate, "In/s"),
569            Metric("lpm_l2_totals_out", "L2 cache total out per second",
570                   l2_out_rate, "Out/s") if l2_out_rate else None,
571        ]),
572        MetricGroup("lpm_l2_rd", [
573            Metric("lpm_l2_rd_hits", "L2 cache data read hits",
574                   d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"),
575            Metric("lpm_l2_rd_hits", "L2 cache data read hits",
576                   d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"),
577            Metric("lpm_l2_rd_requests", "L2 cache data read requests per second",
578                   l2_dmnd_rrate, "requests/s"),
579            Metric("lpm_l2_rd_misses", "L2 cache data read misses per second",
580                   l2_dmnd_mrate, "misses/s"),
581        ]),
582        MetricGroup("lpm_l2_hwpf", [
583            Metric("lpm_l2_hwpf_hits", "L2 cache hardware prefetcher hits",
584                   d_ratio(DC_PFH, l2_pf_all), "100%"),
585            Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses",
586                   d_ratio(DC_PFM, l2_pf_all), "100%"),
587            Metric("lpm_l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second",
588                   l2_useless_rate, "100%") if l2_useless_rate else None,
589            Metric("lpm_l2_hwpf_requests", "L2 cache hardware prefetcher requests per second",
590                   l2_pf_rrate, "100%"),
591            Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses per second",
592                   l2_pf_mrate, "100%"),
593        ]) if DC_PFH else None,
594        MetricGroup("lpm_l2_rfo", [
595            Metric("lpm_l2_rfo_hits", "L2 cache request for ownership (RFO) hits",
596                   d_ratio(DC_RFOH, l2_rfo_all), "100%"),
597            Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses",
598                   d_ratio(DC_RFOM, l2_rfo_all), "100%"),
599            Metric("lpm_l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second",
600                   l2_rfo_rrate, "requests/s"),
601            Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second",
602                   l2_rfo_mrate, "misses/s"),
603        ]) if DC_RFOH else None,
604        MetricGroup("lpm_l2_code", [
605            Metric("lpm_l2_code_hits", "L2 cache code hits",
606                   d_ratio(DC_CH, l2_code_all), "100%") if DC_CH else None,
607            Metric("lpm_l2_code_misses", "L2 cache code misses",
608                   d_ratio(DC_CM, l2_code_all), "100%") if DC_CH else None,
609            Metric("lpm_l2_code_requests", "L2 cache code requests per second",
610                   l2_code_rate, "requests/s") if DC_CH else None,
611            Metric("lpm_l2_code_misses", "L2 cache code misses per second",
612                   l2_code_miss_rate, "misses/s"),
613        ]),
614        MetricGroup("lpm_l2_evict", [
615            MetricGroup("lpm_l2_evict_mef_lines", [
616                Metric("lpm_l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second",
617                       wbu, "HotLRU/s") if wbu else None,
618                Metric("lpm_l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second",
619                       wbn, "NormLRU/s") if wbn else None,
620                Metric("lpm_l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second",
621                       wbd, "dropped/s") if wbd else None,
622                Metric("lpm_l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second",
623                       isd, "dropped/s") if isd else None,
624            ]),
625        ]),
626    ], description="L2 data cache analysis")
627
628
629def IntelMissLat() -> Optional[MetricGroup]:
630    try:
631        ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS")
632        data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL",
633                                "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
634                                "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE",
635                                "UNC_C_TOR_OCCUPANCY.MISS_OPCODE")
636        data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL",
637                                "UNC_CHA_TOR_INSERTS.IA_MISS",
638                                "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE",
639                                "UNC_C_TOR_INSERTS.MISS_OPCODE")
640        data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE",
641                                "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
642                                "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE",
643                                "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE")
644        data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE",
645                                "UNC_CHA_TOR_INSERTS.IA_MISS",
646                                "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE",
647                                "UNC_C_TOR_INSERTS.NID_MISS_OPCODE")
648    except:
649        return None
650
651    if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or
652            data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"):
653        data_rd = 0x182
654        for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]:
655            e.name += f"/filter_opc={hex(data_rd)}/"
656    elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS":
657        # Demand Data Read - Full cache-line read requests from core for
658        # lines to be cached in S or E, typically for data
659        demand_data_rd = 0x202
660        #  LLC Prefetch Data - Uncore will first look up the line in the
661        #  LLC; for a cache hit, the LRU will be updated, on a miss, the
662        #  DRd will be initiated
663        llc_prefetch_data = 0x25a
664        local_filter = (f"/filter_opc0={hex(demand_data_rd)},"
665                        f"filter_opc1={hex(llc_prefetch_data)},"
666                        "filter_loc,filter_nm,filter_not_nm/")
667        remote_filter = (f"/filter_opc0={hex(demand_data_rd)},"
668                         f"filter_opc1={hex(llc_prefetch_data)},"
669                         "filter_rem,filter_nm,filter_not_nm/")
670        for e in [data_rd_loc_occ, data_rd_loc_ins]:
671            e.name += local_filter
672        for e in [data_rd_rem_occ, data_rd_rem_ins]:
673            e.name += remote_filter
674    else:
675        assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ
676
677    ticks_per_cha = ticks / source_count(data_rd_loc_ins)
678    loc_lat = interval_sec * 1e9 * data_rd_loc_occ / \
679        (ticks_per_cha * data_rd_loc_ins)
680    ticks_per_cha = ticks / source_count(data_rd_rem_ins)
681    rem_lat = interval_sec * 1e9 * data_rd_rem_occ / \
682        (ticks_per_cha * data_rd_rem_ins)
683    return MetricGroup("lpm_miss_lat", [
684        Metric("lpm_miss_lat_loc", "Local to a socket miss latency in nanoseconds",
685               loc_lat, "ns"),
686        Metric("lpm_miss_lat_rem", "Remote to a socket miss latency in nanoseconds",
687               rem_lat, "ns"),
688    ])
689
690
691def IntelMlp() -> Optional[Metric]:
692    try:
693        l1d = Event("L1D_PEND_MISS.PENDING")
694        l1dc = Event("L1D_PEND_MISS.PENDING_CYCLES")
695    except:
696        return None
697
698    l1dc = Select(l1dc / 2, Literal("#smt_on"), l1dc)
699    ml = d_ratio(l1d, l1dc)
700    return Metric("lpm_mlp",
701                  "Miss level parallelism - number of outstanding load misses per cycle (higher is better)",
702                  ml, "load_miss_pending/cycle")
703
704
705def IntelPorts() -> Optional[MetricGroup]:
706    pipeline_events = json.load(
707        open(f"{_args.events_path}/x86/{_args.model}/pipeline.json"))
708
709    core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY",
710                        "CPU_CLK_UNHALTED.DISTRIBUTED",
711                        "cycles")
712    # Number of CPU cycles scaled for SMT.
713    smt_cycles = Select(core_cycles / 2, Literal("#smt_on"), core_cycles)
714
715    metrics = []
716    for x in pipeline_events:
717        if "EventName" in x and re.search("^UOPS_DISPATCHED.PORT", x["EventName"]):
718            name = x["EventName"]
719            port = re.search(r"(PORT_[0-9].*)", name).group(0).lower()
720            if name.endswith("_CORE"):
721                cyc = core_cycles
722            else:
723                cyc = smt_cycles
724            metrics.append(Metric(f"lpm_{port}", f"{port} utilization (higher is better)",
725                                  d_ratio(Event(name), cyc), "100%"))
726    if len(metrics) == 0:
727        return None
728
729    return MetricGroup("lpm_ports", metrics, "functional unit (port) utilization -- "
730                       "fraction of cycles each port is utilized (higher is better)")
731
732
733def IntelSwpf() -> Optional[MetricGroup]:
734    ins = Event("instructions")
735    try:
736        s_ld = Event("MEM_INST_RETIRED.ALL_LOADS",
737                     "MEM_UOPS_RETIRED.ALL_LOADS")
738        s_nta = Event("SW_PREFETCH_ACCESS.NTA")
739        s_t0 = Event("SW_PREFETCH_ACCESS.T0")
740        s_t1 = Event("SW_PREFETCH_ACCESS.T1_T2")
741        s_w = Event("SW_PREFETCH_ACCESS.PREFETCHW")
742    except:
743        return None
744
745    all_sw = s_nta + s_t0 + s_t1 + s_w
746    swp_r = d_ratio(all_sw, interval_sec)
747    ins_r = d_ratio(ins, all_sw)
748    ld_r = d_ratio(s_ld, all_sw)
749
750    return MetricGroup("lpm_swpf", [
751        MetricGroup("lpm_swpf_totals", [
752            Metric("lpm_swpf_totals_exec", "Software prefetch instructions per second",
753                   swp_r, "swpf/s"),
754            Metric("lpm_swpf_totals_insn_per_pf",
755                   "Average number of instructions between software prefetches",
756                   ins_r, "insn/swpf"),
757            Metric("lpm_swpf_totals_loads_per_pf",
758                   "Average number of loads between software prefetches",
759                   ld_r, "loads/swpf"),
760        ]),
761        MetricGroup("lpm_swpf_bkdwn", [
762            MetricGroup("lpm_swpf_bkdwn_nta", [
763                Metric("lpm_swpf_bkdwn_nta_per_swpf",
764                       "Software prefetch NTA instructions as a percent of all prefetch instructions",
765                       d_ratio(s_nta, all_sw), "100%"),
766                Metric("lpm_swpf_bkdwn_nta_rate",
767                       "Software prefetch NTA instructions per second",
768                       d_ratio(s_nta, interval_sec), "insn/s"),
769            ]),
770            MetricGroup("lpm_swpf_bkdwn_t0", [
771                Metric("lpm_swpf_bkdwn_t0_per_swpf",
772                       "Software prefetch T0 instructions as a percent of all prefetch instructions",
773                       d_ratio(s_t0, all_sw), "100%"),
774                Metric("lpm_swpf_bkdwn_t0_rate",
775                       "Software prefetch T0 instructions per second",
776                       d_ratio(s_t0, interval_sec), "insn/s"),
777            ]),
778            MetricGroup("lpm_swpf_bkdwn_t1_t2", [
779                Metric("lpm_swpf_bkdwn_t1_t2_per_swpf",
780                       "Software prefetch T1 or T2 instructions as a percent of all prefetch instructions",
781                       d_ratio(s_t1, all_sw), "100%"),
782                Metric("lpm_swpf_bkdwn_t1_t2_rate",
783                       "Software prefetch T1 or T2 instructions per second",
784                       d_ratio(s_t1, interval_sec), "insn/s"),
785            ]),
786            MetricGroup("lpm_swpf_bkdwn_w", [
787                Metric("lpm_swpf_bkdwn_w_per_swpf",
788                       "Software prefetch W instructions as a percent of all prefetch instructions",
789                       d_ratio(s_w, all_sw), "100%"),
790                Metric("lpm_swpf_bkdwn_w_rate",
791                       "Software prefetch W instructions per second",
792                       d_ratio(s_w, interval_sec), "insn/s"),
793            ]),
794        ]),
795    ], description="Software prefetch instruction breakdown")
796
797
798def IntelLdSt() -> Optional[MetricGroup]:
799    if _args.model in [
800        "bonnell",
801        "nehalemep",
802        "nehalemex",
803        "westmereep-dp",
804        "westmereep-sp",
805        "westmereex",
806    ]:
807        return None
808    LDST_LD = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS")
809    LDST_ST = Event("MEM_INST_RETIRED.ALL_STORES",
810                    "MEM_UOPS_RETIRED.ALL_STORES")
811    LDST_LDC1 = Event(f"{LDST_LD.name}/cmask=1/")
812    LDST_STC1 = Event(f"{LDST_ST.name}/cmask=1/")
813    LDST_LDC2 = Event(f"{LDST_LD.name}/cmask=2/")
814    LDST_STC2 = Event(f"{LDST_ST.name}/cmask=2/")
815    LDST_LDC3 = Event(f"{LDST_LD.name}/cmask=3/")
816    LDST_STC3 = Event(f"{LDST_ST.name}/cmask=3/")
817    ins = Event("instructions")
818    LDST_CYC = Event("CPU_CLK_UNHALTED.THREAD",
819                     "CPU_CLK_UNHALTED.CORE_P",
820                     "CPU_CLK_UNHALTED.THREAD_P")
821    LDST_PRE = None
822    try:
823        LDST_PRE = Event("LOAD_HIT_PREFETCH.SWPF", "LOAD_HIT_PRE.SW_PF")
824    except:
825        pass
826    LDST_AT = None
827    try:
828        LDST_AT = Event("MEM_INST_RETIRED.LOCK_LOADS")
829    except:
830        pass
831    cyc = LDST_CYC
832
833    ld_rate = d_ratio(LDST_LD, interval_sec)
834    st_rate = d_ratio(LDST_ST, interval_sec)
835    pf_rate = d_ratio(LDST_PRE, interval_sec) if LDST_PRE else None
836    at_rate = d_ratio(LDST_AT, interval_sec) if LDST_AT else None
837
838    ldst_ret_constraint = MetricConstraint.GROUPED_EVENTS
839    if LDST_LD.name == "MEM_UOPS_RETIRED.ALL_LOADS":
840        ldst_ret_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI
841
842    return MetricGroup("lpm_ldst", [
843        MetricGroup("lpm_ldst_total", [
844            Metric("lpm_ldst_total_loads", "Load/store instructions total loads",
845                   ld_rate, "loads"),
846            Metric("lpm_ldst_total_stores", "Load/store instructions total stores",
847                   st_rate, "stores"),
848        ]),
849        MetricGroup("lpm_ldst_prcnt", [
850            Metric("lpm_ldst_prcnt_loads", "Percent of all instructions that are loads",
851                   d_ratio(LDST_LD, ins), "100%"),
852            Metric("lpm_ldst_prcnt_stores", "Percent of all instructions that are stores",
853                   d_ratio(LDST_ST, ins), "100%"),
854        ]),
855        MetricGroup("lpm_ldst_ret_lds", [
856            Metric("lpm_ldst_ret_lds_1", "Retired loads in 1 cycle",
857                   d_ratio(max(LDST_LDC1 - LDST_LDC2, 0), cyc), "100%",
858                   constraint=ldst_ret_constraint),
859            Metric("lpm_ldst_ret_lds_2", "Retired loads in 2 cycles",
860                   d_ratio(max(LDST_LDC2 - LDST_LDC3, 0), cyc), "100%",
861                   constraint=ldst_ret_constraint),
862            Metric("lpm_ldst_ret_lds_3", "Retired loads in 3 or more cycles",
863                   d_ratio(LDST_LDC3, cyc), "100%"),
864        ]),
865        MetricGroup("lpm_ldst_ret_sts", [
866            Metric("lpm_ldst_ret_sts_1", "Retired stores in 1 cycle",
867                   d_ratio(max(LDST_STC1 - LDST_STC2, 0), cyc), "100%",
868                   constraint=ldst_ret_constraint),
869            Metric("lpm_ldst_ret_sts_2", "Retired stores in 2 cycles",
870                   d_ratio(max(LDST_STC2 - LDST_STC3, 0), cyc), "100%",
871                   constraint=ldst_ret_constraint),
872            Metric("lpm_ldst_ret_sts_3", "Retired stores in 3 more cycles",
873                   d_ratio(LDST_STC3, cyc), "100%"),
874        ]),
875        Metric("lpm_ldst_ld_hit_swpf", "Load hit software prefetches per second",
876               pf_rate, "swpf/s") if pf_rate else None,
877        Metric("lpm_ldst_atomic_lds", "Atomic loads per second",
878               at_rate, "loads/s") if at_rate else None,
879    ], description="Breakdown of load/store instructions")
880
881
882def UncoreCState() -> Optional[MetricGroup]:
883    try:
884        pcu_ticks = Event("UNC_P_CLOCKTICKS")
885        c0 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C0")
886        c3 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C3")
887        c6 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C6")
888    except:
889        return None
890
891    num_cores = Literal("#num_cores") / Literal("#num_packages")
892
893    max_cycles = pcu_ticks * num_cores
894    total_cycles = c0 + c3 + c6
895
896    # remove fused-off cores which show up in C6/C7.
897    c6 = Select(max(c6 - (total_cycles - max_cycles), 0),
898                total_cycles > max_cycles,
899                c6)
900
901    return MetricGroup("lpm_cstate", [
902        Metric("lpm_cstate_c0", "C-State cores in C0/C1",
903               d_ratio(c0, pcu_ticks), "cores"),
904        Metric("lpm_cstate_c3", "C-State cores in C3",
905               d_ratio(c3, pcu_ticks), "cores"),
906        Metric("lpm_cstate_c6", "C-State cores in C6/C7",
907               d_ratio(c6, pcu_ticks), "cores"),
908    ])
909
910
911def UncoreDir() -> Optional[MetricGroup]:
912    try:
913        m2m_upd = Event("UNC_M2M_DIRECTORY_UPDATE.ANY")
914        m2m_hits = Event("UNC_M2M_DIRECTORY_HIT.DIRTY_I")
915        # Turn the umask into a ANY rather than DIRTY_I filter.
916        m2m_hits.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_HIT.ANY/"
917        m2m_miss = Event("UNC_M2M_DIRECTORY_MISS.DIRTY_I")
918        # Turn the umask into a ANY rather than DIRTY_I filter.
919        m2m_miss.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_MISS.ANY/"
920        cha_upd = Event("UNC_CHA_DIR_UPDATE.HA")
921        # Turn the umask into a ANY rather than HA filter.
922        cha_upd.name += "/umask=3,name=UNC_CHA_DIR_UPDATE.ANY/"
923    except:
924        return None
925
926    m2m_total = m2m_hits + m2m_miss
927    upd = m2m_upd + cha_upd  # in cache lines
928    upd_r = upd / interval_sec
929    look_r = m2m_total / interval_sec
930
931    scale = 64 / 1_000_000  # Cache lines to MB
932    return MetricGroup("lpm_dir", [
933        Metric("lpm_dir_lookup_rate", "",
934               d_ratio(m2m_total, interval_sec), "requests/s"),
935        Metric("lpm_dir_lookup_hits", "",
936               d_ratio(m2m_hits, m2m_total), "100%"),
937        Metric("lpm_dir_lookup_misses", "",
938               d_ratio(m2m_miss, m2m_total), "100%"),
939        Metric("lpm_dir_update_requests", "",
940               d_ratio(m2m_upd + cha_upd, interval_sec), "requests/s"),
941        Metric("lpm_dir_update_bw", "",
942               d_ratio(m2m_upd + cha_upd, interval_sec), f"{scale}MB/s"),
943    ])
944
945
946def UncoreMem() -> Optional[MetricGroup]:
947    try:
948        loc_rds = Event("UNC_CHA_REQUESTS.READS_LOCAL",
949                        "UNC_H_REQUESTS.READS_LOCAL")
950        rem_rds = Event("UNC_CHA_REQUESTS.READS_REMOTE",
951                        "UNC_H_REQUESTS.READS_REMOTE")
952        loc_wrs = Event("UNC_CHA_REQUESTS.WRITES_LOCAL",
953                        "UNC_H_REQUESTS.WRITES_LOCAL")
954        rem_wrs = Event("UNC_CHA_REQUESTS.WRITES_REMOTE",
955                        "UNC_H_REQUESTS.WRITES_REMOTE")
956    except:
957        return None
958
959    scale = 64 / 1_000_000
960    return MetricGroup("lpm_mem", [
961        MetricGroup("lpm_mem_local", [
962            Metric("lpm_mem_local_read", "Local memory read bandwidth not including directory updates",
963                   d_ratio(loc_rds, interval_sec), f"{scale}MB/s"),
964            Metric("lpm_mem_local_write", "Local memory write bandwidth not including directory updates",
965                   d_ratio(loc_wrs, interval_sec), f"{scale}MB/s"),
966        ]),
967        MetricGroup("lpm_mem_remote", [
968            Metric("lpm_mem_remote_read", "Remote memory read bandwidth not including directory updates",
969                   d_ratio(rem_rds, interval_sec), f"{scale}MB/s"),
970            Metric("lpm_mem_remote_write", "Remote memory write bandwidth not including directory updates",
971                   d_ratio(rem_wrs, interval_sec), f"{scale}MB/s"),
972        ]),
973    ], description="Memory Bandwidth breakdown local vs. remote (remote requests in). directory updates not included")
974
975
976def UncoreMemBw() -> Optional[MetricGroup]:
977    mem_events = []
978    try:
979        mem_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}"
980                                    f"/arch/x86/{args.model}/uncore-memory.json"))
981    except:
982        pass
983
984    ddr_rds = 0
985    ddr_wrs = 0
986    ddr_total = 0
987    for x in mem_events:
988        if "EventName" in x:
989            name = x["EventName"]
990            if re.search("^UNC_MC[0-9]+_RDCAS_COUNT_FREERUN", name):
991                ddr_rds += Event(name)
992            elif re.search("^UNC_MC[0-9]+_WRCAS_COUNT_FREERUN", name):
993                ddr_wrs += Event(name)
994            # elif re.search("^UNC_MC[0-9]+_TOTAL_REQCOUNT_FREERUN", name):
995            #  ddr_total += Event(name)
996
997    if ddr_rds == 0:
998        try:
999            ddr_rds = Event("UNC_M_CAS_COUNT.RD")
1000            ddr_wrs = Event("UNC_M_CAS_COUNT.WR")
1001        except:
1002            return None
1003
1004    ddr_total = ddr_rds + ddr_wrs
1005
1006    pmm_rds = 0
1007    pmm_wrs = 0
1008    try:
1009        pmm_rds = Event("UNC_M_PMM_RPQ_INSERTS")
1010        pmm_wrs = Event("UNC_M_PMM_WPQ_INSERTS")
1011    except:
1012        pass
1013
1014    pmm_total = pmm_rds + pmm_wrs
1015
1016    scale = 64 / 1_000_000
1017    return MetricGroup("lpm_mem_bw", [
1018        MetricGroup("lpm_mem_bw_ddr", [
1019            Metric("lpm_mem_bw_ddr_read", "DDR memory read bandwidth",
1020                   d_ratio(ddr_rds, interval_sec), f"{scale}MB/s"),
1021            Metric("lpm_mem_bw_ddr_write", "DDR memory write bandwidth",
1022                   d_ratio(ddr_wrs, interval_sec), f"{scale}MB/s"),
1023            Metric("lpm_mem_bw_ddr_total", "DDR memory write bandwidth",
1024                   d_ratio(ddr_total, interval_sec), f"{scale}MB/s"),
1025        ], description="DDR Memory Bandwidth"),
1026        MetricGroup("lpm_mem_bw_pmm", [
1027            Metric("lpm_mem_bw_pmm_read", "PMM memory read bandwidth",
1028                   d_ratio(pmm_rds, interval_sec), f"{scale}MB/s"),
1029            Metric("lpm_mem_bw_pmm_write", "PMM memory write bandwidth",
1030                   d_ratio(pmm_wrs, interval_sec), f"{scale}MB/s"),
1031            Metric("lpm_mem_bw_pmm_total", "PMM memory write bandwidth",
1032                   d_ratio(pmm_total, interval_sec), f"{scale}MB/s"),
1033        ], description="PMM Memory Bandwidth") if pmm_rds != 0 else None,
1034    ], description="Memory Bandwidth")
1035
1036
1037def UncoreMemSat() -> Optional[Metric]:
1038    try:
1039        clocks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS")
1040        sat = Event("UNC_CHA_DISTRESS_ASSERTED.VERT", "UNC_CHA_FAST_ASSERTED.VERT",
1041                    "UNC_C_FAST_ASSERTED")
1042    except:
1043        return None
1044
1045    desc = ("Mesh Bandwidth saturation (% CBOX cycles with FAST signal asserted, "
1046            "include QPI bandwidth saturation), lower is better")
1047    if "UNC_CHA_" in sat.name:
1048        desc = ("Mesh Bandwidth saturation (% CHA cycles with FAST signal asserted, "
1049                "include UPI bandwidth saturation), lower is better")
1050    return Metric("lpm_mem_sat", desc, d_ratio(sat, clocks), "100%")
1051
1052
1053def UncoreUpiBw() -> Optional[MetricGroup]:
1054    try:
1055        upi_rds = Event("UNC_UPI_RxL_FLITS.ALL_DATA")
1056        upi_wrs = Event("UNC_UPI_TxL_FLITS.ALL_DATA")
1057    except:
1058        return None
1059
1060    upi_total = upi_rds + upi_wrs
1061
1062    # From "Uncore Performance Monitoring": When measuring the amount of
1063    # bandwidth consumed by transmission of the data (i.e. NOT including
1064    # the header), it should be .ALL_DATA / 9 * 64B.
1065    scale = (64 / 9) / 1_000_000
1066    return MetricGroup("lpm_upi_bw", [
1067        Metric("lpm_upi_bw_read", "UPI read bandwidth",
1068               d_ratio(upi_rds, interval_sec), f"{scale}MB/s"),
1069        Metric("lpm_upi_bw_write", "DDR memory write bandwidth",
1070               d_ratio(upi_wrs, interval_sec), f"{scale}MB/s"),
1071    ], description="UPI Bandwidth")
1072
1073
1074def main() -> None:
1075    global _args
1076
1077    def dir_path(path: str) -> str:
1078        """Validate path is a directory for argparse."""
1079        if os.path.isdir(path):
1080            return path
1081        raise argparse.ArgumentTypeError(
1082            f'\'{path}\' is not a valid directory')
1083
1084    parser = argparse.ArgumentParser(description="Intel perf json generator")
1085    parser.add_argument(
1086        "-metricgroups", help="Generate metricgroups data", action='store_true')
1087    parser.add_argument("model", help="e.g. skylakex")
1088    parser.add_argument(
1089        'events_path',
1090        type=dir_path,
1091        help='Root of tree containing architecture directories containing json files'
1092    )
1093    _args = parser.parse_args()
1094
1095    directory = f"{_args.events_path}/x86/{_args.model}/"
1096    LoadEvents(directory)
1097
1098    all_metrics = MetricGroup("", [
1099        Cycles(),
1100        Idle(),
1101        Rapl(),
1102        Smi(),
1103        Tsx(),
1104        IntelBr(),
1105        IntelCtxSw(),
1106        IntelFpu(),
1107        IntelIlp(),
1108        IntelL2(),
1109        IntelLdSt(),
1110        IntelMissLat(),
1111        IntelMlp(),
1112        IntelPorts(),
1113        IntelSwpf(),
1114        UncoreCState(),
1115        UncoreDir(),
1116        UncoreMem(),
1117        UncoreMemBw(),
1118        UncoreMemSat(),
1119        UncoreUpiBw(),
1120    ])
1121
1122    if _args.metricgroups:
1123        print(JsonEncodeMetricGroupDescriptions(all_metrics))
1124    else:
1125        print(JsonEncodeMetric(all_metrics))
1126
1127
1128if __name__ == '__main__':
1129    main()
1130