xref: /linux/tools/perf/pmu-events/intel_metrics.py (revision c7decec2f2d2ab0366567f9e30c0e1418cece43f)
119eab0efSIan Rogers#!/usr/bin/env python3
219eab0efSIan Rogers# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
319eab0efSIan Rogersimport argparse
4cd1c6a48SIan Rogersimport json
5bab90b3bSIan Rogersimport math
619eab0efSIan Rogersimport os
7cd1c6a48SIan Rogersimport re
88c345f35SIan Rogersfrom typing import Optional
9*82e53e7aSIan Rogersfrom common_metrics import Cycles
106ec3058eSIan Rogersfrom metric import (d_ratio, has_event, max, source_count, CheckPmu, Event,
116ec3058eSIan Rogers                    JsonEncodeMetric, JsonEncodeMetricGroupDescriptions,
126ec3058eSIan Rogers                    Literal, LoadEvents, Metric, MetricConstraint, MetricGroup,
136ec3058eSIan Rogers                    MetricRef, Select)
1419eab0efSIan Rogers
1519eab0efSIan Rogers# Global command line arguments.
1619eab0efSIan Rogers_args = None
17bab90b3bSIan Rogersinterval_sec = Event("duration_time")
18bab90b3bSIan Rogers
19bab90b3bSIan Rogers
201d519e5aSIan Rogersdef Idle() -> Metric:
211d519e5aSIan Rogers    cyc = Event("msr/mperf/")
221d519e5aSIan Rogers    tsc = Event("msr/tsc/")
231d519e5aSIan Rogers    low = max(tsc - cyc, 0)
241d519e5aSIan Rogers    return Metric(
251d519e5aSIan Rogers        "lpm_idle",
261d519e5aSIan Rogers        "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)",
271d519e5aSIan Rogers        d_ratio(low, tsc), "100%")
281d519e5aSIan Rogers
291d519e5aSIan Rogers
30bab90b3bSIan Rogersdef Rapl() -> MetricGroup:
31bab90b3bSIan Rogers    """Processor power consumption estimate.
32bab90b3bSIan Rogers
33bab90b3bSIan Rogers    Use events from the running average power limit (RAPL) driver.
34bab90b3bSIan Rogers    """
35bab90b3bSIan Rogers    # Watts = joules/second
36bab90b3bSIan Rogers    pkg = Event("power/energy\\-pkg/")
37bab90b3bSIan Rogers    cond_pkg = Select(pkg, has_event(pkg), math.nan)
38bab90b3bSIan Rogers    cores = Event("power/energy\\-cores/")
39bab90b3bSIan Rogers    cond_cores = Select(cores, has_event(cores), math.nan)
40bab90b3bSIan Rogers    ram = Event("power/energy\\-ram/")
41bab90b3bSIan Rogers    cond_ram = Select(ram, has_event(ram), math.nan)
42bab90b3bSIan Rogers    gpu = Event("power/energy\\-gpu/")
43bab90b3bSIan Rogers    cond_gpu = Select(gpu, has_event(gpu), math.nan)
44bab90b3bSIan Rogers    psys = Event("power/energy\\-psys/")
45bab90b3bSIan Rogers    cond_psys = Select(psys, has_event(psys), math.nan)
46bab90b3bSIan Rogers    scale = 2.3283064365386962890625e-10
47bab90b3bSIan Rogers    metrics = [
48bab90b3bSIan Rogers        Metric("lpm_cpu_power_pkg", "",
49bab90b3bSIan Rogers               d_ratio(cond_pkg * scale, interval_sec), "Watts"),
50bab90b3bSIan Rogers        Metric("lpm_cpu_power_cores", "",
51bab90b3bSIan Rogers               d_ratio(cond_cores * scale, interval_sec), "Watts"),
52bab90b3bSIan Rogers        Metric("lpm_cpu_power_ram", "",
53bab90b3bSIan Rogers               d_ratio(cond_ram * scale, interval_sec), "Watts"),
54bab90b3bSIan Rogers        Metric("lpm_cpu_power_gpu", "",
55bab90b3bSIan Rogers               d_ratio(cond_gpu * scale, interval_sec), "Watts"),
56bab90b3bSIan Rogers        Metric("lpm_cpu_power_psys", "",
57bab90b3bSIan Rogers               d_ratio(cond_psys * scale, interval_sec), "Watts"),
58bab90b3bSIan Rogers    ]
59bab90b3bSIan Rogers
60bab90b3bSIan Rogers    return MetricGroup("lpm_cpu_power", metrics,
61bab90b3bSIan Rogers                       description="Running Average Power Limit (RAPL) power consumption estimates")
6219eab0efSIan Rogers
6319eab0efSIan Rogers
6417d616b7SIan Rogersdef Smi() -> MetricGroup:
6517d616b7SIan Rogers    pmu = "<cpu_core or cpu_atom>" if CheckPmu("cpu_core") else "cpu"
6617d616b7SIan Rogers    aperf = Event('msr/aperf/')
6717d616b7SIan Rogers    cycles = Event('cycles')
6817d616b7SIan Rogers    smi_num = Event('msr/smi/')
6917d616b7SIan Rogers    smi_cycles = Select(Select((aperf - cycles) / aperf, smi_num > 0, 0),
7017d616b7SIan Rogers                        has_event(aperf),
7117d616b7SIan Rogers                        0)
7217d616b7SIan Rogers    return MetricGroup('smi', [
7317d616b7SIan Rogers        Metric('smi_num', 'Number of SMI interrupts.',
7417d616b7SIan Rogers               Select(smi_num, has_event(smi_num), 0), 'SMI#'),
7517d616b7SIan Rogers        # Note, the smi_cycles "Event" is really a reference to the metric.
7617d616b7SIan Rogers        Metric('smi_cycles',
7717d616b7SIan Rogers               'Percentage of cycles spent in System Management Interrupts. '
7817d616b7SIan Rogers               f'Requires /sys/bus/event_source/devices/{pmu}/freeze_on_smi to be 1.',
7917d616b7SIan Rogers               smi_cycles, '100%', threshold=(MetricRef('smi_cycles') > 0.10))
8017d616b7SIan Rogers    ], description='System Management Interrupt metrics')
8117d616b7SIan Rogers
8217d616b7SIan Rogers
838c345f35SIan Rogersdef Tsx() -> Optional[MetricGroup]:
848c345f35SIan Rogers    pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu"
858c345f35SIan Rogers    cycles = Event('cycles')
868c345f35SIan Rogers    cycles_in_tx = Event(f'{pmu}/cycles\\-t/')
878c345f35SIan Rogers    cycles_in_tx_cp = Event(f'{pmu}/cycles\\-ct/')
888c345f35SIan Rogers    try:
898c345f35SIan Rogers        # Test if the tsx event is present in the json, prefer the
908c345f35SIan Rogers        # sysfs version so that we can detect its presence at runtime.
918c345f35SIan Rogers        transaction_start = Event("RTM_RETIRED.START")
928c345f35SIan Rogers        transaction_start = Event(f'{pmu}/tx\\-start/')
938c345f35SIan Rogers    except:
948c345f35SIan Rogers        return None
958c345f35SIan Rogers
968c345f35SIan Rogers    elision_start = None
978c345f35SIan Rogers    try:
988c345f35SIan Rogers        # Elision start isn't supported by all models, but we'll not
998c345f35SIan Rogers        # generate the tsx_cycles_per_elision metric in that
1008c345f35SIan Rogers        # case. Again, prefer the sysfs encoding of the event.
1018c345f35SIan Rogers        elision_start = Event("HLE_RETIRED.START")
1028c345f35SIan Rogers        elision_start = Event(f'{pmu}/el\\-start/')
1038c345f35SIan Rogers    except:
1048c345f35SIan Rogers        pass
1058c345f35SIan Rogers
1068c345f35SIan Rogers    return MetricGroup('transaction', [
1078c345f35SIan Rogers        Metric('tsx_transactional_cycles',
1088c345f35SIan Rogers               'Percentage of cycles within a transaction region.',
1098c345f35SIan Rogers               Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0),
1108c345f35SIan Rogers               '100%'),
1118c345f35SIan Rogers        Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.',
1128c345f35SIan Rogers               Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles,
1138c345f35SIan Rogers                      has_event(cycles_in_tx),
1148c345f35SIan Rogers                      0),
1158c345f35SIan Rogers               '100%'),
1168c345f35SIan Rogers        Metric('tsx_cycles_per_transaction',
1178c345f35SIan Rogers               'Number of cycles within a transaction divided by the number of transactions.',
1188c345f35SIan Rogers               Select(cycles_in_tx / transaction_start,
1198c345f35SIan Rogers                      has_event(cycles_in_tx),
1208c345f35SIan Rogers                      0),
1218c345f35SIan Rogers               "cycles / transaction"),
1228c345f35SIan Rogers        Metric('tsx_cycles_per_elision',
1238c345f35SIan Rogers               'Number of cycles within a transaction divided by the number of elisions.',
1248c345f35SIan Rogers               Select(cycles_in_tx / elision_start,
1258c345f35SIan Rogers                      has_event(elision_start),
1268c345f35SIan Rogers                      0),
1278c345f35SIan Rogers               "cycles / elision") if elision_start else None,
1288c345f35SIan Rogers    ], description="Breakdown of transactional memory statistics")
1298c345f35SIan Rogers
1308c345f35SIan Rogers
13137d0b00aSIan Rogersdef IntelBr():
13237d0b00aSIan Rogers    ins = Event("instructions")
13337d0b00aSIan Rogers
13437d0b00aSIan Rogers    def Total() -> MetricGroup:
13537d0b00aSIan Rogers        br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY")
13637d0b00aSIan Rogers        br_m_all = Event("BR_MISP_RETIRED.ALL_BRANCHES",
13737d0b00aSIan Rogers                         "BR_INST_RETIRED.MISPRED",
13837d0b00aSIan Rogers                         "BR_MISP_EXEC.ANY")
13937d0b00aSIan Rogers        br_clr = None
14037d0b00aSIan Rogers        try:
14137d0b00aSIan Rogers            br_clr = Event("BACLEARS.ANY", "BACLEARS.ALL")
14237d0b00aSIan Rogers        except:
14337d0b00aSIan Rogers            pass
14437d0b00aSIan Rogers
14537d0b00aSIan Rogers        br_r = d_ratio(br_all, interval_sec)
14637d0b00aSIan Rogers        ins_r = d_ratio(ins, br_all)
14737d0b00aSIan Rogers        misp_r = d_ratio(br_m_all, br_all)
14837d0b00aSIan Rogers        clr_r = d_ratio(br_clr, interval_sec) if br_clr else None
14937d0b00aSIan Rogers
15037d0b00aSIan Rogers        return MetricGroup("lpm_br_total", [
15137d0b00aSIan Rogers            Metric("lpm_br_total_retired",
15237d0b00aSIan Rogers                   "The number of branch instructions retired per second.", br_r,
15337d0b00aSIan Rogers                   "insn/s"),
15437d0b00aSIan Rogers            Metric(
15537d0b00aSIan Rogers                "lpm_br_total_mispred",
15637d0b00aSIan Rogers                "The number of branch instructions retired, of any type, that were "
15737d0b00aSIan Rogers                "not correctly predicted as a percentage of all branch instrucions.",
15837d0b00aSIan Rogers                misp_r, "100%"),
15937d0b00aSIan Rogers            Metric("lpm_br_total_insn_between_branches",
16037d0b00aSIan Rogers                   "The number of instructions divided by the number of branches.",
16137d0b00aSIan Rogers                   ins_r, "insn"),
16237d0b00aSIan Rogers            Metric("lpm_br_total_insn_fe_resteers",
16337d0b00aSIan Rogers                   "The number of resync branches per second.", clr_r, "req/s"
16437d0b00aSIan Rogers                   ) if clr_r else None
16537d0b00aSIan Rogers        ])
16637d0b00aSIan Rogers
16737d0b00aSIan Rogers    def Taken() -> MetricGroup:
16837d0b00aSIan Rogers        br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY")
16937d0b00aSIan Rogers        br_m_tk = None
17037d0b00aSIan Rogers        try:
17137d0b00aSIan Rogers            br_m_tk = Event("BR_MISP_RETIRED.NEAR_TAKEN",
17237d0b00aSIan Rogers                            "BR_MISP_RETIRED.TAKEN_JCC",
17337d0b00aSIan Rogers                            "BR_INST_RETIRED.MISPRED_TAKEN")
17437d0b00aSIan Rogers        except:
17537d0b00aSIan Rogers            pass
17637d0b00aSIan Rogers        br_r = d_ratio(br_all, interval_sec)
17737d0b00aSIan Rogers        ins_r = d_ratio(ins, br_all)
17837d0b00aSIan Rogers        misp_r = d_ratio(br_m_tk, br_all) if br_m_tk else None
17937d0b00aSIan Rogers        return MetricGroup("lpm_br_taken", [
18037d0b00aSIan Rogers            Metric("lpm_br_taken_retired",
18137d0b00aSIan Rogers                   "The number of taken branches that were retired per second.",
18237d0b00aSIan Rogers                   br_r, "insn/s"),
18337d0b00aSIan Rogers            Metric(
18437d0b00aSIan Rogers                "lpm_br_taken_mispred",
18537d0b00aSIan Rogers                "The number of retired taken branch instructions that were "
18637d0b00aSIan Rogers                "mispredicted as a percentage of all taken branches.", misp_r,
18737d0b00aSIan Rogers                "100%") if misp_r else None,
18837d0b00aSIan Rogers            Metric(
18937d0b00aSIan Rogers                "lpm_br_taken_insn_between_branches",
19037d0b00aSIan Rogers                "The number of instructions divided by the number of taken branches.",
19137d0b00aSIan Rogers                ins_r, "insn"),
19237d0b00aSIan Rogers        ])
19337d0b00aSIan Rogers
19437d0b00aSIan Rogers    def Conditional() -> Optional[MetricGroup]:
19537d0b00aSIan Rogers        try:
19637d0b00aSIan Rogers            br_cond = Event("BR_INST_RETIRED.COND",
19737d0b00aSIan Rogers                            "BR_INST_RETIRED.CONDITIONAL",
19837d0b00aSIan Rogers                            "BR_INST_RETIRED.TAKEN_JCC")
19937d0b00aSIan Rogers            br_m_cond = Event("BR_MISP_RETIRED.COND",
20037d0b00aSIan Rogers                              "BR_MISP_RETIRED.CONDITIONAL",
20137d0b00aSIan Rogers                              "BR_MISP_RETIRED.TAKEN_JCC")
20237d0b00aSIan Rogers        except:
20337d0b00aSIan Rogers            return None
20437d0b00aSIan Rogers
20537d0b00aSIan Rogers        br_cond_nt = None
20637d0b00aSIan Rogers        br_m_cond_nt = None
20737d0b00aSIan Rogers        try:
20837d0b00aSIan Rogers            br_cond_nt = Event("BR_INST_RETIRED.COND_NTAKEN")
20937d0b00aSIan Rogers            br_m_cond_nt = Event("BR_MISP_RETIRED.COND_NTAKEN")
21037d0b00aSIan Rogers        except:
21137d0b00aSIan Rogers            pass
21237d0b00aSIan Rogers        br_r = d_ratio(br_cond, interval_sec)
21337d0b00aSIan Rogers        ins_r = d_ratio(ins, br_cond)
21437d0b00aSIan Rogers        misp_r = d_ratio(br_m_cond, br_cond)
21537d0b00aSIan Rogers        taken_metrics = [
21637d0b00aSIan Rogers            Metric("lpm_br_cond_retired", "Retired conditional branch instructions.",
21737d0b00aSIan Rogers                   br_r, "insn/s"),
21837d0b00aSIan Rogers            Metric("lpm_br_cond_insn_between_branches",
21937d0b00aSIan Rogers                   "The number of instructions divided by the number of conditional "
22037d0b00aSIan Rogers                   "branches.", ins_r, "insn"),
22137d0b00aSIan Rogers            Metric("lpm_br_cond_mispred",
22237d0b00aSIan Rogers                   "Retired conditional branch instructions mispredicted as a "
22337d0b00aSIan Rogers                   "percentage of all conditional branches.", misp_r, "100%"),
22437d0b00aSIan Rogers        ]
22537d0b00aSIan Rogers        if not br_m_cond_nt:
22637d0b00aSIan Rogers            return MetricGroup("lpm_br_cond", taken_metrics)
22737d0b00aSIan Rogers
22837d0b00aSIan Rogers        br_r = d_ratio(br_cond_nt, interval_sec)
22937d0b00aSIan Rogers        ins_r = d_ratio(ins, br_cond_nt)
23037d0b00aSIan Rogers        misp_r = d_ratio(br_m_cond_nt, br_cond_nt)
23137d0b00aSIan Rogers
23237d0b00aSIan Rogers        not_taken_metrics = [
23337d0b00aSIan Rogers            Metric("lpm_br_cond_retired", "Retired conditional not taken branch instructions.",
23437d0b00aSIan Rogers                   br_r, "insn/s"),
23537d0b00aSIan Rogers            Metric("lpm_br_cond_insn_between_branches",
23637d0b00aSIan Rogers                   "The number of instructions divided by the number of not taken conditional "
23737d0b00aSIan Rogers                   "branches.", ins_r, "insn"),
23837d0b00aSIan Rogers            Metric("lpm_br_cond_mispred",
23937d0b00aSIan Rogers                   "Retired not taken conditional branch instructions mispredicted as a "
24037d0b00aSIan Rogers                   "percentage of all not taken conditional branches.", misp_r, "100%"),
24137d0b00aSIan Rogers        ]
24237d0b00aSIan Rogers        return MetricGroup("lpm_br_cond", [
24337d0b00aSIan Rogers            MetricGroup("lpm_br_cond_nt", not_taken_metrics),
24437d0b00aSIan Rogers            MetricGroup("lpm_br_cond_tkn", taken_metrics),
24537d0b00aSIan Rogers        ])
24637d0b00aSIan Rogers
24737d0b00aSIan Rogers    def Far() -> Optional[MetricGroup]:
24837d0b00aSIan Rogers        try:
24937d0b00aSIan Rogers            br_far = Event("BR_INST_RETIRED.FAR_BRANCH")
25037d0b00aSIan Rogers        except:
25137d0b00aSIan Rogers            return None
25237d0b00aSIan Rogers
25337d0b00aSIan Rogers        br_r = d_ratio(br_far, interval_sec)
25437d0b00aSIan Rogers        ins_r = d_ratio(ins, br_far)
25537d0b00aSIan Rogers        return MetricGroup("lpm_br_far", [
25637d0b00aSIan Rogers            Metric("lpm_br_far_retired", "Retired far control transfers per second.",
25737d0b00aSIan Rogers                   br_r, "insn/s"),
25837d0b00aSIan Rogers            Metric(
25937d0b00aSIan Rogers                "lpm_br_far_insn_between_branches",
26037d0b00aSIan Rogers                "The number of instructions divided by the number of far branches.",
26137d0b00aSIan Rogers                ins_r, "insn"),
26237d0b00aSIan Rogers        ])
26337d0b00aSIan Rogers
26437d0b00aSIan Rogers    return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Far()],
26537d0b00aSIan Rogers                       description="breakdown of retired branch instructions")
26637d0b00aSIan Rogers
26737d0b00aSIan Rogers
2682f3d6ea0SIan Rogersdef IntelCtxSw() -> MetricGroup:
2692f3d6ea0SIan Rogers    cs = Event("context\\-switches")
2702f3d6ea0SIan Rogers    metrics = [
2712f3d6ea0SIan Rogers        Metric("lpm_cs_rate", "Context switches per second",
2722f3d6ea0SIan Rogers               d_ratio(cs, interval_sec), "ctxsw/s")
2732f3d6ea0SIan Rogers    ]
2742f3d6ea0SIan Rogers
2752f3d6ea0SIan Rogers    ev = Event("instructions")
2762f3d6ea0SIan Rogers    metrics.append(Metric("lpm_cs_instr", "Instructions per context switch",
2772f3d6ea0SIan Rogers                          d_ratio(ev, cs), "instr/cs"))
2782f3d6ea0SIan Rogers
2792f3d6ea0SIan Rogers    ev = Event("cycles")
2802f3d6ea0SIan Rogers    metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch",
2812f3d6ea0SIan Rogers                          d_ratio(ev, cs), "cycles/cs"))
2822f3d6ea0SIan Rogers
2832f3d6ea0SIan Rogers    try:
2842f3d6ea0SIan Rogers        ev = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS")
2852f3d6ea0SIan Rogers        metrics.append(Metric("lpm_cs_loads", "Loads per context switch",
2862f3d6ea0SIan Rogers                              d_ratio(ev, cs), "loads/cs"))
2872f3d6ea0SIan Rogers    except:
2882f3d6ea0SIan Rogers        pass
2892f3d6ea0SIan Rogers
2902f3d6ea0SIan Rogers    try:
2912f3d6ea0SIan Rogers        ev = Event("MEM_INST_RETIRED.ALL_STORES",
2922f3d6ea0SIan Rogers                   "MEM_UOPS_RETIRED.ALL_STORES")
2932f3d6ea0SIan Rogers        metrics.append(Metric("lpm_cs_stores", "Stores per context switch",
2942f3d6ea0SIan Rogers                              d_ratio(ev, cs), "stores/cs"))
2952f3d6ea0SIan Rogers    except:
2962f3d6ea0SIan Rogers        pass
2972f3d6ea0SIan Rogers
2982f3d6ea0SIan Rogers    try:
2992f3d6ea0SIan Rogers        ev = Event("BR_INST_RETIRED.NEAR_TAKEN", "BR_INST_RETIRED.TAKEN_JCC")
3002f3d6ea0SIan Rogers        metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch",
3012f3d6ea0SIan Rogers                              d_ratio(ev, cs), "br_taken/cs"))
3022f3d6ea0SIan Rogers    except:
3032f3d6ea0SIan Rogers        pass
3042f3d6ea0SIan Rogers
3052f3d6ea0SIan Rogers    try:
3062f3d6ea0SIan Rogers        l2_misses = (Event("L2_RQSTS.DEMAND_DATA_RD_MISS") +
3072f3d6ea0SIan Rogers                     Event("L2_RQSTS.RFO_MISS") +
3082f3d6ea0SIan Rogers                     Event("L2_RQSTS.CODE_RD_MISS"))
3092f3d6ea0SIan Rogers        try:
3102f3d6ea0SIan Rogers            l2_misses += Event("L2_RQSTS.HWPF_MISS",
3112f3d6ea0SIan Rogers                               "L2_RQSTS.L2_PF_MISS", "L2_RQSTS.PF_MISS")
3122f3d6ea0SIan Rogers        except:
3132f3d6ea0SIan Rogers            pass
3142f3d6ea0SIan Rogers
3152f3d6ea0SIan Rogers        metrics.append(Metric("lpm_cs_l2_misses", "L2 misses per context switch",
3162f3d6ea0SIan Rogers                              d_ratio(l2_misses, cs), "l2_misses/cs"))
3172f3d6ea0SIan Rogers    except:
3182f3d6ea0SIan Rogers        pass
3192f3d6ea0SIan Rogers
3202f3d6ea0SIan Rogers    return MetricGroup("lpm_cs", metrics,
3212f3d6ea0SIan Rogers                       description=("Number of context switches per second, instructions "
3222f3d6ea0SIan Rogers                                    "retired & core cycles between context switches"))
3232f3d6ea0SIan Rogers
3242f3d6ea0SIan Rogers
325d666f017SIan Rogersdef IntelFpu() -> Optional[MetricGroup]:
326d666f017SIan Rogers    cyc = Event("cycles")
327d666f017SIan Rogers    try:
328d666f017SIan Rogers        s_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
329d666f017SIan Rogers                     "SIMD_INST_RETIRED.SCALAR_SINGLE")
330d666f017SIan Rogers    except:
331d666f017SIan Rogers        return None
332d666f017SIan Rogers    d_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
333d666f017SIan Rogers                 "SIMD_INST_RETIRED.SCALAR_DOUBLE")
334d666f017SIan Rogers    s_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
335d666f017SIan Rogers                  "SIMD_INST_RETIRED.PACKED_SINGLE")
336d666f017SIan Rogers
337d666f017SIan Rogers    flop = s_64 + d_64 + 4 * s_128
338d666f017SIan Rogers
339d666f017SIan Rogers    d_128 = None
340d666f017SIan Rogers    s_256 = None
341d666f017SIan Rogers    d_256 = None
342d666f017SIan Rogers    s_512 = None
343d666f017SIan Rogers    d_512 = None
344d666f017SIan Rogers    try:
345d666f017SIan Rogers        d_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE")
346d666f017SIan Rogers        flop += 2 * d_128
347d666f017SIan Rogers        s_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE")
348d666f017SIan Rogers        flop += 8 * s_256
349d666f017SIan Rogers        d_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE")
350d666f017SIan Rogers        flop += 4 * d_256
351d666f017SIan Rogers        s_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE")
352d666f017SIan Rogers        flop += 16 * s_512
353d666f017SIan Rogers        d_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE")
354d666f017SIan Rogers        flop += 8 * d_512
355d666f017SIan Rogers    except:
356d666f017SIan Rogers        pass
357d666f017SIan Rogers
358d666f017SIan Rogers    f_assist = Event("ASSISTS.FP", "FP_ASSIST.ANY", "FP_ASSIST.S")
359d666f017SIan Rogers    if f_assist in [
360d666f017SIan Rogers        "ASSISTS.FP",
361d666f017SIan Rogers        "FP_ASSIST.S",
362d666f017SIan Rogers    ]:
363d666f017SIan Rogers        f_assist += "/cmask=1/"
364d666f017SIan Rogers
365d666f017SIan Rogers    flop_r = d_ratio(flop, interval_sec)
366d666f017SIan Rogers    flop_c = d_ratio(flop, cyc)
367d666f017SIan Rogers    nmi_constraint = MetricConstraint.GROUPED_EVENTS
368d666f017SIan Rogers    if f_assist.name == "ASSISTS.FP":  # Icelake+
369d666f017SIan Rogers        nmi_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI
370d666f017SIan Rogers
371d666f017SIan Rogers    def FpuMetrics(group: str, fl: Optional[Event], mult: int, desc: str) -> Optional[MetricGroup]:
372d666f017SIan Rogers        if not fl:
373d666f017SIan Rogers            return None
374d666f017SIan Rogers
375d666f017SIan Rogers        f = fl * mult
376d666f017SIan Rogers        fl_r = d_ratio(f, interval_sec)
377d666f017SIan Rogers        r_s = d_ratio(fl, interval_sec)
378d666f017SIan Rogers        return MetricGroup(group, [
379d666f017SIan Rogers            Metric(f"{group}_of_total", desc + " floating point operations per second",
380d666f017SIan Rogers                   d_ratio(f, flop), "100%"),
381d666f017SIan Rogers            Metric(f"{group}_flops", desc + " floating point operations per second",
382d666f017SIan Rogers                   fl_r, "flops/s"),
383d666f017SIan Rogers            Metric(f"{group}_ops", desc + " operations per second",
384d666f017SIan Rogers                   r_s, "ops/s"),
385d666f017SIan Rogers        ])
386d666f017SIan Rogers
387d666f017SIan Rogers    return MetricGroup("lpm_fpu", [
388d666f017SIan Rogers        MetricGroup("lpm_fpu_total", [
389d666f017SIan Rogers            Metric("lpm_fpu_total_flops", "Floating point operations per second",
390d666f017SIan Rogers                   flop_r, "flops/s"),
391d666f017SIan Rogers            Metric("lpm_fpu_total_flopc", "Floating point operations per cycle",
392d666f017SIan Rogers                   flop_c, "flops/cycle", constraint=nmi_constraint),
393d666f017SIan Rogers        ]),
394d666f017SIan Rogers        MetricGroup("lpm_fpu_64", [
395d666f017SIan Rogers            FpuMetrics("lpm_fpu_64_single", s_64, 1, "64-bit single"),
396d666f017SIan Rogers            FpuMetrics("lpm_fpu_64_double", d_64, 1, "64-bit double"),
397d666f017SIan Rogers        ]),
398d666f017SIan Rogers        MetricGroup("lpm_fpu_128", [
399d666f017SIan Rogers            FpuMetrics("lpm_fpu_128_single", s_128,
400d666f017SIan Rogers                       4, "128-bit packed single"),
401d666f017SIan Rogers            FpuMetrics("lpm_fpu_128_double", d_128,
402d666f017SIan Rogers                       2, "128-bit packed double"),
403d666f017SIan Rogers        ]),
404d666f017SIan Rogers        MetricGroup("lpm_fpu_256", [
405d666f017SIan Rogers            FpuMetrics("lpm_fpu_256_single", s_256,
406d666f017SIan Rogers                       8, "128-bit packed single"),
407d666f017SIan Rogers            FpuMetrics("lpm_fpu_256_double", d_256,
408d666f017SIan Rogers                       4, "128-bit packed double"),
409d666f017SIan Rogers        ]),
410d666f017SIan Rogers        MetricGroup("lpm_fpu_512", [
411d666f017SIan Rogers            FpuMetrics("lpm_fpu_512_single", s_512,
412d666f017SIan Rogers                       16, "128-bit packed single"),
413d666f017SIan Rogers            FpuMetrics("lpm_fpu_512_double", d_512,
414d666f017SIan Rogers                       8, "128-bit packed double"),
415d666f017SIan Rogers        ]),
416d666f017SIan Rogers        Metric("lpm_fpu_assists", "FP assists as a percentage of cycles",
417d666f017SIan Rogers               d_ratio(f_assist, cyc), "100%"),
418d666f017SIan Rogers    ])
419d666f017SIan Rogers
420d666f017SIan Rogers
42159341f4eSIan Rogersdef IntelIlp() -> MetricGroup:
42259341f4eSIan Rogers    tsc = Event("msr/tsc/")
42359341f4eSIan Rogers    c0 = Event("msr/mperf/")
42459341f4eSIan Rogers    low = tsc - c0
42559341f4eSIan Rogers    inst_ret = Event("INST_RETIRED.ANY_P")
42659341f4eSIan Rogers    inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)]
42759341f4eSIan Rogers    core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY",
42859341f4eSIan Rogers                        "CPU_CLK_UNHALTED.DISTRIBUTED",
42959341f4eSIan Rogers                        "cycles")
43059341f4eSIan Rogers    ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), core_cycles)
43159341f4eSIan Rogers           for x in range(0, 4)]
43259341f4eSIan Rogers    ilp.append(d_ratio(inst_ret_c[4], core_cycles))
43359341f4eSIan Rogers    ilp0 = 1
43459341f4eSIan Rogers    for x in ilp:
43559341f4eSIan Rogers        ilp0 -= x
43659341f4eSIan Rogers    return MetricGroup("lpm_ilp", [
43759341f4eSIan Rogers        Metric("lpm_ilp_idle", "Lower power cycles as a percentage of all cycles",
43859341f4eSIan Rogers               d_ratio(low, tsc), "100%"),
43959341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_0",
44059341f4eSIan Rogers               "Instructions retired in 0 cycles as a percentage of all cycles",
44159341f4eSIan Rogers               ilp0, "100%"),
44259341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_1",
44359341f4eSIan Rogers               "Instructions retired in 1 cycles as a percentage of all cycles",
44459341f4eSIan Rogers               ilp[0], "100%"),
44559341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_2",
44659341f4eSIan Rogers               "Instructions retired in 2 cycles as a percentage of all cycles",
44759341f4eSIan Rogers               ilp[1], "100%"),
44859341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_3",
44959341f4eSIan Rogers               "Instructions retired in 3 cycles as a percentage of all cycles",
45059341f4eSIan Rogers               ilp[2], "100%"),
45159341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_4",
45259341f4eSIan Rogers               "Instructions retired in 4 cycles as a percentage of all cycles",
45359341f4eSIan Rogers               ilp[3], "100%"),
45459341f4eSIan Rogers        Metric("lpm_ilp_inst_ret_5",
45559341f4eSIan Rogers               "Instructions retired in 5 or more cycles as a percentage of all cycles",
45659341f4eSIan Rogers               ilp[4], "100%"),
45759341f4eSIan Rogers    ])
45859341f4eSIan Rogers
45959341f4eSIan Rogers
4607413633eSIan Rogersdef IntelL2() -> Optional[MetricGroup]:
4617413633eSIan Rogers    try:
4627413633eSIan Rogers        DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT")
4637413633eSIan Rogers    except:
4647413633eSIan Rogers        return None
4657413633eSIan Rogers    try:
4667413633eSIan Rogers        DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS")
4677413633eSIan Rogers        l2_dmnd_miss = DC_MISS
4687413633eSIan Rogers        l2_dmnd_rd_all = DC_MISS + DC_HIT
4697413633eSIan Rogers    except:
4707413633eSIan Rogers        DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD")
4717413633eSIan Rogers        l2_dmnd_miss = DC_ALL - DC_HIT
4727413633eSIan Rogers        l2_dmnd_rd_all = DC_ALL
4737413633eSIan Rogers    l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec)
4747413633eSIan Rogers    l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec)
4757413633eSIan Rogers
4767413633eSIan Rogers    DC_PFH = None
4777413633eSIan Rogers    DC_PFM = None
4787413633eSIan Rogers    l2_pf_all = None
4797413633eSIan Rogers    l2_pf_mrate = None
4807413633eSIan Rogers    l2_pf_rrate = None
4817413633eSIan Rogers    try:
4827413633eSIan Rogers        DC_PFH = Event("L2_RQSTS.PF_HIT")
4837413633eSIan Rogers        DC_PFM = Event("L2_RQSTS.PF_MISS")
4847413633eSIan Rogers        l2_pf_all = DC_PFH + DC_PFM
4857413633eSIan Rogers        l2_pf_mrate = d_ratio(DC_PFM, interval_sec)
4867413633eSIan Rogers        l2_pf_rrate = d_ratio(l2_pf_all, interval_sec)
4877413633eSIan Rogers    except:
4887413633eSIan Rogers        pass
4897413633eSIan Rogers
4907413633eSIan Rogers    DC_RFOH = None
4917413633eSIan Rogers    DC_RFOM = None
4927413633eSIan Rogers    l2_rfo_all = None
4937413633eSIan Rogers    l2_rfo_mrate = None
4947413633eSIan Rogers    l2_rfo_rrate = None
4957413633eSIan Rogers    try:
4967413633eSIan Rogers        DC_RFOH = Event("L2_RQSTS.RFO_HIT")
4977413633eSIan Rogers        DC_RFOM = Event("L2_RQSTS.RFO_MISS")
4987413633eSIan Rogers        l2_rfo_all = DC_RFOH + DC_RFOM
4997413633eSIan Rogers        l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec)
5007413633eSIan Rogers        l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec)
5017413633eSIan Rogers    except:
5027413633eSIan Rogers        pass
5037413633eSIan Rogers
5047413633eSIan Rogers    DC_CH = None
5057413633eSIan Rogers    try:
5067413633eSIan Rogers        DC_CH = Event("L2_RQSTS.CODE_RD_HIT")
5077413633eSIan Rogers    except:
5087413633eSIan Rogers        pass
5097413633eSIan Rogers    DC_CM = Event("L2_RQSTS.CODE_RD_MISS")
5107413633eSIan Rogers    DC_IN = Event("L2_LINES_IN.ALL")
5117413633eSIan Rogers    DC_OUT_NS = None
5127413633eSIan Rogers    DC_OUT_S = None
5137413633eSIan Rogers    l2_lines_out = None
5147413633eSIan Rogers    l2_out_rate = None
5157413633eSIan Rogers    wbn = None
5167413633eSIan Rogers    isd = None
5177413633eSIan Rogers    try:
5187413633eSIan Rogers        DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT",
5197413633eSIan Rogers                          "L2_LINES_OUT.DEMAND_DIRTY",
5207413633eSIan Rogers                          "L2_LINES_IN.S")
5217413633eSIan Rogers        DC_OUT_S = Event("L2_LINES_OUT.SILENT",
5227413633eSIan Rogers                         "L2_LINES_OUT.DEMAND_CLEAN",
5237413633eSIan Rogers                         "L2_LINES_IN.I")
5247413633eSIan Rogers        if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and (
5257413633eSIan Rogers                args.model.startswith("skylake") or
5267413633eSIan Rogers                args.model == "cascadelakex"):
5277413633eSIan Rogers            DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/"
5287413633eSIan Rogers        # bring is back to per-CPU
5297413633eSIan Rogers        l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S)
5307413633eSIan Rogers        l2_ns = DC_OUT_NS
5317413633eSIan Rogers        l2_lines_out = l2_s + l2_ns
5327413633eSIan Rogers        l2_out_rate = d_ratio(l2_lines_out, interval_sec)
5337413633eSIan Rogers        nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0)
5347413633eSIan Rogers        wbn = d_ratio(nlr, interval_sec)
5357413633eSIan Rogers        isd = d_ratio(l2_s, interval_sec)
5367413633eSIan Rogers    except:
5377413633eSIan Rogers        pass
5387413633eSIan Rogers    DC_OUT_U = None
5397413633eSIan Rogers    l2_pf_useless = None
5407413633eSIan Rogers    l2_useless_rate = None
5417413633eSIan Rogers    try:
5427413633eSIan Rogers        DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF")
5437413633eSIan Rogers        l2_pf_useless = DC_OUT_U
5447413633eSIan Rogers        l2_useless_rate = d_ratio(l2_pf_useless, interval_sec)
5457413633eSIan Rogers    except:
5467413633eSIan Rogers        pass
5477413633eSIan Rogers    DC_WB_U = None
5487413633eSIan Rogers    DC_WB_D = None
5497413633eSIan Rogers    wbu = None
5507413633eSIan Rogers    wbd = None
5517413633eSIan Rogers    try:
5527413633eSIan Rogers        DC_WB_U = Event("IDI_MISC.WB_UPGRADE")
5537413633eSIan Rogers        DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE")
5547413633eSIan Rogers        wbu = d_ratio(DC_WB_U, interval_sec)
5557413633eSIan Rogers        wbd = d_ratio(DC_WB_D, interval_sec)
5567413633eSIan Rogers    except:
5577413633eSIan Rogers        pass
5587413633eSIan Rogers
5597413633eSIan Rogers    l2_lines_in = DC_IN
5607413633eSIan Rogers    l2_code_all = (DC_CH + DC_CM) if DC_CH else None
5617413633eSIan Rogers    l2_code_rate = d_ratio(l2_code_all, interval_sec) if DC_CH else None
5627413633eSIan Rogers    l2_code_miss_rate = d_ratio(DC_CM, interval_sec)
5637413633eSIan Rogers    l2_in_rate = d_ratio(l2_lines_in, interval_sec)
5647413633eSIan Rogers
5657413633eSIan Rogers    return MetricGroup("lpm_l2", [
5667413633eSIan Rogers        MetricGroup("lpm_l2_totals", [
5677413633eSIan Rogers            Metric("lpm_l2_totals_in", "L2 cache total in per second",
5687413633eSIan Rogers                   l2_in_rate, "In/s"),
5697413633eSIan Rogers            Metric("lpm_l2_totals_out", "L2 cache total out per second",
5707413633eSIan Rogers                   l2_out_rate, "Out/s") if l2_out_rate else None,
5717413633eSIan Rogers        ]),
5727413633eSIan Rogers        MetricGroup("lpm_l2_rd", [
5737413633eSIan Rogers            Metric("lpm_l2_rd_hits", "L2 cache data read hits",
5747413633eSIan Rogers                   d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"),
5757413633eSIan Rogers            Metric("lpm_l2_rd_hits", "L2 cache data read hits",
5767413633eSIan Rogers                   d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"),
5777413633eSIan Rogers            Metric("lpm_l2_rd_requests", "L2 cache data read requests per second",
5787413633eSIan Rogers                   l2_dmnd_rrate, "requests/s"),
5797413633eSIan Rogers            Metric("lpm_l2_rd_misses", "L2 cache data read misses per second",
5807413633eSIan Rogers                   l2_dmnd_mrate, "misses/s"),
5817413633eSIan Rogers        ]),
5827413633eSIan Rogers        MetricGroup("lpm_l2_hwpf", [
5837413633eSIan Rogers            Metric("lpm_l2_hwpf_hits", "L2 cache hardware prefetcher hits",
5847413633eSIan Rogers                   d_ratio(DC_PFH, l2_pf_all), "100%"),
5857413633eSIan Rogers            Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses",
5867413633eSIan Rogers                   d_ratio(DC_PFM, l2_pf_all), "100%"),
5877413633eSIan Rogers            Metric("lpm_l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second",
5887413633eSIan Rogers                   l2_useless_rate, "100%") if l2_useless_rate else None,
5897413633eSIan Rogers            Metric("lpm_l2_hwpf_requests", "L2 cache hardware prefetcher requests per second",
5907413633eSIan Rogers                   l2_pf_rrate, "100%"),
5917413633eSIan Rogers            Metric("lpm_l2_hwpf_misses", "L2 cache hardware prefetcher misses per second",
5927413633eSIan Rogers                   l2_pf_mrate, "100%"),
5937413633eSIan Rogers        ]) if DC_PFH else None,
5947413633eSIan Rogers        MetricGroup("lpm_l2_rfo", [
5957413633eSIan Rogers            Metric("lpm_l2_rfo_hits", "L2 cache request for ownership (RFO) hits",
5967413633eSIan Rogers                   d_ratio(DC_RFOH, l2_rfo_all), "100%"),
5977413633eSIan Rogers            Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses",
5987413633eSIan Rogers                   d_ratio(DC_RFOM, l2_rfo_all), "100%"),
5997413633eSIan Rogers            Metric("lpm_l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second",
6007413633eSIan Rogers                   l2_rfo_rrate, "requests/s"),
6017413633eSIan Rogers            Metric("lpm_l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second",
6027413633eSIan Rogers                   l2_rfo_mrate, "misses/s"),
6037413633eSIan Rogers        ]) if DC_RFOH else None,
6047413633eSIan Rogers        MetricGroup("lpm_l2_code", [
6057413633eSIan Rogers            Metric("lpm_l2_code_hits", "L2 cache code hits",
6067413633eSIan Rogers                   d_ratio(DC_CH, l2_code_all), "100%") if DC_CH else None,
6077413633eSIan Rogers            Metric("lpm_l2_code_misses", "L2 cache code misses",
6087413633eSIan Rogers                   d_ratio(DC_CM, l2_code_all), "100%") if DC_CH else None,
6097413633eSIan Rogers            Metric("lpm_l2_code_requests", "L2 cache code requests per second",
6107413633eSIan Rogers                   l2_code_rate, "requests/s") if DC_CH else None,
6117413633eSIan Rogers            Metric("lpm_l2_code_misses", "L2 cache code misses per second",
6127413633eSIan Rogers                   l2_code_miss_rate, "misses/s"),
6137413633eSIan Rogers        ]),
6147413633eSIan Rogers        MetricGroup("lpm_l2_evict", [
6157413633eSIan Rogers            MetricGroup("lpm_l2_evict_mef_lines", [
6167413633eSIan Rogers                Metric("lpm_l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second",
6177413633eSIan Rogers                       wbu, "HotLRU/s") if wbu else None,
6187413633eSIan Rogers                Metric("lpm_l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second",
6197413633eSIan Rogers                       wbn, "NormLRU/s") if wbn else None,
6207413633eSIan Rogers                Metric("lpm_l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second",
6217413633eSIan Rogers                       wbd, "dropped/s") if wbd else None,
6227413633eSIan Rogers                Metric("lpm_l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second",
6237413633eSIan Rogers                       isd, "dropped/s") if isd else None,
6247413633eSIan Rogers            ]),
6257413633eSIan Rogers        ]),
6267413633eSIan Rogers    ], description="L2 data cache analysis")
6277413633eSIan Rogers
6287413633eSIan Rogers
6296ec3058eSIan Rogersdef IntelMissLat() -> Optional[MetricGroup]:
6306ec3058eSIan Rogers    try:
6316ec3058eSIan Rogers        ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS")
6326ec3058eSIan Rogers        data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL",
6336ec3058eSIan Rogers                                "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
6346ec3058eSIan Rogers                                "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE",
6356ec3058eSIan Rogers                                "UNC_C_TOR_OCCUPANCY.MISS_OPCODE")
6366ec3058eSIan Rogers        data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL",
6376ec3058eSIan Rogers                                "UNC_CHA_TOR_INSERTS.IA_MISS",
6386ec3058eSIan Rogers                                "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE",
6396ec3058eSIan Rogers                                "UNC_C_TOR_INSERTS.MISS_OPCODE")
6406ec3058eSIan Rogers        data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE",
6416ec3058eSIan Rogers                                "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
6426ec3058eSIan Rogers                                "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE",
6436ec3058eSIan Rogers                                "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE")
6446ec3058eSIan Rogers        data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE",
6456ec3058eSIan Rogers                                "UNC_CHA_TOR_INSERTS.IA_MISS",
6466ec3058eSIan Rogers                                "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE",
6476ec3058eSIan Rogers                                "UNC_C_TOR_INSERTS.NID_MISS_OPCODE")
6486ec3058eSIan Rogers    except:
6496ec3058eSIan Rogers        return None
6506ec3058eSIan Rogers
6516ec3058eSIan Rogers    if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or
6526ec3058eSIan Rogers            data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"):
6536ec3058eSIan Rogers        data_rd = 0x182
6546ec3058eSIan Rogers        for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]:
6556ec3058eSIan Rogers            e.name += f"/filter_opc={hex(data_rd)}/"
6566ec3058eSIan Rogers    elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS":
6576ec3058eSIan Rogers        # Demand Data Read - Full cache-line read requests from core for
6586ec3058eSIan Rogers        # lines to be cached in S or E, typically for data
6596ec3058eSIan Rogers        demand_data_rd = 0x202
6606ec3058eSIan Rogers        #  LLC Prefetch Data - Uncore will first look up the line in the
6616ec3058eSIan Rogers        #  LLC; for a cache hit, the LRU will be updated, on a miss, the
6626ec3058eSIan Rogers        #  DRd will be initiated
6636ec3058eSIan Rogers        llc_prefetch_data = 0x25a
6646ec3058eSIan Rogers        local_filter = (f"/filter_opc0={hex(demand_data_rd)},"
6656ec3058eSIan Rogers                        f"filter_opc1={hex(llc_prefetch_data)},"
6666ec3058eSIan Rogers                        "filter_loc,filter_nm,filter_not_nm/")
6676ec3058eSIan Rogers        remote_filter = (f"/filter_opc0={hex(demand_data_rd)},"
6686ec3058eSIan Rogers                         f"filter_opc1={hex(llc_prefetch_data)},"
6696ec3058eSIan Rogers                         "filter_rem,filter_nm,filter_not_nm/")
6706ec3058eSIan Rogers        for e in [data_rd_loc_occ, data_rd_loc_ins]:
6716ec3058eSIan Rogers            e.name += local_filter
6726ec3058eSIan Rogers        for e in [data_rd_rem_occ, data_rd_rem_ins]:
6736ec3058eSIan Rogers            e.name += remote_filter
6746ec3058eSIan Rogers    else:
6756ec3058eSIan Rogers        assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ
6766ec3058eSIan Rogers
6776ec3058eSIan Rogers    ticks_per_cha = ticks / source_count(data_rd_loc_ins)
6786ec3058eSIan Rogers    loc_lat = interval_sec * 1e9 * data_rd_loc_occ / \
6796ec3058eSIan Rogers        (ticks_per_cha * data_rd_loc_ins)
6806ec3058eSIan Rogers    ticks_per_cha = ticks / source_count(data_rd_rem_ins)
6816ec3058eSIan Rogers    rem_lat = interval_sec * 1e9 * data_rd_rem_occ / \
6826ec3058eSIan Rogers        (ticks_per_cha * data_rd_rem_ins)
6836ec3058eSIan Rogers    return MetricGroup("lpm_miss_lat", [
6846ec3058eSIan Rogers        Metric("lpm_miss_lat_loc", "Local to a socket miss latency in nanoseconds",
6856ec3058eSIan Rogers               loc_lat, "ns"),
6866ec3058eSIan Rogers        Metric("lpm_miss_lat_rem", "Remote to a socket miss latency in nanoseconds",
6876ec3058eSIan Rogers               rem_lat, "ns"),
6886ec3058eSIan Rogers    ])
6896ec3058eSIan Rogers
6906ec3058eSIan Rogers
691426b8442SIan Rogersdef IntelMlp() -> Optional[Metric]:
692426b8442SIan Rogers    try:
693426b8442SIan Rogers        l1d = Event("L1D_PEND_MISS.PENDING")
694426b8442SIan Rogers        l1dc = Event("L1D_PEND_MISS.PENDING_CYCLES")
695426b8442SIan Rogers    except:
696426b8442SIan Rogers        return None
697426b8442SIan Rogers
698426b8442SIan Rogers    l1dc = Select(l1dc / 2, Literal("#smt_on"), l1dc)
699426b8442SIan Rogers    ml = d_ratio(l1d, l1dc)
700426b8442SIan Rogers    return Metric("lpm_mlp",
701426b8442SIan Rogers                  "Miss level parallelism - number of outstanding load misses per cycle (higher is better)",
702426b8442SIan Rogers                  ml, "load_miss_pending/cycle")
703426b8442SIan Rogers
704426b8442SIan Rogers
705cd1c6a48SIan Rogersdef IntelPorts() -> Optional[MetricGroup]:
706cd1c6a48SIan Rogers    pipeline_events = json.load(
707cd1c6a48SIan Rogers        open(f"{_args.events_path}/x86/{_args.model}/pipeline.json"))
708cd1c6a48SIan Rogers
709cd1c6a48SIan Rogers    core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY",
710cd1c6a48SIan Rogers                        "CPU_CLK_UNHALTED.DISTRIBUTED",
711cd1c6a48SIan Rogers                        "cycles")
712cd1c6a48SIan Rogers    # Number of CPU cycles scaled for SMT.
713cd1c6a48SIan Rogers    smt_cycles = Select(core_cycles / 2, Literal("#smt_on"), core_cycles)
714cd1c6a48SIan Rogers
715cd1c6a48SIan Rogers    metrics = []
716cd1c6a48SIan Rogers    for x in pipeline_events:
717cd1c6a48SIan Rogers        if "EventName" in x and re.search("^UOPS_DISPATCHED.PORT", x["EventName"]):
718cd1c6a48SIan Rogers            name = x["EventName"]
719cd1c6a48SIan Rogers            port = re.search(r"(PORT_[0-9].*)", name).group(0).lower()
720cd1c6a48SIan Rogers            if name.endswith("_CORE"):
721cd1c6a48SIan Rogers                cyc = core_cycles
722cd1c6a48SIan Rogers            else:
723cd1c6a48SIan Rogers                cyc = smt_cycles
724cd1c6a48SIan Rogers            metrics.append(Metric(f"lpm_{port}", f"{port} utilization (higher is better)",
725cd1c6a48SIan Rogers                                  d_ratio(Event(name), cyc), "100%"))
726cd1c6a48SIan Rogers    if len(metrics) == 0:
727cd1c6a48SIan Rogers        return None
728cd1c6a48SIan Rogers
729cd1c6a48SIan Rogers    return MetricGroup("lpm_ports", metrics, "functional unit (port) utilization -- "
730cd1c6a48SIan Rogers                       "fraction of cycles each port is utilized (higher is better)")
731cd1c6a48SIan Rogers
732cd1c6a48SIan Rogers
733397fdb3aSIan Rogersdef IntelSwpf() -> Optional[MetricGroup]:
734397fdb3aSIan Rogers    ins = Event("instructions")
735397fdb3aSIan Rogers    try:
736397fdb3aSIan Rogers        s_ld = Event("MEM_INST_RETIRED.ALL_LOADS",
737397fdb3aSIan Rogers                     "MEM_UOPS_RETIRED.ALL_LOADS")
738397fdb3aSIan Rogers        s_nta = Event("SW_PREFETCH_ACCESS.NTA")
739397fdb3aSIan Rogers        s_t0 = Event("SW_PREFETCH_ACCESS.T0")
740397fdb3aSIan Rogers        s_t1 = Event("SW_PREFETCH_ACCESS.T1_T2")
741397fdb3aSIan Rogers        s_w = Event("SW_PREFETCH_ACCESS.PREFETCHW")
742397fdb3aSIan Rogers    except:
743397fdb3aSIan Rogers        return None
744397fdb3aSIan Rogers
745397fdb3aSIan Rogers    all_sw = s_nta + s_t0 + s_t1 + s_w
746397fdb3aSIan Rogers    swp_r = d_ratio(all_sw, interval_sec)
747397fdb3aSIan Rogers    ins_r = d_ratio(ins, all_sw)
748397fdb3aSIan Rogers    ld_r = d_ratio(s_ld, all_sw)
749397fdb3aSIan Rogers
750397fdb3aSIan Rogers    return MetricGroup("lpm_swpf", [
751397fdb3aSIan Rogers        MetricGroup("lpm_swpf_totals", [
752397fdb3aSIan Rogers            Metric("lpm_swpf_totals_exec", "Software prefetch instructions per second",
753397fdb3aSIan Rogers                   swp_r, "swpf/s"),
754397fdb3aSIan Rogers            Metric("lpm_swpf_totals_insn_per_pf",
755397fdb3aSIan Rogers                   "Average number of instructions between software prefetches",
756397fdb3aSIan Rogers                   ins_r, "insn/swpf"),
757397fdb3aSIan Rogers            Metric("lpm_swpf_totals_loads_per_pf",
758397fdb3aSIan Rogers                   "Average number of loads between software prefetches",
759397fdb3aSIan Rogers                   ld_r, "loads/swpf"),
760397fdb3aSIan Rogers        ]),
761397fdb3aSIan Rogers        MetricGroup("lpm_swpf_bkdwn", [
762397fdb3aSIan Rogers            MetricGroup("lpm_swpf_bkdwn_nta", [
763397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_nta_per_swpf",
764397fdb3aSIan Rogers                       "Software prefetch NTA instructions as a percent of all prefetch instructions",
765397fdb3aSIan Rogers                       d_ratio(s_nta, all_sw), "100%"),
766397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_nta_rate",
767397fdb3aSIan Rogers                       "Software prefetch NTA instructions per second",
768397fdb3aSIan Rogers                       d_ratio(s_nta, interval_sec), "insn/s"),
769397fdb3aSIan Rogers            ]),
770397fdb3aSIan Rogers            MetricGroup("lpm_swpf_bkdwn_t0", [
771397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_t0_per_swpf",
772397fdb3aSIan Rogers                       "Software prefetch T0 instructions as a percent of all prefetch instructions",
773397fdb3aSIan Rogers                       d_ratio(s_t0, all_sw), "100%"),
774397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_t0_rate",
775397fdb3aSIan Rogers                       "Software prefetch T0 instructions per second",
776397fdb3aSIan Rogers                       d_ratio(s_t0, interval_sec), "insn/s"),
777397fdb3aSIan Rogers            ]),
778397fdb3aSIan Rogers            MetricGroup("lpm_swpf_bkdwn_t1_t2", [
779397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_t1_t2_per_swpf",
780397fdb3aSIan Rogers                       "Software prefetch T1 or T2 instructions as a percent of all prefetch instructions",
781397fdb3aSIan Rogers                       d_ratio(s_t1, all_sw), "100%"),
782397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_t1_t2_rate",
783397fdb3aSIan Rogers                       "Software prefetch T1 or T2 instructions per second",
784397fdb3aSIan Rogers                       d_ratio(s_t1, interval_sec), "insn/s"),
785397fdb3aSIan Rogers            ]),
786397fdb3aSIan Rogers            MetricGroup("lpm_swpf_bkdwn_w", [
787397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_w_per_swpf",
788397fdb3aSIan Rogers                       "Software prefetch W instructions as a percent of all prefetch instructions",
789397fdb3aSIan Rogers                       d_ratio(s_w, all_sw), "100%"),
790397fdb3aSIan Rogers                Metric("lpm_swpf_bkdwn_w_rate",
791397fdb3aSIan Rogers                       "Software prefetch W instructions per second",
792397fdb3aSIan Rogers                       d_ratio(s_w, interval_sec), "insn/s"),
793397fdb3aSIan Rogers            ]),
794397fdb3aSIan Rogers        ]),
795397fdb3aSIan Rogers    ], description="Software prefetch instruction breakdown")
796397fdb3aSIan Rogers
797397fdb3aSIan Rogers
798d80edef2SIan Rogersdef IntelLdSt() -> Optional[MetricGroup]:
799d80edef2SIan Rogers    if _args.model in [
800d80edef2SIan Rogers        "bonnell",
801d80edef2SIan Rogers        "nehalemep",
802d80edef2SIan Rogers        "nehalemex",
803d80edef2SIan Rogers        "westmereep-dp",
804d80edef2SIan Rogers        "westmereep-sp",
805d80edef2SIan Rogers        "westmereex",
806d80edef2SIan Rogers    ]:
807d80edef2SIan Rogers        return None
808d80edef2SIan Rogers    LDST_LD = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS")
809d80edef2SIan Rogers    LDST_ST = Event("MEM_INST_RETIRED.ALL_STORES",
810d80edef2SIan Rogers                    "MEM_UOPS_RETIRED.ALL_STORES")
811d80edef2SIan Rogers    LDST_LDC1 = Event(f"{LDST_LD.name}/cmask=1/")
812d80edef2SIan Rogers    LDST_STC1 = Event(f"{LDST_ST.name}/cmask=1/")
813d80edef2SIan Rogers    LDST_LDC2 = Event(f"{LDST_LD.name}/cmask=2/")
814d80edef2SIan Rogers    LDST_STC2 = Event(f"{LDST_ST.name}/cmask=2/")
815d80edef2SIan Rogers    LDST_LDC3 = Event(f"{LDST_LD.name}/cmask=3/")
816d80edef2SIan Rogers    LDST_STC3 = Event(f"{LDST_ST.name}/cmask=3/")
817d80edef2SIan Rogers    ins = Event("instructions")
818d80edef2SIan Rogers    LDST_CYC = Event("CPU_CLK_UNHALTED.THREAD",
819d80edef2SIan Rogers                     "CPU_CLK_UNHALTED.CORE_P",
820d80edef2SIan Rogers                     "CPU_CLK_UNHALTED.THREAD_P")
821d80edef2SIan Rogers    LDST_PRE = None
822d80edef2SIan Rogers    try:
823d80edef2SIan Rogers        LDST_PRE = Event("LOAD_HIT_PREFETCH.SWPF", "LOAD_HIT_PRE.SW_PF")
824d80edef2SIan Rogers    except:
825d80edef2SIan Rogers        pass
826d80edef2SIan Rogers    LDST_AT = None
827d80edef2SIan Rogers    try:
828d80edef2SIan Rogers        LDST_AT = Event("MEM_INST_RETIRED.LOCK_LOADS")
829d80edef2SIan Rogers    except:
830d80edef2SIan Rogers        pass
831d80edef2SIan Rogers    cyc = LDST_CYC
832d80edef2SIan Rogers
833d80edef2SIan Rogers    ld_rate = d_ratio(LDST_LD, interval_sec)
834d80edef2SIan Rogers    st_rate = d_ratio(LDST_ST, interval_sec)
835d80edef2SIan Rogers    pf_rate = d_ratio(LDST_PRE, interval_sec) if LDST_PRE else None
836d80edef2SIan Rogers    at_rate = d_ratio(LDST_AT, interval_sec) if LDST_AT else None
837d80edef2SIan Rogers
838d80edef2SIan Rogers    ldst_ret_constraint = MetricConstraint.GROUPED_EVENTS
839d80edef2SIan Rogers    if LDST_LD.name == "MEM_UOPS_RETIRED.ALL_LOADS":
840d80edef2SIan Rogers        ldst_ret_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI
841d80edef2SIan Rogers
842d80edef2SIan Rogers    return MetricGroup("lpm_ldst", [
843d80edef2SIan Rogers        MetricGroup("lpm_ldst_total", [
844d80edef2SIan Rogers            Metric("lpm_ldst_total_loads", "Load/store instructions total loads",
845d80edef2SIan Rogers                   ld_rate, "loads"),
846d80edef2SIan Rogers            Metric("lpm_ldst_total_stores", "Load/store instructions total stores",
847d80edef2SIan Rogers                   st_rate, "stores"),
848d80edef2SIan Rogers        ]),
849d80edef2SIan Rogers        MetricGroup("lpm_ldst_prcnt", [
850d80edef2SIan Rogers            Metric("lpm_ldst_prcnt_loads", "Percent of all instructions that are loads",
851d80edef2SIan Rogers                   d_ratio(LDST_LD, ins), "100%"),
852d80edef2SIan Rogers            Metric("lpm_ldst_prcnt_stores", "Percent of all instructions that are stores",
853d80edef2SIan Rogers                   d_ratio(LDST_ST, ins), "100%"),
854d80edef2SIan Rogers        ]),
855d80edef2SIan Rogers        MetricGroup("lpm_ldst_ret_lds", [
856d80edef2SIan Rogers            Metric("lpm_ldst_ret_lds_1", "Retired loads in 1 cycle",
857d80edef2SIan Rogers                   d_ratio(max(LDST_LDC1 - LDST_LDC2, 0), cyc), "100%",
858d80edef2SIan Rogers                   constraint=ldst_ret_constraint),
859d80edef2SIan Rogers            Metric("lpm_ldst_ret_lds_2", "Retired loads in 2 cycles",
860d80edef2SIan Rogers                   d_ratio(max(LDST_LDC2 - LDST_LDC3, 0), cyc), "100%",
861d80edef2SIan Rogers                   constraint=ldst_ret_constraint),
862d80edef2SIan Rogers            Metric("lpm_ldst_ret_lds_3", "Retired loads in 3 or more cycles",
863d80edef2SIan Rogers                   d_ratio(LDST_LDC3, cyc), "100%"),
864d80edef2SIan Rogers        ]),
865d80edef2SIan Rogers        MetricGroup("lpm_ldst_ret_sts", [
866d80edef2SIan Rogers            Metric("lpm_ldst_ret_sts_1", "Retired stores in 1 cycle",
867d80edef2SIan Rogers                   d_ratio(max(LDST_STC1 - LDST_STC2, 0), cyc), "100%",
868d80edef2SIan Rogers                   constraint=ldst_ret_constraint),
869d80edef2SIan Rogers            Metric("lpm_ldst_ret_sts_2", "Retired stores in 2 cycles",
870d80edef2SIan Rogers                   d_ratio(max(LDST_STC2 - LDST_STC3, 0), cyc), "100%",
871d80edef2SIan Rogers                   constraint=ldst_ret_constraint),
872d80edef2SIan Rogers            Metric("lpm_ldst_ret_sts_3", "Retired stores in 3 more cycles",
873d80edef2SIan Rogers                   d_ratio(LDST_STC3, cyc), "100%"),
874d80edef2SIan Rogers        ]),
875d80edef2SIan Rogers        Metric("lpm_ldst_ld_hit_swpf", "Load hit software prefetches per second",
876d80edef2SIan Rogers               pf_rate, "swpf/s") if pf_rate else None,
877d80edef2SIan Rogers        Metric("lpm_ldst_atomic_lds", "Atomic loads per second",
878d80edef2SIan Rogers               at_rate, "loads/s") if at_rate else None,
879d80edef2SIan Rogers    ], description="Breakdown of load/store instructions")
880d80edef2SIan Rogers
881d80edef2SIan Rogers
8821fee2701SIan Rogersdef UncoreCState() -> Optional[MetricGroup]:
8831fee2701SIan Rogers    try:
8841fee2701SIan Rogers        pcu_ticks = Event("UNC_P_CLOCKTICKS")
8851fee2701SIan Rogers        c0 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C0")
8861fee2701SIan Rogers        c3 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C3")
8871fee2701SIan Rogers        c6 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C6")
8881fee2701SIan Rogers    except:
8891fee2701SIan Rogers        return None
8901fee2701SIan Rogers
8911fee2701SIan Rogers    num_cores = Literal("#num_cores") / Literal("#num_packages")
8921fee2701SIan Rogers
8931fee2701SIan Rogers    max_cycles = pcu_ticks * num_cores
8941fee2701SIan Rogers    total_cycles = c0 + c3 + c6
8951fee2701SIan Rogers
8961fee2701SIan Rogers    # remove fused-off cores which show up in C6/C7.
8971fee2701SIan Rogers    c6 = Select(max(c6 - (total_cycles - max_cycles), 0),
8981fee2701SIan Rogers                total_cycles > max_cycles,
8991fee2701SIan Rogers                c6)
9001fee2701SIan Rogers
9011fee2701SIan Rogers    return MetricGroup("lpm_cstate", [
9021fee2701SIan Rogers        Metric("lpm_cstate_c0", "C-State cores in C0/C1",
9031fee2701SIan Rogers               d_ratio(c0, pcu_ticks), "cores"),
9041fee2701SIan Rogers        Metric("lpm_cstate_c3", "C-State cores in C3",
9051fee2701SIan Rogers               d_ratio(c3, pcu_ticks), "cores"),
9061fee2701SIan Rogers        Metric("lpm_cstate_c6", "C-State cores in C6/C7",
9071fee2701SIan Rogers               d_ratio(c6, pcu_ticks), "cores"),
9081fee2701SIan Rogers    ])
9091fee2701SIan Rogers
9101fee2701SIan Rogers
9112166b44bSIan Rogersdef UncoreDir() -> Optional[MetricGroup]:
9122166b44bSIan Rogers    try:
9132166b44bSIan Rogers        m2m_upd = Event("UNC_M2M_DIRECTORY_UPDATE.ANY")
9142166b44bSIan Rogers        m2m_hits = Event("UNC_M2M_DIRECTORY_HIT.DIRTY_I")
9152166b44bSIan Rogers        # Turn the umask into a ANY rather than DIRTY_I filter.
9162166b44bSIan Rogers        m2m_hits.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_HIT.ANY/"
9172166b44bSIan Rogers        m2m_miss = Event("UNC_M2M_DIRECTORY_MISS.DIRTY_I")
9182166b44bSIan Rogers        # Turn the umask into a ANY rather than DIRTY_I filter.
9192166b44bSIan Rogers        m2m_miss.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_MISS.ANY/"
9202166b44bSIan Rogers        cha_upd = Event("UNC_CHA_DIR_UPDATE.HA")
9212166b44bSIan Rogers        # Turn the umask into a ANY rather than HA filter.
9222166b44bSIan Rogers        cha_upd.name += "/umask=3,name=UNC_CHA_DIR_UPDATE.ANY/"
9232166b44bSIan Rogers    except:
9242166b44bSIan Rogers        return None
9252166b44bSIan Rogers
9262166b44bSIan Rogers    m2m_total = m2m_hits + m2m_miss
9272166b44bSIan Rogers    upd = m2m_upd + cha_upd  # in cache lines
9282166b44bSIan Rogers    upd_r = upd / interval_sec
9292166b44bSIan Rogers    look_r = m2m_total / interval_sec
9302166b44bSIan Rogers
9312166b44bSIan Rogers    scale = 64 / 1_000_000  # Cache lines to MB
9322166b44bSIan Rogers    return MetricGroup("lpm_dir", [
9332166b44bSIan Rogers        Metric("lpm_dir_lookup_rate", "",
9342166b44bSIan Rogers               d_ratio(m2m_total, interval_sec), "requests/s"),
9352166b44bSIan Rogers        Metric("lpm_dir_lookup_hits", "",
9362166b44bSIan Rogers               d_ratio(m2m_hits, m2m_total), "100%"),
9372166b44bSIan Rogers        Metric("lpm_dir_lookup_misses", "",
9382166b44bSIan Rogers               d_ratio(m2m_miss, m2m_total), "100%"),
9392166b44bSIan Rogers        Metric("lpm_dir_update_requests", "",
9402166b44bSIan Rogers               d_ratio(m2m_upd + cha_upd, interval_sec), "requests/s"),
9412166b44bSIan Rogers        Metric("lpm_dir_update_bw", "",
9422166b44bSIan Rogers               d_ratio(m2m_upd + cha_upd, interval_sec), f"{scale}MB/s"),
9432166b44bSIan Rogers    ])
9442166b44bSIan Rogers
9452166b44bSIan Rogers
946cde9c1a5SIan Rogersdef UncoreMem() -> Optional[MetricGroup]:
947cde9c1a5SIan Rogers    try:
948cde9c1a5SIan Rogers        loc_rds = Event("UNC_CHA_REQUESTS.READS_LOCAL",
949cde9c1a5SIan Rogers                        "UNC_H_REQUESTS.READS_LOCAL")
950cde9c1a5SIan Rogers        rem_rds = Event("UNC_CHA_REQUESTS.READS_REMOTE",
951cde9c1a5SIan Rogers                        "UNC_H_REQUESTS.READS_REMOTE")
952cde9c1a5SIan Rogers        loc_wrs = Event("UNC_CHA_REQUESTS.WRITES_LOCAL",
953cde9c1a5SIan Rogers                        "UNC_H_REQUESTS.WRITES_LOCAL")
954cde9c1a5SIan Rogers        rem_wrs = Event("UNC_CHA_REQUESTS.WRITES_REMOTE",
955cde9c1a5SIan Rogers                        "UNC_H_REQUESTS.WRITES_REMOTE")
956cde9c1a5SIan Rogers    except:
957cde9c1a5SIan Rogers        return None
958cde9c1a5SIan Rogers
959cde9c1a5SIan Rogers    scale = 64 / 1_000_000
960cde9c1a5SIan Rogers    return MetricGroup("lpm_mem", [
961cde9c1a5SIan Rogers        MetricGroup("lpm_mem_local", [
962cde9c1a5SIan Rogers            Metric("lpm_mem_local_read", "Local memory read bandwidth not including directory updates",
963cde9c1a5SIan Rogers                   d_ratio(loc_rds, interval_sec), f"{scale}MB/s"),
964cde9c1a5SIan Rogers            Metric("lpm_mem_local_write", "Local memory write bandwidth not including directory updates",
965cde9c1a5SIan Rogers                   d_ratio(loc_wrs, interval_sec), f"{scale}MB/s"),
966cde9c1a5SIan Rogers        ]),
967cde9c1a5SIan Rogers        MetricGroup("lpm_mem_remote", [
968cde9c1a5SIan Rogers            Metric("lpm_mem_remote_read", "Remote memory read bandwidth not including directory updates",
969cde9c1a5SIan Rogers                   d_ratio(rem_rds, interval_sec), f"{scale}MB/s"),
970cde9c1a5SIan Rogers            Metric("lpm_mem_remote_write", "Remote memory write bandwidth not including directory updates",
971cde9c1a5SIan Rogers                   d_ratio(rem_wrs, interval_sec), f"{scale}MB/s"),
972cde9c1a5SIan Rogers        ]),
973cde9c1a5SIan Rogers    ], description="Memory Bandwidth breakdown local vs. remote (remote requests in). directory updates not included")
974cde9c1a5SIan Rogers
975cde9c1a5SIan Rogers
976130f4245SIan Rogersdef UncoreMemBw() -> Optional[MetricGroup]:
977130f4245SIan Rogers    mem_events = []
978130f4245SIan Rogers    try:
979130f4245SIan Rogers        mem_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}"
980130f4245SIan Rogers                                    f"/arch/x86/{args.model}/uncore-memory.json"))
981130f4245SIan Rogers    except:
982130f4245SIan Rogers        pass
983130f4245SIan Rogers
984130f4245SIan Rogers    ddr_rds = 0
985130f4245SIan Rogers    ddr_wrs = 0
986130f4245SIan Rogers    ddr_total = 0
987130f4245SIan Rogers    for x in mem_events:
988130f4245SIan Rogers        if "EventName" in x:
989130f4245SIan Rogers            name = x["EventName"]
990130f4245SIan Rogers            if re.search("^UNC_MC[0-9]+_RDCAS_COUNT_FREERUN", name):
991130f4245SIan Rogers                ddr_rds += Event(name)
992130f4245SIan Rogers            elif re.search("^UNC_MC[0-9]+_WRCAS_COUNT_FREERUN", name):
993130f4245SIan Rogers                ddr_wrs += Event(name)
994130f4245SIan Rogers            # elif re.search("^UNC_MC[0-9]+_TOTAL_REQCOUNT_FREERUN", name):
995130f4245SIan Rogers            #  ddr_total += Event(name)
996130f4245SIan Rogers
997130f4245SIan Rogers    if ddr_rds == 0:
998130f4245SIan Rogers        try:
999130f4245SIan Rogers            ddr_rds = Event("UNC_M_CAS_COUNT.RD")
1000130f4245SIan Rogers            ddr_wrs = Event("UNC_M_CAS_COUNT.WR")
1001130f4245SIan Rogers        except:
1002130f4245SIan Rogers            return None
1003130f4245SIan Rogers
1004130f4245SIan Rogers    ddr_total = ddr_rds + ddr_wrs
1005130f4245SIan Rogers
1006130f4245SIan Rogers    pmm_rds = 0
1007130f4245SIan Rogers    pmm_wrs = 0
1008130f4245SIan Rogers    try:
1009130f4245SIan Rogers        pmm_rds = Event("UNC_M_PMM_RPQ_INSERTS")
1010130f4245SIan Rogers        pmm_wrs = Event("UNC_M_PMM_WPQ_INSERTS")
1011130f4245SIan Rogers    except:
1012130f4245SIan Rogers        pass
1013130f4245SIan Rogers
1014130f4245SIan Rogers    pmm_total = pmm_rds + pmm_wrs
1015130f4245SIan Rogers
1016130f4245SIan Rogers    scale = 64 / 1_000_000
1017130f4245SIan Rogers    return MetricGroup("lpm_mem_bw", [
1018130f4245SIan Rogers        MetricGroup("lpm_mem_bw_ddr", [
1019130f4245SIan Rogers            Metric("lpm_mem_bw_ddr_read", "DDR memory read bandwidth",
1020130f4245SIan Rogers                   d_ratio(ddr_rds, interval_sec), f"{scale}MB/s"),
1021130f4245SIan Rogers            Metric("lpm_mem_bw_ddr_write", "DDR memory write bandwidth",
1022130f4245SIan Rogers                   d_ratio(ddr_wrs, interval_sec), f"{scale}MB/s"),
1023130f4245SIan Rogers            Metric("lpm_mem_bw_ddr_total", "DDR memory write bandwidth",
1024130f4245SIan Rogers                   d_ratio(ddr_total, interval_sec), f"{scale}MB/s"),
1025130f4245SIan Rogers        ], description="DDR Memory Bandwidth"),
1026130f4245SIan Rogers        MetricGroup("lpm_mem_bw_pmm", [
1027130f4245SIan Rogers            Metric("lpm_mem_bw_pmm_read", "PMM memory read bandwidth",
1028130f4245SIan Rogers                   d_ratio(pmm_rds, interval_sec), f"{scale}MB/s"),
1029130f4245SIan Rogers            Metric("lpm_mem_bw_pmm_write", "PMM memory write bandwidth",
1030130f4245SIan Rogers                   d_ratio(pmm_wrs, interval_sec), f"{scale}MB/s"),
1031130f4245SIan Rogers            Metric("lpm_mem_bw_pmm_total", "PMM memory write bandwidth",
1032130f4245SIan Rogers                   d_ratio(pmm_total, interval_sec), f"{scale}MB/s"),
1033130f4245SIan Rogers        ], description="PMM Memory Bandwidth") if pmm_rds != 0 else None,
1034130f4245SIan Rogers    ], description="Memory Bandwidth")
1035130f4245SIan Rogers
1036130f4245SIan Rogers
1037e74f72a7SIan Rogersdef UncoreMemSat() -> Optional[Metric]:
1038e74f72a7SIan Rogers    try:
1039e74f72a7SIan Rogers        clocks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS")
1040e74f72a7SIan Rogers        sat = Event("UNC_CHA_DISTRESS_ASSERTED.VERT", "UNC_CHA_FAST_ASSERTED.VERT",
1041e74f72a7SIan Rogers                    "UNC_C_FAST_ASSERTED")
1042e74f72a7SIan Rogers    except:
1043e74f72a7SIan Rogers        return None
1044e74f72a7SIan Rogers
1045e74f72a7SIan Rogers    desc = ("Mesh Bandwidth saturation (% CBOX cycles with FAST signal asserted, "
1046e74f72a7SIan Rogers            "include QPI bandwidth saturation), lower is better")
1047e74f72a7SIan Rogers    if "UNC_CHA_" in sat.name:
1048e74f72a7SIan Rogers        desc = ("Mesh Bandwidth saturation (% CHA cycles with FAST signal asserted, "
1049e74f72a7SIan Rogers                "include UPI bandwidth saturation), lower is better")
1050e74f72a7SIan Rogers    return Metric("lpm_mem_sat", desc, d_ratio(sat, clocks), "100%")
1051e74f72a7SIan Rogers
1052e74f72a7SIan Rogers
10535dc81578SIan Rogersdef UncoreUpiBw() -> Optional[MetricGroup]:
10545dc81578SIan Rogers    try:
10555dc81578SIan Rogers        upi_rds = Event("UNC_UPI_RxL_FLITS.ALL_DATA")
10565dc81578SIan Rogers        upi_wrs = Event("UNC_UPI_TxL_FLITS.ALL_DATA")
10575dc81578SIan Rogers    except:
10585dc81578SIan Rogers        return None
10595dc81578SIan Rogers
10605dc81578SIan Rogers    upi_total = upi_rds + upi_wrs
10615dc81578SIan Rogers
10625dc81578SIan Rogers    # From "Uncore Performance Monitoring": When measuring the amount of
10635dc81578SIan Rogers    # bandwidth consumed by transmission of the data (i.e. NOT including
10645dc81578SIan Rogers    # the header), it should be .ALL_DATA / 9 * 64B.
10655dc81578SIan Rogers    scale = (64 / 9) / 1_000_000
10665dc81578SIan Rogers    return MetricGroup("lpm_upi_bw", [
10675dc81578SIan Rogers        Metric("lpm_upi_bw_read", "UPI read bandwidth",
10685dc81578SIan Rogers               d_ratio(upi_rds, interval_sec), f"{scale}MB/s"),
10695dc81578SIan Rogers        Metric("lpm_upi_bw_write", "DDR memory write bandwidth",
10705dc81578SIan Rogers               d_ratio(upi_wrs, interval_sec), f"{scale}MB/s"),
10715dc81578SIan Rogers    ], description="UPI Bandwidth")
10725dc81578SIan Rogers
10735dc81578SIan Rogers
107419eab0efSIan Rogersdef main() -> None:
107519eab0efSIan Rogers    global _args
107619eab0efSIan Rogers
107719eab0efSIan Rogers    def dir_path(path: str) -> str:
107819eab0efSIan Rogers        """Validate path is a directory for argparse."""
107919eab0efSIan Rogers        if os.path.isdir(path):
108019eab0efSIan Rogers            return path
108119eab0efSIan Rogers        raise argparse.ArgumentTypeError(
108219eab0efSIan Rogers            f'\'{path}\' is not a valid directory')
108319eab0efSIan Rogers
108419eab0efSIan Rogers    parser = argparse.ArgumentParser(description="Intel perf json generator")
108519eab0efSIan Rogers    parser.add_argument(
108619eab0efSIan Rogers        "-metricgroups", help="Generate metricgroups data", action='store_true')
108719eab0efSIan Rogers    parser.add_argument("model", help="e.g. skylakex")
108819eab0efSIan Rogers    parser.add_argument(
108919eab0efSIan Rogers        'events_path',
109019eab0efSIan Rogers        type=dir_path,
109119eab0efSIan Rogers        help='Root of tree containing architecture directories containing json files'
109219eab0efSIan Rogers    )
109319eab0efSIan Rogers    _args = parser.parse_args()
109419eab0efSIan Rogers
10956bd6c5efSIan Rogers    directory = f"{_args.events_path}/x86/{_args.model}/"
10966bd6c5efSIan Rogers    LoadEvents(directory)
10976bd6c5efSIan Rogers
1098bab90b3bSIan Rogers    all_metrics = MetricGroup("", [
1099*82e53e7aSIan Rogers        Cycles(),
11001d519e5aSIan Rogers        Idle(),
1101bab90b3bSIan Rogers        Rapl(),
110217d616b7SIan Rogers        Smi(),
11038c345f35SIan Rogers        Tsx(),
110437d0b00aSIan Rogers        IntelBr(),
11052f3d6ea0SIan Rogers        IntelCtxSw(),
1106d666f017SIan Rogers        IntelFpu(),
110759341f4eSIan Rogers        IntelIlp(),
11087413633eSIan Rogers        IntelL2(),
1109d80edef2SIan Rogers        IntelLdSt(),
11106ec3058eSIan Rogers        IntelMissLat(),
1111426b8442SIan Rogers        IntelMlp(),
1112cd1c6a48SIan Rogers        IntelPorts(),
1113397fdb3aSIan Rogers        IntelSwpf(),
11141fee2701SIan Rogers        UncoreCState(),
11152166b44bSIan Rogers        UncoreDir(),
1116cde9c1a5SIan Rogers        UncoreMem(),
1117130f4245SIan Rogers        UncoreMemBw(),
1118e74f72a7SIan Rogers        UncoreMemSat(),
11195dc81578SIan Rogers        UncoreUpiBw(),
1120bab90b3bSIan Rogers    ])
112119eab0efSIan Rogers
112219eab0efSIan Rogers    if _args.metricgroups:
112319eab0efSIan Rogers        print(JsonEncodeMetricGroupDescriptions(all_metrics))
112419eab0efSIan Rogers    else:
112519eab0efSIan Rogers        print(JsonEncodeMetric(all_metrics))
112619eab0efSIan Rogers
112719eab0efSIan Rogers
112819eab0efSIan Rogersif __name__ == '__main__':
112919eab0efSIan Rogers    main()
1130